[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r9125 - in /fsf/trunk/libc: ./ localedata/ localedata/locales/ sysdeps/x86_64/multiarch/



Author: eglibc
Date: Fri Oct 23 00:04:16 2009
New Revision: 9125

Log:
Import glibc-mainline for 2009-10-23

Added:
    fsf/trunk/libc/localedata/locales/ps_AF
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/NEWS
    fsf/trunk/libc/localedata/ChangeLog
    fsf/trunk/libc/localedata/SUPPORTED
    fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Fri Oct 23 00:04:16 2009
@@ -1,3 +1,11 @@
+2009-10-21  H.J. Lu  <hongjiu.lu@xxxxxxxxx>
+
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	strend-sse4.
+	* sysdeps/x86_64/multiarch/strchr.S: New file.
+	* sysdeps/x86_64/multiarch/strend-sse4.S: New file.
+	* sysdeps/x86_64/multiarch/strrchr.S: New file.
+
 2009-10-21  Andreas Schwab  <schwab@xxxxxxxxxx>
 
 	* elf/dl-sym.c (do_sym): Resolve STT_GNU_IFUNC symbols.

Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Fri Oct 23 00:04:16 2009
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes.  2009-8-8
+GNU C Library NEWS -- history of user-visible changes.  2009-10-15
 Copyright (C) 1992-2008, 2009 Free Software Foundation, Inc.
 See the end for copying conditions.
 
@@ -54,6 +54,8 @@
 * Using condvars with PI mutexes is now more efficient due to kernel
   support for requeueing to PI futexes.  NPTL support added for x86-64.
   Implemented by Ulrich Drepper.
+
+* New locale: ps_AF
 
 
 Version 2.10

Modified: fsf/trunk/libc/localedata/ChangeLog
==============================================================================
--- fsf/trunk/libc/localedata/ChangeLog (original)
+++ fsf/trunk/libc/localedata/ChangeLog Fri Oct 23 00:04:16 2009
@@ -1,3 +1,11 @@
+2009-10-15  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	* SUPPORTED (SUPPORTED-LOCALES): Add ps_AF.UTF-8.
+
+	* locale/ps_AF: New file.
+	Contributed by Sayamindu Dasgupta <sayamindu@xxxxxxxxx> and
+	Pravin Satpute <psatpute@xxxxxxxxxx>.
+
 2009-06-16  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
 	[BZ #10011]

Modified: fsf/trunk/libc/localedata/SUPPORTED
==============================================================================
--- fsf/trunk/libc/localedata/SUPPORTED (original)
+++ fsf/trunk/libc/localedata/SUPPORTED Fri Oct 23 00:04:16 2009
@@ -317,6 +317,7 @@
 pap_AN/UTF-8 \
 pl_PL.UTF-8/UTF-8 \
 pl_PL/ISO-8859-2 \
+ps_AF/UTF-8 \
 pt_BR.UTF-8/UTF-8 \
 pt_BR/ISO-8859-1 \
 pt_PT.UTF-8/UTF-8 \

Added: fsf/trunk/libc/localedata/locales/ps_AF
==============================================================================
--- fsf/trunk/libc/localedata/locales/ps_AF (added)
+++ fsf/trunk/libc/localedata/locales/ps_AF Fri Oct 23 00:04:16 2009
@@ -1,0 +1,282 @@
+escape_char /
+comment_char %
+
+%%%%%%%%%%%%%%%%%%%%%%% locale definition file "ps_AF"  %%%%%%%%%%%%%%%%%%%%%%
+
+% Pashto Locale for Afghanistan
+% Filename: ps_AF
+% Locale name: ps_AF.UTF-8
+% Language: Pashto
+% Territory: Afghanistan
+% Charset: UTF-8
+% Revision: 0.1
+% By: Nasir Gulzade<nasirgulzade@xxxxxxxxxxx>
+% Creation Date: Thu, 15 Jan 2009 18:16:15 +0500 GMT
+% Last Modification Date: Fri, 16 Jan 2009 20:33:40 +0500 GMT
+%
+
+
+LC_IDENTIFICATION
+	title      "Pashto locale for Afghanistan"
+	source     "Nasir Gulzade"
+	address    "see e-mail."
+	contact    "Nasir Gulzade"
+	email      "nasirgulzade@xxxxxxxxxxx"
+	tel        "+93 700530286"
+	fax        ""
+	language   "Pashto"
+	territory  "Afghanistan"
+	revision   "0.2"
+	date       "2009-01-16"
+
+	category  "ps_AF:2007";LC_IDENTIFICATION
+	category  "ps_AF:2007";LC_CTYPE
+	category  "ps_AF:2007";LC_COLLATE
+	category  "ps_AF:2007";LC_TIME
+	category  "ps_AF:2007";LC_NUMERIC
+	category  "ps_AF:2007";LC_MONETARY
+	category  "ps_AF:2007";LC_MESSAGES
+	category  "ps_AF:2007";LC_MEASUREMENT
+	category  "ps_AF:2007";LC_PAPER
+	category  "ps_AF:2007";LC_NAME
+	category  "ps_AF:2007";LC_ADDRESS
+	category  "ps_AF:2007";LC_TELEPHONE
+END LC_IDENTIFICATION
+
+LC_CTYPE
+	copy	"i18n"
+	outdigit	<U0660>..<U0663>;<U06F4>;<U0665>..<U0669>
+	map to_outpunct;  (<U002E>,<U066B>);(<U002C>,<U066C>)
+END LC_CTYPE
+
+LC_COLLATE
+% Collation rules updated as per requirement of glibc by Pravin Satpute <psatpute@xxxxxxxxxx>
+% see rh bug 482881
+copy "iso14651_t1"
+
+collating-symbol  <teh_with_ring>
+collating-symbol  <hah_with_hamza_above>
+collating-symbol  <hah_with_three_dots>
+collating-symbol  <dal_with_ring>
+collating-symbol  <reh_with_ring>
+collating-symbol <reh_with_dot_below_and_above>
+collating-symbol  <seen_with_dot_below_and_above>
+collating-symbol  <kaaf_with_ring>
+collating-symbol  <noon_with_ring>
+collating-symbol  <farsi_yeh>
+collating-symbol  <arabic_letter_e>
+collating-symbol  <yeh_with_tail>
+
+reorder-after <teh>
+<teh_with_ring>
+
+reorder-after <jeem>
+<hah_with_hamza_above>
+
+reorder-after <tcheh>
+<hah_with_three_dots>
+
+reorder-after <dal>
+<dal_with_ring>
+
+reorder-after <reh>
+<reh_with_ring>
+
+reorder-after <jeh>
+<reh_with_dot_below_and_above>
+
+reorder-after <sheen>
+<seen_with_dot_below_and_above>
+
+reorder-after <keheh>
+<kaaf_with_ring>
+
+reorder-after <noon>
+<noon_with_ring>
+
+reorder-after <heh>
+<farsi_yeh>
+
+reorder-after <alef_maksura>
+<arabic_letter_e>
+<yeh_with_tail>
+
+
+reorder-after <U062A>
+<U067C>  <teh_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U062C>
+<U0681> <hah_with_hamza_above>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0686>
+<U0685> <hah_with_three_dots>;<BAS>;<MIN>;IGNORE
+
+reorder-after  <U062F>
+<U0689> <dal_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0631>
+<U0693> <reh_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0698>
+<U0696> <reh_with_dot_below_and_above>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0634>
+<U069A> <seen_with_dot_below_and_above>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U06A9>
+<U06AB> <kaaf_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after  <U0646>
+<U06BC> <noon_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0648>
+<U0647> <heh>;<BAS>;<MIN>;IGNORE
+<U06CC> <farsi_yeh>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U064A>
+<U06D0> <arabic_letter_e>;<AYE>;<MIN>;IGNORE
+<U0626> "<alef_maksura><hamza>";"<BAS><BAS>";"<MIN><MIN>";IGNORE
+<U06CD> <yeh_with_tail>;<AYE>;<MIN>;IGNORE
+
+reorder-end
+
+END LC_COLLATE
+
+LC_TIME
+	abday	"<U06CC><U002E>";/
+		"<U062F><U002E>";/
+		"<U0633><U002E>";/
+		"<U0686><U002E>";/
+		"<U067E><U002E>";/
+		"<U062C><U002E>";/
+		"<U0634><U002E>"
+	day	"<U06CC><U06A9><U0634><U0646><U0628><U0647>";/
+		"<U062F><U0648><U0634><U0646><U0628><U0647>";/
+		"<U0633><U0647><U200C><U0634><U0646><U0628><U0647>";/
+		"<U0686><U0627><U0631><U0634><U0646><U0628><U0647>";/
+		"<U067E><U0646><U062C><U0634><U0646><U0628><U0647>";/
+		"<U062C><U0645><U0639><U0647>";/
+		"<U0634><U0646><U0628><U0647>"
+	abmon	"<U062C><U0646><U0648>";/
+		"<U0641><U0628><U0631>";/
+		"<U0645><U0627><U0631>";/
+		"<U0627><U067E><U0631>";/
+		"<U0645><U0640><U06D0>";/
+		"<U062C><U0648><U0646>";/
+		"<U062C><U0648><U0644>";/
+		"<U0627><U06AB><U0633>";/
+		"<U0633><U067E><U062A>";/
+		"<U0627><U06A9><U062A>";/
+		"<U0646><U0648><U0645>";/
+		"<U062F><U0633><U0645>"
+	mon	"<U062C><U0646><U0648><U0631><U064A>";/
+		"<U0641><U0628><U0631><U0648><U0631><U064A>";/
+		"<U0645><U0627><U0631><U0686>";/
+		"<U0627><U067E><U0631><U06CC><U0644>";/
+		"<U0645><U06D0>";/
+		"<U062C><U0648><U0646>";/
+		"<U062C><U0648><U0644><U0627><U064A>";/
+		"<U0627><U06AB><U0633><U062A>";/
+		"<U0633><U067E><U062A><U0645><U0628><U0631>";/
+		"<U0627><U06A9><U062A><U0648><U0628><U0631>";/
+		"<U0646><U0648><U0645><U0628><U0631>";/
+		"<U062F><U0633><U0645><U0628><U0631>"
+	d_t_fmt	"<U0025><U0041><U0020><U062F><U0020><U0025><U0059>/
+<U0020><U062F><U0020><U0025><U0042><U0020><U0025><U0065><U060C>/
+<U0020><U0025><U005A><U0020><U0025><U0048><U003A><U0025><U004D>/
+<U003A><U0025><U0053>"
+	d_fmt	"<U062F><U0020><U0025><U0059><U0020><U062F><U0020>/
+<U0025><U0042><U0020><U0025><U0065>"
+	t_fmt	"<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
+	am_pm	"<U063A><U002E><U0645><U002E>";/
+		"<U063A><U002E><U0648><U002E>"
+	t_fmt_ampm	"<U202B><U0025><U0049><U003A><U0025><U004D><U003A>/
+<U0025><U0053><U0020><U0025><U0070><U202C>"
+	first_weekday	7
+	first_workday	7
+	cal_direction	3
+END LC_TIME
+
+LC_NUMERIC
+	decimal_point	"<U066B>"
+	thousands_sep	"<U066C>"
+	grouping	3
+END LC_NUMERIC
+
+LC_MONETARY
+	int_curr_symbol	"<U0041><U0046><U004E><U0020>"
+	currency_symbol	"<U0627><U0641><U063A><U0627><U0646><U06CD>"
+	mon_decimal_point	"<U066B>"
+	mon_thousands_sep	"<U066C>"
+	mon_grouping	3
+	positive_sign	""
+	negative_sign	"<U002D>"
+	int_frac_digits	0
+	frac_digits	0
+	p_cs_precedes	0
+	p_sep_by_space	1
+	n_cs_precedes	0
+	n_sep_by_space	1
+	p_sign_posn	1
+	n_sign_posn	1
+END LC_MONETARY
+
+LC_MESSAGES
+	yesexpr "<U005E><U005B><U0079><U0059><U0628>/
+<U0066><U005D><U002E><U002A>"
+	noexpr  "<U005E><U005B><U006E><U004E><U062E>/
+<U0646><U006F><U005D><U002E><U002A>"
+END LC_MESSAGES
+
+LC_MEASUREMENT
+	measurement	1
+END LC_MEASUREMENT
+
+LC_PAPER
+	height	297
+	width	210
+END LC_PAPER
+
+LC_NAME
+	name_fmt	"<U0025><U0073><U0025><U0074><U0025><U0070><U0025>/
+<U0074><U0025><U0067><U0025><U0074><U0025><U006D><U0025><U0074><U0025><U0066>"
+	name_gen	"" % No general salutation for all persons in Pashto.
+	name_mr	"<U069A><U0627><U063A><U0644><U06D0>"
+	name_mrs	"<U0628><U064A><U0020><U0628><U064A>"
+	name_miss	"<U0628><U064A><U0020><U0628><U064A>"
+	name_ms	"<U0628><U064A><U0020><U0628><U064A>"
+END LC_NAME
+
+LC_ADDRESS
+	postal_fmt	"<U0025><U0061><U0025><U004E><U0025><U0066><U0025>/
+<U004E><U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025>/
+<U0073><U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072>/
+<U0025><U004E><U0025><U0043><U002D><U0025><U007A><U0020><U0025><U0054>/
+<U0025><U004E><U0025><U0063><U0025><U004E>"
+	country_name	"<U0627><U0641><U063A><U0627><U0646>/
+<U0633><U062A><U0627><U0646>"
+	%country_post	"<U0041><U0046><U0047>" % FIXME: Not confirmed.
+	country_ab2	"<U0041><U0046>"
+	country_ab3	"<U0041><U0046><U0047>"
+	country_num	004
+	country_car	"<U0041><U0046><U0047>"
+
+	%country_isbn	"" % Unfortunately not yet assigned :-(
+			   % Since to date there is no ISBN agency working
+			   % in Afghanistan.
+
+	lang_name	"<U067E><U069A><U062A><U0648>"
+	lang_ab	"<U0070><U0073>"
+	lang_term	"<U0070><U0075><U0073>"
+	lang_lib	"<U0070><U0075><U0073>"
+END LC_ADDRESS
+
+LC_TELEPHONE
+	tel_int_fmt	"<U002B><U0025><U0063><U0020><U0025><U0061><U0020>/
+<U0025><U006C>"
+	tel_dom_fmt	"<U0025><U0041><U2012><U0025><U006C>"
+	int_select	"<U0030><U0030>"
+	int_prefix	"<U0039><U0033>"
+END LC_TELEPHONE
+
+%%%%%%%%%%%%%%%%%%%% locale definition file "ps_AF" ends %%%%%%%%%%%%%%%%%%%%%

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile Fri Oct 23 00:04:16 2009
@@ -4,7 +4,8 @@
 endif
 
 ifeq ($(subdir),string)
-sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3
+sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
+		   strend-sse4
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-strcspn-c.c += -msse4

Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S Fri Oct 23 00:04:16 2009
@@ -1,0 +1,177 @@
+/* strchr with SSE4.2
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+
+/* Define multiple versions only for the definition in libc.  */
+#ifndef NOT_IN_libc
+	.text
+ENTRY(strchr)
+	.type	strchr, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__strchr_sse2(%rip), %rax
+	testl	$(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+	jz	2f
+	leaq	__strchr_sse42(%rip), %rax
+2:	ret
+END(strchr)
+
+
+/*
+   This implementation uses SSE4 instructions to compare up to 16 bytes
+   at a time looking for the first occurrence of the character c in the
+   string s:
+
+   char *strchr (const char *s, int c);
+
+   We use 0xa:
+	_SIDD_SBYTE_OPS
+	| _SIDD_CMP_EQUAL_EACH
+	| _SIDD_LEAST_SIGNIFICANT
+   on pcmpistri to compare xmm/mem128
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   X X X X X X X X X X X X X X X X
+
+   against xmm
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   C C C C C C C C C C C C C C C C
+
+   to find out if the first 16byte data element has a byte C and the
+   offset of the first byte.  There are 3 cases:
+
+   1. The first 16byte data element has the byte C at the offset X.
+   2. The first 16byte data element has EOS and doesn't have the byte C.
+   3. The first 16byte data element is valid and doesn't have the byte C.
+
+   Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
+
+   case		ECX	CFlag	ZFlag	SFlag
+    1		 X	  1	 0/1	  0
+    2		16	  0	  1	  0
+    3		16	  0	  0	  0
+
+   We exit from the loop for cases 1 and 2 with jbe which branches
+   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
+   X for case 1.  */
+
+	.section .text.sse4.2,"ax",@progbits
+	.align	16
+	.type	__strchr_sse42, @function
+__strchr_sse42:
+	cfi_startproc
+	CALL_MCOUNT
+	testb	%sil, %sil
+	je	__strend_sse4
+	pxor	%xmm2, %xmm2
+	movd	%esi, %xmm1
+	movl	%edi, %ecx
+	andl	$15, %ecx
+	movq	%rdi, %r8
+	je	L(aligned_start)
+
+/* Handle unaligned string.  */
+	andq	$-16, %r8
+	pshufb  %xmm2, %xmm1
+	movdqa	(%r8), %xmm0
+	pcmpeqb	 %xmm0, %xmm2
+	pcmpeqb	 %xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %esi
+	/* Remove the leading  bytes.  */
+	sarl	%cl, %edx
+	sarl	%cl, %esi
+	testl	%esi, %esi
+	je	L(unaligned_no_match)
+	/* Check which byte is a match.  */
+	bsfl	%esi, %eax
+	/* Is there a NULL? */
+	testl	%edx, %edx
+	je      L(unaligned_match)
+	bsfl	%edx, %esi
+	cmpl	%esi, %eax
+	/* Return NULL if NULL comes first.  */
+	ja	L(return_null)
+L(unaligned_match):
+	addq	%rdi, %rax
+	ret
+
+	.p2align 4
+L(unaligned_no_match):
+	testl	%edx, %edx
+	jne	L(return_null)
+
+/* Loop start on aligned string.  */
+L(loop):
+	addq	$16, %r8
+L(aligned_start):
+	pcmpistri	$0x2, (%r8), %xmm1
+	jbe	L(wrap)
+	addq	$16, %r8
+	pcmpistri	$0x2, (%r8), %xmm1
+	jbe	L(wrap)
+	addq	$16, %r8
+	pcmpistri       $0x2, (%r8), %xmm1
+	jbe     L(wrap)
+	addq	$16, %r8
+	pcmpistri	$0x2, (%r8), %xmm1
+	jbe	L(wrap)
+	jmp	L(loop)
+L(wrap):
+	jc	L(loop_exit)
+
+/* Return NULL.  */
+L(return_null):
+	xorl	%eax, %eax
+	ret
+
+/* Loop exit.  */
+	.p2align 4
+L(loop_exit):
+	leaq	(%r8,%rcx), %rax
+	ret
+	cfi_endproc
+	.size	__strchr_sse42, .-__strchr_sse42
+
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type __strchr_sse2, @function; \
+	.align 16; \
+	__strchr_sse2: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strchr calls through a PLT.
+   The speedup we get from using SSE4.2 instruction is likely eaten away
+   by the indirect call in the PLT.  */
+# define libc_hidden_builtin_def(name) \
+	.globl __GI_strchr; __GI_strchr = __strchr_sse2
+#endif
+
+#include "../strchr.S"

Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S Fri Oct 23 00:04:16 2009
@@ -1,0 +1,49 @@
+/* Return the pointer to the end of string, using SSE4.2
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+	.section .text.sse4.2,"ax",@progbits
+ENTRY (__strend_sse4)
+	pxor	%xmm2, %xmm2
+	movq	%rdi, %rcx
+	andq	$~15, %rdi
+	movdqa	%xmm2, %xmm1
+	pcmpeqb	(%rdi), %xmm2
+	orl	$0xffffffff, %esi
+	subq	%rdi, %rcx
+	shll	%cl, %esi
+	pmovmskb %xmm2, %edx
+	andl	%esi, %edx
+	jnz	1f
+
+2:	pcmpistri $0x08, 16(%rdi), %xmm1
+	leaq	16(%rdi), %rdi
+	jnz	2b
+
+	leaq	(%rdi,%rcx), %rax
+	ret
+
+1:	bsfl	%edx, %eax
+	addq	%rdi, %rax
+	ret
+
+END (__strend_sse4)

Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S Fri Oct 23 00:04:16 2009
@@ -1,0 +1,278 @@
+/* strrchr with SSE4.2
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+
+/* Define multiple versions only for the definition in libc and for
+   the DSO.  In static binaries we need strrchr before the initialization
+   happened.  */
+#if defined SHARED && !defined NOT_IN_libc
+	.text
+ENTRY(strrchr)
+	.type	strrchr, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__strrchr_sse2(%rip), %rax
+	testl	$(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+	jz	2f
+	leaq	__strrchr_sse42(%rip), %rax
+2:	ret
+END(strrchr)
+
+/*
+   This implementation uses SSE4 instructions to compare up to 16 bytes
+   at a time looking for the last occurrence of the character c in the
+   string s:
+
+   char *strrchr (const char *s, int c);
+
+   We use 0x4a:
+	_SIDD_SBYTE_OPS
+	| _SIDD_CMP_EQUAL_EACH
+	| _SIDD_MOST_SIGNIFICANT
+   on pcmpistri to compare xmm/mem128
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   X X X X X X X X X X X X X X X X
+
+   against xmm
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   C C C C C C C C C C C C C C C C
+
+   to find out if the first 16byte data element has a byte C and the
+   last offset.  There are 4 cases:
+
+   1. The first 16byte data element has EOS and has the byte C at the
+      last offset X.
+   2. The first 16byte data element is valid and has the byte C at the
+      last offset X.
+   3. The first 16byte data element has EOS and doesn't have the byte C.
+   4. The first 16byte data element is valid and doesn't have the byte C.
+
+   Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
+
+   case		ECX	CFlag	ZFlag	SFlag
+    1		 X	  1	  1	  0
+    2		 X	  1	  0	  0
+    3		16	  0	  1	  0
+    4		16	  0	  0	  0
+
+   We exit from the loop for cases 1 and 3 with jz which branches
+   when ZFlag is 1.  If CFlag == 1, ECX has the offset X for case 1.  */
+
+
+	.section .text.sse4.2,"ax",@progbits
+	.align	16
+	.type	__strrchr_sse42, @function
+__strrchr_sse42:
+	cfi_startproc
+	CALL_MCOUNT
+	testb	%sil, %sil
+	je	__strend_sse4
+	xor	%eax,%eax	/* RAX has the last occurrence of s.  */
+	movd	%esi, %xmm1
+	punpcklbw	%xmm1, %xmm1
+	movl	%edi, %esi
+	punpcklbw	%xmm1, %xmm1
+	andl	$15, %esi
+	pshufd	$0, %xmm1, %xmm1
+	movq	%rdi, %r8
+	je	L(loop)
+
+/* Handle unaligned string using psrldq.  */
+	leaq	L(psrldq_table)(%rip), %rdx
+	andq	$-16, %r8
+	movslq	(%rdx,%rsi,4),%r9
+	movdqa	(%r8), %xmm0
+	addq	%rdx, %r9
+	jmp	*%r9
+
+/* Handle unaligned string with offset 1 using psrldq.  */
+	.p2align 4
+L(psrldq_1):
+	psrldq	$1, %xmm0
+
+	.p2align 4
+L(unaligned_pcmpistri):
+	pcmpistri	$0x4a, %xmm1, %xmm0
+	jnc	L(unaligned_no_byte)
+	leaq	(%rdi,%rcx), %rax
+L(unaligned_no_byte):
+	/* Find the length of the unaligned string.  */
+	pcmpistri	$0x3a, %xmm0, %xmm0
+	movl	$16, %edx
+	subl	%esi, %edx
+	cmpl	%ecx, %edx
+	/* Return RAX if the unaligned fragment to next 16B already
+	   contain the NULL terminator.  */
+	jg	L(exit)
+	addq	$16, %r8
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	pcmpistri	$0x4a, (%r8), %xmm1
+	jbe	L(match_or_eos)
+	addq	$16, %r8
+	jmp	L(loop)
+	.p2align 4
+L(match_or_eos):
+	je	L(had_eos)
+L(match_no_eos):
+	leaq	(%r8,%rcx), %rax
+	addq	$16, %r8
+	jmp     L(loop)
+	.p2align 4
+L(had_eos):
+	jnc     L(exit)
+	leaq	(%r8,%rcx), %rax
+	.p2align 4
+L(exit):
+	ret
+
+/* Handle unaligned string with offset 15 using psrldq.  */
+	.p2align 4
+L(psrldq_15):
+	psrldq	$15, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 14 using psrldq.  */
+	.p2align 4
+L(psrldq_14):
+	psrldq	$14, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 13 using psrldq.  */
+	.p2align 4
+L(psrldq_13):
+	psrldq	$13, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 12 using psrldq.  */
+	.p2align 4
+L(psrldq_12):
+	psrldq	$12, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 11 using psrldq.  */
+	.p2align 4
+L(psrldq_11):
+	psrldq	$11, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 10 using psrldq.  */
+	.p2align 4
+L(psrldq_10):
+	psrldq	$10, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 9 using psrldq.  */
+	.p2align 4
+L(psrldq_9):
+	psrldq	$9, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 8 using psrldq.  */
+	.p2align 4
+L(psrldq_8):
+	psrldq	$8, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 7 using psrldq.  */
+	.p2align 4
+L(psrldq_7):
+	psrldq	$7, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 6 using psrldq.  */
+	.p2align 4
+L(psrldq_6):
+	psrldq	$6, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 5 using psrldq.  */
+	.p2align 4
+L(psrldq_5):
+	psrldq	$5, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 4 using psrldq.  */
+	.p2align 4
+L(psrldq_4):
+	psrldq	$4, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 3 using psrldq.  */
+	.p2align 4
+L(psrldq_3):
+	psrldq	$3, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 2 using psrldq.  */
+	.p2align 4
+L(psrldq_2):
+	psrldq	$2, %xmm0
+	jmp	L(unaligned_pcmpistri)
+
+	cfi_endproc
+	.size	__strrchr_sse42, .-__strrchr_sse42
+
+	.section .rodata.sse4.2,"a",@progbits
+	.p2align 4
+L(psrldq_table):
+	.int	L(loop) - L(psrldq_table)
+	.int	L(psrldq_1) - L(psrldq_table)
+	.int	L(psrldq_2) - L(psrldq_table)
+	.int	L(psrldq_3) - L(psrldq_table)
+	.int	L(psrldq_4) - L(psrldq_table)
+	.int	L(psrldq_5) - L(psrldq_table)
+	.int	L(psrldq_6) - L(psrldq_table)
+	.int	L(psrldq_7) - L(psrldq_table)
+	.int	L(psrldq_8) - L(psrldq_table)
+	.int	L(psrldq_9) - L(psrldq_table)
+	.int	L(psrldq_10) - L(psrldq_table)
+	.int	L(psrldq_11) - L(psrldq_table)
+	.int	L(psrldq_12) - L(psrldq_table)
+	.int	L(psrldq_13) - L(psrldq_table)
+	.int	L(psrldq_14) - L(psrldq_table)
+	.int	L(psrldq_15) - L(psrldq_table)
+
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type __strrchr_sse2, @function; \
+	.align 16; \
+	__strrchr_sse2: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size __strrchr_sse2, .-__strrchr_sse2
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strrchr calls through a PLT.
+   The speedup we get from using SSE4.2 instruction is likely eaten away
+   by the indirect call in the PLT.  */
+# define libc_hidden_builtin_def(name) \
+	.globl __GI_strrchr; __GI_strrchr = __strrchr_sse2
+#endif
+
+#include "../strrchr.S"