[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r9125 - in /fsf/trunk/libc: ./ localedata/ localedata/locales/ sysdeps/x86_64/multiarch/
- To: commits@xxxxxxxxxx
- Subject: [commits] r9125 - in /fsf/trunk/libc: ./ localedata/ localedata/locales/ sysdeps/x86_64/multiarch/
- From: eglibc@xxxxxxxxxx
- Date: Fri, 23 Oct 2009 07:04:17 -0000
Author: eglibc
Date: Fri Oct 23 00:04:16 2009
New Revision: 9125
Log:
Import glibc-mainline for 2009-10-23
Added:
fsf/trunk/libc/localedata/locales/ps_AF
fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/NEWS
fsf/trunk/libc/localedata/ChangeLog
fsf/trunk/libc/localedata/SUPPORTED
fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Fri Oct 23 00:04:16 2009
@@ -1,3 +1,11 @@
+2009-10-21 H.J. Lu <hongjiu.lu@xxxxxxxxx>
+
+ * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+ strend-sse4.
+ * sysdeps/x86_64/multiarch/strchr.S: New file.
+ * sysdeps/x86_64/multiarch/strend-sse4.S: New file.
+ * sysdeps/x86_64/multiarch/strrchr.S: New file.
+
2009-10-21 Andreas Schwab <schwab@xxxxxxxxxx>
* elf/dl-sym.c (do_sym): Resolve STT_GNU_IFUNC symbols.
Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Fri Oct 23 00:04:16 2009
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes. 2009-8-8
+GNU C Library NEWS -- history of user-visible changes. 2009-10-15
Copyright (C) 1992-2008, 2009 Free Software Foundation, Inc.
See the end for copying conditions.
@@ -54,6 +54,8 @@
* Using condvars with PI mutexes is now more efficient due to kernel
support for requeueing to PI futexes. NPTL support added for x86-64.
Implemented by Ulrich Drepper.
+
+* New locale: ps_AF
Version 2.10
Modified: fsf/trunk/libc/localedata/ChangeLog
==============================================================================
--- fsf/trunk/libc/localedata/ChangeLog (original)
+++ fsf/trunk/libc/localedata/ChangeLog Fri Oct 23 00:04:16 2009
@@ -1,3 +1,11 @@
+2009-10-15 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * SUPPORTED (SUPPORTED-LOCALES): Add ps_AF.UTF-8.
+
+ * locale/ps_AF: New file.
+ Contributed by Sayamindu Dasgupta <sayamindu@xxxxxxxxx> and
+ Pravin Satpute <psatpute@xxxxxxxxxx>.
+
2009-06-16 Ulrich Drepper <drepper@xxxxxxxxxx>
[BZ #10011]
Modified: fsf/trunk/libc/localedata/SUPPORTED
==============================================================================
--- fsf/trunk/libc/localedata/SUPPORTED (original)
+++ fsf/trunk/libc/localedata/SUPPORTED Fri Oct 23 00:04:16 2009
@@ -317,6 +317,7 @@
pap_AN/UTF-8 \
pl_PL.UTF-8/UTF-8 \
pl_PL/ISO-8859-2 \
+ps_AF/UTF-8 \
pt_BR.UTF-8/UTF-8 \
pt_BR/ISO-8859-1 \
pt_PT.UTF-8/UTF-8 \
Added: fsf/trunk/libc/localedata/locales/ps_AF
==============================================================================
--- fsf/trunk/libc/localedata/locales/ps_AF (added)
+++ fsf/trunk/libc/localedata/locales/ps_AF Fri Oct 23 00:04:16 2009
@@ -1,0 +1,282 @@
+escape_char /
+comment_char %
+
+%%%%%%%%%%%%%%%%%%%%%%% locale definition file "ps_AF" %%%%%%%%%%%%%%%%%%%%%%
+
+% Pashto Locale for Afghanistan
+% Filename: ps_AF
+% Locale name: ps_AF.UTF-8
+% Language: Pashto
+% Territory: Afghanistan
+% Charset: UTF-8
+% Revision: 0.1
+% By: Nasir Gulzade<nasirgulzade@xxxxxxxxxxx>
+% Creation Date: Thu, 15 Jan 2009 18:16:15 +0500 GMT
+% Last Modification Date: Fri, 16 Jan 2009 20:33:40 +0500 GMT
+%
+
+
+LC_IDENTIFICATION
+ title "Pashto locale for Afghanistan"
+ source "Nasir Gulzade"
+ address "see e-mail."
+ contact "Nasir Gulzade"
+ email "nasirgulzade@xxxxxxxxxxx"
+ tel "+93 700530286"
+ fax ""
+ language "Pashto"
+ territory "Afghanistan"
+ revision "0.2"
+ date "2009-01-16"
+
+ category "ps_AF:2007";LC_IDENTIFICATION
+ category "ps_AF:2007";LC_CTYPE
+ category "ps_AF:2007";LC_COLLATE
+ category "ps_AF:2007";LC_TIME
+ category "ps_AF:2007";LC_NUMERIC
+ category "ps_AF:2007";LC_MONETARY
+ category "ps_AF:2007";LC_MESSAGES
+ category "ps_AF:2007";LC_MEASUREMENT
+ category "ps_AF:2007";LC_PAPER
+ category "ps_AF:2007";LC_NAME
+ category "ps_AF:2007";LC_ADDRESS
+ category "ps_AF:2007";LC_TELEPHONE
+END LC_IDENTIFICATION
+
+LC_CTYPE
+ copy "i18n"
+ outdigit <U0660>..<U0663>;<U06F4>;<U0665>..<U0669>
+ map to_outpunct; (<U002E>,<U066B>);(<U002C>,<U066C>)
+END LC_CTYPE
+
+LC_COLLATE
+% Collation rules updated as per requirement of glibc by Pravin Satpute <psatpute@xxxxxxxxxx>
+% see rh bug 482881
+copy "iso14651_t1"
+
+collating-symbol <teh_with_ring>
+collating-symbol <hah_with_hamza_above>
+collating-symbol <hah_with_three_dots>
+collating-symbol <dal_with_ring>
+collating-symbol <reh_with_ring>
+collating-symbol <reh_with_dot_below_and_above>
+collating-symbol <seen_with_dot_below_and_above>
+collating-symbol <kaaf_with_ring>
+collating-symbol <noon_with_ring>
+collating-symbol <farsi_yeh>
+collating-symbol <arabic_letter_e>
+collating-symbol <yeh_with_tail>
+
+reorder-after <teh>
+<teh_with_ring>
+
+reorder-after <jeem>
+<hah_with_hamza_above>
+
+reorder-after <tcheh>
+<hah_with_three_dots>
+
+reorder-after <dal>
+<dal_with_ring>
+
+reorder-after <reh>
+<reh_with_ring>
+
+reorder-after <jeh>
+<reh_with_dot_below_and_above>
+
+reorder-after <sheen>
+<seen_with_dot_below_and_above>
+
+reorder-after <keheh>
+<kaaf_with_ring>
+
+reorder-after <noon>
+<noon_with_ring>
+
+reorder-after <heh>
+<farsi_yeh>
+
+reorder-after <alef_maksura>
+<arabic_letter_e>
+<yeh_with_tail>
+
+
+reorder-after <U062A>
+<U067C> <teh_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U062C>
+<U0681> <hah_with_hamza_above>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0686>
+<U0685> <hah_with_three_dots>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U062F>
+<U0689> <dal_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0631>
+<U0693> <reh_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0698>
+<U0696> <reh_with_dot_below_and_above>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0634>
+<U069A> <seen_with_dot_below_and_above>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U06A9>
+<U06AB> <kaaf_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0646>
+<U06BC> <noon_with_ring>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U0648>
+<U0647> <heh>;<BAS>;<MIN>;IGNORE
+<U06CC> <farsi_yeh>;<BAS>;<MIN>;IGNORE
+
+reorder-after <U064A>
+<U06D0> <arabic_letter_e>;<AYE>;<MIN>;IGNORE
+<U0626> "<alef_maksura><hamza>";"<BAS><BAS>";"<MIN><MIN>";IGNORE
+<U06CD> <yeh_with_tail>;<AYE>;<MIN>;IGNORE
+
+reorder-end
+
+END LC_COLLATE
+
+LC_TIME
+ abday "<U06CC><U002E>";/
+ "<U062F><U002E>";/
+ "<U0633><U002E>";/
+ "<U0686><U002E>";/
+ "<U067E><U002E>";/
+ "<U062C><U002E>";/
+ "<U0634><U002E>"
+ day "<U06CC><U06A9><U0634><U0646><U0628><U0647>";/
+ "<U062F><U0648><U0634><U0646><U0628><U0647>";/
+ "<U0633><U0647><U200C><U0634><U0646><U0628><U0647>";/
+ "<U0686><U0627><U0631><U0634><U0646><U0628><U0647>";/
+ "<U067E><U0646><U062C><U0634><U0646><U0628><U0647>";/
+ "<U062C><U0645><U0639><U0647>";/
+ "<U0634><U0646><U0628><U0647>"
+ abmon "<U062C><U0646><U0648>";/
+ "<U0641><U0628><U0631>";/
+ "<U0645><U0627><U0631>";/
+ "<U0627><U067E><U0631>";/
+ "<U0645><U0640><U06D0>";/
+ "<U062C><U0648><U0646>";/
+ "<U062C><U0648><U0644>";/
+ "<U0627><U06AB><U0633>";/
+ "<U0633><U067E><U062A>";/
+ "<U0627><U06A9><U062A>";/
+ "<U0646><U0648><U0645>";/
+ "<U062F><U0633><U0645>"
+ mon "<U062C><U0646><U0648><U0631><U064A>";/
+ "<U0641><U0628><U0631><U0648><U0631><U064A>";/
+ "<U0645><U0627><U0631><U0686>";/
+ "<U0627><U067E><U0631><U06CC><U0644>";/
+ "<U0645><U06D0>";/
+ "<U062C><U0648><U0646>";/
+ "<U062C><U0648><U0644><U0627><U064A>";/
+ "<U0627><U06AB><U0633><U062A>";/
+ "<U0633><U067E><U062A><U0645><U0628><U0631>";/
+ "<U0627><U06A9><U062A><U0648><U0628><U0631>";/
+ "<U0646><U0648><U0645><U0628><U0631>";/
+ "<U062F><U0633><U0645><U0628><U0631>"
+ d_t_fmt "<U0025><U0041><U0020><U062F><U0020><U0025><U0059>/
+<U0020><U062F><U0020><U0025><U0042><U0020><U0025><U0065><U060C>/
+<U0020><U0025><U005A><U0020><U0025><U0048><U003A><U0025><U004D>/
+<U003A><U0025><U0053>"
+ d_fmt "<U062F><U0020><U0025><U0059><U0020><U062F><U0020>/
+<U0025><U0042><U0020><U0025><U0065>"
+ t_fmt "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
+ am_pm "<U063A><U002E><U0645><U002E>";/
+ "<U063A><U002E><U0648><U002E>"
+ t_fmt_ampm "<U202B><U0025><U0049><U003A><U0025><U004D><U003A>/
+<U0025><U0053><U0020><U0025><U0070><U202C>"
+ first_weekday 7
+ first_workday 7
+ cal_direction 3
+END LC_TIME
+
+LC_NUMERIC
+ decimal_point "<U066B>"
+ thousands_sep "<U066C>"
+ grouping 3
+END LC_NUMERIC
+
+LC_MONETARY
+ int_curr_symbol "<U0041><U0046><U004E><U0020>"
+ currency_symbol "<U0627><U0641><U063A><U0627><U0646><U06CD>"
+ mon_decimal_point "<U066B>"
+ mon_thousands_sep "<U066C>"
+ mon_grouping 3
+ positive_sign ""
+ negative_sign "<U002D>"
+ int_frac_digits 0
+ frac_digits 0
+ p_cs_precedes 0
+ p_sep_by_space 1
+ n_cs_precedes 0
+ n_sep_by_space 1
+ p_sign_posn 1
+ n_sign_posn 1
+END LC_MONETARY
+
+LC_MESSAGES
+ yesexpr "<U005E><U005B><U0079><U0059><U0628>/
+<U0066><U005D><U002E><U002A>"
+ noexpr "<U005E><U005B><U006E><U004E><U062E>/
+<U0646><U006F><U005D><U002E><U002A>"
+END LC_MESSAGES
+
+LC_MEASUREMENT
+ measurement 1
+END LC_MEASUREMENT
+
+LC_PAPER
+ height 297
+ width 210
+END LC_PAPER
+
+LC_NAME
+ name_fmt "<U0025><U0073><U0025><U0074><U0025><U0070><U0025>/
+<U0074><U0025><U0067><U0025><U0074><U0025><U006D><U0025><U0074><U0025><U0066>"
+ name_gen "" % No general salutation for all persons in Pashto.
+ name_mr "<U069A><U0627><U063A><U0644><U06D0>"
+ name_mrs "<U0628><U064A><U0020><U0628><U064A>"
+ name_miss "<U0628><U064A><U0020><U0628><U064A>"
+ name_ms "<U0628><U064A><U0020><U0628><U064A>"
+END LC_NAME
+
+LC_ADDRESS
+ postal_fmt "<U0025><U0061><U0025><U004E><U0025><U0066><U0025>/
+<U004E><U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025>/
+<U0073><U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072>/
+<U0025><U004E><U0025><U0043><U002D><U0025><U007A><U0020><U0025><U0054>/
+<U0025><U004E><U0025><U0063><U0025><U004E>"
+ country_name "<U0627><U0641><U063A><U0627><U0646>/
+<U0633><U062A><U0627><U0646>"
+ %country_post "<U0041><U0046><U0047>" % FIXME: Not confirmed.
+ country_ab2 "<U0041><U0046>"
+ country_ab3 "<U0041><U0046><U0047>"
+ country_num 004
+ country_car "<U0041><U0046><U0047>"
+
+ %country_isbn "" % Unfortunately not yet assigned :-(
+ % Since to date there is no ISBN agency working
+ % in Afghanistan.
+
+ lang_name "<U067E><U069A><U062A><U0648>"
+ lang_ab "<U0070><U0073>"
+ lang_term "<U0070><U0075><U0073>"
+ lang_lib "<U0070><U0075><U0073>"
+END LC_ADDRESS
+
+LC_TELEPHONE
+ tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020>/
+<U0025><U006C>"
+ tel_dom_fmt "<U0025><U0041><U2012><U0025><U006C>"
+ int_select "<U0030><U0030>"
+ int_prefix "<U0039><U0033>"
+END LC_TELEPHONE
+
+%%%%%%%%%%%%%%%%%%%% locale definition file "ps_AF" ends %%%%%%%%%%%%%%%%%%%%%
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile Fri Oct 23 00:04:16 2009
@@ -4,7 +4,8 @@
endif
ifeq ($(subdir),string)
-sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3
+sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
+ strend-sse4
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S Fri Oct 23 00:04:16 2009
@@ -1,0 +1,177 @@
+/* strchr with SSE4.2
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+
+/* Define multiple versions only for the definition in libc. */
+#ifndef NOT_IN_libc
+ .text
+ENTRY(strchr)
+ .type strchr, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq __strchr_sse2(%rip), %rax
+ testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+ jz 2f
+ leaq __strchr_sse42(%rip), %rax
+2: ret
+END(strchr)
+
+
+/*
+ This implementation uses SSE4 instructions to compare up to 16 bytes
+ at a time looking for the first occurrence of the character c in the
+ string s:
+
+ char *strchr (const char *s, int c);
+
+ We use 0xa:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_EACH
+ | _SIDD_LEAST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ C C C C C C C C C C C C C C C C
+
+ to find out if the first 16byte data element has a byte C and the
+ offset of the first byte. There are 3 cases:
+
+ 1. The first 16byte data element has the byte C at the offset X.
+ 2. The first 16byte data element has EOS and doesn't have the byte C.
+ 3. The first 16byte data element is valid and doesn't have the byte C.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
+
+ case ECX CFlag ZFlag SFlag
+ 1 X 1 0/1 0
+ 2 16 0 1 0
+ 3 16 0 0 0
+
+ We exit from the loop for cases 1 and 2 with jbe which branches
+ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
+ X for case 1. */
+
+ .section .text.sse4.2,"ax",@progbits
+ .align 16
+ .type __strchr_sse42, @function
+__strchr_sse42:
+ cfi_startproc
+ CALL_MCOUNT
+ testb %sil, %sil
+ je __strend_sse4
+ pxor %xmm2, %xmm2
+ movd %esi, %xmm1
+ movl %edi, %ecx
+ andl $15, %ecx
+ movq %rdi, %r8
+ je L(aligned_start)
+
+/* Handle unaligned string. */
+ andq $-16, %r8
+ pshufb %xmm2, %xmm1
+ movdqa (%r8), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm2, %edx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %esi
+ /* Remove the leading bytes. */
+ sarl %cl, %edx
+ sarl %cl, %esi
+ testl %esi, %esi
+ je L(unaligned_no_match)
+ /* Check which byte is a match. */
+ bsfl %esi, %eax
+ /* Is there a NULL? */
+ testl %edx, %edx
+ je L(unaligned_match)
+ bsfl %edx, %esi
+ cmpl %esi, %eax
+ /* Return NULL if NULL comes first. */
+ ja L(return_null)
+L(unaligned_match):
+ addq %rdi, %rax
+ ret
+
+ .p2align 4
+L(unaligned_no_match):
+ testl %edx, %edx
+ jne L(return_null)
+
+/* Loop start on aligned string. */
+L(loop):
+ addq $16, %r8
+L(aligned_start):
+ pcmpistri $0x2, (%r8), %xmm1
+ jbe L(wrap)
+ addq $16, %r8
+ pcmpistri $0x2, (%r8), %xmm1
+ jbe L(wrap)
+ addq $16, %r8
+ pcmpistri $0x2, (%r8), %xmm1
+ jbe L(wrap)
+ addq $16, %r8
+ pcmpistri $0x2, (%r8), %xmm1
+ jbe L(wrap)
+ jmp L(loop)
+L(wrap):
+ jc L(loop_exit)
+
+/* Return NULL. */
+L(return_null):
+ xorl %eax, %eax
+ ret
+
+/* Loop exit. */
+ .p2align 4
+L(loop_exit):
+ leaq (%r8,%rcx), %rax
+ ret
+ cfi_endproc
+ .size __strchr_sse42, .-__strchr_sse42
+
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strchr_sse2, @function; \
+ .align 16; \
+ __strchr_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strchr calls through a PLT.
+ The speedup we get from using SSE4.2 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strchr; __GI_strchr = __strchr_sse2
+#endif
+
+#include "../strchr.S"
Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strend-sse4.S Fri Oct 23 00:04:16 2009
@@ -1,0 +1,49 @@
+/* Return the pointer to the end of string, using SSE4.2
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (__strend_sse4)
+ pxor %xmm2, %xmm2
+ movq %rdi, %rcx
+ andq $~15, %rdi
+ movdqa %xmm2, %xmm1
+ pcmpeqb (%rdi), %xmm2
+ orl $0xffffffff, %esi
+ subq %rdi, %rcx
+ shll %cl, %esi
+ pmovmskb %xmm2, %edx
+ andl %esi, %edx
+ jnz 1f
+
+2: pcmpistri $0x08, 16(%rdi), %xmm1
+ leaq 16(%rdi), %rdi
+ jnz 2b
+
+ leaq (%rdi,%rcx), %rax
+ ret
+
+1: bsfl %edx, %eax
+ addq %rdi, %rax
+ ret
+
+END (__strend_sse4)
Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strrchr.S Fri Oct 23 00:04:16 2009
@@ -1,0 +1,278 @@
+/* strrchr with SSE4.2
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+
+/* Define multiple versions only for the definition in libc and for
+ the DSO. In static binaries we need strrchr before the initialization
+ happened. */
+#if defined SHARED && !defined NOT_IN_libc
+ .text
+ENTRY(strrchr)
+ .type strrchr, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq __strrchr_sse2(%rip), %rax
+ testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+ jz 2f
+ leaq __strrchr_sse42(%rip), %rax
+2: ret
+END(strrchr)
+
+/*
+ This implementation uses SSE4 instructions to compare up to 16 bytes
+ at a time looking for the last occurrence of the character c in the
+ string s:
+
+ char *strrchr (const char *s, int c);
+
+ We use 0x4a:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_EACH
+ | _SIDD_MOST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ C C C C C C C C C C C C C C C C
+
+ to find out if the first 16byte data element has a byte C and the
+ last offset. There are 4 cases:
+
+ 1. The first 16byte data element has EOS and has the byte C at the
+ last offset X.
+ 2. The first 16byte data element is valid and has the byte C at the
+ last offset X.
+ 3. The first 16byte data element has EOS and doesn't have the byte C.
+ 4. The first 16byte data element is valid and doesn't have the byte C.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
+
+ case ECX CFlag ZFlag SFlag
+ 1 X 1 1 0
+ 2 X 1 0 0
+ 3 16 0 1 0
+ 4 16 0 0 0
+
+ We exit from the loop for cases 1 and 3 with jz which branches
+ when ZFlag is 1. If CFlag == 1, ECX has the offset X for case 1. */
+
+
+ .section .text.sse4.2,"ax",@progbits
+ .align 16
+ .type __strrchr_sse42, @function
+__strrchr_sse42:
+ cfi_startproc
+ CALL_MCOUNT
+ testb %sil, %sil
+ je __strend_sse4
+ xor %eax,%eax /* RAX has the last occurrence of s. */
+ movd %esi, %xmm1
+ punpcklbw %xmm1, %xmm1
+ movl %edi, %esi
+ punpcklbw %xmm1, %xmm1
+ andl $15, %esi
+ pshufd $0, %xmm1, %xmm1
+ movq %rdi, %r8
+ je L(loop)
+
+/* Handle unaligned string using psrldq. */
+ leaq L(psrldq_table)(%rip), %rdx
+ andq $-16, %r8
+ movslq (%rdx,%rsi,4),%r9
+ movdqa (%r8), %xmm0
+ addq %rdx, %r9
+ jmp *%r9
+
+/* Handle unaligned string with offset 1 using psrldq. */
+ .p2align 4
+L(psrldq_1):
+ psrldq $1, %xmm0
+
+ .p2align 4
+L(unaligned_pcmpistri):
+ pcmpistri $0x4a, %xmm1, %xmm0
+ jnc L(unaligned_no_byte)
+ leaq (%rdi,%rcx), %rax
+L(unaligned_no_byte):
+ /* Find the length of the unaligned string. */
+ pcmpistri $0x3a, %xmm0, %xmm0
+ movl $16, %edx
+ subl %esi, %edx
+ cmpl %ecx, %edx
+ /* Return RAX if the unaligned fragment to next 16B already
+ contain the NULL terminator. */
+ jg L(exit)
+ addq $16, %r8
+
+/* Loop start on aligned string. */
+ .p2align 4
+L(loop):
+ pcmpistri $0x4a, (%r8), %xmm1
+ jbe L(match_or_eos)
+ addq $16, %r8
+ jmp L(loop)
+ .p2align 4
+L(match_or_eos):
+ je L(had_eos)
+L(match_no_eos):
+ leaq (%r8,%rcx), %rax
+ addq $16, %r8
+ jmp L(loop)
+ .p2align 4
+L(had_eos):
+ jnc L(exit)
+ leaq (%r8,%rcx), %rax
+ .p2align 4
+L(exit):
+ ret
+
+/* Handle unaligned string with offset 15 using psrldq. */
+ .p2align 4
+L(psrldq_15):
+ psrldq $15, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 14 using psrldq. */
+ .p2align 4
+L(psrldq_14):
+ psrldq $14, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 13 using psrldq. */
+ .p2align 4
+L(psrldq_13):
+ psrldq $13, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 12 using psrldq. */
+ .p2align 4
+L(psrldq_12):
+ psrldq $12, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 11 using psrldq. */
+ .p2align 4
+L(psrldq_11):
+ psrldq $11, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 10 using psrldq. */
+ .p2align 4
+L(psrldq_10):
+ psrldq $10, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 9 using psrldq. */
+ .p2align 4
+L(psrldq_9):
+ psrldq $9, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 8 using psrldq. */
+ .p2align 4
+L(psrldq_8):
+ psrldq $8, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 7 using psrldq. */
+ .p2align 4
+L(psrldq_7):
+ psrldq $7, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 6 using psrldq. */
+ .p2align 4
+L(psrldq_6):
+ psrldq $6, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 5 using psrldq. */
+ .p2align 4
+L(psrldq_5):
+ psrldq $5, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 4 using psrldq. */
+ .p2align 4
+L(psrldq_4):
+ psrldq $4, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 3 using psrldq. */
+ .p2align 4
+L(psrldq_3):
+ psrldq $3, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+/* Handle unaligned string with offset 2 using psrldq. */
+ .p2align 4
+L(psrldq_2):
+ psrldq $2, %xmm0
+ jmp L(unaligned_pcmpistri)
+
+ cfi_endproc
+ .size __strrchr_sse42, .-__strrchr_sse42
+
+ .section .rodata.sse4.2,"a",@progbits
+ .p2align 4
+L(psrldq_table):
+ .int L(loop) - L(psrldq_table)
+ .int L(psrldq_1) - L(psrldq_table)
+ .int L(psrldq_2) - L(psrldq_table)
+ .int L(psrldq_3) - L(psrldq_table)
+ .int L(psrldq_4) - L(psrldq_table)
+ .int L(psrldq_5) - L(psrldq_table)
+ .int L(psrldq_6) - L(psrldq_table)
+ .int L(psrldq_7) - L(psrldq_table)
+ .int L(psrldq_8) - L(psrldq_table)
+ .int L(psrldq_9) - L(psrldq_table)
+ .int L(psrldq_10) - L(psrldq_table)
+ .int L(psrldq_11) - L(psrldq_table)
+ .int L(psrldq_12) - L(psrldq_table)
+ .int L(psrldq_13) - L(psrldq_table)
+ .int L(psrldq_14) - L(psrldq_table)
+ .int L(psrldq_15) - L(psrldq_table)
+
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strrchr_sse2, @function; \
+ .align 16; \
+ __strrchr_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strrchr_sse2, .-__strrchr_sse2
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strrchr calls through a PLT.
+ The speedup we get from using SSE4.2 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strrchr; __GI_strrchr = __strrchr_sse2
+#endif
+
+#include "../strrchr.S"