[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r11108 - in /fsf/trunk/libc: ./ string/ sysdeps/x86_64/ sysdeps/x86_64/multiarch/
- To: commits@xxxxxxxxxx
- Subject: [commits] r11108 - in /fsf/trunk/libc: ./ string/ sysdeps/x86_64/ sysdeps/x86_64/multiarch/
- From: eglibc@xxxxxxxxxx
- Date: Sat, 31 Jul 2010 07:03:05 -0000
Author: eglibc
Date: Sat Jul 31 00:03:04 2010
New Revision: 11108
Log:
Import glibc-mainline for 2010-07-31
Added:
fsf/trunk/libc/string/test-strcasecmp.c
fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym
fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S
fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c
fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/NEWS
fsf/trunk/libc/string/Makefile
fsf/trunk/libc/string/test-strcasestr.c
fsf/trunk/libc/sysdeps/x86_64/Makefile
fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c
fsf/trunk/libc/sysdeps/x86_64/strcmp.S
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sat Jul 31 00:03:04 2010
@@ -1,3 +1,21 @@
+2010-07-30 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * sysdeps/x86_64/multiarch/strcmp.S: Pretty printing.
+
+ * string/Makefile (strop-tests): Add strcasecmp.
+ * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
+ strcasecmp_l-nonascii.
+ (gen-as-const-headers): Add locale-defines.sym.
+ * sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation.
+ * sysdeps/x86_64/strcasecmp.S: New file.
+ * sysdeps/x86_64/strcasecmp_l.S: New file.
+ * sysdeps/x86_64/strcasecmp_l-nonascii.c: New file.
+ * sysdeps/x86_64/locale-defines.sym: New file.
+ * string/test-strcasecmp.c: New file.
+
+ * string/test-strcasestr.c: Test both ends of the range of characters.
+ * sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
+
2010-07-29 Roland McGrath <roland@xxxxxxxxxx>
[BZ #11856]
Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Sat Jul 31 00:03:04 2010
@@ -13,7 +13,7 @@
* POWER7 optimizations: memset, memcmp, strncmp
-* New optimized string functions for x86-64: strnlen
+* New optimized string functions for x86-64: strnlen, strcasecmp
Implemented by Ulrich Drepper.
Version 2.12
Modified: fsf/trunk/libc/string/Makefile
==============================================================================
--- fsf/trunk/libc/string/Makefile (original)
+++ fsf/trunk/libc/string/Makefile Sat Jul 31 00:03:04 2010
@@ -49,7 +49,7 @@
strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
strlen strncmp strncpy strpbrk strrchr strspn memmem \
- strstr strcasestr strnlen
+ strstr strcasestr strnlen strcasecmp
tests := tester inl-tester noinl-tester testcopy test-ffs \
tst-strlen stratcliff tst-svc tst-inlcall \
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
Added: fsf/trunk/libc/string/test-strcasecmp.c
==============================================================================
--- fsf/trunk/libc/string/test-strcasecmp.c (added)
+++ fsf/trunk/libc/string/test-strcasecmp.c Sat Jul 31 00:03:04 2010
@@ -1,0 +1,276 @@
+/* Test and measure strcasecmp functions.
+ Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Jakub Jelinek <jakub@xxxxxxxxxx>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <ctype.h>
+#define TEST_MAIN
+#include "test-string.h"
+
+typedef int (*proto_t) (const char *, const char *);
+static int simple_strcasecmp (const char *, const char *);
+static int stupid_strcasecmp (const char *, const char *);
+
+IMPL (stupid_strcasecmp, 0)
+IMPL (simple_strcasecmp, 0)
+IMPL (strcasecmp, 1)
+
+static int
+simple_strcasecmp (const char *s1, const char *s2)
+{
+ int ret;
+
+ while ((ret = ((unsigned char) tolower (*s1)
+ - (unsigned char) tolower (*s2))) == 0
+ && *s1++)
+ ++s2;
+ return ret;
+}
+
+static int
+stupid_strcasecmp (const char *s1, const char *s2)
+{
+ size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
+ size_t n = ns1 < ns2 ? ns1 : ns2;
+ int ret = 0;
+
+ while (n--)
+ {
+ if ((ret = ((unsigned char) tolower (*s1)
+ - (unsigned char) tolower (*s2))) != 0)
+ break;
+ ++s1;
+ ++s2;
+ }
+ return ret;
+}
+
+static void
+do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
+{
+ int result = CALL (impl, s1, s2);
+ if ((exp_result == 0 && result != 0)
+ || (exp_result < 0 && result >= 0)
+ || (exp_result > 0 && result <= 0))
+ {
+ error (0, 0, "Wrong result in function %s %d %d", impl->name,
+ result, exp_result);
+ ret = 1;
+ return;
+ }
+
+ if (HP_TIMING_AVAIL)
+ {
+ hp_timing_t start __attribute ((unused));
+ hp_timing_t stop __attribute ((unused));
+ hp_timing_t best_time = ~ (hp_timing_t) 0;
+ size_t i;
+
+ for (i = 0; i < 32; ++i)
+ {
+ HP_TIMING_NOW (start);
+ CALL (impl, s1, s2);
+ HP_TIMING_NOW (stop);
+ HP_TIMING_BEST (best_time, start, stop);
+ }
+
+ printf ("\t%zd", (size_t) best_time);
+ }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len, int max_char,
+ int exp_result)
+{
+ size_t i;
+ char *s1, *s2;
+
+ if (len == 0)
+ return;
+
+ align1 &= 7;
+ if (align1 + len + 1 >= page_size)
+ return;
+
+ align2 &= 7;
+ if (align2 + len + 1 >= page_size)
+ return;
+
+ s1 = (char *) (buf1 + align1);
+ s2 = (char *) (buf2 + align2);
+
+ for (i = 0; i < len; i++)
+ {
+ s1[i] = toupper (1 + 23 * i % max_char);
+ s2[i] = tolower (s1[i]);
+ }
+
+ s1[len] = s2[len] = 0;
+ s1[len + 1] = 23;
+ s2[len + 1] = 24 + exp_result;
+ if ((s2[len - 1] == 'z' && exp_result == -1)
+ || (s2[len - 1] == 'a' && exp_result == 1))
+ s1[len - 1] += exp_result;
+ else
+ s2[len - 1] -= exp_result;
+
+ if (HP_TIMING_AVAIL)
+ printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, s1, s2, exp_result);
+
+ if (HP_TIMING_AVAIL)
+ putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+ size_t i, j, n, align1, align2, pos, len1, len2;
+ int result;
+ long r;
+ unsigned char *p1 = buf1 + page_size - 512;
+ unsigned char *p2 = buf2 + page_size - 512;
+
+ for (n = 0; n < ITERATIONS; n++)
+ {
+ align1 = random () & 31;
+ if (random () & 1)
+ align2 = random () & 31;
+ else
+ align2 = align1 + (random () & 24);
+ pos = random () & 511;
+ j = align1 > align2 ? align1 : align2;
+ if (pos + j >= 511)
+ pos = 510 - j - (random () & 7);
+ len1 = random () & 511;
+ if (pos >= len1 && (random () & 1))
+ len1 = pos + (random () & 7);
+ if (len1 + j >= 512)
+ len1 = 511 - j - (random () & 7);
+ if (pos >= len1)
+ len2 = len1;
+ else
+ len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+ j = (pos > len2 ? pos : len2) + align1 + 64;
+ if (j > 512)
+ j = 512;
+ for (i = 0; i < j; ++i)
+ {
+ p1[i] = tolower (random () & 255);
+ if (i < len1 + align1 && !p1[i])
+ {
+ p1[i] = tolower (random () & 255);
+ if (!p1[i])
+ p1[i] = tolower (1 + (random () & 127));
+ }
+ }
+ for (i = 0; i < j; ++i)
+ {
+ p2[i] = toupper (random () & 255);
+ if (i < len2 + align2 && !p2[i])
+ {
+ p2[i] = toupper (random () & 255);
+ if (!p2[i])
+ toupper (p2[i] = 1 + (random () & 127));
+ }
+ }
+
+ result = 0;
+ memcpy (p2 + align2, p1 + align1, pos);
+ if (pos < len1)
+ {
+ if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+ {
+ p2[align2 + pos] = toupper (random () & 255);
+ if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+ p2[align2 + pos] = toupper (p1[align1 + pos]
+ + 3 + (random () & 127));
+ }
+
+ if (p1[align1 + pos] < tolower (p2[align2 + pos]))
+ result = -1;
+ else
+ result = 1;
+ }
+ p1[len1 + align1] = 0;
+ p2[len2 + align2] = 0;
+
+ FOR_EACH_IMPL (impl, 1)
+ {
+ r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
+ /* Test whether on 64-bit architectures where ABI requires
+ callee to promote has the promotion been done. */
+ asm ("" : "=g" (r) : "0" (r));
+ if ((r == 0 && result)
+ || (r < 0 && result >= 0)
+ || (r > 0 && result <= 0))
+ {
+ error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+ n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
+ ret = 1;
+ }
+ }
+ }
+}
+
+int
+test_main (void)
+{
+ size_t i;
+
+ test_init ();
+
+ printf ("%23s", "");
+ FOR_EACH_IMPL (impl, 0)
+ printf ("\t%s", impl->name);
+ putchar ('\n');
+
+ for (i = 1; i < 16; ++i)
+ {
+ do_test (i, i, i, 127, 0);
+ do_test (i, i, i, 127, 1);
+ do_test (i, i, i, 127, -1);
+ }
+
+ for (i = 1; i < 10; ++i)
+ {
+ do_test (0, 0, 2 << i, 127, 0);
+ do_test (0, 0, 2 << i, 254, 0);
+ do_test (0, 0, 2 << i, 127, 1);
+ do_test (0, 0, 2 << i, 254, 1);
+ do_test (0, 0, 2 << i, 127, -1);
+ do_test (0, 0, 2 << i, 254, -1);
+ }
+
+ for (i = 1; i < 8; ++i)
+ {
+ do_test (i, 2 * i, 8 << i, 127, 0);
+ do_test (2 * i, i, 8 << i, 254, 0);
+ do_test (i, 2 * i, 8 << i, 127, 1);
+ do_test (2 * i, i, 8 << i, 254, 1);
+ do_test (i, 2 * i, 8 << i, 127, -1);
+ do_test (2 * i, i, 8 << i, 254, -1);
+ }
+
+ do_random_tests ();
+ return ret;
+}
+
+#include "../test-skeleton.c"
Modified: fsf/trunk/libc/string/test-strcasestr.c
==============================================================================
--- fsf/trunk/libc/string/test-strcasestr.c (original)
+++ fsf/trunk/libc/string/test-strcasestr.c Sat Jul 31 00:03:04 2010
@@ -97,7 +97,7 @@
char *s1 = (char *) (buf1 + align1);
char *s2 = (char *) (buf2 + align2);
- static const char d[] = "1234567890abcdef";
+ static const char d[] = "1234567890abcxyz";
#define dl (sizeof (d) - 1)
char *ss2 = s2;
for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
Modified: fsf/trunk/libc/sysdeps/x86_64/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/Makefile (original)
+++ fsf/trunk/libc/sysdeps/x86_64/Makefile Sat Jul 31 00:03:04 2010
@@ -12,7 +12,8 @@
endif
ifeq ($(subdir),string)
-sysdep_routines += cacheinfo
+sysdep_routines += cacheinfo strcasecmp_l-nonascii
+gen-as-const-headers += locale-defines.sym
endif
ifeq ($(subdir),elf)
Added: fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym (added)
+++ fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym Sat Jul 31 00:03:04 2010
@@ -1,0 +1,11 @@
+#include <locale/localeinfo.h>
+#include <langinfo.h>
+#include <stddef.h>
+
+--
+
+LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales)
+LC_CTYPE
+_NL_CTYPE_NONASCII_CASE
+LOCALE_DATA_VALUES offsetof (struct __locale_data, values)
+SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0])
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S Sat Jul 31 00:03:04 2010
@@ -1,5 +1,5 @@
/* strcmp with SSE4.2
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -24,7 +24,7 @@
#ifdef USE_AS_STRNCMP
/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
if the new counter > the old one or is 0. */
-#define UPDATE_STRNCMP_COUNTER \
+# define UPDATE_STRNCMP_COUNTER \
/* calculate left number to compare */ \
lea -16(%rcx, %r11), %r9; \
cmp %r9, %r11; \
@@ -33,23 +33,23 @@
je LABEL(strcmp_exitz_sse4_2); \
mov %r9, %r11
-#define STRCMP_SSE42 __strncmp_sse42
-#define STRCMP_SSSE3 __strncmp_ssse3
-#define STRCMP_SSE2 __strncmp_sse2
-#define __GI_STRCMP __GI_strncmp
+# define STRCMP_SSE42 __strncmp_sse42
+# define STRCMP_SSSE3 __strncmp_ssse3
+# define STRCMP_SSE2 __strncmp_sse2
+# define __GI_STRCMP __GI_strncmp
#else
-#define UPDATE_STRNCMP_COUNTER
-#ifndef STRCMP
-#define STRCMP strcmp
-#define STRCMP_SSE42 __strcmp_sse42
-#define STRCMP_SSSE3 __strcmp_ssse3
-#define STRCMP_SSE2 __strcmp_sse2
-#define __GI_STRCMP __GI_strcmp
-#endif
+# define UPDATE_STRNCMP_COUNTER
+# ifndef STRCMP
+# define STRCMP strcmp
+# define STRCMP_SSE42 __strcmp_sse42
+# define STRCMP_SSSE3 __strcmp_ssse3
+# define STRCMP_SSE2 __strcmp_sse2
+# define __GI_STRCMP __GI_strcmp
+# endif
#endif
#ifndef LABEL
-#define LABEL(l) L(l)
+# define LABEL(l) L(l)
#endif
/* Define multiple versions only for the definition in libc. Don't
@@ -101,7 +101,7 @@
/* Put all SSE 4.2 functions together. */
.section .text.sse4.2,"ax",@progbits
- .align 16
+ .align 16
.type STRCMP_SSE42, @function
STRCMP_SSE42:
cfi_startproc
@@ -241,7 +241,7 @@
/*
* The following cases will be handled by ashr_1
- * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
@@ -320,7 +320,7 @@
/*
* The following cases will be handled by ashr_2
- * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
* n(14~15) n -14 1(15 +(n-14) - n) ashr_2
*/
.p2align 4
@@ -559,7 +559,7 @@
/*
* The following cases will be handled by ashr_5
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
+ * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
*/
.p2align 4
LABEL(ashr_5_sse4_2):
@@ -640,7 +640,7 @@
/*
* The following cases will be handled by ashr_6
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
+ * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
*/
.p2align 4
LABEL(ashr_6_sse4_2):
@@ -720,7 +720,7 @@
/*
* The following cases will be handled by ashr_7
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
+ * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
*/
.p2align 4
LABEL(ashr_7_sse4_2):
@@ -800,7 +800,7 @@
/*
* The following cases will be handled by ashr_8
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
+ * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
*/
.p2align 4
LABEL(ashr_8_sse4_2):
@@ -880,7 +880,7 @@
/*
* The following cases will be handled by ashr_9
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
+ * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
*/
.p2align 4
LABEL(ashr_9_sse4_2):
@@ -961,7 +961,7 @@
/*
* The following cases will be handled by ashr_10
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
+ * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
*/
.p2align 4
LABEL(ashr_10_sse4_2):
@@ -1041,7 +1041,7 @@
/*
* The following cases will be handled by ashr_11
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
+ * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
*/
.p2align 4
LABEL(ashr_11_sse4_2):
@@ -1121,7 +1121,7 @@
/*
* The following cases will be handled by ashr_12
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
+ * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
*/
.p2align 4
LABEL(ashr_12_sse4_2):
@@ -1201,7 +1201,7 @@
/*
* The following cases will be handled by ashr_13
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
+ * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
*/
.p2align 4
LABEL(ashr_13_sse4_2):
@@ -1282,7 +1282,7 @@
/*
* The following cases will be handled by ashr_14
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
+ * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
*/
.p2align 4
LABEL(ashr_14_sse4_2):
@@ -1363,7 +1363,7 @@
/*
* The following cases will be handled by ashr_15
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
- * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
+ * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
*/
.p2align 4
LABEL(ashr_15_sse4_2):
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c Sat Jul 31 00:03:04 2010
@@ -174,7 +174,7 @@
__m128i frag = __m128i_strloadu (p);
#define UCLOW 0x4040404040404040ULL
-#define UCHIGH 0x5a5a5a5a5a5a5a5aULL
+#define UCHIGH 0x5b5b5b5b5b5b5b5bULL
#define LCQWORD 0x2020202020202020ULL
/* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */
__m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag);
Added: fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S Sat Jul 31 00:03:04 2010
@@ -1,0 +1,1 @@
+/* In strcasecmp_l.S. */
Added: fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c (added)
+++ fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c Sat Jul 31 00:03:04 2010
@@ -1,0 +1,5 @@
+#include <string.h>
+
+#define __strcasecmp_l __strcasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL 1
+#include <string/strcasecmp.c>
Added: fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S Sat Jul 31 00:03:04 2010
@@ -1,0 +1,6 @@
+#define STRCMP __strcasecmp_l
+#define USE_AS_STRCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
+libc_hidden_def (strcasecmp_l)
Modified: fsf/trunk/libc/sysdeps/x86_64/strcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcmp.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strcmp.S Sat Jul 31 00:03:04 2010
@@ -51,6 +51,15 @@
je LABEL(strcmp_exitz); \
mov %r9, %r11
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+
+/* No support for strcasecmp outside libc so far since it is not needed. */
+# ifdef NOT_IN_lib
+# error "strcasecmp_l not implemented so far"
+# endif
+
+# define UPDATE_STRNCMP_COUNTER
#else
# define UPDATE_STRNCMP_COUNTER
# ifndef STRCMP
@@ -62,6 +71,19 @@
.text
#else
.section .text.ssse3,"ax",@progbits
+#endif
+
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp)
+ movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
+ movq %fs:(%rax),%rdx
+
+ /* 5-byte NOP. */
+ .byte 0x0f,0x1f,0x44,0x00,0x00
+END (__strcasecmp)
+weak_alias (__strcasecmp, strcasecmp)
+libc_hidden_def (__strcasecmp)
+ /* FALLTHROUGH to strcasecmp_l. */
#endif
ENTRY (BP_SYM (STRCMP))
@@ -84,6 +106,18 @@
ret
END (BP_SYM (STRCMP))
#else /* NOT_IN_libc */
+# ifdef USE_AS_STRCASECMP_L
+ /* We have to fall back on the C implementation for locales
+ with encodings not matching ASCII for single bytes. */
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
+# else
+ movq (%rdx), %rax
+# endif
+ testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ jne __strcasecmp_l_nonascii
+# endif
+
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
@@ -99,6 +133,26 @@
/* Use 64bit AND here to avoid long NOP padding. */
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
+# ifdef USE_AS_STRCASECMP_L
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+.Lbelowupper:
+ .quad 0x4040404040404040
+ .quad 0x4040404040404040
+.Ltopupper:
+ .quad 0x5b5b5b5b5b5b5b5b
+ .quad 0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .previous
+ movdqa .Lbelowupper(%rip), %xmm5
+# define UCLOW_reg %xmm5
+ movdqa .Ltopupper(%rip), %xmm6
+# define UCHIGH_reg %xmm6
+ movdqa .Ltouppermask(%rip), %xmm7
+# define LCQWORD_reg %xmm7
+# endif
cmp $0x30, %ecx
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
cmp $0x30, %eax
@@ -107,6 +161,26 @@
movlpd (%rsi), %xmm2
movhpd 8(%rdi), %xmm1
movhpd 8(%rsi), %xmm2
+# ifdef USE_AS_STRCASECMP_L
+# define TOLOWER(reg1, reg2) \
+ movdqa reg1, %xmm8; \
+ movdqa UCHIGH_reg, %xmm9; \
+ movdqa reg2, %xmm10; \
+ movdqa UCHIGH_reg, %xmm11; \
+ pcmpgtb UCLOW_reg, %xmm8; \
+ pcmpgtb reg1, %xmm9; \
+ pcmpgtb UCLOW_reg, %xmm10; \
+ pcmpgtb reg2, %xmm11; \
+ pand %xmm9, %xmm8; \
+ pand %xmm11, %xmm10; \
+ pand LCQWORD_reg, %xmm8; \
+ pand LCQWORD_reg, %xmm10; \
+ por %xmm8, reg1; \
+ por %xmm10, reg2
+ TOLOWER (%xmm1, %xmm2)
+# else
+# define TOLOWER(reg1, reg2)
+# endif
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
@@ -159,7 +233,13 @@
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+# ifndef USE_AS_STRCASECMP_L
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
+# else
+ movdqa (%rdi), %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
+# endif
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
@@ -183,6 +263,7 @@
LABEL(loop_ashr_0):
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -198,6 +279,7 @@
add $16, %rcx
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -214,7 +296,7 @@
/*
* The following cases will be handled by ashr_1
- * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
@@ -224,6 +306,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pslldq $15, %xmm2 /* shift first string to align with second */
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %r9d
@@ -263,6 +346,7 @@
# else
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -292,6 +376,7 @@
# else
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -351,6 +436,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $14, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -390,6 +476,7 @@
# else
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -420,6 +507,7 @@
# else
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -472,6 +560,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $13, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -512,6 +601,7 @@
# else
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -542,6 +632,7 @@
# else
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -594,6 +685,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $12, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -634,6 +726,7 @@
# else
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -664,6 +757,7 @@
# else
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -716,6 +810,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $11, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -756,6 +851,7 @@
# else
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -786,6 +882,7 @@
# else
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -838,6 +935,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $10, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -878,6 +976,7 @@
# else
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -908,6 +1007,7 @@
# else
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -960,6 +1060,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $9, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1000,6 +1101,7 @@
# else
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1030,6 +1132,7 @@
# else
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1082,6 +1185,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $8, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1122,6 +1226,7 @@
# else
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1152,6 +1257,7 @@
# else
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1204,6 +1310,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $7, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1244,6 +1351,7 @@
# else
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1274,6 +1382,7 @@
# else
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1326,6 +1435,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $6, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1366,6 +1476,7 @@
# else
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1396,6 +1507,7 @@
# else
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1448,6 +1560,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $5, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1488,6 +1601,7 @@
# else
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1518,6 +1632,7 @@
# else
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1570,6 +1685,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $4, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1610,6 +1726,7 @@
# else
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1640,6 +1757,7 @@
# else
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1692,6 +1810,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1732,6 +1851,7 @@
# else
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1762,6 +1882,7 @@
# else
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1814,6 +1935,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $2, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1854,6 +1976,7 @@
# else
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1884,6 +2007,7 @@
# else
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1936,6 +2060,7 @@
movdqa (%rsi), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $1, %xmm2
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
@@ -1978,6 +2103,7 @@
# else
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -2008,6 +2134,7 @@
# else
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
# endif
+ TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -2049,6 +2176,7 @@
.p2align 4
LABEL(aftertail):
+ TOLOWER (%xmm1, %xmm3)
pcmpeqb %xmm3, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edx
@@ -2075,6 +2203,12 @@
# endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
+
+# ifdef USE_AS_STRCASECMP_L
+ leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
+ movl (%rdx,%rcx,4), %ecx
+ movl (%rdx,%rax,4), %eax
+# endif
sub %ecx, %eax
ret