[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r11108 - in /fsf/trunk/libc: ./ string/ sysdeps/x86_64/ sysdeps/x86_64/multiarch/



Author: eglibc
Date: Sat Jul 31 00:03:04 2010
New Revision: 11108

Log:
Import glibc-mainline for 2010-07-31

Added:
    fsf/trunk/libc/string/test-strcasecmp.c
    fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym
    fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S
    fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c
    fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/NEWS
    fsf/trunk/libc/string/Makefile
    fsf/trunk/libc/string/test-strcasestr.c
    fsf/trunk/libc/sysdeps/x86_64/Makefile
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c
    fsf/trunk/libc/sysdeps/x86_64/strcmp.S

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sat Jul 31 00:03:04 2010
@@ -1,3 +1,21 @@
+2010-07-30  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	* sysdeps/x86_64/multiarch/strcmp.S: Pretty printing.
+
+	* string/Makefile (strop-tests): Add strcasecmp.
+	* sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
+	strcasecmp_l-nonascii.
+	(gen-as-const-headers): Add locale-defines.sym.
+	* sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation.
+	* sysdeps/x86_64/strcasecmp.S: New file.
+	* sysdeps/x86_64/strcasecmp_l.S: New file.
+	* sysdeps/x86_64/strcasecmp_l-nonascii.c: New file.
+	* sysdeps/x86_64/locale-defines.sym: New file.
+	* string/test-strcasecmp.c: New file.
+
+	* string/test-strcasestr.c: Test both ends of the range of characters.
+	* sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
+
 2010-07-29  Roland McGrath  <roland@xxxxxxxxxx>
 
 	[BZ #11856]

Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Sat Jul 31 00:03:04 2010
@@ -13,7 +13,7 @@
 
 * POWER7 optimizations: memset, memcmp, strncmp
 
-* New optimized string functions for x86-64: strnlen
+* New optimized string functions for x86-64: strnlen, strcasecmp
   Implemented by Ulrich Drepper.
 
 Version 2.12

Modified: fsf/trunk/libc/string/Makefile
==============================================================================
--- fsf/trunk/libc/string/Makefile (original)
+++ fsf/trunk/libc/string/Makefile Sat Jul 31 00:03:04 2010
@@ -49,7 +49,7 @@
 strop-tests	:= memchr memcmp memcpy memmove mempcpy memset memccpy	\
 		   stpcpy stpncpy strcat strchr strcmp strcpy strcspn	\
 		   strlen strncmp strncpy strpbrk strrchr strspn memmem	\
-		   strstr strcasestr strnlen
+		   strstr strcasestr strnlen strcasecmp
 tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
 		   tst-strlen stratcliff tst-svc tst-inlcall		\
 		   bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap	\

Added: fsf/trunk/libc/string/test-strcasecmp.c
==============================================================================
--- fsf/trunk/libc/string/test-strcasecmp.c (added)
+++ fsf/trunk/libc/string/test-strcasecmp.c Sat Jul 31 00:03:04 2010
@@ -1,0 +1,276 @@
+/* Test and measure strcasecmp functions.
+   Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Jakub Jelinek <jakub@xxxxxxxxxx>, 1999.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <ctype.h>
+#define TEST_MAIN
+#include "test-string.h"
+
+typedef int (*proto_t) (const char *, const char *);
+static int simple_strcasecmp (const char *, const char *);
+static int stupid_strcasecmp (const char *, const char *);
+
+IMPL (stupid_strcasecmp, 0)
+IMPL (simple_strcasecmp, 0)
+IMPL (strcasecmp, 1)
+
+static int
+simple_strcasecmp (const char *s1, const char *s2)
+{
+  int ret;
+
+  while ((ret = ((unsigned char) tolower (*s1)
+		 - (unsigned char) tolower (*s2))) == 0
+	 && *s1++)
+    ++s2;
+  return ret;
+}
+
+static int
+stupid_strcasecmp (const char *s1, const char *s2)
+{
+  size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
+  size_t n = ns1 < ns2 ? ns1 : ns2;
+  int ret = 0;
+
+  while (n--)
+    {
+      if ((ret = ((unsigned char) tolower (*s1)
+		  - (unsigned char) tolower (*s2))) != 0)
+	break;
+      ++s1;
+      ++s2;
+    }
+  return ret;
+}
+
+static void
+do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
+{
+  int result = CALL (impl, s1, s2);
+  if ((exp_result == 0 && result != 0)
+      || (exp_result < 0 && result >= 0)
+      || (exp_result > 0 && result <= 0))
+    {
+      error (0, 0, "Wrong result in function %s %d %d", impl->name,
+	     result, exp_result);
+      ret = 1;
+      return;
+    }
+
+  if (HP_TIMING_AVAIL)
+    {
+      hp_timing_t start __attribute ((unused));
+      hp_timing_t stop __attribute ((unused));
+      hp_timing_t best_time = ~ (hp_timing_t) 0;
+      size_t i;
+
+      for (i = 0; i < 32; ++i)
+	{
+	  HP_TIMING_NOW (start);
+	  CALL (impl, s1, s2);
+	  HP_TIMING_NOW (stop);
+	  HP_TIMING_BEST (best_time, start, stop);
+	}
+
+      printf ("\t%zd", (size_t) best_time);
+    }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len, int max_char,
+	 int exp_result)
+{
+  size_t i;
+  char *s1, *s2;
+
+  if (len == 0)
+    return;
+
+  align1 &= 7;
+  if (align1 + len + 1 >= page_size)
+    return;
+
+  align2 &= 7;
+  if (align2 + len + 1 >= page_size)
+    return;
+
+  s1 = (char *) (buf1 + align1);
+  s2 = (char *) (buf2 + align2);
+
+  for (i = 0; i < len; i++)
+    {
+      s1[i] = toupper (1 + 23 * i % max_char);
+      s2[i] = tolower (s1[i]);
+    }
+
+  s1[len] = s2[len] = 0;
+  s1[len + 1] = 23;
+  s2[len + 1] = 24 + exp_result;
+  if ((s2[len - 1] == 'z' && exp_result == -1)
+      || (s2[len - 1] == 'a' && exp_result == 1))
+    s1[len - 1] += exp_result;
+  else
+    s2[len - 1] -= exp_result;
+
+  if (HP_TIMING_AVAIL)
+    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s1, s2, exp_result);
+
+  if (HP_TIMING_AVAIL)
+    putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+  size_t i, j, n, align1, align2, pos, len1, len2;
+  int result;
+  long r;
+  unsigned char *p1 = buf1 + page_size - 512;
+  unsigned char *p2 = buf2 + page_size - 512;
+
+  for (n = 0; n < ITERATIONS; n++)
+    {
+      align1 = random () & 31;
+      if (random () & 1)
+	align2 = random () & 31;
+      else
+	align2 = align1 + (random () & 24);
+      pos = random () & 511;
+      j = align1 > align2 ? align1 : align2;
+      if (pos + j >= 511)
+	pos = 510 - j - (random () & 7);
+      len1 = random () & 511;
+      if (pos >= len1 && (random () & 1))
+	len1 = pos + (random () & 7);
+      if (len1 + j >= 512)
+	len1 = 511 - j - (random () & 7);
+      if (pos >= len1)
+	len2 = len1;
+      else
+	len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+      j = (pos > len2 ? pos : len2) + align1 + 64;
+      if (j > 512)
+	j = 512;
+      for (i = 0; i < j; ++i)
+	{
+	  p1[i] = tolower (random () & 255);
+	  if (i < len1 + align1 && !p1[i])
+	    {
+	      p1[i] = tolower (random () & 255);
+	      if (!p1[i])
+		p1[i] = tolower (1 + (random () & 127));
+	    }
+	}
+      for (i = 0; i < j; ++i)
+	{
+	  p2[i] = toupper (random () & 255);
+	  if (i < len2 + align2 && !p2[i])
+	    {
+	      p2[i] = toupper (random () & 255);
+	      if (!p2[i])
+		toupper (p2[i] = 1 + (random () & 127));
+	    }
+	}
+
+      result = 0;
+      memcpy (p2 + align2, p1 + align1, pos);
+      if (pos < len1)
+	{
+	  if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+	    {
+	      p2[align2 + pos] = toupper (random () & 255);
+	      if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+		p2[align2 + pos] = toupper (p1[align1 + pos]
+					    + 3 + (random () & 127));
+	    }
+
+	  if (p1[align1 + pos] < tolower (p2[align2 + pos]))
+	    result = -1;
+	  else
+	    result = 1;
+	}
+      p1[len1 + align1] = 0;
+      p2[len2 + align2] = 0;
+
+      FOR_EACH_IMPL (impl, 1)
+	{
+	  r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
+	  /* Test whether on 64-bit architectures where ABI requires
+	     callee to promote has the promotion been done.  */
+	  asm ("" : "=g" (r) : "0" (r));
+	  if ((r == 0 && result)
+	      || (r < 0 && result >= 0)
+	      || (r > 0 && result <= 0))
+	    {
+	      error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+		     n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
+	      ret = 1;
+	    }
+	}
+    }
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = 1; i < 16; ++i)
+    {
+      do_test (i, i, i, 127, 0);
+      do_test (i, i, i, 127, 1);
+      do_test (i, i, i, 127, -1);
+    }
+
+  for (i = 1; i < 10; ++i)
+    {
+      do_test (0, 0, 2 << i, 127, 0);
+      do_test (0, 0, 2 << i, 254, 0);
+      do_test (0, 0, 2 << i, 127, 1);
+      do_test (0, 0, 2 << i, 254, 1);
+      do_test (0, 0, 2 << i, 127, -1);
+      do_test (0, 0, 2 << i, 254, -1);
+    }
+
+  for (i = 1; i < 8; ++i)
+    {
+      do_test (i, 2 * i, 8 << i, 127, 0);
+      do_test (2 * i, i, 8 << i, 254, 0);
+      do_test (i, 2 * i, 8 << i, 127, 1);
+      do_test (2 * i, i, 8 << i, 254, 1);
+      do_test (i, 2 * i, 8 << i, 127, -1);
+      do_test (2 * i, i, 8 << i, 254, -1);
+    }
+
+  do_random_tests ();
+  return ret;
+}
+
+#include "../test-skeleton.c"

Modified: fsf/trunk/libc/string/test-strcasestr.c
==============================================================================
--- fsf/trunk/libc/string/test-strcasestr.c (original)
+++ fsf/trunk/libc/string/test-strcasestr.c Sat Jul 31 00:03:04 2010
@@ -97,7 +97,7 @@
   char *s1 = (char *) (buf1 + align1);
   char *s2 = (char *) (buf2 + align2);
 
-  static const char d[] = "1234567890abcdef";
+  static const char d[] = "1234567890abcxyz";
 #define dl (sizeof (d) - 1)
   char *ss2 = s2;
   for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)

Modified: fsf/trunk/libc/sysdeps/x86_64/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/Makefile (original)
+++ fsf/trunk/libc/sysdeps/x86_64/Makefile Sat Jul 31 00:03:04 2010
@@ -12,7 +12,8 @@
 endif
 
 ifeq ($(subdir),string)
-sysdep_routines += cacheinfo
+sysdep_routines += cacheinfo strcasecmp_l-nonascii
+gen-as-const-headers += locale-defines.sym
 endif
 
 ifeq ($(subdir),elf)

Added: fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym (added)
+++ fsf/trunk/libc/sysdeps/x86_64/locale-defines.sym Sat Jul 31 00:03:04 2010
@@ -1,0 +1,11 @@
+#include <locale/localeinfo.h>
+#include <langinfo.h>
+#include <stddef.h>
+
+--
+
+LOCALE_T___LOCALES		offsetof (struct __locale_struct, __locales)
+LC_CTYPE
+_NL_CTYPE_NONASCII_CASE
+LOCALE_DATA_VALUES		offsetof (struct __locale_data, values)
+SIZEOF_VALUES			sizeof (((struct __locale_data *) 0)->values[0])

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S Sat Jul 31 00:03:04 2010
@@ -1,5 +1,5 @@
 /* strcmp with SSE4.2
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -24,7 +24,7 @@
 #ifdef USE_AS_STRNCMP
 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
    if the new counter > the old one or is 0.  */
-#define UPDATE_STRNCMP_COUNTER				\
+# define UPDATE_STRNCMP_COUNTER				\
 	/* calculate left number to compare */		\
 	lea	-16(%rcx, %r11), %r9;			\
 	cmp	%r9, %r11;				\
@@ -33,23 +33,23 @@
 	je	LABEL(strcmp_exitz_sse4_2);		\
 	mov	%r9, %r11
 
-#define STRCMP_SSE42	__strncmp_sse42
-#define STRCMP_SSSE3	__strncmp_ssse3
-#define STRCMP_SSE2	__strncmp_sse2
-#define __GI_STRCMP	__GI_strncmp
+# define STRCMP_SSE42	__strncmp_sse42
+# define STRCMP_SSSE3	__strncmp_ssse3
+# define STRCMP_SSE2	__strncmp_sse2
+# define __GI_STRCMP	__GI_strncmp
 #else
-#define UPDATE_STRNCMP_COUNTER
-#ifndef STRCMP
-#define STRCMP		strcmp
-#define STRCMP_SSE42	__strcmp_sse42
-#define STRCMP_SSSE3	__strcmp_ssse3
-#define STRCMP_SSE2	__strcmp_sse2
-#define __GI_STRCMP	__GI_strcmp
-#endif
+# define UPDATE_STRNCMP_COUNTER
+# ifndef STRCMP
+#  define STRCMP	strcmp
+#  define STRCMP_SSE42	__strcmp_sse42
+#  define STRCMP_SSSE3	__strcmp_ssse3
+#  define STRCMP_SSE2	__strcmp_sse2
+#  define __GI_STRCMP	__GI_strcmp
+# endif
 #endif
 
 #ifndef LABEL
-#define LABEL(l) L(l)
+# define LABEL(l) L(l)
 #endif
 
 /* Define multiple versions only for the definition in libc.  Don't
@@ -101,7 +101,7 @@
 
 	/* Put all SSE 4.2 functions together.  */
 	.section .text.sse4.2,"ax",@progbits
-	.align 	16
+	.align	16
 	.type	STRCMP_SSE42, @function
 STRCMP_SSE42:
 	cfi_startproc
@@ -241,7 +241,7 @@
 
 /*
  * The following cases will be handled by ashr_1
- * rcx(offset of rsi)  rax(offset of rdi)   relative offset   	corresponding case
+ * rcx(offset of rsi)  rax(offset of rdi)   relative offset	corresponding case
  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
  */
 	.p2align 4
@@ -320,7 +320,7 @@
 
 /*
  * The following cases will be handled by ashr_2
- * rcx(offset of rsi)  rax(offset of rdi)   relative offset   	corresponding case
+ * rcx(offset of rsi)  rax(offset of rdi)   relative offset	corresponding case
  *        n(14~15)            n -14         1(15 +(n-14) - n)         ashr_2
  */
 	.p2align 4
@@ -559,7 +559,7 @@
 /*
  * The following cases will be handled by ashr_5
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset      corresponding case
- *        n(11~15)          n - 11      	  4(15 +(n-11) - n)         ashr_5
+ *        n(11~15)          n - 11		  4(15 +(n-11) - n)         ashr_5
  */
 	.p2align 4
 LABEL(ashr_5_sse4_2):
@@ -640,7 +640,7 @@
 /*
  * The following cases will be handled by ashr_6
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset      corresponding case
- *        n(10~15)          n - 10      	  5(15 +(n-10) - n)         ashr_6
+ *        n(10~15)          n - 10		  5(15 +(n-10) - n)         ashr_6
  */
 	.p2align 4
 LABEL(ashr_6_sse4_2):
@@ -720,7 +720,7 @@
 /*
  * The following cases will be handled by ashr_7
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset      corresponding case
- *        n(9~15)          n - 9      	        6(15 +(n - 9) - n)         ashr_7
+ *        n(9~15)          n - 9		  6(15 +(n - 9) - n)         ashr_7
  */
 	.p2align 4
 LABEL(ashr_7_sse4_2):
@@ -800,7 +800,7 @@
 /*
  *  The following cases will be handled by ashr_8
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(8~15)          n - 8      	        7(15 +(n - 8) - n)         ashr_8
+ *        n(8~15)          n - 8		  7(15 +(n - 8) - n)         ashr_8
  */
 	.p2align 4
 LABEL(ashr_8_sse4_2):
@@ -880,7 +880,7 @@
 /*
  *  The following cases will be handled by ashr_9
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(7~15)          n - 7      	        8(15 +(n - 7) - n)         ashr_9
+ *        n(7~15)          n - 7		  8(15 +(n - 7) - n)         ashr_9
  */
 	.p2align 4
 LABEL(ashr_9_sse4_2):
@@ -961,7 +961,7 @@
 /*
  *  The following cases will be handled by ashr_10
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(6~15)          n - 6      	        9(15 +(n - 6) - n)         ashr_10
+ *        n(6~15)          n - 6		  9(15 +(n - 6) - n)         ashr_10
  */
 	.p2align 4
 LABEL(ashr_10_sse4_2):
@@ -1041,7 +1041,7 @@
 /*
  *  The following cases will be handled by ashr_11
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(5~15)          n - 5      	        10(15 +(n - 5) - n)         ashr_11
+ *        n(5~15)          n - 5		  10(15 +(n - 5) - n)         ashr_11
  */
 	.p2align 4
 LABEL(ashr_11_sse4_2):
@@ -1121,7 +1121,7 @@
 /*
  *  The following cases will be handled by ashr_12
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(4~15)          n - 4      	        11(15 +(n - 4) - n)         ashr_12
+ *        n(4~15)          n - 4		  11(15 +(n - 4) - n)         ashr_12
  */
 	.p2align 4
 LABEL(ashr_12_sse4_2):
@@ -1201,7 +1201,7 @@
 /*
  *  The following cases will be handled by ashr_13
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(3~15)          n - 3      	        12(15 +(n - 3) - n)         ashr_13
+ *        n(3~15)          n - 3		  12(15 +(n - 3) - n)         ashr_13
  */
 	.p2align 4
 LABEL(ashr_13_sse4_2):
@@ -1282,7 +1282,7 @@
 /*
  *  The following cases will be handled by ashr_14
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(2~15)          n - 2      	        13(15 +(n - 2) - n)         ashr_14
+ *        n(2~15)          n - 2		  13(15 +(n - 2) - n)         ashr_14
  */
 	.p2align 4
 LABEL(ashr_14_sse4_2):
@@ -1363,7 +1363,7 @@
 /*
  *  The following cases will be handled by ashr_15
  *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
- *        n(1~15)          n - 1      	        14(15 +(n - 1) - n)         ashr_15
+ *        n(1~15)          n - 1		  14(15 +(n - 1) - n)         ashr_15
  */
 	.p2align 4
 LABEL(ashr_15_sse4_2):

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strstr.c Sat Jul 31 00:03:04 2010
@@ -174,7 +174,7 @@
   __m128i frag = __m128i_strloadu (p);
 
 #define UCLOW 0x4040404040404040ULL
-#define UCHIGH 0x5a5a5a5a5a5a5a5aULL
+#define UCHIGH 0x5b5b5b5b5b5b5b5bULL
 #define LCQWORD 0x2020202020202020ULL
   /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'.  */
   __m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag);

Added: fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/strcasecmp.S Sat Jul 31 00:03:04 2010
@@ -1,0 +1,1 @@
+/* In strcasecmp_l.S.  */

Added: fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c (added)
+++ fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l-nonascii.c Sat Jul 31 00:03:04 2010
@@ -1,0 +1,5 @@
+#include <string.h>
+
+#define __strcasecmp_l __strcasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL    1
+#include <string/strcasecmp.c>

Added: fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/strcasecmp_l.S Sat Jul 31 00:03:04 2010
@@ -1,0 +1,6 @@
+#define STRCMP __strcasecmp_l
+#define USE_AS_STRCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
+libc_hidden_def (strcasecmp_l)

Modified: fsf/trunk/libc/sysdeps/x86_64/strcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcmp.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strcmp.S Sat Jul 31 00:03:04 2010
@@ -51,6 +51,15 @@
 	je	LABEL(strcmp_exitz);			\
 	mov	%r9, %r11
 
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+
+/* No support for strcasecmp outside libc so far since it is not needed.  */
+# ifdef NOT_IN_lib
+#  error "strcasecmp_l not implemented so far"
+# endif
+
+# define UPDATE_STRNCMP_COUNTER
 #else
 # define UPDATE_STRNCMP_COUNTER
 # ifndef STRCMP
@@ -62,6 +71,19 @@
 	.text
 #else
 	.section .text.ssse3,"ax",@progbits
+#endif
+
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp)
+	movq	__libc_tsd_LOCALE@gottpoff(%rip),%rax
+	movq	%fs:(%rax),%rdx
+
+	/* 5-byte NOP.  */
+	.byte	0x0f,0x1f,0x44,0x00,0x00
+END (__strcasecmp)
+weak_alias (__strcasecmp, strcasecmp)
+libc_hidden_def (__strcasecmp)
+	/* FALLTHROUGH to strcasecmp_l.  */
 #endif
 
 ENTRY (BP_SYM (STRCMP))
@@ -84,6 +106,18 @@
 	ret
 END (BP_SYM (STRCMP))
 #else	/* NOT_IN_libc */
+# ifdef USE_AS_STRCASECMP_L
+	/* We have to fall back on the C implementation for locales
+	   with encodings not matching ASCII for single bytes.  */
+#  if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+	movq	LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
+#  else
+	movq	(%rdx), %rax
+#  endif
+	testl	$0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+	jne	__strcasecmp_l_nonascii
+# endif
+
 /*
  * This implementation uses SSE to compare up to 16 bytes at a time.
  */
@@ -99,6 +133,26 @@
 /* Use 64bit AND here to avoid long NOP padding.  */
 	and	$0x3f, %rcx		/* rsi alignment in cache line */
 	and	$0x3f, %rax		/* rdi alignment in cache line */
+# ifdef USE_AS_STRCASECMP_L
+	.section .rodata.cst16,"aM",@progbits,16
+	.align 16
+.Lbelowupper:
+	.quad	0x4040404040404040
+	.quad	0x4040404040404040
+.Ltopupper:
+	.quad	0x5b5b5b5b5b5b5b5b
+	.quad	0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+	.quad	0x2020202020202020
+	.quad	0x2020202020202020
+	.previous
+	movdqa	.Lbelowupper(%rip), %xmm5
+# define UCLOW_reg %xmm5
+	movdqa	.Ltopupper(%rip), %xmm6
+# define UCHIGH_reg %xmm6
+	movdqa	.Ltouppermask(%rip), %xmm7
+# define LCQWORD_reg %xmm7
+# endif
 	cmp	$0x30, %ecx
 	ja	LABEL(crosscache)	/* rsi: 16-byte load will cross cache line */
 	cmp	$0x30, %eax
@@ -107,6 +161,26 @@
 	movlpd	(%rsi), %xmm2
 	movhpd	8(%rdi), %xmm1
 	movhpd	8(%rsi), %xmm2
+# ifdef USE_AS_STRCASECMP_L
+#  define TOLOWER(reg1, reg2) \
+	movdqa	reg1, %xmm8;					\
+	movdqa	UCHIGH_reg, %xmm9;				\
+	movdqa	reg2, %xmm10;					\
+	movdqa	UCHIGH_reg, %xmm11;				\
+	pcmpgtb	UCLOW_reg, %xmm8;				\
+	pcmpgtb	reg1, %xmm9;					\
+	pcmpgtb	UCLOW_reg, %xmm10;				\
+	pcmpgtb	reg2, %xmm11;					\
+	pand	%xmm9, %xmm8;					\
+	pand	%xmm11, %xmm10;					\
+	pand	LCQWORD_reg, %xmm8;				\
+	pand	LCQWORD_reg, %xmm10;				\
+	por	%xmm8, reg1;					\
+	por	%xmm10, reg2
+	TOLOWER (%xmm1, %xmm2)
+# else
+#  define TOLOWER(reg1, reg2)
+# endif
 	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
 	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
 	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
@@ -159,7 +233,13 @@
 	movdqa	(%rsi), %xmm1
 	pxor	%xmm0, %xmm0			/* clear %xmm0 for null char check */
 	pcmpeqb	%xmm1, %xmm0			/* Any null chars? */
+# ifndef USE_AS_STRCASECMP_L
 	pcmpeqb	(%rdi), %xmm1			/* compare 16 bytes for equality */
+# else
+	movdqa	(%rdi), %xmm2
+	TOLOWER (%xmm1, %xmm2)
+	pcmpeqb	%xmm2, %xmm1			/* compare 16 bytes for equality */
+# endif
 	psubb	%xmm0, %xmm1			/* packed sub of comparison results*/
 	pmovmskb %xmm1, %r9d
 	shr	%cl, %edx			/* adjust 0xffff for offset */
@@ -183,6 +263,7 @@
 LABEL(loop_ashr_0):
 	movdqa	(%rsi, %rcx), %xmm1
 	movdqa	(%rdi, %rcx), %xmm2
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -198,6 +279,7 @@
 	add	$16, %rcx
 	movdqa	(%rsi, %rcx), %xmm1
 	movdqa	(%rdi, %rcx), %xmm2
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -214,7 +296,7 @@
 
 /*
  * The following cases will be handled by ashr_1
- * rcx(offset of rsi)  rax(offset of rdi)   relative offset   	corresponding case
+ * rcx(offset of rsi)  rax(offset of rdi)   relative offset	corresponding case
  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
  */
 	.p2align 4
@@ -224,6 +306,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
 	pslldq	$15, %xmm2		/* shift first string to align with second */
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
 	pmovmskb %xmm2, %r9d
@@ -263,6 +346,7 @@
 # else
 	palignr	$1, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -292,6 +376,7 @@
 # else
 	palignr	$1, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -351,6 +436,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$14, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -390,6 +476,7 @@
 # else
 	palignr	$2, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -420,6 +507,7 @@
 # else
 	palignr	$2, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -472,6 +560,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$13, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -512,6 +601,7 @@
 # else
 	palignr	$3, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -542,6 +632,7 @@
 # else
 	palignr	$3, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -594,6 +685,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$12, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -634,6 +726,7 @@
 # else
 	palignr	$4, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -664,6 +757,7 @@
 # else
 	palignr	$4, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -716,6 +810,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$11, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -756,6 +851,7 @@
 # else
 	palignr	$5, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -786,6 +882,7 @@
 # else
 	palignr	$5, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -838,6 +935,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$10, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -878,6 +976,7 @@
 # else
 	palignr	$6, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -908,6 +1007,7 @@
 # else
 	palignr	$6, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -960,6 +1060,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$9, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1000,6 +1101,7 @@
 # else
 	palignr	$7, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1030,6 +1132,7 @@
 # else
 	palignr	$7, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1082,6 +1185,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$8, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1122,6 +1226,7 @@
 # else
 	palignr	$8, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1152,6 +1257,7 @@
 # else
 	palignr	$8, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1204,6 +1310,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$7, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1244,6 +1351,7 @@
 # else
 	palignr	$9, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1274,6 +1382,7 @@
 # else
 	palignr	$9, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1326,6 +1435,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$6, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1366,6 +1476,7 @@
 # else
 	palignr	$10, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1396,6 +1507,7 @@
 # else
 	palignr	$10, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1448,6 +1560,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$5, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1488,6 +1601,7 @@
 # else
 	palignr	$11, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1518,6 +1632,7 @@
 # else
 	palignr	$11, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1570,6 +1685,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$4, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1610,6 +1726,7 @@
 # else
 	palignr	$12, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1640,6 +1757,7 @@
 # else
 	palignr	$12, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1692,6 +1810,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$3, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1732,6 +1851,7 @@
 # else
 	palignr	$13, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1762,6 +1882,7 @@
 # else
 	palignr	$13, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1814,6 +1935,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq  $2, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1854,6 +1976,7 @@
 # else
 	palignr	$14, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1884,6 +2007,7 @@
 # else
 	palignr	$14, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -1936,6 +2060,7 @@
 	movdqa	(%rsi), %xmm1
 	pcmpeqb	%xmm1, %xmm0
 	pslldq	$1, %xmm2
+	TOLOWER (%xmm1, %xmm2)
 	pcmpeqb	%xmm1, %xmm2
 	psubb	%xmm0, %xmm2
 	pmovmskb %xmm2, %r9d
@@ -1978,6 +2103,7 @@
 # else
 	palignr	$15, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -2008,6 +2134,7 @@
 # else
 	palignr	$15, %xmm3, %xmm2	/* merge into one 16byte value */
 # endif
+	TOLOWER (%xmm1, %xmm2)
 
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm1
@@ -2049,6 +2176,7 @@
 
 	.p2align 4
 LABEL(aftertail):
+	TOLOWER (%xmm1, %xmm3)
 	pcmpeqb	%xmm3, %xmm1
 	psubb	%xmm0, %xmm1
 	pmovmskb %xmm1, %edx
@@ -2075,6 +2203,12 @@
 # endif
 	movzbl	(%rsi, %rdx), %ecx
 	movzbl	(%rdi, %rdx), %eax
+
+# ifdef USE_AS_STRCASECMP_L
+	leaq	_nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
+	movl	(%rdx,%rcx,4), %ecx
+	movl	(%rdx,%rax,4), %eax
+# endif
 
 	sub	%ecx, %eax
 	ret