[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r14824 - in /fsf/trunk/libc: ./ resolv/ string/ sysdeps/i386/i486/bits/ sysdeps/i386/i686/multiarch/ sysdeps/ieee754/dbl-64/...



Author: eglibc
Date: Fri Aug  5 00:02:24 2011
New Revision: 14824

Log:
Import glibc-mainline for 2011-08-05

Added:
    fsf/trunk/libc/string/test-wcscmp.c
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/resolv/res_init.c
    fsf/trunk/libc/string/Makefile
    fsf/trunk/libc/string/test-strcmp.c
    fsf/trunk/libc/string/test-string.h
    fsf/trunk/libc/sysdeps/i386/i486/bits/string.h
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S
    fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
    fsf/trunk/libc/sysdeps/posix/getaddrinfo.c
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Fri Aug  5 00:02:24 2011
@@ -1,3 +1,67 @@
+2011-08-04  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	* string/test-string.h (IMPL): Use __STRING to expand name and then
+	stringify it.
+
+	* string/test-strcmp.c: Unify most of the WIDE and !WIDE code.  Lots
+	of cleanups.
+
+2011-07-22  Liubov Dmitrieva  <liubov.dmitrieva@xxxxxxxxx>
+
+	* string/Makefile: Update.
+	(strop-tests): Append strncat.
+	* string/test-wcscmp.c: New file.
+	New comprehensive test for wcscmp.
+	* string/test-strcmp.c: Update.
+	(WIDE): New define.
+
+2011-07-22  Andreas Schwab  <schwab@xxxxxxxxxx>
+
+	* resolv/res_init.c (__res_vinit): Properly tokenize nameserver
+	line.
+
+2011-07-26  Andreas Schwab  <schwab@xxxxxxxxxx>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Don't discard result of
+	encoding to ACE if AI_IDN.
+
+2011-08-01  Jakub Jelinek  <jakub@xxxxxxxxxx>
+
+	* sysdeps/ieee754/dbl-64/k_rem_pio2.c (__kernel_rem_pio2): Fix up fq
+	to y conversion for prec 3 and __FLT_EVAL_METHOD__ != 0.
+
+2011-07-22  Liubov Dmitrieva  <liubov.dmitrieva@xxxxxxxxx>
+
+	* sysdeps/i386/i686/multiarch/strcat-sse2.S: Update.
+	Fix overflow bug in strncat.
+	* sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Likewise.
+
+	* string/test-strncat.c: Update.
+	Add new tests for checking overflow bugs.
+
+2011-07-15  Liubov Dmitrieva  <liubov.dmitrieva@xxxxxxxxx>
+
+	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+	strcat-ssse3 strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c.
+	* sysdeps/i386/i686/multiarch/strcat.S: New file.
+	* sysdeps/i386/i686/multiarch/strcat-c.c: New file.
+	* sysdeps/i386/i686/multiarch/strcat-sse2.S: New file.
+	* sysdeps/i386/i686/multiarch/strcat-ssse3.S: New file.
+	* sysdeps/i386/i686/multiarch/strncat.S: New file.
+	* sysdeps/i386/i686/multiarch/strncat-sse2.S: New file.
+	* sysdeps/i386/i686/multiarch/strncat-ssse3.S: New file.
+
+	* sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+	(USE_AS_STRCAT): Define.
+	Add strcat and strncat support.
+	* sysdeps/i386/i686/multiarch/strlen-sse2.S: Likewise.
+
+2011-07-25  Andreas Schwab  <schwab@xxxxxxxxxx>
+
+	* sysdeps/i386/i486/bits/string.h (__strncat_g): Correctly handle
+	__n bigger than INT_MAX+1.
+	(__strncmp_g): Likewise.
+
 2011-07-23  Ulrich Drepper  <drepper@xxxxxxxxx>
 
 	* posix/unistd.h: Define SEEK_DATA and SEEK_HOLE.

Modified: fsf/trunk/libc/resolv/res_init.c
==============================================================================
--- fsf/trunk/libc/resolv/res_init.c (original)
+++ fsf/trunk/libc/resolv/res_init.c Fri Aug  5 00:02:24 2011
@@ -318,7 +318,7 @@
 			struct in6_addr a6;
 			char *el;
 
-			if ((el = strchr(cp, '\n')) != NULL)
+			if ((el = strpbrk(cp, " \t\n")) != NULL)
 			    *el = '\0';
 			if ((el = strchr(cp, SCOPE_DELIMITER)) != NULL)
 			    *el = '\0';

Modified: fsf/trunk/libc/string/Makefile
==============================================================================
--- fsf/trunk/libc/string/Makefile (original)
+++ fsf/trunk/libc/string/Makefile Fri Aug  5 00:02:24 2011
@@ -46,7 +46,7 @@
 # for -fbounded-pointer compiles.  Glibc uses memchr for explicit checks.
 o-objects.ob	:= memcpy.o memset.o memchr.o
 
-strop-tests	:= memchr memcmp memcpy memmove mempcpy memset memccpy	\
+strop-tests	:=  wcscmp memchr memcmp memcpy memmove mempcpy memset memccpy	\
 		   stpcpy stpncpy strcat strchr strcmp strcpy strcspn	\
 		   strlen strncmp strncpy strpbrk strrchr strspn memmem	\
 		   strstr strcasestr strnlen strcasecmp strncasecmp	\

Modified: fsf/trunk/libc/string/test-strcmp.c
==============================================================================
--- fsf/trunk/libc/string/test-strcmp.c (original)
+++ fsf/trunk/libc/string/test-strcmp.c Fri Aug  5 00:02:24 2011
@@ -1,7 +1,8 @@
-/* Test and measure strcmp functions.
-   Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Test and measure STRCMP functions.
+   Copyright (C) 1999, 2002, 2003, 2005, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Jakub Jelinek <jakub@xxxxxxxxxx>, 1999.
+   Added wcscmp support by Liubov Dmitrieva <liubov.dmitrieva@xxxxxxxxx>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -21,39 +22,73 @@
 #define TEST_MAIN
 #include "test-string.h"
 
-typedef int (*proto_t) (const char *, const char *);
-int simple_strcmp (const char *, const char *);
-int stupid_strcmp (const char *, const char *);
-
-IMPL (stupid_strcmp, 0)
-IMPL (simple_strcmp, 0)
-IMPL (strcmp, 1)
+#ifdef WIDE
+# include <inttypes.h>
+# include <wchar.h>
+
+# define L(str) L##str
+# define STRCMP wcscmp
+# define STRCPY wcscpy
+# define STRLEN wcslen
+# define MEMCPY wmemcpy
+# define SIMPLE_STRCMP simple_wcscmp
+# define STUPID_STRCMP stupid_wcscmp
+# define CHAR wchar_t
+# define UCHAR uint32_t
+# define CHARBYTES 4
+# define CHARBYTESLOG 2
+# define CHARALIGN __alignof__ (CHAR)
+# define MIDCHAR 0x7fffffff
+# define LARGECHAR 0xfffffffe
+#else
+# define L(str) str
+# define STRCMP strcmp
+# define STRCPY strcpy
+# define STRLEN strlen
+# define MEMCPY memcpy
+# define SIMPLE_STRCMP simple_strcmp
+# define STUPID_STRCMP stupid_strcmp
+# define CHAR char
+# define UCHAR unsigned char
+# define CHARBYTES 1
+# define CHARBYTESLOG 0
+# define CHARALIGN 1
+# define MIDCHAR 0x7f
+# define LARGECHAR 0xfe
+#endif
+typedef int (*proto_t) (const CHAR *, const CHAR *);
 
 int
-simple_strcmp (const char *s1, const char *s2)
+SIMPLE_STRCMP (const CHAR *s1, const CHAR *s2)
 {
   int ret;
 
-  while ((ret = *(unsigned char *) s1 - *(unsigned char *) s2++) == 0
-	 && *s1++);
+  while ((ret = *(UCHAR *) s1 - *(UCHAR *) s2++) == 0 && *s1++);
   return ret;
 }
 
 int
-stupid_strcmp (const char *s1, const char *s2)
-{
-  size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
+STUPID_STRCMP (const CHAR *s1, const CHAR *s2)
+{
+  size_t ns1 = STRLEN (s1) + 1;
+  size_t ns2 = STRLEN (s2) + 1;
   size_t n = ns1 < ns2 ? ns1 : ns2;
   int ret = 0;
 
   while (n--)
-    if ((ret = *(unsigned char *) s1++ - *(unsigned char *) s2++) != 0)
+    if ((ret = *(UCHAR *) s1++ - *(UCHAR *) s2++) != 0)
       break;
   return ret;
 }
 
-static void
-do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
+IMPL (STUPID_STRCMP, 1)
+IMPL (SIMPLE_STRCMP, 1)
+IMPL (STRCMP, 1)
+
+static int
+check_result (impl_t *impl,
+	     const CHAR *s1, const CHAR *s2,
+	     int exp_result)
 {
   int result = CALL (impl, s1, s2);
   if ((exp_result == 0 && result != 0)
@@ -63,8 +98,19 @@
       error (0, 0, "Wrong result in function %s %d %d", impl->name,
 	     result, exp_result);
       ret = 1;
-      return;
-    }
+      return -1;
+    }
+
+  return 0;
+}
+
+static void
+do_one_test (impl_t *impl,
+	     const CHAR *s1, const CHAR *s2,
+	     int exp_result)
+{
+  if (check_result (impl, s1, s2, exp_result) < 0)
+    return;
 
   if (HP_TIMING_AVAIL)
     {
@@ -90,24 +136,28 @@
 	 int exp_result)
 {
   size_t i;
-  char *s1, *s2;
+
+  CHAR *s1, *s2;
 
   if (len == 0)
     return;
 
-  align1 &= 7;
-  if (align1 + len + 1 >= page_size)
+  align1 &= 63;
+  if (align1 + (len + 1) * CHARBYTES >= page_size)
     return;
 
-  align2 &= 7;
-  if (align2 + len + 1 >= page_size)
+  align2 &= 63;
+  if (align2 + (len + 1) * CHARBYTES >= page_size)
     return;
 
-  s1 = (char *) (buf1 + align1);
-  s2 = (char *) (buf2 + align2);
+  /* Put them close to the end of page.  */
+  i = align1 + CHARBYTES * (len + 2);
+  s1 = (CHAR *) (buf1 + ((page_size - i) / 16 * 16) + align1);
+  i = align2 + CHARBYTES * (len + 2);
+  s2 = (CHAR *) (buf2 + ((page_size - i) / 16 * 16)  + align2);
 
   for (i = 0; i < len; i++)
-    s1[i] = s2[i] = 1 + 23 * i % max_char;
+    s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % max_char;
 
   s1[len] = s2[len] = 0;
   s1[len + 1] = 23;
@@ -127,92 +177,116 @@
 static void
 do_random_tests (void)
 {
-  size_t i, j, n, align1, align2, pos, len1, len2;
-  int result;
-  long r;
-  unsigned char *p1 = buf1 + page_size - 512;
-  unsigned char *p2 = buf2 + page_size - 512;
-
-  for (n = 0; n < ITERATIONS; n++)
-    {
-      align1 = random () & 31;
-      if (random () & 1)
-	align2 = random () & 31;
-      else
-	align2 = align1 + (random () & 24);
-      pos = random () & 511;
-      j = align1 > align2 ? align1 : align2;
-      if (pos + j >= 511)
-	pos = 510 - j - (random () & 7);
-      len1 = random () & 511;
-      if (pos >= len1 && (random () & 1))
-        len1 = pos + (random () & 7);
-      if (len1 + j >= 512)
-        len1 = 511 - j - (random () & 7);
-      if (pos >= len1)
-	len2 = len1;
-      else
-	len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
-      j = (pos > len2 ? pos : len2) + align1 + 64;
-      if (j > 512)
-	j = 512;
-      for (i = 0; i < j; ++i)
-	{
-	  p1[i] = random () & 255;
-	  if (i < len1 + align1 && !p1[i])
-	    {
-	      p1[i] = random () & 255;
-	      if (!p1[i])
-		p1[i] = 1 + (random () & 127);
-	    }
-	}
-      for (i = 0; i < j; ++i)
-	{
-	  p2[i] = random () & 255;
-	  if (i < len2 + align2 && !p2[i])
-	    {
-	      p2[i] = random () & 255;
-	      if (!p2[i])
-		p2[i] = 1 + (random () & 127);
-	    }
-	}
-
-      result = 0;
-      memcpy (p2 + align2, p1 + align1, pos);
-      if (pos < len1)
-	{
-	  if (p2[align2 + pos] == p1[align1 + pos])
-	    {
-	      p2[align2 + pos] = random () & 255;
-	      if (p2[align2 + pos] == p1[align1 + pos])
-		p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127);
-	    }
-
-	  if (p1[align1 + pos] < p2[align2 + pos])
-	    result = -1;
-	  else
-	    result = 1;
-	}
-      p1[len1 + align1] = 0;
-      p2[len2 + align2] = 0;
-
-      FOR_EACH_IMPL (impl, 1)
-	{
-	  r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
-	  /* Test whether on 64-bit architectures where ABI requires
-	     callee to promote has the promotion been done.  */
-	  asm ("" : "=g" (r) : "0" (r));
-	  if ((r == 0 && result)
-	      || (r < 0 && result >= 0)
-	      || (r > 0 && result <= 0))
-	    {
-	      error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
-		     n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
-	      ret = 1;
-	    }
-	}
-    }
-}
+  for (size_t a = 0; a < CHARBYTES; a += CHARALIGN)
+    for (size_t b = 0; b < CHARBYTES; b += CHARALIGN)
+      {
+	UCHAR *p1 = (UCHAR *) (buf1 + page_size - 512 * CHARBYTES - a);
+	UCHAR *p2 = (UCHAR *) (buf2 + page_size - 512 * CHARBYTES - b);
+
+	for (size_t n = 0; n < ITERATIONS; n++)
+	  {
+	    size_t align1 = random () & 31;
+	    size_t align2;
+	    if (random () & 1)
+	      align2 = random () & 31;
+	    else
+	      align2 = align1 + (random () & 24);
+	    size_t pos = random () & 511;
+	    size_t j = align1 > align2 ? align1 : align2;
+	    if (pos + j >= 511)
+	      pos = 510 - j - (random () & 7);
+	    size_t len1 = random () & 511;
+	    if (pos >= len1 && (random () & 1))
+	      len1 = pos + (random () & 7);
+	    if (len1 + j >= 512)
+	      len1 = 511 - j - (random () & 7);
+	    size_t len2;
+	    if (pos >= len1)
+	      len2 = len1;
+	    else
+	      len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+	    j = (pos > len2 ? pos : len2) + align1 + 64;
+	    if (j > 512)
+	      j = 512;
+	    for (size_t i = 0; i < j; ++i)
+	      {
+		p1[i] = random () & 255;
+		if (i < len1 + align1 && !p1[i])
+		  {
+		    p1[i] = random () & 255;
+		    if (!p1[i])
+		      p1[i] = 1 + (random () & 127);
+		  }
+	      }
+	    for (size_t i = 0; i < j; ++i)
+	      {
+		p2[i] = random () & 255;
+		if (i < len2 + align2 && !p2[i])
+		  {
+		    p2[i] = random () & 255;
+		    if (!p2[i])
+		      p2[i] = 1 + (random () & 127);
+		  }
+	      }
+
+	    int result = 0;
+	    MEMCPY ((CHAR *) (p2 + align2), (CHAR *) (p1 + align1), pos);
+	    if (pos < len1)
+	      {
+		if (p2[align2 + pos] == p1[align1 + pos])
+		  {
+		    p2[align2 + pos] = random () & 255;
+		    if (p2[align2 + pos] == p1[align1 + pos])
+		      p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127);
+		  }
+
+		if (p1[align1 + pos] < p2[align2 + pos])
+		  result = -1;
+		else
+		  result = 1;
+	      }
+	    p1[len1 + align1] = 0;
+	    p2[len2 + align2] = 0;
+
+	    FOR_EACH_IMPL (impl, 1)
+	      {
+		int r = CALL (impl, (CHAR *) (p1 + align1), (CHAR *) (p2 + align2));
+		/* Test whether on 64-bit architectures where ABI requires
+		   callee to promote has the promotion been done.  */
+		asm ("" : "=g" (r) : "0" (r));
+		if ((r == 0 && result)
+		    || (r < 0 && result >= 0)
+		    || (r > 0 && result <= 0))
+		  {
+		    error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %d != %d, p1 %p p2 %p",
+			   n, impl->name, (size_t) (p1 + align1) & 63, (size_t) (p1 + align2) & 63, len1, len2, pos, r, result, p1, p2);
+		    ret = 1;
+		  }
+	      }
+	  }
+      }
+}
+
+static void
+check (void)
+{
+  CHAR *s1 = (CHAR *) (buf1 + 0xb2c);
+  CHAR *s2 = (CHAR *) (buf1 + 0xfd8);
+
+  STRCPY(s1, L("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrs"));
+  STRCPY(s2, L("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkLMNOPQRSTUV"));
+
+  size_t l1 = STRLEN (s1);
+  size_t l2 = STRLEN (s2);
+  for (size_t i1 = 0; i1 < l1; i1++)
+    for (size_t i2 = 0; i2 < l2; i2++)
+      {
+	int exp_result = SIMPLE_STRCMP (s1 + i1, s2 + i2);
+	FOR_EACH_IMPL (impl, 0)
+	  check_result (impl, s1 + i1, s2 + i2, exp_result);
+      }
+}
+
 
 int
 test_main (void)
@@ -220,37 +294,40 @@
   size_t i;
 
   test_init ();
+  check();
 
   printf ("%23s", "");
   FOR_EACH_IMPL (impl, 0)
     printf ("\t%s", impl->name);
   putchar ('\n');
 
-  for (i = 1; i < 16; ++i)
-    {
-      do_test (i, i, i, 127, 0);
-      do_test (i, i, i, 127, 1);
-      do_test (i, i, i, 127, -1);
-    }
-
-  for (i = 1; i < 10; ++i)
-    {
-      do_test (0, 0, 2 << i, 127, 0);
-      do_test (0, 0, 2 << i, 254, 0);
-      do_test (0, 0, 2 << i, 127, 1);
-      do_test (0, 0, 2 << i, 254, 1);
-      do_test (0, 0, 2 << i, 127, -1);
-      do_test (0, 0, 2 << i, 254, -1);
+  for (i = 1; i < 32; ++i)
+    {
+      do_test (CHARBYTES * i, CHARBYTES * i, i, MIDCHAR, 0);
+      do_test (CHARBYTES * i, CHARBYTES * i, i, MIDCHAR, 1);
+      do_test (CHARBYTES * i, CHARBYTES * i, i, MIDCHAR, -1);
+    }
+
+  for (i = 1; i < 10 + CHARBYTESLOG; ++i)
+    {
+      do_test (0, 0, 2 << i, MIDCHAR, 0);
+      do_test (0, 0, 2 << i, LARGECHAR, 0);
+      do_test (0, 0, 2 << i, MIDCHAR, 1);
+      do_test (0, 0, 2 << i, LARGECHAR, 1);
+      do_test (0, 0, 2 << i, MIDCHAR, -1);
+      do_test (0, 0, 2 << i, LARGECHAR, -1);
+      do_test (0, CHARBYTES * i, 2 << i, MIDCHAR, 1);
+      do_test (CHARBYTES * i, CHARBYTES * (i + 1), 2 << i, LARGECHAR, 1);
     }
 
   for (i = 1; i < 8; ++i)
     {
-      do_test (i, 2 * i, 8 << i, 127, 0);
-      do_test (2 * i, i, 8 << i, 254, 0);
-      do_test (i, 2 * i, 8 << i, 127, 1);
-      do_test (2 * i, i, 8 << i, 254, 1);
-      do_test (i, 2 * i, 8 << i, 127, -1);
-      do_test (2 * i, i, 8 << i, 254, -1);
+      do_test (CHARBYTES * i, 2 * CHARBYTES * i, 8 << i, MIDCHAR, 0);
+      do_test (2 * CHARBYTES * i, CHARBYTES * i, 8 << i, LARGECHAR, 0);
+      do_test (CHARBYTES * i, 2 * CHARBYTES * i, 8 << i, MIDCHAR, 1);
+      do_test (2 * CHARBYTES * i, CHARBYTES * i, 8 << i, LARGECHAR, 1);
+      do_test (CHARBYTES * i, 2 * CHARBYTES * i, 8 << i, MIDCHAR, -1);
+      do_test (2 * CHARBYTES * i, CHARBYTES * i, 8 << i, LARGECHAR, -1);
     }
 
   do_random_tests ();

Modified: fsf/trunk/libc/string/test-string.h
==============================================================================
--- fsf/trunk/libc/string/test-string.h (original)
+++ fsf/trunk/libc/string/test-string.h Fri Aug  5 00:02:24 2011
@@ -1,5 +1,5 @@
 /* Test and measure string and memory functions.
-   Copyright (C) 1999, 2002, 2004, 2008 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2002, 2004, 2008, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Jakub Jelinek <jakub@xxxxxxxxxx>, 1999.
 
@@ -18,6 +18,8 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <sys/cdefs.h>
+
 typedef struct
 {
   const char *name;
@@ -29,7 +31,7 @@
 #define IMPL(name, test) \
   impl_t tst_ ## name							\
   __attribute__ ((section ("impls"), aligned (sizeof (void *))))	\
-    = { #name, (void (*) (void))name, test };
+       = { __STRING (name), (void (*) (void))name, test };
 
 #ifdef TEST_MAIN
 

Added: fsf/trunk/libc/string/test-wcscmp.c
==============================================================================
--- fsf/trunk/libc/string/test-wcscmp.c (added)
+++ fsf/trunk/libc/string/test-wcscmp.c Fri Aug  5 00:02:24 2011
@@ -1,0 +1,2 @@
+#define WIDE 1
+#include "test-strcmp.c"

Modified: fsf/trunk/libc/sysdeps/i386/i486/bits/string.h
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i486/bits/string.h (original)
+++ fsf/trunk/libc/sysdeps/i386/i486/bits/string.h Fri Aug  5 00:02:24 2011
@@ -1,6 +1,6 @@
 /* Optimized, inlined string functions.  i486 version.
-   Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2004,2007
-   	Free Software Foundation, Inc.
+   Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2004,2007,2011
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -1058,8 +1058,8 @@
      "movl %4, %3\n\t"
      "decl %1\n\t"
      "1:\n\t"
-     "decl	%3\n\t"
-     "js	2f\n\t"
+     "subl	$1,%3\n\t"
+     "jc	2f\n\t"
      "movb	(%2),%b0\n\t"
      "movsb\n\t"
      "testb	%b0,%b0\n\t"
@@ -1078,8 +1078,8 @@
      "leal	1(%1),%1\n\t"
      "jne	1b\n"
      "2:\n\t"
-     "decl	%3\n\t"
-     "js	3f\n\t"
+     "subl	$1,%3\n\t"
+     "jc	3f\n\t"
      "movb	(%2),%b0\n\t"
      "leal	1(%2),%2\n\t"
      "movb	%b0,(%1)\n\t"
@@ -1219,8 +1219,8 @@
   register int __res;
   __asm__ __volatile__
     ("1:\n\t"
-     "decl	%3\n\t"
-     "js	2f\n\t"
+     "subl	$1,%3\n\t"
+     "jc	2f\n\t"
      "movb	(%1),%b0\n\t"
      "incl	%1\n\t"
      "cmpb	%b0,(%2)\n\t"

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile Fri Aug  5 00:02:24 2011
@@ -12,7 +12,8 @@
 		   memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
 		   strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
 		   strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
-		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2
+		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
+		   strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-varshift.c += -msse4

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S Fri Aug  5 00:02:24 2011
@@ -1,0 +1,1244 @@
+/* strcat with SSE2
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifdef SHARED
+#  define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into ECX and branch to it.  TABLE is a
+	jump table with relative offsets.  INDEX is a register contains the
+	index into the jump table.   SCALE is the scale of INDEX. */
+
+#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
+	/* We first load PC into ECX.  */	\
+	call	__i686.get_pc_thunk.cx;	\
+	/* Get the address of the jump table.  */	\
+	addl	$(TABLE - .), %ecx;	\
+	/* Get the entry and convert the relative offset to the	\
+	absolute address.  */	\
+	addl	(%ecx,INDEX,SCALE), %ecx;	\
+	/* We loaded the jump table and adjuested ECX. Go.  */	\
+	jmp	*%ecx
+# else
+#  define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table.  TABLE is a jump table with
+	absolute offsets.  INDEX is a register contains the index into the
+	jump table.  SCALE is the scale of INDEX. */
+
+#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
+	jmp	*TABLE(,INDEX,SCALE)
+# endif
+
+# ifndef STRCAT
+#  define STRCAT  __strcat_sse2
+# endif
+
+# define PARMS  4
+# define STR1  PARMS+4
+# define STR2  STR1+4
+
+# ifdef USE_AS_STRNCAT
+#  define LEN    STR2+8
+#  define STR3   STR1+4
+# else
+#  define STR3   STR1
+# endif
+
+# define USE_AS_STRCAT
+# ifdef USE_AS_STRNCAT
+#  define RETURN  POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
+# else
+#  define RETURN  POP(%esi); ret; CFI_PUSH(%esi);
+# endif
+
+.text
+ENTRY (STRCAT)
+	PUSH	(%esi)
+	mov	STR1(%esp), %eax
+	mov	STR2(%esp), %esi
+# ifdef USE_AS_STRNCAT
+	PUSH	(%ebx)
+	movl	LEN(%esp), %ebx
+	test	%ebx, %ebx
+	jz	L(ExitZero)
+# endif
+	cmpb	$0, (%esi)
+	mov	%esi, %ecx
+	mov	%eax, %edx
+	jz	L(ExitZero)
+
+	and	$63, %ecx
+	and	$63, %edx
+	cmp	$32, %ecx
+	ja	L(StrlenCore7_1)
+	cmp	$48, %edx
+	ja	L(alignment_prolog)
+
+	pxor	%xmm0, %xmm0
+	pxor	%xmm4, %xmm4
+	pxor	%xmm7, %xmm7
+	movdqu	(%eax), %xmm1
+	movdqu	(%esi), %xmm5
+	pcmpeqb	%xmm1, %xmm0
+	movdqu	16(%esi), %xmm6
+	pmovmskb %xmm0, %ecx
+	pcmpeqb	%xmm5, %xmm4
+	pcmpeqb	%xmm6, %xmm7
+	test	%ecx, %ecx
+	jnz	L(exit_less16_)
+	mov	%eax, %ecx
+	and	$-16, %eax
+	jmp	L(loop_prolog)
+
+L(alignment_prolog):
+	pxor	%xmm0, %xmm0
+	pxor	%xmm4, %xmm4
+	mov	%edx, %ecx
+	pxor	%xmm7, %xmm7
+	and	$15, %ecx
+	and	$-16, %eax
+	pcmpeqb	(%eax), %xmm0
+	movdqu	(%esi), %xmm5
+	movdqu	16(%esi), %xmm6
+	pmovmskb %xmm0, %edx
+	pcmpeqb	%xmm5, %xmm4
+	shr	%cl, %edx
+	pcmpeqb	%xmm6, %xmm7
+	test	%edx, %edx
+	jnz	L(exit_less16)
+	add	%eax, %ecx
+
+	pxor	%xmm0, %xmm0
+L(loop_prolog):
+	pxor	%xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	pxor	%xmm3, %xmm3
+	.p2align 4
+L(align16_loop):
+	pcmpeqb	16(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16)
+
+	pcmpeqb	32(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32)
+
+	pcmpeqb	48(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48)
+
+	pcmpeqb	64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	64(%eax), %eax
+	test	%edx, %edx
+	jz	L(align16_loop)
+	bsf	%edx, %edx
+	add	%edx, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit16):
+	bsf	%edx, %edx
+	lea	16(%eax, %edx), %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit32):
+	bsf	%edx, %edx
+	lea	32(%eax, %edx), %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit48):
+	bsf	%edx, %edx
+	lea	48(%eax, %edx), %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_less16):
+	bsf	%edx, %edx
+	add	%ecx, %eax
+	add	%edx, %eax
+	jmp	L(StartStrcpyPart)
+
+	.p2align 4
+L(exit_less16_):
+	bsf	%ecx, %ecx
+	add	%ecx, %eax
+
+	.p2align 4
+L(StartStrcpyPart):
+	pmovmskb %xmm4, %edx
+# ifdef USE_AS_STRNCAT
+	cmp	$16, %ebx
+	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesTail1)
+
+	movdqu	%xmm5, (%eax)
+	pmovmskb %xmm7, %edx
+# ifdef USE_AS_STRNCAT
+	cmp	$32, %ebx
+	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To32Bytes1)
+
+	mov	%esi, %ecx
+	and	$-16, %esi
+	and	$15, %ecx
+	pxor	%xmm0, %xmm0
+# ifdef USE_AS_STRNCAT
+	add	%ecx, %ebx
+# endif
+	sub	%ecx, %eax
+	jmp	L(Unalign16Both)
+
+L(StrlenCore7_1):
+	mov	%eax, %ecx
+	pxor	%xmm0, %xmm0
+	and	$15, %ecx
+	and	$-16, %eax
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	shr	%cl, %edx
+	test	%edx, %edx
+	jnz	L(exit_less16_1)
+	add	%eax, %ecx
+
+	pxor	%xmm0, %xmm0
+	pxor	%xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	pxor	%xmm3, %xmm3
+
+	.p2align 4
+L(align16_loop_1):
+	pcmpeqb	16(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(exit16_1)
+
+	pcmpeqb	32(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	test	%edx, %edx
+	jnz	L(exit32_1)
+
+	pcmpeqb	48(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
+	jnz	L(exit48_1)
+
+	pcmpeqb	64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	64(%eax), %eax
+	test	%edx, %edx
+	jz	L(align16_loop_1)
+	bsf	%edx, %edx
+	add	%edx, %eax
+	jmp	L(StartStrcpyPart_1)
+
+	.p2align 4
+L(exit16_1):
+	bsf	%edx, %edx
+	lea	16(%eax, %edx), %eax
+	jmp	L(StartStrcpyPart_1)
+
+	.p2align 4
+L(exit32_1):
+	bsf	%edx, %edx
+	lea	32(%eax, %edx), %eax
+	jmp	L(StartStrcpyPart_1)
+
+	.p2align 4
+L(exit48_1):
+	bsf	%edx, %edx
+	lea	48(%eax, %edx), %eax
+	jmp	L(StartStrcpyPart_1)
+
+	.p2align 4
+L(exit_less16_1):
+	bsf	%edx, %edx
+	add	%ecx, %eax
+	add	%edx, %eax
+
+	.p2align 4
+L(StartStrcpyPart_1):
+	mov	%esi, %ecx
+	and	$15, %ecx
+	and	$-16, %esi
+	pxor	%xmm0, %xmm0
+	pxor	%xmm1, %xmm1
+
+# ifdef USE_AS_STRNCAT
+	cmp	$48, %ebx
+	ja      L(BigN)
+# endif
+	pcmpeqb	(%esi), %xmm1
+# ifdef USE_AS_STRNCAT
+	add	%ecx, %ebx
+# endif
+	pmovmskb %xmm1, %edx
+	shr	%cl, %edx
+# ifdef USE_AS_STRNCAT
+	cmp	$16, %ebx
+	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesTail)
+
+	pcmpeqb	16(%esi), %xmm0
+	pmovmskb %xmm0, %edx
+# ifdef USE_AS_STRNCAT
+	cmp	$32, %ebx
+	jbe	L(CopyFrom1To32BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To32Bytes)
+
+	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
+	movdqu	%xmm1, (%eax)
+	sub	%ecx, %eax
+
+	.p2align 4
+L(Unalign16Both):
+	mov	$16, %ecx
+	movdqa	(%esi, %ecx), %xmm1
+	movaps	16(%esi, %ecx), %xmm2
+	movdqu	%xmm1, (%eax, %ecx)
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+# ifdef USE_AS_STRNCAT
+	sub	$48, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+L(Unalign16BothBigN):
+	movaps	16(%esi, %ecx), %xmm3
+	movdqu	%xmm2, (%eax, %ecx)
+	pcmpeqb	%xmm3, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+# ifdef USE_AS_STRNCAT
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%esi, %ecx), %xmm4
+	movdqu	%xmm3, (%eax, %ecx)
+	pcmpeqb	%xmm4, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+# ifdef USE_AS_STRNCAT
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%esi, %ecx), %xmm1
+	movdqu	%xmm4, (%eax, %ecx)
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+# ifdef USE_AS_STRNCAT
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%esi, %ecx), %xmm2
+	movdqu	%xmm1, (%eax, %ecx)
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+# ifdef USE_AS_STRNCAT
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%esi, %ecx), %xmm3
+	movdqu	%xmm2, (%eax, %ecx)
+	pcmpeqb	%xmm3, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+# ifdef USE_AS_STRNCAT
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	movdqu	%xmm3, (%eax, %ecx)
+	mov	%esi, %edx
+	lea	16(%esi, %ecx), %esi
+	and	$-0x40, %esi
+	sub	%esi, %edx
+	sub	%edx, %eax
+# ifdef USE_AS_STRNCAT
+	lea	128(%ebx, %edx), %ebx
+# endif
+	movaps	(%esi), %xmm2
+	movaps	%xmm2, %xmm4
+	movaps	16(%esi), %xmm5
+	movaps	32(%esi), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	48(%esi), %xmm7
+	pminub	%xmm5, %xmm2
+	pminub	%xmm7, %xmm3
+	pminub	%xmm2, %xmm3
+	pcmpeqb	%xmm0, %xmm3
+	pmovmskb %xmm3, %edx
+# ifdef USE_AS_STRNCAT
+	sub	$64, %ebx
+	jbe	L(UnalignedLeaveCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jnz	L(Unaligned64Leave)
+
+	.p2align 4
+L(Unaligned64Loop_start):
+	add	$64, %eax
+	add	$64, %esi
+	movdqu	%xmm4, -64(%eax)
+	movaps	(%esi), %xmm2
+	movdqa	%xmm2, %xmm4
+	movdqu	%xmm5, -48(%eax)
+	movaps	16(%esi), %xmm5
+	pminub	%xmm5, %xmm2
+	movaps	32(%esi), %xmm3
+	movdqu	%xmm6, -32(%eax)
+	movaps	%xmm3, %xmm6
+	movdqu	%xmm7, -16(%eax)
+	movaps	48(%esi), %xmm7
+	pminub	%xmm7, %xmm3
+	pminub	%xmm2, %xmm3
+	pcmpeqb	%xmm0, %xmm3
+	pmovmskb %xmm3, %edx
+# ifdef USE_AS_STRNCAT
+	sub	$64, %ebx
+	jbe	L(UnalignedLeaveCase2OrCase3)
+# endif
+	test	%edx, %edx
+	jz	L(Unaligned64Loop_start)
+
+L(Unaligned64Leave):
+	pxor	%xmm1, %xmm1
+
+	pcmpeqb	%xmm4, %xmm0
+	pcmpeqb	%xmm5, %xmm1
+	pmovmskb %xmm0, %edx
+	pmovmskb %xmm1, %ecx
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesUnaligned_0)
+	test	%ecx, %ecx
+	jnz	L(CopyFrom1To16BytesUnaligned_16)
+
+	pcmpeqb	%xmm6, %xmm0
+	pcmpeqb	%xmm7, %xmm1
+	pmovmskb %xmm0, %edx
+	pmovmskb %xmm1, %ecx
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesUnaligned_32)
+
+	bsf	%ecx, %edx
+	movdqu	%xmm4, (%eax)
+	movdqu	%xmm5, 16(%eax)
+	movdqu	%xmm6, 32(%eax)
+	add	$48, %esi
+	add	$48, %eax
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+# ifdef USE_AS_STRNCAT
+	.p2align 4
+L(BigN):
+	pcmpeqb	(%esi), %xmm1
+	pmovmskb %xmm1, %edx
+	shr	%cl, %edx
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesTail)
+
+	pcmpeqb	16(%esi), %xmm0
+	pmovmskb %xmm0, %edx
+	test	%edx, %edx
+	jnz	L(CopyFrom1To32Bytes)
+
+	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
+	movdqu	%xmm1, (%eax)
+	sub	%ecx, %eax
+	sub     $48, %ebx
+	add     %ecx, %ebx
+
+	mov	$16, %ecx
+	movdqa	(%esi, %ecx), %xmm1
+	movaps	16(%esi, %ecx), %xmm2
+	movdqu	%xmm1, (%eax, %ecx)
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$16, %ecx
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+	jmp	L(Unalign16BothBigN)
+# endif
+
+/*------------end of main part-------------------------------*/
+
+/* Case1 */
+	.p2align 4
+L(CopyFrom1To16Bytes):
+	add	%ecx, %eax
+	add	%ecx, %esi
+	bsf	%edx, %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+	.p2align 4
+L(CopyFrom1To16BytesTail):
+	add	%ecx, %esi
+	bsf	%edx, %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+	.p2align 4
+L(CopyFrom1To32Bytes1):
+	add	$16, %esi
+	add	$16, %eax
+L(CopyFrom1To16BytesTail1):
+	bsf	%edx, %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+	.p2align 4
+L(CopyFrom1To32Bytes):
+	bsf	%edx, %edx
+	add	%ecx, %esi
+	add	$16, %edx
+	sub	%ecx, %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+	.p2align 4
+L(CopyFrom1To16BytesUnaligned_0):
+	bsf	%edx, %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+	.p2align 4
+L(CopyFrom1To16BytesUnaligned_16):
+	bsf	%ecx, %edx
+	movdqu	%xmm4, (%eax)
+	add	$16, %esi
+	add	$16, %eax
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+	.p2align 4
+L(CopyFrom1To16BytesUnaligned_32):
+	bsf	%edx, %edx
+	movdqu	%xmm4, (%eax)
+	movdqu	%xmm5, 16(%eax)
+	add	$32, %esi
+	add	$32, %eax
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+# ifdef USE_AS_STRNCAT
+
+	.p2align 4
+L(CopyFrom1To16BytesExit):
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+/* Case2 */
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2):
+	add	$16, %ebx
+	add	%ecx, %eax
+	add	%ecx, %esi
+	bsf	%edx, %edx
+	cmp	%ebx, %edx
+	jb	L(CopyFrom1To16BytesExit)
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+	.p2align 4
+L(CopyFrom1To32BytesCase2):
+	sub	%ecx, %ebx
+	add	%ecx, %esi
+	bsf	%edx, %edx
+	add	$16, %edx
+	sub	%ecx, %edx
+	cmp	%ebx, %edx
+	jb	L(CopyFrom1To16BytesExit)
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTailCase2):
+	sub	%ecx, %ebx
+	add	%ecx, %esi
+	bsf	%edx, %edx
+	cmp	%ebx, %edx
+	jb	L(CopyFrom1To16BytesExit)
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTail1Case2):
+	bsf	%edx, %edx
+	cmp	%ebx, %edx
+	jb	L(CopyFrom1To16BytesExit)
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+/* Case2 or Case3,  Case3 */
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesCase2)
+L(CopyFrom1To16BytesCase3):
+	add	$16, %ebx
+	add	%ecx, %eax
+	add	%ecx, %esi
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+	.p2align 4
+L(CopyFrom1To32BytesCase2OrCase3):
+	test	%edx, %edx
+	jnz	L(CopyFrom1To32BytesCase2)
+	sub	%ecx, %ebx
+	add	%ecx, %esi
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+	.p2align 4
+L(CopyFrom1To16BytesTailCase2OrCase3):
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesTailCase2)
+	sub	%ecx, %ebx
+	add	%ecx, %esi
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+	.p2align 4
+L(CopyFrom1To32Bytes1Case2OrCase3):
+	add	$16, %eax
+	add	$16, %esi
+	sub	$16, %ebx
+L(CopyFrom1To16BytesTail1Case2OrCase3):
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16BytesTail1Case2)
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+# endif
+
+# ifdef USE_AS_STRNCAT
+	.p2align 4
+L(StrncatExit0):
+	movb	%bh, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+# endif
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit1):
+	movb	%bh, 1(%eax)
+# endif
+L(Exit1):
+# ifdef USE_AS_STRNCAT
+	movb	(%esi), %dh
+# endif
+	movb	%dh, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit2):
+	movb	%bh, 2(%eax)
+# endif
+L(Exit2):
+	movw	(%esi), %dx
+	movw	%dx, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit3):
+	movb	%bh, 3(%eax)
+# endif
+L(Exit3):
+	movw	(%esi), %cx
+	movw	%cx, (%eax)
+# ifdef USE_AS_STRNCAT
+	movb	2(%esi), %dh
+# endif
+	movb	%dh, 2(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit4):
+	movb	%bh, 4(%eax)
+# endif
+L(Exit4):
+	movl	(%esi), %edx
+	movl	%edx, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit5):
+	movb	%bh, 5(%eax)
+# endif
+L(Exit5):
+	movl	(%esi), %ecx
+# ifdef USE_AS_STRNCAT
+	movb	4(%esi), %dh
+# endif
+	movb	%dh, 4(%eax)
+	movl	%ecx, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit6):
+	movb	%bh, 6(%eax)
+# endif
+L(Exit6):
+	movl	(%esi), %ecx
+	movw	4(%esi), %dx
+	movl	%ecx, (%eax)
+	movw	%dx, 4(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit7):
+	movb	%bh, 7(%eax)
+# endif
+L(Exit7):
+	movl	(%esi), %ecx
+	movl	3(%esi), %edx
+	movl	%ecx, (%eax)
+	movl	%edx, 3(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit8):
+	movb	%bh, 8(%eax)
+# endif
+L(Exit8):
+	movlpd	(%esi), %xmm0
+	movlpd	%xmm0, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit9):
+	movb	%bh, 9(%eax)
+# endif
+L(Exit9):
+	movlpd	(%esi), %xmm0
+# ifdef USE_AS_STRNCAT
+	movb	8(%esi), %dh
+# endif
+	movb	%dh, 8(%eax)
+	movlpd	%xmm0, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit10):
+	movb	%bh, 10(%eax)
+# endif
+L(Exit10):
+	movlpd	(%esi), %xmm0
+	movw	8(%esi), %dx
+	movlpd	%xmm0, (%eax)
+	movw	%dx, 8(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit11):
+	movb	%bh, 11(%eax)
+# endif
+L(Exit11):
+	movlpd	(%esi), %xmm0
+	movl	7(%esi), %edx
+	movlpd	%xmm0, (%eax)
+	movl	%edx, 7(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit12):
+	movb	%bh, 12(%eax)
+# endif
+L(Exit12):
+	movlpd	(%esi), %xmm0
+	movl	8(%esi), %edx
+	movlpd	%xmm0, (%eax)
+	movl	%edx, 8(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit13):
+	movb	%bh, 13(%eax)
+# endif
+L(Exit13):
+	movlpd	(%esi), %xmm0
+	movlpd	5(%esi), %xmm1
+	movlpd	%xmm0, (%eax)
+	movlpd	%xmm1, 5(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit14):
+	movb	%bh, 14(%eax)
+# endif
+L(Exit14):
+	movlpd	(%esi), %xmm0
+	movlpd	6(%esi), %xmm1
+	movlpd	%xmm0, (%eax)
+	movlpd	%xmm1, 6(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit15):
+	movb	%bh, 15(%eax)
+# endif
+L(Exit15):
+	movlpd	(%esi), %xmm0
+	movlpd	7(%esi), %xmm1
+	movlpd	%xmm0, (%eax)
+	movlpd	%xmm1, 7(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit16):
+	movb	%bh, 16(%eax)
+# endif
+L(Exit16):
+	movdqu	(%esi), %xmm0
+	movdqu	%xmm0, (%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit17):
+	movb	%bh, 17(%eax)
+# endif
+L(Exit17):
+	movdqu	(%esi), %xmm0
+# ifdef USE_AS_STRNCAT
+	movb	16(%esi), %dh
+# endif
+	movdqu	%xmm0, (%eax)
+	movb	%dh, 16(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit18):
+	movb	%bh, 18(%eax)
+# endif
+L(Exit18):
+	movdqu	(%esi), %xmm0
+	movw	16(%esi), %cx
+	movdqu	%xmm0, (%eax)
+	movw	%cx, 16(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit19):
+	movb	%bh, 19(%eax)
+# endif
+L(Exit19):
+	movdqu	(%esi), %xmm0
+	movl	15(%esi), %ecx
+	movdqu	%xmm0, (%eax)
+	movl	%ecx, 15(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit20):
+	movb	%bh, 20(%eax)
+# endif
+L(Exit20):
+	movdqu	(%esi), %xmm0
+	movl	16(%esi), %ecx
+	movdqu	%xmm0, (%eax)
+	movl	%ecx, 16(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit21):
+	movb	%bh, 21(%eax)
+# endif
+L(Exit21):
+	movdqu	(%esi), %xmm0
+	movl	16(%esi), %ecx
+# ifdef USE_AS_STRNCAT
+	movb	20(%esi), %dh
+# endif
+	movdqu	%xmm0, (%eax)
+	movl	%ecx, 16(%eax)
+	movb	%dh, 20(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit22):
+	movb	%bh, 22(%eax)
+# endif
+L(Exit22):
+	movdqu	(%esi), %xmm0
+	movlpd	14(%esi), %xmm3
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm3, 14(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit23):
+	movb	%bh, 23(%eax)
+# endif
+L(Exit23):
+	movdqu	(%esi), %xmm0
+	movlpd	15(%esi), %xmm3
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm3, 15(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit24):
+	movb	%bh, 24(%eax)
+# endif
+L(Exit24):
+	movdqu	(%esi), %xmm0
+	movlpd	16(%esi), %xmm2
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm2, 16(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit25):
+	movb	%bh, 25(%eax)
+# endif
+L(Exit25):
+	movdqu	(%esi), %xmm0
+	movlpd	16(%esi), %xmm2
+# ifdef USE_AS_STRNCAT
+	movb	24(%esi), %dh
+# endif
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm2, 16(%eax)
+	movb	%dh, 24(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit26):
+	movb	%bh, 26(%eax)
+# endif
+L(Exit26):
+	movdqu	(%esi), %xmm0
+	movlpd	16(%esi), %xmm2
+	movw	24(%esi), %cx
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm2, 16(%eax)
+	movw	%cx, 24(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit27):
+	movb	%bh, 27(%eax)
+# endif
+L(Exit27):
+	movdqu	(%esi), %xmm0
+	movlpd	16(%esi), %xmm2
+	movl	23(%esi), %ecx
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm2, 16(%eax)
+	movl	%ecx, 23(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit28):
+	movb	%bh, 28(%eax)
+# endif
+L(Exit28):
+	movdqu	(%esi), %xmm0
+	movlpd	16(%esi), %xmm2
+	movl	24(%esi), %ecx
+	movdqu	%xmm0, (%eax)
+	movlpd	%xmm2, 16(%eax)
+	movl	%ecx, 24(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit29):
+	movb	%bh, 29(%eax)
+# endif
+L(Exit29):
+	movdqu	(%esi), %xmm0
+	movdqu	13(%esi), %xmm2
+	movdqu	%xmm0, (%eax)
+	movdqu	%xmm2, 13(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit30):
+	movb	%bh, 30(%eax)
+# endif
+L(Exit30):
+	movdqu	(%esi), %xmm0
+	movdqu	14(%esi), %xmm2
+	movdqu	%xmm0, (%eax)
+	movdqu	%xmm2, 14(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit31):
+	movb	%bh, 31(%eax)
+# endif
+L(Exit31):
+	movdqu	(%esi), %xmm0
+	movdqu	15(%esi), %xmm2
+	movdqu	%xmm0, (%eax)
+	movdqu	%xmm2, 15(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit32):
+	movb	%bh, 32(%eax)
+# endif
+L(Exit32):
+	movdqu	(%esi), %xmm0
+	movdqu	16(%esi), %xmm2
+	movdqu	%xmm0, (%eax)
+	movdqu	%xmm2, 16(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+# ifdef USE_AS_STRNCAT
+
+	.p2align 4
+L(UnalignedLeaveCase2OrCase3):
+	test	%edx, %edx
+	jnz	L(Unaligned64LeaveCase2)
+L(Unaligned64LeaveCase3):
+	lea	64(%ebx), %ecx
+	and	$-16, %ecx
+	add	$48, %ebx
+	jl	L(CopyFrom1To16BytesCase3)
+	movdqu	%xmm4, (%eax)
+	sub	$16, %ebx
+	jb	L(CopyFrom1To16BytesCase3)
+	movdqu	%xmm5, 16(%eax)
+	sub	$16, %ebx
+	jb	L(CopyFrom1To16BytesCase3)
+	movdqu	%xmm6, 32(%eax)
+	sub	$16, %ebx
+	jb	L(CopyFrom1To16BytesCase3)
+	movdqu	%xmm7, 48(%eax)
+	xor	%bh, %bh
+	movb	%bh, 64(%eax)
+	mov	STR3(%esp), %eax
+	RETURN
+
+	.p2align 4
+L(Unaligned64LeaveCase2):
+	xor	%ecx, %ecx
+	pcmpeqb	%xmm4, %xmm0
+	pmovmskb %xmm0, %edx
+	add	$48, %ebx
+	jle	L(CopyFrom1To16BytesCase2OrCase3)
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm5, %xmm0
+	pmovmskb %xmm0, %edx
+	movdqu	%xmm4, (%eax)
+	add	$16, %ecx
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm6, %xmm0
+	pmovmskb %xmm0, %edx
+	movdqu	%xmm5, 16(%eax)
+	add	$16, %ecx
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+	test	%edx, %edx
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm7, %xmm0
+	pmovmskb %xmm0, %edx
+	movdqu	%xmm6, 32(%eax)
+	lea	16(%eax, %ecx), %eax
+	lea	16(%esi, %ecx), %esi
+	bsf	%edx, %edx
+	cmp	%ebx, %edx
+	jb	L(CopyFrom1To16BytesExit)
+	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+# endif
+	.p2align 4
+L(ExitZero):
+	RETURN
+
+END (STRCAT)
+
+	.p2align 4
+	.section .rodata
+L(ExitTable):
+	.int	JMPTBL(L(Exit1), L(ExitTable))
+	.int	JMPTBL(L(Exit2), L(ExitTable))
+	.int	JMPTBL(L(Exit3), L(ExitTable))
+	.int	JMPTBL(L(Exit4), L(ExitTable))
+	.int	JMPTBL(L(Exit5), L(ExitTable))
+	.int	JMPTBL(L(Exit6), L(ExitTable))
+	.int	JMPTBL(L(Exit7), L(ExitTable))
+	.int	JMPTBL(L(Exit8), L(ExitTable))
+	.int	JMPTBL(L(Exit9), L(ExitTable))
+	.int	JMPTBL(L(Exit10), L(ExitTable))
+	.int	JMPTBL(L(Exit11), L(ExitTable))
+	.int	JMPTBL(L(Exit12), L(ExitTable))
+	.int	JMPTBL(L(Exit13), L(ExitTable))
+	.int	JMPTBL(L(Exit14), L(ExitTable))
+	.int	JMPTBL(L(Exit15), L(ExitTable))
+	.int	JMPTBL(L(Exit16), L(ExitTable))
+	.int	JMPTBL(L(Exit17), L(ExitTable))
+	.int	JMPTBL(L(Exit18), L(ExitTable))
+	.int	JMPTBL(L(Exit19), L(ExitTable))
+	.int	JMPTBL(L(Exit20), L(ExitTable))
+	.int	JMPTBL(L(Exit21), L(ExitTable))
+	.int	JMPTBL(L(Exit22), L(ExitTable))
+	.int	JMPTBL(L(Exit23), L(ExitTable))
+	.int	JMPTBL(L(Exit24), L(ExitTable))
+	.int	JMPTBL(L(Exit25), L(ExitTable))
+	.int	JMPTBL(L(Exit26), L(ExitTable))
+	.int	JMPTBL(L(Exit27), L(ExitTable))
+	.int	JMPTBL(L(Exit28), L(ExitTable))
+	.int	JMPTBL(L(Exit29), L(ExitTable))
+	.int	JMPTBL(L(Exit30), L(ExitTable))
+	.int	JMPTBL(L(Exit31), L(ExitTable))
+	.int	JMPTBL(L(Exit32), L(ExitTable))
+# ifdef USE_AS_STRNCAT
+L(ExitStrncatTable):
+	.int	JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
+	.int	JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
+# endif
+#endif

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S Fri Aug  5 00:02:24 2011
@@ -1,0 +1,573 @@
+/* strcat with SSSE3
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY	or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef STRCAT
+#  define STRCAT  __strcat_ssse3
+# endif
+
+# define PARMS  4
+# define STR1  PARMS+4
+# define STR2  STR1+4
+
+# ifdef USE_AS_STRNCAT
+#  define LEN STR2+8
+# endif
+
+# define USE_AS_STRCAT
+
+.text
+ENTRY (STRCAT)
+	PUSH	(%edi)
+	mov	STR1(%esp), %edi
+	mov	%edi, %edx
+
+# define RETURN  jmp L(StartStrcpyPart)
+# include "strlen-sse2.S"
+
+L(StartStrcpyPart):
+	mov	STR2(%esp), %ecx
+	lea	(%edi, %eax), %edx
+# ifdef USE_AS_STRNCAT
+	PUSH	(%ebx)
+	mov	LEN(%esp), %ebx
+	test	%ebx, %ebx
+	jz	L(StrncatExit0)
+	cmp	$8, %ebx
+	jbe	L(StrncatExit8Bytes)
+# endif
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+	cmpb	$0, 4(%ecx)
+	jz	L(Exit5)
+	cmpb	$0, 5(%ecx)
+	jz	L(Exit6)
+	cmpb	$0, 6(%ecx)
+	jz	L(Exit7)
+	cmpb	$0, 7(%ecx)
+	jz	L(Exit8)
+	cmpb	$0, 8(%ecx)
+	jz	L(Exit9)
+# ifdef USE_AS_STRNCAT
+	cmp	$16, %ebx
+	jb	L(StrncatExit15Bytes)
+# endif
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+	cmpb	$0, 12(%ecx)
+	jz	L(Exit13)
+	cmpb	$0, 13(%ecx)
+	jz	L(Exit14)
+	cmpb	$0, 14(%ecx)
+	jz	L(Exit15)
+	cmpb	$0, 15(%ecx)
+	jz	L(Exit16)
+# ifdef USE_AS_STRNCAT
+	cmp	$16, %ebx
+	je	L(StrncatExit16)
+
+#  define RETURN1	\
+	POP	(%ebx);	\
+	POP	(%edi);	\
+	ret;	\
+	CFI_PUSH	(%ebx);	\
+	CFI_PUSH	(%edi)
+#  define USE_AS_STRNCPY
+# else
+#  define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+# include "strcpy-ssse3.S"
+	.p2align 4
+L(CopyFrom1To16Bytes):
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+	test	%al, %al
+	jz	L(ExitHigh)
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x08, %ah
+	jnz	L(Exit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit1):
+	movb	%bh, 1(%edx)
+L(Exit1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit2):
+	movb	%bh, 2(%edx)
+L(Exit2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit3):
+	movb	%bh, 3(%edx)
+L(Exit3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit4):
+	movb	%bh, 4(%edx)
+L(Exit4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit5):
+	movb	%bh, 5(%edx)
+L(Exit5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit6):
+	movb	%bh, 6(%edx)
+L(Exit6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit7):
+	movb	%bh, 7(%edx)
+L(Exit7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit8):
+	movb	%bh, 8(%edx)
+L(Exit8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit9):
+	movb	%bh, 9(%edx)
+L(Exit9):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit10):
+	movb	%bh, 10(%edx)
+L(Exit10):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit11):
+	movb	%bh, 11(%edx)
+L(Exit11):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit12):
+	movb	%bh, 12(%edx)
+L(Exit12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit13):
+	movb	%bh, 13(%edx)
+L(Exit13):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit14):
+	movb	%bh, 14(%edx)
+L(Exit14):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit15):
+	movb	%bh, 15(%edx)
+L(Exit15):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit16):
+	movb	%bh, 16(%edx)
+L(Exit16):
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+# ifdef USE_AS_STRNCPY
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2):
+	add	$16, %ebx
+	add	%esi, %ecx
+	lea	(%esi, %edx), %esi
+	lea	-9(%ebx), %edx
+	and	$1<<7, %dh
+	or	%al, %dh
+	test	%dh, %dh
+	lea	(%esi), %edx
+	POP	(%esi)
+	jz	L(ExitHighCase2)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrncatExit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrncatExit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrncatExit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(StrncatExit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrncatExit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrncatExit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrncatExit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	lea	7(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+	xor	%cl, %cl
+	movb	%cl, (%eax)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHighCase2):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	cmp	$9, %ebx
+	je	L(StrncatExit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrncatExit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrncatExit11)
+	test	$0x8, %ah
+	jnz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(StrncatExit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrncatExit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrncatExit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	cmp	$15, %ebx
+	je	L(StrncatExit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm1, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	CFI_PUSH(%esi)
+
+L(CopyFrom1To16BytesCase2OrCase3):
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase3):
+	add	$16, %ebx
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+
+	cmp	$8, %ebx
+	ja	L(ExitHighCase3)
+	cmp	$1, %ebx
+	je	L(StrncatExit1)
+	cmp	$2, %ebx
+	je	L(StrncatExit2)
+	cmp	$3, %ebx
+	je	L(StrncatExit3)
+	cmp	$4, %ebx
+	je	L(StrncatExit4)
+	cmp	$5, %ebx
+	je	L(StrncatExit5)
+	cmp	$6, %ebx
+	je	L(StrncatExit6)
+	cmp	$7, %ebx
+	je	L(StrncatExit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	%bh, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHighCase3):
+	cmp	$9, %ebx
+	je	L(StrncatExit9)
+	cmp	$10, %ebx
+	je	L(StrncatExit10)
+	cmp	$11, %ebx
+	je	L(StrncatExit11)
+	cmp	$12, %ebx
+	je	L(StrncatExit12)
+	cmp	$13, %ebx
+	je	L(StrncatExit13)
+	cmp	$14, %ebx
+	je	L(StrncatExit14)
+	cmp	$15, %ebx
+	je	L(StrncatExit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm1, 8(%edx)
+	movb	%bh, 16(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit0):
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit15Bytes):
+	cmp	$9, %ebx
+	je	L(StrncatExit9)
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrncatExit10)
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrncatExit11)
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(StrncatExit12)
+	cmpb	$0, 12(%ecx)
+	jz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrncatExit13)
+	cmpb	$0, 13(%ecx)
+	jz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrncatExit14)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+	lea	14(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+	movb	%bh, (%eax)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit8Bytes):
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrncatExit1)
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrncatExit2)
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrncatExit3)
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(StrncatExit4)
+	cmpb	$0, 4(%ecx)
+	jz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrncatExit5)
+	cmpb	$0, 5(%ecx)
+	jz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrncatExit6)
+	cmpb	$0, 6(%ecx)
+	jz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrncatExit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	lea	7(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+	movb	%bh, (%eax)
+	movl	%edi, %eax
+	RETURN1
+
+# endif
+END (STRCAT)
+#endif

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S Fri Aug  5 00:02:24 2011
@@ -1,0 +1,130 @@
+/* Multiple versions of strcat
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef USE_AS_STRNCAT
+# ifndef STRCAT
+#  define STRCAT strcat
+# endif
+#endif
+
+#ifdef USE_AS_STRNCAT
+# define STRCAT_SSSE3	__strncat_ssse3
+# define STRCAT_SSE2		__strncat_sse2
+# define STRCAT_IA32		__strncat_ia32
+# define __GI_STRCAT		__GI_strncat
+#else
+# define STRCAT_SSSE3	__strcat_ssse3
+# define STRCAT_SSE2		__strcat_sse2
+# define STRCAT_IA32		__strcat_ia32
+# define __GI_STRCAT		__GI_strcat
+#endif
+
+
+/* Define multiple versions only for the definition in libc.  Don't
+   define multiple versions for strncat in static library since we
+   need strncat before the initialization happened.  */
+#ifndef NOT_IN_libc
+
+# ifdef SHARED
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	.p2align 4
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+
+	.text
+ENTRY(STRCAT)
+	.type	STRCAT, @gnu_indirect_function
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+	jne	1f
+	call	__init_cpu_features
+1:	leal	STRCAT_IA32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	STRCAT_SSE2@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	STRCAT_SSSE3@GOTOFF(%ebx), %eax
+2:	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	ret
+END(STRCAT)
+# else
+
+ENTRY(STRCAT)
+	.type	STRCAT, @gnu_indirect_function
+	cmpl	$0, KIND_OFFSET+__cpu_features
+	jne	1f
+	call	__init_cpu_features
+1:	leal	STRCAT_IA32, %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+	jz	2f
+	leal	STRCAT_SSE2, %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
+	jnz	2f
+	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+	jz	2f
+	leal	STRCAT_SSSE3, %eax
+2:	ret
+END(STRCAT)
+
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type STRCAT_IA32, @function; \
+	.align 16; \
+	STRCAT_IA32: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
+
+# ifdef SHARED
+#  undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strcat calls through a PLT.
+   The speedup we get from using SSSE3 instruction is likely eaten away
+   by the indirect call in the PLT.  */
+#  define libc_hidden_builtin_def(name) \
+	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
+#  undef libc_hidden_def
+#  define libc_hidden_def(name) \
+	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
+
+# endif
+#endif
+
+#ifndef USE_AS_STRNCAT
+# include "../../i486/strcat.S"
+#endif

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S Fri Aug  5 00:02:24 2011
@@ -20,39 +20,39 @@
 
 
 #ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)                  \
-	cfi_adjust_cfa_offset (4);     \
+# ifndef USE_AS_STRCAT
+#  include <sysdep.h>
+
+#  define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
 	cfi_rel_offset (REG, 0)
 
-# define CFI_POP(REG)                   \
-	cfi_adjust_cfa_offset (-4);    \
+#  define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
 	cfi_restore (REG)
 
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-#  define STRCPY  __strcpy_ssse3
-# endif
-
-# ifdef USE_AS_STRNCPY
-#  define PARMS  8
-#  define ENTRANCE PUSH(%ebx)
-#  define RETURN  POP(%ebx); ret; CFI_PUSH(%ebx);
-#  define RETURN1  POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
-# else
-#  define PARMS  4
-#  define ENTRANCE
-#  define RETURN  ret
-#  define RETURN1  POP(%edi); ret; CFI_PUSH(%edi)
-# endif
-
-# define STR1  PARMS
-# define STR2  STR1+4
-# define LEN  STR2+4
+#  define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#  define POP(REG) popl REG; CFI_POP (REG)
+
+#  ifndef STRCPY
+#   define STRCPY  __strcpy_ssse3
+#  endif
+
+#  ifdef USE_AS_STRNCPY
+#   define PARMS  8
+#   define ENTRANCE PUSH(%ebx)
+#   define RETURN  POP(%ebx); ret; CFI_PUSH(%ebx);
+#   define RETURN1  POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
+#  else
+#   define PARMS  4
+#   define ENTRANCE
+#   define RETURN  ret
+#   define RETURN1  POP(%edi); ret; CFI_PUSH(%edi)
+#  endif
+
+#  define STR1  PARMS
+#  define STR2  STR1+4
+#  define LEN  STR2+4
 
 /* In this code following instructions are used for copying:
 	movb	- 1 byte
@@ -60,9 +60,9 @@
 	movl	- 4 byte
 	movlpd	- 8 byte
 	movaps	- 16 byte - requires 16 byte alignment
-	of	sourse and destination adresses.
+	of sourse and destination adresses.
 	16 byte alignment: adress is 32bit value,
-	right	four bit of adress shall be 0.
+	right four bit of adress shall be 0.
 */
 
 .text
@@ -70,13 +70,13 @@
 	ENTRANCE
 	mov	STR1(%esp), %edx
 	mov	STR2(%esp), %ecx
-# ifdef USE_AS_STRNCPY
+#  ifdef USE_AS_STRNCPY
 	movl	LEN(%esp), %ebx
 	test	%ebx, %ebx
 	jz	L(ExitTail0)
 	cmp	$8, %ebx
 	jbe	L(StrncpyExit8Bytes)
-# endif
+#  endif
 	cmpb	$0, (%ecx)
 	jz	L(ExitTail1)
 	cmpb	$0, 1(%ecx)
@@ -93,10 +93,10 @@
 	jz	L(ExitTail7)
 	cmpb	$0, 7(%ecx)
 	jz	L(ExitTail8)
-# ifdef USE_AS_STRNCPY
+#  ifdef USE_AS_STRNCPY
 	cmp	$16, %ebx
 	jb	L(StrncpyExit15Bytes)
-# endif
+#  endif
 	cmpb	$0, 8(%ecx)
 	jz	L(ExitTail9)
 	cmpb	$0, 9(%ecx)
@@ -111,18 +111,20 @@
 	jz	L(ExitTail14)
 	cmpb	$0, 14(%ecx)
 	jz	L(ExitTail15)
-# ifdef USE_AS_STRNCPY
+#  ifdef USE_AS_STRNCPY
 	cmp	$16, %ebx
 	je	L(ExitTail16)
-# endif
+#  endif
 	cmpb	$0, 15(%ecx)
 	jz	L(ExitTail16)
 
 	PUSH	(%edi)
 	mov	%edx, %edi
+# endif
 	PUSH	(%esi)
 # ifdef USE_AS_STRNCPY
 	mov	%ecx, %esi
+	sub	$16, %ebx
 	and	$0xf, %esi
 
 /* add 16 bytes ecx_shift to ebx */
@@ -159,7 +161,7 @@
 /* eax = 0: there isn't end of string from position esi to esi+15 */
 
 # ifdef USE_AS_STRNCPY
-	sub	$32, %ebx
+	sub	$16, %ebx
 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
 # endif
 	test	%eax, %eax
@@ -2217,12 +2219,17 @@
 	mov	$1, %esi
 	palignr	$15, %xmm1, %xmm6
 	movaps	%xmm6, (%edx)
+# ifdef USE_AS_STRCAT
+	jmp	L(CopyFrom1To16Bytes)
+# endif
+
+# ifndef USE_AS_STRCAT
 
 	.p2align 4
 L(CopyFrom1To16Bytes):
-# ifdef USE_AS_STRNCPY
+#  ifdef USE_AS_STRNCPY
 	add	$16, %ebx
-# endif
+#  endif
 	add	%esi, %edx
 	add	%esi, %ecx
 
@@ -2248,20 +2255,20 @@
 L(Exit8):
 	movlpd	(%ecx), %xmm0
 	movlpd	%xmm0, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	7(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$8, %ebx
 	lea	8(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2287,23 +2294,23 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	8(%ecx), %xmm0
 	movlpd	%xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	15(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$16, %ebx
 	lea	16(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
-# ifdef USE_AS_STRNCPY
+#  ifdef USE_AS_STRNCPY
 
 	CFI_PUSH(%esi)
 
@@ -2425,46 +2432,46 @@
 	jl	L(Exit9)
 	je	L(Exit10)
 	jg	L(Exit11)
-# endif
+#  endif
 
 	.p2align 4
 L(Exit1):
 	movb	(%ecx), %al
 	movb	%al, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$1, %ebx
 	lea	1(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
 L(Exit2):
 	movw	(%ecx), %ax
 	movw	%ax, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	1(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$2, %ebx
 	lea	2(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2473,40 +2480,40 @@
 	movw	%ax, (%edx)
 	movb	2(%ecx), %al
 	movb	%al, 2(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	2(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$3, %ebx
 	lea	3(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
 L(Exit4):
 	movl	(%ecx), %eax
 	movl	%eax, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	3(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$4, %ebx
 	lea	4(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2515,20 +2522,20 @@
 	movl	%eax, (%edx)
 	movb	4(%ecx), %al
 	movb	%al, 4(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	4(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$5, %ebx
 	lea	5(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2537,20 +2544,20 @@
 	movl	%eax, (%edx)
 	movw	4(%ecx), %ax
 	movw	%ax, 4(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	5(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$6, %ebx
 	lea	6(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2559,20 +2566,20 @@
 	movl	%eax, (%edx)
 	movl	3(%ecx), %eax
 	movl	%eax, 3(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	6(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$7, %ebx
 	lea	7(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2581,20 +2588,20 @@
 	movlpd	%xmm0, (%edx)
 	movb	8(%ecx), %al
 	movb	%al, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	8(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$9, %ebx
 	lea	9(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2603,20 +2610,20 @@
 	movlpd	%xmm0, (%edx)
 	movw	8(%ecx), %ax
 	movw	%ax, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	9(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$10, %ebx
 	lea	10(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2625,20 +2632,20 @@
 	movlpd	%xmm0, (%edx)
 	movl	7(%ecx), %eax
 	movl	%eax, 7(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	10(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$11, %ebx
 	lea	11(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2647,20 +2654,20 @@
 	movlpd	%xmm0, (%edx)
 	movl	8(%ecx), %eax
 	movl	%eax, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	11(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$12, %ebx
 	lea	12(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2669,20 +2676,20 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	5(%ecx), %xmm0
 	movlpd	%xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	12(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$13, %ebx
 	lea	13(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2691,20 +2698,20 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	6(%ecx), %xmm0
 	movlpd	%xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	13(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$14, %ebx
 	lea	14(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 	.p2align 4
@@ -2713,25 +2720,25 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	7(%ecx), %xmm0
 	movlpd	%xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	14(%edx), %eax
-# else
+#  else
 	movl	%edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$15, %ebx
 	lea	15(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero1)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN1
 
 CFI_POP	(%edi)
 
-# ifdef USE_AS_STRNCPY
+#  ifdef USE_AS_STRNCPY
 	.p2align 4
 L(Fill0):
 	RETURN
@@ -2865,11 +2872,11 @@
 	je	L(Fill10)
 	jmp	L(Fill11)
 
-        CFI_PUSH(%edi)
+	CFI_PUSH	(%edi)
 
 	.p2align 4
 L(StrncpyFillTailWithZero1):
-        POP     (%edi)
+	POP	(%edi)
 L(StrncpyFillTailWithZero):
 	pxor	%xmm0, %xmm0
 	xor	%edx, %edx
@@ -2916,46 +2923,46 @@
 	movdqa	%xmm0, (%ecx)
 	lea	16(%ecx), %ecx
 	jmp	L(FillFrom1To16Bytes)
-# endif
+#  endif
 
 	.p2align 4
 L(ExitTail1):
 	movb	(%ecx), %al
 	movb	%al, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$1, %ebx
 	lea	1(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
 L(ExitTail2):
 	movw	(%ecx), %ax
 	movw	%ax, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	1(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$2, %ebx
 	lea	2(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -2964,40 +2971,40 @@
 	movw	%ax, (%edx)
 	movb	2(%ecx), %al
 	movb	%al, 2(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	2(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$3, %ebx
 	lea	3(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
 L(ExitTail4):
 	movl	(%ecx), %eax
 	movl	%eax, (%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	3(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$4, %ebx
 	lea	4(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3006,20 +3013,20 @@
 	movl	%eax, (%edx)
 	movb	4(%ecx), %al
 	movb	%al, 4(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	4(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$5, %ebx
 	lea	5(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3028,20 +3035,20 @@
 	movl	%eax, (%edx)
 	movw	4(%ecx), %ax
 	movw	%ax, 4(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	5(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$6, %ebx
 	lea	6(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3050,20 +3057,40 @@
 	movl	%eax, (%edx)
 	movl	3(%ecx), %eax
 	movl	%eax, 3(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	6(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$7, %ebx
 	lea	7(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
+	RETURN
+
+	.p2align 4
+L(ExitTail8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+#  ifdef USE_AS_STPCPY
+	lea	7(%edx), %eax
+#  else
+	movl	%edx, %eax
+#  endif
+#  ifdef USE_AS_STRNCPY
+	sub	$8, %ebx
+	lea	8(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#   ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3072,20 +3099,20 @@
 	movlpd	%xmm0, (%edx)
 	movb	8(%ecx), %al
 	movb	%al, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	8(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$9, %ebx
 	lea	9(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3094,20 +3121,20 @@
 	movlpd	%xmm0, (%edx)
 	movw	8(%ecx), %ax
 	movw	%ax, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	9(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$10, %ebx
 	lea	10(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3116,20 +3143,20 @@
 	movlpd	%xmm0, (%edx)
 	movl	7(%ecx), %eax
 	movl	%eax, 7(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	10(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$11, %ebx
 	lea	11(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3138,20 +3165,20 @@
 	movlpd	%xmm0, (%edx)
 	movl	8(%ecx), %eax
 	movl	%eax, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	11(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$12, %ebx
 	lea	12(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3160,20 +3187,20 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	5(%ecx), %xmm0
 	movlpd	%xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	12(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$13, %ebx
 	lea	13(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3182,20 +3209,42 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	6(%ecx), %xmm0
 	movlpd	%xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	13(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$14, %ebx
 	lea	14(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
+	RETURN
+
+	.p2align 4
+L(ExitTail15):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+#  ifdef USE_AS_STPCPY
+	lea	14(%edx), %eax
+#  else
+	movl	%edx, %eax
+#  endif
+#  ifdef USE_AS_STRNCPY
+	sub	$15, %ebx
+	lea	15(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#   ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#   endif
+#  endif
 	RETURN
 
 	.p2align 4
@@ -3204,24 +3253,28 @@
 	movlpd	%xmm0, (%edx)
 	movlpd	8(%ecx), %xmm0
 	movlpd	%xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
+#  ifdef USE_AS_STPCPY
 	lea	15(%edx), %eax
-# else
+#  else
 	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+#  endif
+#  ifdef USE_AS_STRNCPY
 	sub	$16, %ebx
 	lea	16(%edx), %ecx
 	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   endif
+#  endif
 	RETURN
-# ifdef USE_AS_STRNCPY
-	CFI_PUSH (%esi)
-	CFI_PUSH (%edi)
+#endif
+
+# ifdef USE_AS_STRNCPY
+#  ifndef USE_AS_STRCAT
+	CFI_PUSH	(%esi)
+	CFI_PUSH	(%edi)
+#  endif
 L(StrncpyLeaveCase2OrCase3):
 	test	%eax, %eax
 	jnz	L(Aligned64LeaveCase2)
@@ -3979,9 +4032,13 @@
 	movaps	%xmm6, (%edx, %esi)
 	lea	1(%esi), %esi
 	jmp	L(CopyFrom1To16BytesCase3)
-
-	CFI_POP (%esi)
-	CFI_POP (%edi)
+# endif
+
+# ifndef USE_AS_STRCAT
+#  ifdef USE_AS_STRNCPY
+	CFI_POP	(%esi)
+	CFI_POP	(%edi)
+
 	.p2align 4
 L(ExitTail0):
 	movl	%edx, %eax
@@ -4013,31 +4070,19 @@
 	je	L(ExitTail14)
 	cmpb	$0, 13(%ecx)
 	jz	L(ExitTail14)
-# endif
-
-	.p2align 4
-L(ExitTail15):
 	movlpd	(%ecx), %xmm0
 	movlpd	%xmm0, (%edx)
 	movlpd	7(%ecx), %xmm0
 	movlpd	%xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	lea	14(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
-	sub	$15, %ebx
-	lea	15(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   else
+	movl	%edx, %eax
+#   endif
 	RETURN
 
-# ifdef USE_AS_STRNCPY
 	.p2align 4
 L(StrncpyExit8Bytes):
 	cmp	$1, %ebx
@@ -4068,27 +4113,19 @@
 	je	L(ExitTail7)
 	cmpb	$0, 6(%ecx)
 	jz	L(ExitTail7)
-# endif
-	.p2align 4
-L(ExitTail8):
 	movlpd	(%ecx), %xmm0
 	movlpd	%xmm0, (%edx)
-# ifdef USE_AS_STPCPY
+#   ifdef USE_AS_STPCPY
 	lea	7(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
-	sub	$8, %ebx
-	lea	8(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
 	cmpb	$1, (%eax)
 	sbb	$-1, %eax
-#  endif
-# endif
+#   else
+	movl	%edx, %eax
+#   endif
 	RETURN
+#  endif
+
 
 END (STRCPY)
-
+# endif
 #endif

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S Fri Aug  5 00:02:24 2011
@@ -1,5 +1,5 @@
 /* strlen with SSE2
-   Copyright (C) 2010 Free Software Foundation, Inc.
+   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -18,30 +18,32 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#if defined SHARED && !defined NOT_IN_libc
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-#define PARMS		4
-#define	STR		PARMS
-#define ENTRANCE
-#define RETURN		ret
+#if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
+# ifndef USE_AS_STRCAT
+
+#  include <sysdep.h>
+#  include "asm-syntax.h"
+
+#  define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#  define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#  define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#  define POP(REG)	popl REG; CFI_POP (REG)
+#  define PARMS		4
+#  define STR		PARMS
+#  define ENTRANCE
+#  define RETURN		ret
 
 	.text
 ENTRY (__strlen_sse2)
 	ENTRANCE
 	mov	STR(%esp), %edx
+# endif
 	xor	%eax, %eax
 	cmpb	$0, (%edx)
 	jz	L(exit_tail0)
@@ -77,9 +79,8 @@
 	jz	L(exit_tail15)
 	pxor	%xmm0, %xmm0
 	mov	%edx, %eax
-	mov	%edx, %ecx
+	lea	16(%edx), %ecx
 	and	$-16, %eax
-	add	$16, %ecx
 	add	$16, %eax
 
 	pcmpeqb	(%eax), %xmm0
@@ -183,51 +184,41 @@
 	jnz	L(exit)
 
 	and	$-0x40, %eax
-	PUSH (%esi)
-	PUSH (%edi)
-	PUSH (%ebx)
-	PUSH (%ebp)
-	xor	%ebp, %ebp
 L(aligned_64):
-	pcmpeqb	(%eax), %xmm0
-	pcmpeqb	16(%eax), %xmm1
-	pcmpeqb	32(%eax), %xmm2
-	pcmpeqb	48(%eax), %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %esi
-	pmovmskb %xmm2, %edi
-	pmovmskb %xmm3, %ebx
-	or	%edx, %ebp
-	or	%esi, %ebp
-	or	%edi, %ebp
-	or	%ebx, %ebp
+	movaps	(%eax), %xmm0
+	movaps	16(%eax), %xmm1
+	movaps	32(%eax), %xmm2
+	movaps	48(%eax), %xmm6
+	pminub	%xmm1, %xmm0
+	pminub	%xmm6, %xmm2
+	pminub	%xmm0, %xmm2
+	pcmpeqb	%xmm3, %xmm2
+	pmovmskb %xmm2, %edx
+	test	%edx, %edx
 	lea	64(%eax), %eax
 	jz	L(aligned_64)
-L(48leave):
-	test	%edx, %edx
-	jnz	L(aligned_64_exit_16)
-	test	%esi, %esi
-	jnz	L(aligned_64_exit_32)
-	test	%edi, %edi
-	jnz	L(aligned_64_exit_48)
-	mov	%ebx, %edx
-	lea	(%eax), %eax
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_48):
-	lea	-16(%eax), %eax
-	mov	%edi, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_32):
-	lea	-32(%eax), %eax
-	mov	%esi, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_16):
-	lea	-48(%eax), %eax
-L(aligned_64_exit):
-	POP (%ebp)
-	POP (%ebx)
-	POP (%edi)
-	POP (%esi)
+
+	pcmpeqb	-64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	48(%ecx), %ecx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	-16(%ecx), %ecx
+	jnz	L(exit)
+
+	pcmpeqb	-32(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	test	%edx, %edx
+	lea	-16(%ecx), %ecx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm6, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
 L(exit):
 	sub	%ecx, %eax
 	test	%dl, %dl
@@ -340,8 +331,8 @@
 
 L(exit_tail15):
 	add	$15, %eax
+# ifndef USE_AS_STRCAT
 	ret
-
 END (__strlen_sse2)
-
+# endif
 #endif

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c Fri Aug  5 00:02:24 2011
@@ -1,0 +1,8 @@
+#define STRNCAT __strncat_ia32
+#ifdef SHARED
+#undef libc_hidden_def
+#define libc_hidden_def(name) \
+  __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
+#endif
+
+#include "string/strncat.c"

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S Fri Aug  5 00:02:24 2011
@@ -1,0 +1,4 @@
+#define STRCAT  __strncat_sse2
+#define USE_AS_STRNCAT
+
+#include "strcat-sse2.S"

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S Fri Aug  5 00:02:24 2011
@@ -1,0 +1,4 @@
+#define STRCAT  __strncat_ssse3
+#define USE_AS_STRNCAT
+
+#include "strcat-ssse3.S"

Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S Fri Aug  5 00:02:24 2011
@@ -1,0 +1,3 @@
+#define STRCAT strncat
+#define USE_AS_STRNCAT
+#include "strcat.S"

Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c Fri Aug  5 00:02:24 2011
@@ -300,14 +300,20 @@
 		break;
 	    case 3:	/* painful */
 		for (i=jz;i>0;i--) {
-		    fw      = fq[i-1]+fq[i];
-		    fq[i]  += fq[i-1]-fw;
-		    fq[i-1] = fw;
+#if __FLT_EVAL_METHOD__ != 0
+		    volatile
+#endif
+		    double fv = (double)(fq[i-1]+fq[i]);
+		    fq[i]  += fq[i-1]-fv;
+		    fq[i-1] = fv;
 		}
 		for (i=jz;i>1;i--) {
-		    fw      = fq[i-1]+fq[i];
-		    fq[i]  += fq[i-1]-fw;
-		    fq[i-1] = fw;
+#if __FLT_EVAL_METHOD__ != 0
+		    volatile
+#endif
+		    double fv = (double)(fq[i-1]+fq[i]);
+		    fq[i]  += fq[i-1]-fv;
+		    fq[i-1] = fv;
 		}
 		for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
 		if(ih==0) {

Modified: fsf/trunk/libc/sysdeps/posix/getaddrinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/posix/getaddrinfo.c (original)
+++ fsf/trunk/libc/sysdeps/posix/getaddrinfo.c Fri Aug  5 00:02:24 2011
@@ -432,7 +432,10 @@
 	  /* In case the output string is the same as the input string
 	     no new string has been allocated.  */
 	  if (p != name)
-	    malloc_name = true;
+	    {
+	      name = p;
+	      malloc_name = true;
+	    }
 	}
 #endif
 

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S Fri Aug  5 00:02:24 2011
@@ -1,5 +1,5 @@
 /* strlen(str) -- determine the length of the string STR.
-   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>.
    This file is part of the GNU C Library.