[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commits] r16347 - in /fsf/trunk/libc: ./ bits/ elf/ iconvdata/ include/ inet/ localedata/ localedata/locales/ nptl/ nptl/sysdeps/pthr...



Author: eglibc
Date: Sat Dec 24 00:05:56 2011
New Revision: 16347

Log:
Import glibc-mainline for 2011-12-24

Added:
    fsf/trunk/libc/localedata/locales/ta_LK
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/NEWS
    fsf/trunk/libc/bits/byteswap.h
    fsf/trunk/libc/elf/dl-addr.c
    fsf/trunk/libc/elf/tst-auditmod4b.c
    fsf/trunk/libc/elf/tst-auditmod6b.c
    fsf/trunk/libc/elf/tst-auditmod6c.c
    fsf/trunk/libc/elf/tst-auditmod7b.c
    fsf/trunk/libc/iconvdata/cp1258.c
    fsf/trunk/libc/iconvdata/tcvn5712-1.c
    fsf/trunk/libc/include/features.h
    fsf/trunk/libc/inet/getnameinfo.c
    fsf/trunk/libc/localedata/ChangeLog
    fsf/trunk/libc/localedata/SUPPORTED
    fsf/trunk/libc/localedata/locales/fi_FI
    fsf/trunk/libc/localedata/locales/si_LK
    fsf/trunk/libc/localedata/locales/sv_SE
    fsf/trunk/libc/localedata/locales/wal_ET
    fsf/trunk/libc/nptl/ChangeLog
    fsf/trunk/libc/nptl/Versions
    fsf/trunk/libc/nptl/nptl-init.c
    fsf/trunk/libc/nptl/pthreadP.h
    fsf/trunk/libc/nptl/sysdeps/pthread/gai_misc.h
    fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c
    fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c
    fsf/trunk/libc/sysdeps/i386/bits/byteswap.h
    fsf/trunk/libc/sysdeps/i386/fpu/bits/fenv.h
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
    fsf/trunk/libc/sysdeps/ia64/bits/byteswap.h
    fsf/trunk/libc/sysdeps/s390/bits/byteswap.h
    fsf/trunk/libc/sysdeps/x86_64/bits/byteswap.h
    fsf/trunk/libc/sysdeps/x86_64/dl-machine.h
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
    fsf/trunk/libc/version.h

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sat Dec 24 00:05:56 2011
@@ -1,4 +1,58 @@
+2011-12-23  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	* version.h (RELEASE): Bump for 2.15 release.
+	* include/features.h (__GLIBC_MINOR__): Bump to 15.
+
+	* sysdeps/x86_64/dl-machine.h: Fix typos in comments.
+	Patch by Marek Polacek <mpolacek@xxxxxxxxxx>.
+
+	* bits/byteswap.h: Protect long long constants with __extension__.
+	* sysdeps/i386/bits/byteswap.h: Likewise.
+	* sysdeps/ia64/bits/byteswap.h: Likewise.
+	* sysdeps/s390/bits/byteswap.h: Likewise.
+	* sysdeps/x86_64/bits/byteswap.h: Likewise.
+
+2011-12-23  Liubov Dmitrieva  <liubov.dmitrieva@xxxxxxxxx>
+
+	[BZ #13540]
+	* sysdeps/x86_64/multiarch/strcpy-ssse3.S: Fix overrun in
+	destination buffer.
+	* sysdeps/x86_64/multiarch/wcscpy-ssse3.S: Likewise.
+
+2011-12-23  Marek Polacek  <polacek@xxxxxxxxxx>
+
+	* elf/dl-addr.c (determine_info): Add inline keyword.
+	* elf/tst-auditmod4b.c (check_avx): Likewise.
+	* elf/tst-auditmod6b.c (check_avx): Likewise.
+	* elf/tst-auditmod6c.c (check_avx): Likewise.
+	* elf/tst-auditmod7b.c (check_avx): Likewise.
+
+2011-12-23  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	* sysdeps/i386/fpu/bits/fenv.h (feraiseexcept): Also enable for
+	!__SSE_MATH__.
+
+2011-12-23  Liubov Dmitrieva  <liubov.dmitrieva@xxxxxxxxx>
+
+	[BZ #13540]
+	* sysdeps/i386/i686/multiarch/wcscpy-ssse3.S: Fix wrong copying
+	processing for last bytes.
+
+2011-08-06  Bruno Haible  <bruno@xxxxxxxxx>
+
+	[BZ #13061]
+	* iconvdata/cp1258.c (comp_table_data): Combine U+00A8 U+0301 to
+	U+0385, not to U+1FEE.
+
+	[BZ #13062]
+	* iconvdata/tcvn5712-1.c (comp_table_data): Remove useless and wrong
+	entry for U+00A5 U+0301.
+
 2011-12-22  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	[BZ #13166]
+	* inet/getnameinfo.c (getnameinfo): Return EAI_OVERFLOW if the
+	buffer for the output is too small.
 
 	* sysdeps/i386/fpu/bits/fenv.h [__SSE_MATH__]: Add feraiseexcept
 	optimization.
@@ -68,6 +122,7 @@
 2011-11-18  Richard B. Kreckel  <kreckel@xxxxxxxx>
 
 	[BZ #13305]
+	[BZ #12786]
 	* math/s_cacosh.c: Fix rare miscomputation in cacosh().
 	* math/s_cacoshf.c: Likewise.
 	* math/s_cacoshl.c: Likewise.

Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Sat Dec 24 00:05:56 2011
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes.  2011-12-22
+GNU C Library NEWS -- history of user-visible changes.  2011-12-23
 Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc.
 See the end for copying conditions.
 
@@ -9,9 +9,10 @@
 
 * The following bugs are resolved with this release:
 
-  6779, 6783, 9696, 10103, 10709, 11589, 12403, 12847, 12868, 12852, 12874,
-  12885, 12892, 12907, 12922, 12935, 13007, 13021, 13067, 13068, 13090,
-  13092, 13114, 13118, 13123, 13134, 13138, 13147, 13150, 13179, 13185,
+  6779, 6783, 9696, 10103, 10709, 11589, 12403, 12786, 12840, 12847, 12868,
+  12852, 12874, 12885, 12892, 12906, 12907, 12922, 12935, 12962, 13007,
+  13021, 13061, 13062, 13067, 13068, 13085, 13088, 13090, 13092, 13096,
+  13114, 13118, 13123, 13134, 13138, 13147, 13150, 13166, 13179, 13185,
   13189, 13192, 13268, 13276, 13282, 13291, 13305, 13328, 13335, 13337,
   13344, 13358, 13367, 13413, 13416, 13423, 13439, 13446, 13472, 13484,
   13506, 13515, 13523, 13524, 13538, 13540
@@ -66,7 +67,7 @@
 * Optimized nearbyint and strcasecmp for PPC.
   Implemented by Adhemerval Zanella.
 
-* New locales: bho_IN, unm_US, es_CU
+* New locales: bho_IN, unm_US, es_CU, ta_LK
 
 Version 2.14
 

Modified: fsf/trunk/libc/bits/byteswap.h
==============================================================================
--- fsf/trunk/libc/bits/byteswap.h (original)
+++ fsf/trunk/libc/bits/byteswap.h Sat Dec 24 00:05:56 2011
@@ -1,5 +1,6 @@
 /* Macros to swap the order of bytes in integer values.
-   Copyright (C) 1997,1998,2000-2002,2005,2008 Free Software Foundation, Inc.
+   Copyright (C) 1997,1998,2000-2002,2005,2008,2011
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -60,20 +61,20 @@
 #if defined __GNUC__ && __GNUC__ >= 2
 /* Swap bytes in 64 bit value.  */
 # define __bswap_constant_64(x) \
-     ((((x) & 0xff00000000000000ull) >> 56)				      \
-      | (((x) & 0x00ff000000000000ull) >> 40)				      \
-      | (((x) & 0x0000ff0000000000ull) >> 24)				      \
-      | (((x) & 0x000000ff00000000ull) >> 8)				      \
-      | (((x) & 0x00000000ff000000ull) << 8)				      \
-      | (((x) & 0x0000000000ff0000ull) << 24)				      \
-      | (((x) & 0x000000000000ff00ull) << 40)				      \
-      | (((x) & 0x00000000000000ffull) << 56))
+     (__extension__ ((((x) & 0xff00000000000000ull) >> 56)		      \
+		     | (((x) & 0x00ff000000000000ull) >> 40)		      \
+		     | (((x) & 0x0000ff0000000000ull) >> 24)		      \
+		     | (((x) & 0x000000ff00000000ull) >> 8)		      \
+		     | (((x) & 0x00000000ff000000ull) << 8)		      \
+		     | (((x) & 0x0000000000ff0000ull) << 24)		      \
+		     | (((x) & 0x000000000000ff00ull) << 40)		      \
+		     | (((x) & 0x00000000000000ffull) << 56)))
 
 # define __bswap_64(x) \
      (__extension__							      \
       ({ union { __extension__ unsigned long long int __ll;		      \
 		 unsigned int __l[2]; } __w, __r;			      \
-         if (__builtin_constant_p (x))					      \
+	 if (__builtin_constant_p (x))					      \
 	   __r.__ll = __bswap_constant_64 (x);				      \
 	 else								      \
 	   {								      \

Modified: fsf/trunk/libc/elf/dl-addr.c
==============================================================================
--- fsf/trunk/libc/elf/dl-addr.c (original)
+++ fsf/trunk/libc/elf/dl-addr.c Sat Dec 24 00:05:56 2011
@@ -1,5 +1,5 @@
 /* Locate the shared object symbol nearest a given address.
-   Copyright (C) 1996-2007, 2009 Free Software Foundation, Inc.
+   Copyright (C) 1996-2007, 2009, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 #include <ldsodefs.h>
 
 
-static void
+static inline void
 __attribute ((always_inline))
 determine_info (const ElfW(Addr) addr, struct link_map *match, Dl_info *info,
 		struct link_map **mapp, const ElfW(Sym) **symbolp)

Modified: fsf/trunk/libc/elf/tst-auditmod4b.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod4b.c (original)
+++ fsf/trunk/libc/elf/tst-auditmod4b.c Sat Dec 24 00:05:56 2011
@@ -108,7 +108,7 @@
 
 static int avx = -1;
 
-static int
+static inline int
 __attribute ((always_inline))
 check_avx (void)
 {

Modified: fsf/trunk/libc/elf/tst-auditmod6b.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod6b.c (original)
+++ fsf/trunk/libc/elf/tst-auditmod6b.c Sat Dec 24 00:05:56 2011
@@ -108,7 +108,7 @@
 
 static int avx = -1;
 
-static int
+static inline int
 __attribute ((always_inline))
 check_avx (void)
 {

Modified: fsf/trunk/libc/elf/tst-auditmod6c.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod6c.c (original)
+++ fsf/trunk/libc/elf/tst-auditmod6c.c Sat Dec 24 00:05:56 2011
@@ -108,7 +108,7 @@
 
 static int avx = -1;
 
-static int
+static inline int
 __attribute ((always_inline))
 check_avx (void)
 {

Modified: fsf/trunk/libc/elf/tst-auditmod7b.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod7b.c (original)
+++ fsf/trunk/libc/elf/tst-auditmod7b.c Sat Dec 24 00:05:56 2011
@@ -108,7 +108,7 @@
 
 static int avx = -1;
 
-static int
+static inline int
 __attribute ((always_inline))
 check_avx (void)
 {

Modified: fsf/trunk/libc/iconvdata/cp1258.c
==============================================================================
--- fsf/trunk/libc/iconvdata/cp1258.c (original)
+++ fsf/trunk/libc/iconvdata/cp1258.c Sat Dec 24 00:05:56 2011
@@ -197,8 +197,7 @@
     { 0x0077, 0x1E83 },
     { 0x0079, 0x00FD },
     { 0x007A, 0x017A },
-    /* { 0x00A5, 0x0385 }, Wrong, A5 is Yen sign */
-    { 0x00A8, 0x1FEE },
+    { 0x00A8, 0x0385 }, /* prefer U+0385 over U+1FEE */
     { 0x00C2, 0x1EA4 },
     { 0x00C5, 0x01FA },
     { 0x00C6, 0x01FC },

Modified: fsf/trunk/libc/iconvdata/tcvn5712-1.c
==============================================================================
--- fsf/trunk/libc/iconvdata/tcvn5712-1.c (original)
+++ fsf/trunk/libc/iconvdata/tcvn5712-1.c Sat Dec 24 00:05:56 2011
@@ -1,5 +1,5 @@
 /* Conversion to and from TCVN5712-1.
-   Copyright (C) 2001, 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2004, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>, 2001.
 
@@ -158,7 +158,7 @@
     { 0x01AF, 0x1EEA },
     { 0x01B0, 0x1EEB },
 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
-#define COMP_TABLE_LEN_0301 51
+#define COMP_TABLE_LEN_0301 50
     { 0x0041, 0x00C1 },
     { 0x0043, 0x0106 },
     { 0x0045, 0x00C9 },
@@ -193,8 +193,7 @@
     { 0x0077, 0x1E83 },
     { 0x0079, 0x00FD },
     { 0x007A, 0x017A },
-    { 0x00A5, 0x0385 },
-  /*{ 0x00A8, 0x1FEE },*/
+  /*{ 0x00A8, 0x0385 },*//* prefer U+0385 over U+1FEE */
     { 0x00C2, 0x1EA4 },
   /*{ 0x00C5, 0x01FA },*/
   /*{ 0x00C6, 0x01FC },*/
@@ -492,7 +491,7 @@
 #include <iconv/loop.c>
 
 
-/* Next, define the conversion function from UCS4 to CP1258.  */
+/* Next, define the conversion function from UCS4 to TCVN5712-1.  */
 
 static const unsigned char from_ucs4[] =
   {

Modified: fsf/trunk/libc/include/features.h
==============================================================================
--- fsf/trunk/libc/include/features.h (original)
+++ fsf/trunk/libc/include/features.h Sat Dec 24 00:05:56 2011
@@ -339,7 +339,7 @@
 /* Major and minor version number of the GNU C library package.  Use
    these macros to test for features in specific releases.  */
 #define	__GLIBC__	2
-#define	__GLIBC_MINOR__	14
+#define	__GLIBC_MINOR__	15
 
 #define __GLIBC_PREREQ(maj, min) \
 	((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))

Modified: fsf/trunk/libc/inet/getnameinfo.c
==============================================================================
--- fsf/trunk/libc/inet/getnameinfo.c (original)
+++ fsf/trunk/libc/inet/getnameinfo.c Sat Dec 24 00:05:56 2011
@@ -346,10 +346,11 @@
 						     "%u", scopeid);
 
 			if (real_hostlen + scopelen + 1 > hostlen)
-			  /* XXX We should not fail here.  Simply enlarge
-			     the buffer or return with out of memory.  */
-			  return EAI_SYSTEM;
-			memcpy (host + real_hostlen, scopebuf, scopelen + 1);
+			  /* Signal the buffer is too small.  This is
+			     what inet_ntop does.  */
+			  c = NULL;
+			else
+			  memcpy (host + real_hostlen, scopebuf, scopelen + 1);
 		      }
 		  }
 		else
@@ -357,7 +358,7 @@
 				 (const void *) &(((const struct sockaddr_in *) sa)->sin_addr),
 				 host, hostlen);
 		if (c == NULL)
-		  return EAI_SYSTEM;
+		  return EAI_OVERFLOW;
 	      }
 	    ok = 1;
 	  }

Modified: fsf/trunk/libc/localedata/ChangeLog
==============================================================================
--- fsf/trunk/libc/localedata/ChangeLog (original)
+++ fsf/trunk/libc/localedata/ChangeLog Sat Dec 24 00:05:56 2011
@@ -1,4 +1,27 @@
+2011-12-23  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	[BZ #12840]
+	* locales/sv_SE: Modernize date format.
+
+	[BZ #12906]
+	* SUPPORTED (SUPPORTED-LOCALES): Add wal_ET entry.
+
+	* locales/wal_ET: Remove lang_ab entry.
+
+	[BZ #12962]
+	* locales/fi_FI: Various fixups.
+	Patch by Marko Myllynen <myllynen@xxxxxxxxxx>.
+
+	[BZ #13085]
+	* locales/ta_LK: New file.
+	* SUPPORTED (SUPPORTED-LOCALES): Add appropriate entry
+
+	* locales/si_LK: Add country_ab2, country_ab3, country_num.
+
 2011-12-22  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	[BZ #13096]
+	* locales/fi_FI: Fix collation reordering rules.
 
 	[BZ #13189]
 	* SUPPORTED (SUPPORTED-LOCALES): Add ur_IN entry.

Modified: fsf/trunk/libc/localedata/SUPPORTED
==============================================================================
--- fsf/trunk/libc/localedata/SUPPORTED (original)
+++ fsf/trunk/libc/localedata/SUPPORTED Sat Dec 24 00:05:56 2011
@@ -380,6 +380,7 @@
 sw_KE/UTF-8 \
 sw_TZ/UTF-8 \
 ta_IN/UTF-8 \
+ta_LK/UTF-8 \
 te_IN/UTF-8 \
 tg_TJ.UTF-8/UTF-8 \
 tg_TJ/KOI8-T \
@@ -414,6 +415,7 @@
 wa_BE@euro/ISO-8859-15 \
 wa_BE.UTF-8/UTF-8 \
 wae_CH/UTF-8 \
+wal_ET/UTF-8 \
 wo_SN/UTF-8 \
 xh_ZA.UTF-8/UTF-8 \
 xh_ZA/ISO-8859-1 \

Modified: fsf/trunk/libc/localedata/locales/fi_FI
==============================================================================
--- fsf/trunk/libc/localedata/locales/fi_FI (original)
+++ fsf/trunk/libc/localedata/locales/fi_FI Sat Dec 24 00:05:56 2011
@@ -1,4 +1,4 @@
-escape_char	/
+escape_char     /
 comment_char    %
 
 % Finnish language locale for Finland
@@ -45,10 +45,10 @@
 category  "fi_FI:2000";LC_MONETARY
 category  "fi_FI:2000";LC_MESSAGES
 category  "fi_FI:2000";LC_PAPER
+category  "fi_FI:2000";LC_MEASUREMENT
 category  "fi_FI:2000";LC_NAME
 category  "fi_FI:2000";LC_ADDRESS
 category  "fi_FI:2000";LC_TELEPHONE
-
 END LC_IDENTIFICATION
 
 LC_COLLATE
@@ -63,64 +63,68 @@
 <a-diaerisis>
 <o-diaerisis>
 
+reorder-after <U007A>
+<U00E5> <a-ring>;<BAS>;<MIN>;IGNORE
+<U01FB> <a-ring>;<ACA>;<MIN>;IGNORE
+<U00E4> <a-diaerisis>;<BAS>;<MIN>;IGNORE
+<U00E6> <a-diaerisis>;<REU>;<MIN>;IGNORE
+<U01FD> <a-diaerisis>;<U01FD>;<MIN>;IGNORE
+<U01E3> <a-diaerisis>;<MAC>;<MIN>;IGNORE
+<U00F6> <o-diaerisis>;<BAS>;<MIN>;IGNORE
+<U00F8> <o-diaerisis>;<U00D8>;<MIN>;IGNORE
+<U01FF> <o-diaerisis>;<U01FF>;<MIN>;IGNORE
+<U00F5> <o-diaerisis>;<TIL>;<MIN>;IGNORE
 reorder-after <U005A>
-<U00E5> <a-ring>;<BAS>;<MIN>;IGNORE
 <U00C5> <a-ring>;<BAS>;<CAP>;IGNORE
-<U01FB> <a-ring>;<ACA>;<MIN>;IGNORE
 <U01FA> <a-ring>;<ACA>;<CAP>;IGNORE
-<U00E4> <a-diaerisis>;<BAS>;<MIN>;IGNORE
 <U00C4> <a-diaerisis>;<BAS>;<CAP>;IGNORE
-<U00E6> <a-diaerisis>;<REU>;<MIN>;IGNORE
 <U00C6> <a-diaerisis>;<REU>;<CAP>;IGNORE
-<U01FD> <a-diaerisis>;<U01FD>;<MIN>;IGNORE
 <U01FC> <a-diaerisis>;<U01FD>;<CAP>;IGNORE
-<U01E3> <a-diaerisis>;<MAC>;<MIN>;IGNORE
 <U01E2> <a-diaerisis>;<MAC>;<CAP>;IGNORE
-<U00F6> <o-diaerisis>;<BAS>;<MIN>;IGNORE
 <U00D6> <o-diaerisis>;<BAS>;<CAP>;IGNORE
-<U00F8> <o-diaerisis>;<U00D8>;<MIN>;IGNORE
 <U00D8> <o-diaerisis>;<U00D8>;<CAP>;IGNORE
-<U01FF> <o-diaerisis>;<U01FF>;<MIN>;IGNORE
 <U01FE> <o-diaerisis>;<U01FF>;<CAP>;IGNORE
-<U00F5> <o-diaerisis>;<TIL>;<MIN>;IGNORE
 <U00D5> <o-diaerisis>;<TIL>;<CAP>;IGNORE
 
+reorder-after <U016B>
+<U0076> <v>;<U0056>;<BAS>;<MIN>
+<U1E7D> <v>;<U0056>;<TIL>;<MIN>
+<U0077> <w>;<U0057>;<BAS>;<MIN>
+<U1E83> <w>;<U0057>;<ACA>;<MIN>
+<U1E81> <w>;<U0057>;<GRA>;<MIN>
+<U0175> <w>;<U0057>;<CIR>;<MIN>
+<U1E85> <w>;<U0057>;<REU>;<MIN>
+<U1E87> <w>;<U0057>;<PCT>;<MIN>
 reorder-after <U016A>
-<U0076> <v>;<U0056>;<BAS>;<MIN>
 <U0056> <v>;<U0056>;<BAS>;<CAP>
-<U1E7D> <v>;<U0056>;<TIL>;<MIN>
 <U1E7C> <v>;<U0056>;<TIL>;<CAP>
-<U0077> <w>;<U0057>;<BAS>;<MIN>
 <U0057> <w>;<U0057>;<BAS>;<CAP>
-<U1E83> <w>;<U0057>;<ACA>;<MIN>
 <U1E82> <w>;<U0057>;<ACA>;<CAP>
-<U1E81> <w>;<U0057>;<GRA>;<MIN>
 <U1E80> <w>;<U0057>;<GRA>;<CAP>
-<U0175> <w>;<U0057>;<CIR>;<MIN>
 <U0174> <w>;<U0057>;<CIR>;<CAP>
-<U1E85> <w>;<U0057>;<REU>;<MIN>
 <U1E84> <w>;<U0057>;<REU>;<CAP>
-<U1E87> <w>;<U0057>;<PCT>;<MIN>
 <U1E86> <w>;<U0057>;<PCT>;<CAP>
 
 reorder-after <U00FF>
 <U00FC> <y>;<DTT>;<MIN>;IGNORE
+reorder-after <U0178>
 <U00DC> <y>;<DTT>;<CAP>;IGNORE
 
 %  Present in iso14651_t1, but these definitions seem to have been
 %  removed from latest iso14651 tables.
+reorder-after <U0163>
+<U00FE> "<t><h>";"<LIG><LIG>";"<MIN><MIN>";IGNORE
 reorder-after <U0162>
-<U00FE> "<t><h>";"<LIG><LIG>";"<MIN><MIN>";IGNORE
 <U00DE> "<t><h>";"<LIG><LIG>";"<CAP><CAP>";IGNORE
 
 reorder-after <U0064>
 <U00F0> <d>;<PCL>;<MIN>;IGNORE
+<U0111> <d>;<OBL>;<MIN>;IGNORE
+reorder-after <U0044>
 <U00D0> <d>;<PCL>;<CAP>;IGNORE
-<U0111> <d>;<OBL>;<MIN>;IGNORE
 <U0110> <d>;<OBL>;<CAP>;IGNORE
 
 reorder-end
-
 END LC_COLLATE
 
 LC_CTYPE
@@ -141,12 +145,10 @@
 negative_sign        "<U002D>"
 int_frac_digits      2
 frac_digits          2
-% int_curr_symbol precedes
-% curr_symbol succeeds
 p_cs_precedes        0
-p_sep_by_space       2
+p_sep_by_space       1
 n_cs_precedes        0
-n_sep_by_space       2
+n_sep_by_space       1
 p_sign_posn          1
 n_sign_posn          1
 END LC_MONETARY
@@ -168,18 +170,18 @@
          "<U0074><U006F><U0072><U0073><U0074><U0061><U0069>";/
          "<U0070><U0065><U0072><U006A><U0061><U006E><U0074><U0061><U0069>";/
          "<U006C><U0061><U0075><U0061><U006E><U0074><U0061><U0069>"
-abmon    "<U0074><U0061><U006D><U006D><U0069><U00A0>";/
-         "<U0068><U0065><U006C><U006D><U0069><U00A0>";/
+abmon    "<U0074><U0061><U006D><U006D><U0069>";/
+         "<U0068><U0065><U006C><U006D><U0069>";/
          "<U006D><U0061><U0061><U006C><U0069><U0073>";/
-         "<U0068><U0075><U0068><U0074><U0069><U00A0>";/
-         "<U0074><U006F><U0075><U006B><U006F><U00A0>";/
-         "<U006B><U0065><U0073><U00E4><U00A0><U00A0>";/
-         "<U0068><U0065><U0069><U006E><U00E4><U00A0>";/
-         "<U0065><U006C><U006F><U00A0><U00A0><U00A0>";/
-         "<U0073><U0079><U0079><U0073><U00A0><U00A0>";/
-         "<U006C><U006F><U006B><U0061><U00A0><U00A0>";/
+         "<U0068><U0075><U0068><U0074><U0069>";/
+         "<U0074><U006F><U0075><U006B><U006F>";/
+         "<U006B><U0065><U0073><U00E4>";/
+         "<U0068><U0065><U0069><U006E><U00E4>";/
+         "<U0065><U006C><U006F>";/
+         "<U0073><U0079><U0079><U0073>";/
+         "<U006C><U006F><U006B><U0061>";/
          "<U006D><U0061><U0072><U0072><U0061><U0073>";/
-         "<U006A><U006F><U0075><U006C><U0075><U00A0>"
+         "<U006A><U006F><U0075><U006C><U0075>"
 mon      "<U0074><U0061><U006D><U006D><U0069><U006B><U0075><U0075>";/
          "<U0068><U0065><U006C><U006D><U0069><U006B><U0075><U0075>";/
          "<U006D><U0061><U0061><U006C><U0069><U0073><U006B><U0075><U0075>";/
@@ -202,13 +204,14 @@
 date_fmt       "<U0025><U0061><U0020><U0025><U002D><U0064><U002E><U0025>/
 <U002D><U006D><U002E><U0025><U0059><U0020><U0025><U0048><U002E><U0025>/
 <U004D><U002E><U0025><U0053><U0020><U0025><U007A>"
+week     7;19971130;4
 first_weekday 2 % Monday
 first_workday 2 % Monday
 END LC_TIME
 
 LC_MESSAGES
-yesexpr  "<U005E><U005B><U004B><U006B><U004A><U006A><U0059><U0079><U005D><U002E><U002A>"
-noexpr   "<U005E><U005B><U004E><U006E><U0045><U0065><U005D><U002E><U002A>"
+yesexpr  "<U005E><U005B><U004B><U006B><U0059><U0079><U005D><U002E><U002A>"
+noexpr   "<U005E><U005B><U0045><U0065><U004E><U006E><U005D><U002E><U002A>"
 END LC_MESSAGES
 
 LC_PAPER
@@ -217,6 +220,7 @@
 END LC_PAPER
 
 LC_TELEPHONE
+tel_dom_fmt    "<U0028><U0025><U0041><U0029><U0020><U0025><U006C>"
 tel_int_fmt    "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025>/
 <U006C>"
 int_prefix     "<U0033><U0035><U0038>"
@@ -230,15 +234,25 @@
 LC_NAME
 name_fmt    "<U0025><U0064><U0025><U0074><U0025><U0067><U0025><U0074>/
 <U0025><U006D><U0025><U0074><U0025><U0066>"
+% Finnish equivalents for Mr/Mrs/Miss/Ms are herra/rouva/rouva/neiti
+% but they are practically never used, thus we don't define them here.
 END LC_NAME
 
 LC_ADDRESS
-postal_fmt    "<U0025><U0066><U0025><U004E><U0025><U0061><U0025><U004E>/
-<U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025><U0073>/
-<U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072><U0025>/
-<U004E><U0025><U007A><U0020><U0025><U0054><U0025>/
+postal_fmt   "<U0025><U0066><U0025><U004E><U0025><U0064><U0025><U004E>/
+<U0025><U0062><U0025><U004E><U0025><U0061><U0025><U004E><U0025><U0073>/
+<U0020><U0025><U0068><U0025><U0074><U0025><U0065><U0025><U0074><U0025>/
+<U0072><U0025><U004E><U0025><U007A><U0020><U0025><U0054><U0025>/
 <U004E><U0025><U0063><U0025><U004E>"
 country_ab2 "<U0046><U0049>"
 country_ab3 "<U0046><U0049><U004E>"
 country_num 246
+country_name "<U0053><U0075><U006F><U006D><U0069>"
+country_post "<U0046><U0049>"
+country_car  "<U0046><U0049><U004E>"
+country_isbn 952
+lang_name    "<U0073><U0075><U006F><U006D><U0069>"
+lang_ab      "<U0066><U0069>"
+lang_term    "<U0066><U0069><U006E>"
+lang_lib     "<U0066><U0069><U006E>"
 END LC_ADDRESS

Modified: fsf/trunk/libc/localedata/locales/si_LK
==============================================================================
--- fsf/trunk/libc/localedata/locales/si_LK (original)
+++ fsf/trunk/libc/localedata/locales/si_LK Sat Dec 24 00:05:56 2011
@@ -85,46 +85,46 @@
 %
 % Abbreviated weekday names (%a)
 abday       "<U0D89>";"<U0DC3>";/
-            "<U0D85>";"<U0DB6>";/
-            "<U0DB6><U0DCA><U200D><U0DBB>";"<U0DC3><U0DD2>";/
-            "<U0DC3><U0DD9>"
+	    "<U0D85>";"<U0DB6>";/
+	    "<U0DB6><U0DCA><U200D><U0DBB>";"<U0DC3><U0DD2>";/
+	    "<U0DC3><U0DD9>"
 %
 % Full weekday names (%A)
 day         "<U0D89><U0DBB><U0DD2><U0DAF><U0DCF>";/
-            "<U0DC3><U0DB3><U0DD4><U0DAF><U0DCF>";/
-            "<U0D85><U0D9F><U0DC4><U0DBB><U0DD4><U0DC0><U0DCF><U0DAF><U0DCF>";/
-            "<U0DB6><U0DAF><U0DCF><U0DAF><U0DCF>";/
-            "<U0DB6><U0DCA><U200D><U0DBB><U0DC4><U0DC3><U0DCA><U0DB4><U0DAD><U0DD2><U0DB1><U0DCA><U0DAF><U0DCF>";/
+	    "<U0DC3><U0DB3><U0DD4><U0DAF><U0DCF>";/
+	    "<U0D85><U0D9F><U0DC4><U0DBB><U0DD4><U0DC0><U0DCF><U0DAF><U0DCF>";/
+	    "<U0DB6><U0DAF><U0DCF><U0DAF><U0DCF>";/
+	    "<U0DB6><U0DCA><U200D><U0DBB><U0DC4><U0DC3><U0DCA><U0DB4><U0DAD><U0DD2><U0DB1><U0DCA><U0DAF><U0DCF>";/
 	    "<U0DC3><U0DD2><U0D9A><U0DD4><U0DBB><U0DCF><U0DAF><U0DCF>";/
-            "<U0DC3><U0DD9><U0DB1><U0DC3><U0DD4><U0DBB><U0DCF><U0DAF><U0DCF>"
+	    "<U0DC3><U0DD9><U0DB1><U0DC3><U0DD4><U0DBB><U0DCF><U0DAF><U0DCF>"
 %
 % Abbreviated month names (%b)
 abmon       "<U0DA2><U0DB1>";/
-            "<U0DB4><U0DD9><U0DB6>";/
-            "<U0DB8><U0DCF><U0DBB><U0DCA>";/
-            "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2>";/
-            "<U0DB8><U0DD0><U0DBA><U0DD2>";/
-            "<U0DA2><U0DD6><U0DB1><U0DD2>";/
-            "<U0DA2><U0DD6><U0DBD><U0DD2>";/
-            "<U0D85><U0D9C><U0DDD>";/
-            "<U0DC3><U0DD0><U0DB4><U0DCA>";/
-            "<U0D94><U0D9A><U0DCA>";/
-            "<U0DB1><U0DD9><U0DC0><U0DD0>";/
-            "<U0DAF><U0DD9><U0DC3><U0DD0>"
+	    "<U0DB4><U0DD9><U0DB6>";/
+	    "<U0DB8><U0DCF><U0DBB><U0DCA>";/
+	    "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2>";/
+	    "<U0DB8><U0DD0><U0DBA><U0DD2>";/
+	    "<U0DA2><U0DD6><U0DB1><U0DD2>";/
+	    "<U0DA2><U0DD6><U0DBD><U0DD2>";/
+	    "<U0D85><U0D9C><U0DDD>";/
+	    "<U0DC3><U0DD0><U0DB4><U0DCA>";/
+	    "<U0D94><U0D9A><U0DCA>";/
+	    "<U0DB1><U0DD9><U0DC0><U0DD0>";/
+	    "<U0DAF><U0DD9><U0DC3><U0DD0>"
 %
 % Full month names (%B)
 mon         "<U0DA2><U0DB1><U0DC0><U0DCF><U0DBB><U0DD2>";/
 	    "<U0DB4><U0DD9><U0DB6><U0DBB><U0DC0><U0DCF><U0DBB><U0DD2>";/
-            "<U0DB8><U0DCF><U0DBB><U0DCA><U0DAD><U0DD4>";/
-            "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2><U0DBA><U0DD9><U0DBD><U0DCA>";/
-            "<U0DB8><U0DD0><U0DBA><U0DD2>";/
-            "<U0DA2><U0DD6><U0DB1><U0DD2>";/
-            "<U0DA2><U0DD6><U0DBD><U0DD2>";/
-            "<U0D85><U0D9C><U0DDD><U0DC3><U0DCA><U0DAD><U0DD4>";/
-            "<U0DC3><U0DD0><U0DB4><U0DCA><U0DAD><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
-            "<U0D94><U0D9A><U0DCA><U0DAD><U0DDD><U0DB6><U0DBB><U0DCA>";/
-            "<U0DB1><U0DDC><U0DC0><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
-            "<U0DAF><U0DD9><U0DC3><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>"
+	    "<U0DB8><U0DCF><U0DBB><U0DCA><U0DAD><U0DD4>";/
+	    "<U0D85><U0DB4><U0DCA><U200D><U0DBB><U0DD2><U0DBA><U0DD9><U0DBD><U0DCA>";/
+	    "<U0DB8><U0DD0><U0DBA><U0DD2>";/
+	    "<U0DA2><U0DD6><U0DB1><U0DD2>";/
+	    "<U0DA2><U0DD6><U0DBD><U0DD2>";/
+	    "<U0D85><U0D9C><U0DDD><U0DC3><U0DCA><U0DAD><U0DD4>";/
+	    "<U0DC3><U0DD0><U0DB4><U0DCA><U0DAD><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
+	    "<U0D94><U0D9A><U0DCA><U0DAD><U0DDD><U0DB6><U0DBB><U0DCA>";/
+	    "<U0DB1><U0DDC><U0DC0><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>";/
+	    "<U0DAF><U0DD9><U0DC3><U0DD0><U0DB8><U0DCA><U0DB6><U0DBB><U0DCA>"
 %
 % Equivalent of AM PM
 am_pm       "<U0DB4><U0DD9><U002E><U0DC0><U002E>";"<U0DB4><U002E><U0DC0><U002E>"
@@ -196,6 +196,10 @@
 %
 postal_fmt  "<U0025><U007A><U0025><U0063><U0025><U0054><U0025><U0073><U0025><U0062><U0025><U0065><U0025><U0072>"
 
+country_ab2 "<U004C><U004B>"
+country_ab3 "<U004C><U004B><U0041>"
+country_num 144
+
 END LC_ADDRESS
 
 

Modified: fsf/trunk/libc/localedata/locales/sv_SE
==============================================================================
--- fsf/trunk/libc/localedata/locales/sv_SE (original)
+++ fsf/trunk/libc/localedata/locales/sv_SE Sat Dec 24 00:05:56 2011
@@ -169,9 +169,9 @@
           "<U006F><U006B><U0074><U006F><U0062><U0065><U0072>";/
           "<U006E><U006F><U0076><U0065><U006D><U0062><U0065><U0072>";/
           "<U0064><U0065><U0063><U0065><U006D><U0062><U0065><U0072>"
-d_t_fmt   "<U0025><U0061><U0020><U0025><U0065><U0020><U0025><U0062><U0020><U0025><U0059><U0020><U0025><U0048><U002E><U0025><U004D><U002E><U0025><U0053>"
+d_t_fmt   "<U0025><U0061><U0020><U0025><U0065><U0020><U0025><U0062><U0020><U0025><U0059><U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
 d_fmt     "<U0025><U0059><U002D><U0025><U006D><U002D><U0025><U0064>"
-t_fmt     "<U0025><U0048><U002E><U0025><U004D><U002E><U0025><U0053>"
+t_fmt     "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
 am_pm     "";""
 t_fmt_ampm  ""
 date_fmt       "<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065>/

Added: fsf/trunk/libc/localedata/locales/ta_LK
==============================================================================
--- fsf/trunk/libc/localedata/locales/ta_LK (added)
+++ fsf/trunk/libc/localedata/locales/ta_LK Sat Dec 24 00:05:56 2011
@@ -1,0 +1,85 @@
+comment_char %
+escape_char  /
+%
+% Tamil language locale for Sri Lanka
+% Language: ta
+% Territory: LK
+% Revision: 1.0
+% Date: 2011,August,11
+% Application: general
+% Users: general
+% Charset: SLS 1326:2008
+% Distribution and use is free, also
+% for commercial purposes.
+
+LC_IDENTIFICATION
+title      "Tamil language locale for Sri Lanka"
+source     "J.Yogaraj"
+address    "30/36Q -2/1,Charles Apartments, De Silva Cross Rd,/
+	    Kalubowila,Dehiwela,SriLanka."
+contact    "94-777-315206"
+email      "yogaraj.ubuntu@xxxxxxxxx"
+tel        "94-112-765773"
+fax        ""
+language   "Tamil"
+territory  "Sri Lanka"
+revision   "1.0"
+date       "2011,August,11"
+%
+category  "ta_LK:2000";LC_IDENTIFICATION
+category  "ta_LK:2000";LC_CTYPE
+category  "ta_LK:2000";LC_COLLATE
+category  "ta_LK:2000";LC_TIME
+category  "ta_LK:2000";LC_NUMERIC
+category  "ta_LK:2000";LC_MONETARY
+category  "ta_LK:2000";LC_MESSAGES
+category  "ta_LK:2000";LC_PAPER
+category  "ta_LK:2000";LC_NAME
+category  "ta_LK:2000";LC_ADDRESS
+category  "ta_LK:2000";LC_TELEPHONE
+
+END LC_IDENTIFICATION
+
+LC_COLLATE
+copy "ta_IN"
+END LC_COLLATE
+
+LC_CTYPE
+copy "ta_IN"
+END LC_CTYPE
+
+LC_MESSAGES
+copy "ta_IN"
+END LC_MESSAGES
+
+LC_MONETARY
+copy "ta_IN"
+END LC_MONETARY
+
+LC_NUMERIC
+copy "ta_IN"
+END LC_NUMERIC
+
+LC_TIME
+copy "ta_IN"
+END LC_TIME
+
+LC_PAPER
+copy "si_LK"
+END LC_PAPER
+
+LC_TELEPHONE
+copy "si_LK"
+END LC_TELEPHONE
+
+LC_MEASUREMENT
+copy "si_LK"
+END LC_MEASUREMENT
+
+LC_NAME
+copy "ta_IN"
+END LC_NAME
+
+LC_ADDRESS
+copy "si_LK"
+END LC_ADDRESS

Modified: fsf/trunk/libc/localedata/locales/wal_ET
==============================================================================
--- fsf/trunk/libc/localedata/locales/wal_ET (original)
+++ fsf/trunk/libc/localedata/locales/wal_ET Sat Dec 24 00:05:56 2011
@@ -110,7 +110,6 @@
 % country_car   unknown
 % country_isbn  unknown, Need ISO 2108
 lang_name     "<U12C8><U120B><U12ED><U1273><U1271>"
-lang_ab       "<U0077><U0061><U006C>"
 lang_term     "<U0077><U0061><U006C>"
 lang_lib      "<U0077><U0061><U006C>"
 
@@ -144,12 +143,12 @@
 % Abbreviated weekday names (%a)
 %
 abday   "<U12C8><U130B> ";/
-        "<U1233><U12ED><U1296>";/
-        "<U121B><U1246><U1233>";/
-        "<U12A0><U1229><U12CB>";/
-        "<U1203><U1219><U1233>";/
-        "<U12A0><U122D><U1263>";/
-        "<U1244><U122B> "
+	"<U1233><U12ED><U1296>";/
+	"<U121B><U1246><U1233>";/
+	"<U12A0><U1229><U12CB>";/
+	"<U1203><U1219><U1233>";/
+	"<U12A0><U122D><U1263>";/
+	"<U1244><U122B> "
 %
 % Full weekday names (%A)
 %
@@ -169,32 +168,32 @@
 % Abbreviated month names (%b)
 %
 abmon    "<U1303><U1295><U12E9>";/
-         "<U134C><U1265><U1229>";/
-         "<U121B><U122D><U127D>";/
-         "<U12A4><U1355><U1228>";/
-         "<U121C><U12ED><U0020>";/
-         "<U1301><U1295><U0020>";/
-         "<U1301><U120B><U12ED>";/
-         "<U12A6><U1308><U1235>";/
-         "<U1234><U1355><U1274>";/
-         "<U12A6><U12AD><U1270>";/
-         "<U1296><U126C><U121D>";/
-         "<U12F2><U1234><U121D>"
+	 "<U134C><U1265><U1229>";/
+	 "<U121B><U122D><U127D>";/
+	 "<U12A4><U1355><U1228>";/
+	 "<U121C><U12ED><U0020>";/
+	 "<U1301><U1295><U0020>";/
+	 "<U1301><U120B><U12ED>";/
+	 "<U12A6><U1308><U1235>";/
+	 "<U1234><U1355><U1274>";/
+	 "<U12A6><U12AD><U1270>";/
+	 "<U1296><U126C><U121D>";/
+	 "<U12F2><U1234><U121D>"
 %
 % Full month names (%B)
 %
 mon      "<U1303><U1295><U12E9><U12C8><U122A>";/
-         "<U134C><U1265><U1229><U12C8><U122A>";/
-         "<U121B><U122D><U127D>";/
-         "<U12A4><U1355><U1228><U120D>";/
-         "<U121C><U12ED>";/
-         "<U1301><U1295>";/
-         "<U1301><U120B><U12ED>";/
-         "<U12A6><U1308><U1235><U1275>";/
-         "<U1234><U1355><U1274><U121D><U1260><U122D>";/
-         "<U12A6><U12AD><U1270><U12CD><U1260><U122D>";/
-         "<U1296><U126C><U121D><U1260><U122D>";/
-         "<U12F2><U1234><U121D><U1260><U122D>"
+	 "<U134C><U1265><U1229><U12C8><U122A>";/
+	 "<U121B><U122D><U127D>";/
+	 "<U12A4><U1355><U1228><U120D>";/
+	 "<U121C><U12ED>";/
+	 "<U1301><U1295>";/
+	 "<U1301><U120B><U12ED>";/
+	 "<U12A6><U1308><U1235><U1275>";/
+	 "<U1234><U1355><U1274><U121D><U1260><U122D>";/
+	 "<U12A6><U12AD><U1270><U12CD><U1260><U122D>";/
+	 "<U1296><U126C><U121D><U1260><U122D>";/
+	 "<U12F2><U1234><U121D><U1260><U122D>"
 %
 % Equivalent of AM PM
 %
@@ -202,7 +201,7 @@
 % also <U12A1><U1218><U122D><U1232>
 %
 am_pm    "<U121B><U1208><U12F6>";/
-         "<U1243><U121B>"
+	 "<U1243><U121B>"
 %
 % Appropriate date representation (%x)
 %	"%d/%m/%Y"

Modified: fsf/trunk/libc/nptl/ChangeLog
==============================================================================
--- fsf/trunk/libc/nptl/ChangeLog (original)
+++ fsf/trunk/libc/nptl/ChangeLog Sat Dec 24 00:05:56 2011
@@ -1,3 +1,18 @@
+2011-12-22  Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	* sysdeps/pthread/gai_misc.h (__gai_create_helper_thread): Use
+	__pthread_get_minstack.
+	* sysdeps/unix/sysv/linux/mq_notify.c (init_mq_netlink): Likewise.
+
+	[BZ #13088]
+	* sysdeps/unix/sysv/linux/timer_routines.c: Get minimum stack size
+	through __pthread_get_minstack.
+	* nptl-init.c (__pthread_initialize_minimal_internal): Get page size
+	directly from _rtld_global_ro.
+	(__pthread_get_minstack): New function.
+	* pthreadP.h: Declare __pthread_get_minstack.
+	* Versions (libpthread) [GLIBC_PRIVATE]: Add __pthread_get_minstack.
+
 2011-12-21  Ulrich Drepper  <drepper@xxxxxxxxx>
 
 	[BZ #13515]

Modified: fsf/trunk/libc/nptl/Versions
==============================================================================
--- fsf/trunk/libc/nptl/Versions (original)
+++ fsf/trunk/libc/nptl/Versions Sat Dec 24 00:05:56 2011
@@ -255,6 +255,6 @@
   GLIBC_PRIVATE {
     __pthread_initialize_minimal;
     __pthread_clock_gettime; __pthread_clock_settime;
-    __pthread_unwind;
+    __pthread_unwind; __pthread_get_minstack;
   }
 }

Modified: fsf/trunk/libc/nptl/nptl-init.c
==============================================================================
--- fsf/trunk/libc/nptl/nptl-init.c (original)
+++ fsf/trunk/libc/nptl/nptl-init.c Sat Dec 24 00:05:56 2011
@@ -427,7 +427,7 @@
 
   /* Make sure it meets the minimum size that allocate_stack
      (allocatestack.c) will demand, which depends on the page size.  */
-  const uintptr_t pagesz = __sysconf (_SC_PAGESIZE);
+  const uintptr_t pagesz = GLRO(dl_pagesize);
   const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;
   if (limit.rlim_cur < minstack)
     limit.rlim_cur = minstack;
@@ -469,3 +469,13 @@
 }
 strong_alias (__pthread_initialize_minimal_internal,
 	      __pthread_initialize_minimal)
+
+
+size_t
+__pthread_get_minstack (const pthread_attr_t *attr)
+{
+  struct pthread_attr *iattr = (struct pthread_attr *) attr;
+
+  return (GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN
+	  + iattr->guardsize);
+}

Modified: fsf/trunk/libc/nptl/pthreadP.h
==============================================================================
--- fsf/trunk/libc/nptl/pthreadP.h (original)
+++ fsf/trunk/libc/nptl/pthreadP.h Sat Dec 24 00:05:56 2011
@@ -397,6 +397,7 @@
 
 extern void __pthread_init_static_tls (struct link_map *) attribute_hidden;
 
+extern size_t __pthread_get_minstack (const pthread_attr_t *attr);
 
 /* Namespace save aliases.  */
 extern int __pthread_getschedparam (pthread_t thread_id, int *policy,

Modified: fsf/trunk/libc/nptl/sysdeps/pthread/gai_misc.h
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/pthread/gai_misc.h (original)
+++ fsf/trunk/libc/nptl/sysdeps/pthread/gai_misc.h Sat Dec 24 00:05:56 2011
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2006, 2007, 2008, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -97,7 +97,9 @@
   pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
 
   /* The helper thread needs only very little resources.  */
-  (void) pthread_attr_setstacksize (&attr, 4 * PTHREAD_STACK_MIN);
+  (void) pthread_attr_setstacksize (&attr,
+				    __pthread_get_minstack (&attr)
+				    + 4 * PTHREAD_STACK_MIN);
 
   /* Block all signals in the helper thread.  To do this thoroughly we
      temporarily have to block all signals here.  */

Modified: fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c (original)
+++ fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/mq_notify.c Sat Dec 24 00:05:56 2011
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004, 2005, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2004, 2005, 2008, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contribute by Ulrich Drepper <drepper@xxxxxxxxxx>, 2004.
 
@@ -201,7 +201,7 @@
       (void) pthread_attr_init (&attr);
       (void) pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
       /* We do not need much stack space, the bare minimum will be enough.  */
-      (void) pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
+      (void) pthread_attr_setstacksize (&attr, __pthread_get_minstack (&attr));
 
       /* Temporarily block all signals so that the newly created
 	 thread inherits the mask.  */

Modified: fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c (original)
+++ fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/timer_routines.c Sat Dec 24 00:05:56 2011
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2007, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>, 2003.
 
@@ -165,7 +165,7 @@
      and should go away automatically when canceled.  */
   pthread_attr_t attr;
   (void) pthread_attr_init (&attr);
-  (void) pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
+  (void) pthread_attr_setstacksize (&attr, __pthread_get_minstack (&attr));
 
   /* Block all signals in the helper thread but SIGSETXID.  To do this
      thoroughly we temporarily have to block all signals here.  The

Modified: fsf/trunk/libc/sysdeps/i386/bits/byteswap.h
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/bits/byteswap.h (original)
+++ fsf/trunk/libc/sysdeps/i386/bits/byteswap.h Sat Dec 24 00:05:56 2011
@@ -1,5 +1,5 @@
 /* Macros to swap the order of bytes in integer values.
-   Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008, 2010
+   Copyright (C) 1997, 1998, 2000, 2002, 2003, 2006, 2007, 2008, 2010, 2011
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -109,15 +109,15 @@
 
 #if defined __GNUC__ && __GNUC__ >= 2
 /* Swap bytes in 64 bit value.  */
-#define __bswap_constant_64(x) \
-     ((((x) & 0xff00000000000000ull) >> 56)				      \
-      | (((x) & 0x00ff000000000000ull) >> 40)				      \
-      | (((x) & 0x0000ff0000000000ull) >> 24)				      \
-      | (((x) & 0x000000ff00000000ull) >> 8)				      \
-      | (((x) & 0x00000000ff000000ull) << 8)				      \
-      | (((x) & 0x0000000000ff0000ull) << 24)				      \
-      | (((x) & 0x000000000000ff00ull) << 40)				      \
-      | (((x) & 0x00000000000000ffull) << 56))
+# define __bswap_constant_64(x) \
+     (__extension__ ((((x) & 0xff00000000000000ull) >> 56)		      \
+		     | (((x) & 0x00ff000000000000ull) >> 40)		      \
+		     | (((x) & 0x0000ff0000000000ull) >> 24)		      \
+		     | (((x) & 0x000000ff00000000ull) >> 8)		      \
+		     | (((x) & 0x00000000ff000000ull) << 8)		      \
+		     | (((x) & 0x0000000000ff0000ull) << 24)		      \
+		     | (((x) & 0x000000000000ff00ull) << 40)		      \
+		     | (((x) & 0x00000000000000ffull) << 56)))
 
 # define __bswap_64(x) \
      (__extension__							      \

Modified: fsf/trunk/libc/sysdeps/i386/fpu/bits/fenv.h
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/fpu/bits/fenv.h (original)
+++ fsf/trunk/libc/sysdeps/i386/fpu/bits/fenv.h Sat Dec 24 00:05:56 2011
@@ -90,7 +90,7 @@
 #endif
 
 
-#if defined __SSE_MATH__ && defined __USE_EXTERN_INLINES
+#ifdef __USE_EXTERN_INLINES
 __BEGIN_DECLS
 
 /* Optimized versions.  */
@@ -106,7 +106,12 @@
 	  /* One example of a invalid operation is 0.0 / 0.0.  */
 	  float __f = 0.0;
 
+# ifdef __SSE_MATH__
 	  __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
+# else
+	  __asm__ __volatile__ ("fdiv %%st, %%st(0); fwait"
+				: "=t" (__f) : "0" (__f));
+# endif
 	  (void) &__f;
 	}
       if ((FE_DIVBYZERO & __excepts) != 0)
@@ -114,7 +119,12 @@
 	  float __f = 1.0;
 	  float __g = 0.0;
 
+# ifdef __SSE_MATH__
 	  __asm__ __volatile__ ("divss %1, %0" : : "x" (__f), "x" (__g));
+# else
+	  __asm__ __volatile__ ("fdivp %%st(1), %%st; fwait"
+				: "=t" (__f) : "0" (__f), "u" (__g) : "st(1)");
+# endif
 	  (void) &__f;
 	}
 

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S Sat Dec 24 00:05:56 2011
@@ -54,7 +54,6 @@
 
 	PUSH	(%edi)
 	mov	%edx, %edi
-
 	PUSH	(%esi)
 	lea	16(%ecx), %esi
 
@@ -220,7 +219,6 @@
 	jnz	L(Shl4LoopExit)
 
 	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%edx)
 	movaps	28(%ecx), %xmm2
 
@@ -228,6 +226,19 @@
 	lea	16(%edx), %edx
 	pmovmskb %xmm0, %eax
 	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm1
+
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
 	movaps	%xmm2, %xmm3
 
 	test	%eax, %eax
@@ -236,33 +247,16 @@
 	palignr	$4, %xmm1, %xmm2
 	movaps	%xmm2, (%edx)
 	movaps	28(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
 
 	test	%eax, %eax
 	jnz	L(Shl4LoopExit)
 
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$4, %xmm3, %xmm2
 	movaps	%xmm2, (%edx)
 	lea	28(%ecx), %ecx
 	lea	16(%edx), %edx
@@ -305,14 +299,13 @@
 	jmp	L(Shl4LoopStart)
 
 L(Shl4LoopExit):
-	movaps	(%edx), %xmm6
-	psrldq	$12, %xmm6
-	palignr	$4, %xmm1, %xmm6
-	movaps	%xmm6, (%edx)
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 8(%edx)
+	POP	(%esi)
 	add	$12, %edx
 	add	$12, %ecx
-
-	POP	(%esi)
 	test	%al, %al
 	jz	L(ExitHigh)
 	test	$0x01, %al
@@ -337,7 +330,6 @@
 	jnz	L(Shl8LoopExit)
 
 	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%edx)
 	movaps	24(%ecx), %xmm2
 
@@ -345,6 +337,19 @@
 	lea	16(%edx), %edx
 	pmovmskb %xmm0, %eax
 	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm1
+
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
 	movaps	%xmm2, %xmm3
 
 	test	%eax, %eax
@@ -353,33 +358,16 @@
 	palignr	$8, %xmm1, %xmm2
 	movaps	%xmm2, (%edx)
 	movaps	24(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
 
 	test	%eax, %eax
 	jnz	L(Shl8LoopExit)
 
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$8, %xmm3, %xmm2
 	movaps	%xmm2, (%edx)
 	lea	24(%ecx), %ecx
 	lea	16(%edx), %edx
@@ -422,14 +410,11 @@
 	jmp	L(Shl8LoopStart)
 
 L(Shl8LoopExit):
-	movaps	(%edx), %xmm6
-	psrldq	$8, %xmm6
-	palignr	$8, %xmm1, %xmm6
-	movaps	%xmm6, (%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	POP	(%esi)
 	add	$8, %edx
 	add	$8, %ecx
-
-	POP	(%esi)
 	test	%al, %al
 	jz	L(ExitHigh)
 	test	$0x01, %al
@@ -454,7 +439,6 @@
 	jnz	L(Shl12LoopExit)
 
 	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%edx)
 	movaps	20(%ecx), %xmm2
 
@@ -462,6 +446,19 @@
 	lea	16(%edx), %edx
 	pmovmskb %xmm0, %eax
 	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm1
+
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
 	movaps	%xmm2, %xmm3
 
 	test	%eax, %eax
@@ -470,33 +467,16 @@
 	palignr	$12, %xmm1, %xmm2
 	movaps	%xmm2, (%edx)
 	movaps	20(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
 
 	test	%eax, %eax
 	jnz	L(Shl12LoopExit)
 
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$12, %xmm3, %xmm2
 	movaps	%xmm2, (%edx)
 	lea	20(%ecx), %ecx
 	lea	16(%edx), %edx
@@ -539,11 +519,9 @@
 	jmp	L(Shl12LoopStart)
 
 L(Shl12LoopExit):
-	movaps	(%edx), %xmm6
-	psrldq	$4, %xmm6
+	movl	(%ecx), %esi
+	movl	%esi, (%edx)
 	mov	$4, %esi
-	palignr	$12, %xmm1, %xmm6
-	movaps	%xmm6, (%edx)
 
 	.p2align 4
 L(CopyFrom1To16Bytes):
@@ -555,6 +533,7 @@
 	jz	L(ExitHigh)
 	test	$0x01, %al
 	jnz	L(Exit4)
+L(Exit8):
 	movlpd	(%ecx), %xmm0
 	movlpd	%xmm0, (%edx)
 	movl	%edi, %eax
@@ -564,6 +543,7 @@
 L(ExitHigh):
 	test	$0x01, %ah
 	jnz	L(Exit12)
+L(Exit16):
 	movdqu	(%ecx), %xmm0
 	movdqu	%xmm0, (%edx)
 	movl	%edi, %eax

Modified: fsf/trunk/libc/sysdeps/ia64/bits/byteswap.h
==============================================================================
--- fsf/trunk/libc/sysdeps/ia64/bits/byteswap.h (original)
+++ fsf/trunk/libc/sysdeps/ia64/bits/byteswap.h Sat Dec 24 00:05:56 2011
@@ -1,5 +1,6 @@
 /* Macros to swap the order of bytes in integer values.
-   Copyright (C) 1997,1998,2000,2002,2003,2008 Free Software Foundation, Inc.
+   Copyright (C) 1997,1998,2000,2002,2003,2008,2011
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -77,7 +78,30 @@
 
 
 /* Swap bytes in 64 bit value.  */
-#define __bswap_constant_64(x) \
+#if defined __GNUC__ && __GNUC__ >= 2
+# define __bswap_constant_64(x) \
+     (__extension__ ((((x) & 0xff00000000000000ul) >> 56)		      \
+		     | (((x) & 0x00ff000000000000ul) >>  40)		      \
+		     | (((x) & 0x0000ff0000000000ul) >> 24)		      \
+		     | (((x) & 0x000000ff00000000ul) >> 8)		      \
+		     | (((x) & 0x00000000ff000000ul) << 8)		      \
+		     | (((x) & 0x0000000000ff0000ul) << 24)		      \
+		     | (((x) & 0x000000000000ff00ul) << 40)		      \
+		     | (((x) & 0x00000000000000fful) << 56)))
+
+# define __bswap_64(x) \
+     (__extension__							      \
+      ({ register unsigned long int __v, __x = (x);			      \
+	 if (__builtin_constant_p (x))					      \
+	   __v = __bswap_constant_64 (__x);				      \
+	 else								      \
+	   __asm__ __volatile__ ("mux1 %0 = %1, @rev ;;"		      \
+				 : "=r" (__v)				      \
+				 : "r" ((unsigned long int) (__x)));	      \
+	 __v; }))
+
+#else
+# define __bswap_constant_64(x) \
      ((((x) & 0xff00000000000000ul) >> 56)				      \
       | (((x) & 0x00ff000000000000ul) >>  40)				      \
       | (((x) & 0x0000ff0000000000ul) >> 24)				      \
@@ -87,19 +111,6 @@
       | (((x) & 0x000000000000ff00ul) << 40)				      \
       | (((x) & 0x00000000000000fful) << 56))
 
-#if defined __GNUC__ && __GNUC__ >= 2
-# define __bswap_64(x) \
-     (__extension__							      \
-      ({ register unsigned long int __v, __x = (x);			      \
-	 if (__builtin_constant_p (x))					      \
-	   __v = __bswap_constant_64 (__x);				      \
-	 else								      \
-	   __asm__ __volatile__ ("mux1 %0 = %1, @rev ;;"		      \
-				 : "=r" (__v)				      \
-				 : "r" ((unsigned long int) (__x)));	      \
-         __v; }))
-
-#else
 static __inline unsigned long int
 __bswap_64 (unsigned long int __bsx)
 {

Modified: fsf/trunk/libc/sysdeps/s390/bits/byteswap.h
==============================================================================
--- fsf/trunk/libc/sysdeps/s390/bits/byteswap.h (original)
+++ fsf/trunk/libc/sysdeps/s390/bits/byteswap.h Sat Dec 24 00:05:56 2011
@@ -1,5 +1,5 @@
 /* Macros to swap the order of bytes in integer values.  s390 version.
-   Copyright (C) 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+   Copyright (C) 2000-2003, 2008, 2011 Free Software Foundation, Inc.
    Contributed by Martin Schwidefsky (schwidefsky@xxxxxxxxxx).
    This file is part of the GNU C Library.
 
@@ -35,31 +35,31 @@
 # if __WORDSIZE == 64
 #  define __bswap_16(x) \
      (__extension__							      \
-      ({ unsigned short int __v, __x = (x);	                              \
+      ({ unsigned short int __v, __x = (x);				      \
 	 if (__builtin_constant_p (x))					      \
 	   __v = __bswap_constant_16 (__x);				      \
 	 else {								      \
-           unsigned short int __tmp = (unsigned short int) (__x);             \
-           __asm__ __volatile__ (                                             \
-              "lrvh %0,%1"                                                    \
-              : "=&d" (__v) : "m" (__tmp) );                                  \
-         }                                                                    \
+	   unsigned short int __tmp = (unsigned short int) (__x);             \
+	   __asm__ __volatile__ (                                             \
+	      "lrvh %0,%1"                                                    \
+	      : "=&d" (__v) : "m" (__tmp) );                                  \
+	 }                                                                    \
 	 __v; }))
 # else
 #  define __bswap_16(x) \
      (__extension__							      \
-      ({ unsigned short int __v, __x = (x);	                              \
+      ({ unsigned short int __v, __x = (x);				      \
 	 if (__builtin_constant_p (x))					      \
 	   __v = __bswap_constant_16 (__x);				      \
 	 else {								      \
-           unsigned short int __tmp = (unsigned short int) (__x);             \
-           __asm__ __volatile__ (                                             \
-              "sr   %0,%0\n"                                                  \
-              "la   1,%1\n"                                                   \
-              "icm  %0,2,1(1)\n"                                              \
-              "ic   %0,0(1)"                                                  \
-              : "=&d" (__v) : "m" (__tmp) : "1");                             \
-         }                                                                    \
+	   unsigned short int __tmp = (unsigned short int) (__x);             \
+	   __asm__ __volatile__ (                                             \
+	      "sr   %0,%0\n"                                                  \
+	      "la   1,%1\n"                                                   \
+	      "icm  %0,2,1(1)\n"                                              \
+	      "ic   %0,0(1)"                                                  \
+	      : "=&d" (__v) : "m" (__tmp) : "1");                             \
+	 }                                                                    \
 	 __v; }))
 # endif
 #else
@@ -80,32 +80,32 @@
 # if __WORDSIZE == 64
 #  define __bswap_32(x) \
      (__extension__							      \
-      ({ unsigned int __v, __x = (x);				              \
+      ({ unsigned int __v, __x = (x);					      \
 	 if (__builtin_constant_p (x))					      \
 	   __v = __bswap_constant_32 (__x);				      \
 	 else {								      \
-           unsigned int __tmp = (unsigned int) (__x);                         \
-           __asm__ __volatile__ (                                             \
-              "lrv   %0,%1"                                                   \
-              : "=&d" (__v) : "m" (__tmp));                                   \
-         }                                                                    \
+	   unsigned int __tmp = (unsigned int) (__x);                         \
+	   __asm__ __volatile__ (                                             \
+	      "lrv   %0,%1"                                                   \
+	      : "=&d" (__v) : "m" (__tmp));                                   \
+	 }                                                                    \
 	 __v; }))
 # else
 #  define __bswap_32(x) \
      (__extension__							      \
-      ({ unsigned int __v, __x = (x);			                      \
+      ({ unsigned int __v, __x = (x);					      \
 	 if (__builtin_constant_p (x))					      \
 	   __v = __bswap_constant_32 (__x);				      \
 	 else {								      \
-           unsigned int __tmp = (unsigned int) (__x);                         \
-           __asm__ __volatile__ (                                             \
-              "la    1,%1\n"                                                  \
-              "icm   %0,8,3(1)\n"                                             \
-              "icm   %0,4,2(1)\n"                                             \
-              "icm   %0,2,1(1)\n"                                             \
-              "ic    %0,0(1)"                                                 \
-              : "=&d" (__v) : "m" (__tmp) : "1");                             \
-         }                                                                    \
+	   unsigned int __tmp = (unsigned int) (__x);                         \
+	   __asm__ __volatile__ (                                             \
+	      "la    1,%1\n"                                                  \
+	      "icm   %0,8,3(1)\n"                                             \
+	      "icm   %0,4,2(1)\n"                                             \
+	      "icm   %0,2,1(1)\n"                                             \
+	      "ic    %0,0(1)"                                                 \
+	      : "=&d" (__v) : "m" (__tmp) : "1");                             \
+	 }                                                                    \
 	 __v; }))
 # endif
 #else
@@ -117,37 +117,51 @@
 #endif
 
 /* Swap bytes in 64 bit value.  */
-#define __bswap_constant_64(x) \
-     ((((x)&0xff00000000000000) >> 56) | (((x)&0x00ff000000000000) >> 40) |  \
-      (((x)&0x0000ff0000000000) >> 24) | (((x)&0x000000ff00000000) >>  8) |  \
-      (((x)&0x00000000ff000000) <<  8) | (((x)&0x0000000000ff0000) << 24) |  \
-      (((x)&0x000000000000ff00) << 40) | (((x)&0x00000000000000ff) << 56))
+#if defined __GNUC__ && __GNUC__ >= 2
+# define __bswap_constant_64(x) \
+     (__extension__ ((((x) & 0xff00000000000000ul) >> 56)		      \
+		     | (((x) & 0x00ff000000000000ul) >>  40)		      \
+		     | (((x) & 0x0000ff0000000000ul) >> 24)		      \
+		     | (((x) & 0x000000ff00000000ul) >> 8)		      \
+		     | (((x) & 0x00000000ff000000ul) << 8)		      \
+		     | (((x) & 0x0000000000ff0000ul) << 24)		      \
+		     | (((x) & 0x000000000000ff00ul) << 40)		      \
+		     | (((x) & 0x00000000000000fful) << 56)))
 
-#if defined __GNUC__ && __GNUC__ >= 2
 # if __WORDSIZE == 64
 #  define __bswap_64(x) \
      (__extension__							      \
-      ({ unsigned long __w, __x = (x);				              \
+      ({ unsigned long __w, __x = (x);					      \
 	 if (__builtin_constant_p (x))					      \
 	   __w = __bswap_constant_64 (__x);				      \
 	 else {								      \
-           unsigned long __tmp = (unsigned long) (__x);                       \
-           __asm__ __volatile__ (                                             \
-              "lrvg  %0,%1"                                                   \
-              : "=&d" (__w) : "m" (__tmp));                                   \
-         }                                                                    \
+	   unsigned long __tmp = (unsigned long) (__x);                       \
+	   __asm__ __volatile__ (                                             \
+	      "lrvg  %0,%1"                                                   \
+	      : "=&d" (__w) : "m" (__tmp));                                   \
+	 }                                                                    \
 	 __w; }))
 # else
 #  define __bswap_64(x) \
      __extension__					\
        ({ union { unsigned long long int __ll;		\
-	          unsigned long int __l[2]; } __w, __r;	\
-          __w.__ll = (x);				\
-          __r.__l[0] = __bswap_32 (__w.__l[1]);		\
-          __r.__l[1] = __bswap_32 (__w.__l[0]);		\
-          __r.__ll; })
+		  unsigned long int __l[2]; } __w, __r;	\
+	  __w.__ll = (x);				\
+	  __r.__l[0] = __bswap_32 (__w.__l[1]);		\
+	  __r.__l[1] = __bswap_32 (__w.__l[0]);		\
+	  __r.__ll; })
 # endif
 #else
+# define __bswap_constant_64(x) \
+     ((((x) & 0xff00000000000000ul) >> 56)				      \
+      | (((x) & 0x00ff000000000000ul) >>  40)				      \
+      | (((x) & 0x0000ff0000000000ul) >> 24)				      \
+      | (((x) & 0x000000ff00000000ul) >> 8)				      \
+      | (((x) & 0x00000000ff000000ul) << 8)				      \
+      | (((x) & 0x0000000000ff0000ul) << 24)				      \
+      | (((x) & 0x000000000000ff00ul) << 40)				      \
+      | (((x) & 0x00000000000000fful) << 56))
+
 static __inline unsigned long long int
 __bswap_64 (unsigned long long int __bsx)
 {

Modified: fsf/trunk/libc/sysdeps/x86_64/bits/byteswap.h
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/bits/byteswap.h (original)
+++ fsf/trunk/libc/sysdeps/x86_64/bits/byteswap.h Sat Dec 24 00:05:56 2011
@@ -1,5 +1,5 @@
 /* Macros to swap the order of bytes in integer values.
-   Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008, 2010
+   Copyright (C) 1997, 1998, 2000, 2002, 2003, 2007, 2008, 2010, 2011
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -99,14 +99,14 @@
 #if defined __GNUC__ && __GNUC__ >= 2
 /* Swap bytes in 64 bit value.  */
 # define __bswap_constant_64(x) \
-     ((((x) & 0xff00000000000000ull) >> 56)				      \
-      | (((x) & 0x00ff000000000000ull) >> 40)				      \
-      | (((x) & 0x0000ff0000000000ull) >> 24)				      \
-      | (((x) & 0x000000ff00000000ull) >> 8)				      \
-      | (((x) & 0x00000000ff000000ull) << 8)				      \
-      | (((x) & 0x0000000000ff0000ull) << 24)				      \
-      | (((x) & 0x000000000000ff00ull) << 40)				      \
-      | (((x) & 0x00000000000000ffull) << 56))
+     (__extension__ ((((x) & 0xff00000000000000ull) >> 56)		      \
+		     | (((x) & 0x00ff000000000000ull) >> 40)		      \
+		     | (((x) & 0x0000ff0000000000ull) >> 24)		      \
+		     | (((x) & 0x000000ff00000000ull) >> 8)		      \
+		     | (((x) & 0x00000000ff000000ull) << 8)		      \
+		     | (((x) & 0x0000000000ff0000ull) << 24)		      \
+		     | (((x) & 0x000000000000ff00ull) << 40)		      \
+		     | (((x) & 0x00000000000000ffull) << 56)))
 
 # if __WORDSIZE == 64
 #  define __bswap_64(x) \

Modified: fsf/trunk/libc/sysdeps/x86_64/dl-machine.h
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/dl-machine.h (original)
+++ fsf/trunk/libc/sysdeps/x86_64/dl-machine.h Sat Dec 24 00:05:56 2011
@@ -98,7 +98,7 @@
       /* The GOT entries for functions in the PLT have not yet been filled
 	 in.  Their initial contents will arrange when called to push an
 	 offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1],
-	 and then jump to _GLOBAL_OFFSET_TABLE[2].  */
+	 and then jump to _GLOBAL_OFFSET_TABLE_[2].  */
       got = (Elf64_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
       /* If a library is prelinked but we have to relocate anyway,
 	 we have to be able to undo the prelinking of .got.plt.
@@ -214,7 +214,7 @@
 /* The x86-64 never uses Elf64_Rel relocations.  */
 #define ELF_MACHINE_NO_REL 1
 
-/* We define an initialization functions.  This is called very early in
+/* We define an initialization function.  This is called very early in
    _dl_sysdep_start.  */
 #define DL_PLATFORM_INIT dl_platform_init ()
 
@@ -234,8 +234,8 @@
   return *reloc_addr = value;
 }
 
-/* Return the final value of a plt relocation.  On x86-64 the
-   JUMP_SLOT relocation ignores the addend. */
+/* Return the final value of a PLT relocation.  On x86-64 the
+   JUMP_SLOT relocation ignores the addend.  */
 static inline Elf64_Addr
 elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
 		       Elf64_Addr value)

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strcpy-ssse3.S Sat Dec 24 00:05:56 2011
@@ -29,6 +29,7 @@
 
 	.section .text.ssse3,"ax",@progbits
 ENTRY (STRCPY)
+
 	mov	%rsi, %rcx
 #  ifdef USE_AS_STRNCPY
 	mov	%rdx, %r8
@@ -39,7 +40,7 @@
 	jz	L(Exit0)
 	cmp	$8, %r8
 	jbe	L(StrncpyExit8Bytes)
-#  endif
+# endif
 	cmpb	$0, (%rcx)
 	jz	L(Exit1)
 	cmpb	$0, 1(%rcx)
@@ -56,10 +57,10 @@
 	jz	L(Exit7)
 	cmpb	$0, 7(%rcx)
 	jz	L(Exit8)
-#  ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
 	cmp	$16, %r8
 	jb	L(StrncpyExit15Bytes)
-#  endif
+# endif
 	cmpb	$0, 8(%rcx)
 	jz	L(Exit9)
 	cmpb	$0, 9(%rcx)
@@ -74,10 +75,10 @@
 	jz	L(Exit14)
 	cmpb	$0, 14(%rcx)
 	jz	L(Exit15)
-#  ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
 	cmp	$16, %r8
 	je	L(Exit16)
-#  endif
+# endif
 	cmpb	$0, 15(%rcx)
 	jz	L(Exit16)
 # endif
@@ -87,25 +88,15 @@
 	sub	$16, %r8
 	and	$0xf, %rsi
 
-/* add 16 bytes rcx_shift to r8 */
+/* add 16 bytes rcx_offset to r8 */
+
 	add	%rsi, %r8
 # endif
 	lea	16(%rcx), %rsi
-/* Now:
-	rsi	= alignment_16(rcx) + rcx_shift + 16;
-	rcx_shift = rcx - alignment_16(rcx)
-*/
 	and	$-16, %rsi
-/* Now:
-	rsi	= alignment_16(rcx) + 16
-*/
 	pxor	%xmm0, %xmm0
 	mov	(%rcx), %r9
 	mov	%r9, (%rdx)
-/*
-	look	if there is zero symbol in next 16 bytes of string
-	from	rsi to rsi + 15 and form mask in xmm0
-*/
 	pcmpeqb	(%rsi), %xmm0
 	mov	8(%rcx), %r9
 	mov	%r9, 8(%rdx)
@@ -115,10 +106,6 @@
 	pmovmskb %xmm0, %rax
 	sub	%rcx, %rsi
 
-/* rsi = 16 - rcx_shift */
-
-/* rax = 0: there isn't end of string from position rsi to rsi+15 */
-
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
@@ -128,16 +115,8 @@
 
 	mov	%rdx, %rax
 	lea	16(%rdx), %rdx
-/* Now:
-	rdx	= rdx + 16 = alignment_16(rdx) + rdx_shift + 16
-*/
 	and	$-16, %rdx
-
-/* Now: rdx = alignment_16(rdx) + 16 */
-
 	sub	%rdx, %rax
-
-/* Now: rax = rdx_shift - 16 */
 
 # ifdef USE_AS_STRNCPY
 	add	%rax, %rsi
@@ -150,22 +129,11 @@
 L(ContinueCopy):
 # endif
 	sub	%rax, %rcx
-/* Now:
-	case	rcx_shift >= rdx_shift:
-	rcx	= alignment_16(rcx) + (rcx_shift  - rdx_shift) + 16
-	case	rcx_shift < rdx_shift:
-	rcx	= alignment_16(rcx) + (16 + rcx_shift  - rdx_shift)
-*/
 	mov	%rcx, %rax
 	and	$0xf, %rax
-/* Now:
-	case	rcx_shift >= rdx_shift: rax = rcx_shift  - rdx_shift
-	case	rcx_shift < rdx_shift: rax = (16 + rcx_shift  - rdx_shift)
-	rax	can be 0, 1,	..., 15
-*/
 	mov	$0, %rsi
 
-/* case: rcx_shift == rdx_shift */
+/* case: rcx_offset == rdx_offset */
 
 	jz	L(Align16Both)
 
@@ -282,10 +250,11 @@
 	sub	%rcx, %rax
 	sub	%rax, %rdx
 # ifdef USE_AS_STRNCPY
-	lea	48+64(%r8, %rax), %r8
+	lea	112(%r8, %rax), %r8
 # endif
 	mov	$-0x40, %rsi
 
+	.p2align 4
 L(Aligned64Loop):
 	movaps	(%rcx), %xmm2
 	movaps	%xmm2, %xmm4
@@ -366,7 +335,6 @@
 	jnz	L(Shl1LoopExit)
 
 	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	31(%rcx), %xmm2
 
@@ -374,7 +342,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit1Case2OrCase3)
@@ -382,16 +350,30 @@
 	test	%rax, %rax
 	jnz	L(Shl1LoopExit)
 
+	palignr	$1, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	31(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit1Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl1LoopExit)
+
 	palignr	$1, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	31(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit1Case2OrCase3)
@@ -399,25 +381,7 @@
 	test	%rax, %rax
 	jnz	L(Shl1LoopExit)
 
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	31(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$1, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	31(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -432,6 +396,8 @@
 # endif
 	movaps	-1(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl1LoopStart):
 	movaps	15(%rcx), %xmm2
 	movaps	31(%rcx), %xmm3
@@ -465,11 +431,9 @@
 	jmp	L(Shl1LoopStart)
 
 L(Shl1LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$15, %xmm6
+	movdqu	-1(%rcx), %xmm1
 	mov	$15, %rsi
-	palignr	$1, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	movdqu	%xmm1, -1(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -488,7 +452,6 @@
 	jnz	L(Shl2LoopExit)
 
 	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	30(%rcx), %xmm2
 
@@ -496,7 +459,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit2Case2OrCase3)
@@ -504,16 +467,30 @@
 	test	%rax, %rax
 	jnz	L(Shl2LoopExit)
 
+	palignr	$2, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	30(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit2Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl2LoopExit)
+
 	palignr	$2, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	30(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit2Case2OrCase3)
@@ -521,25 +498,7 @@
 	test	%rax, %rax
 	jnz	L(Shl2LoopExit)
 
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	30(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$2, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	30(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -554,6 +513,8 @@
 # endif
 	movaps	-2(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl2LoopStart):
 	movaps	14(%rcx), %xmm2
 	movaps	30(%rcx), %xmm3
@@ -587,11 +548,9 @@
 	jmp	L(Shl2LoopStart)
 
 L(Shl2LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$14, %xmm6
+	movdqu	-2(%rcx), %xmm1
 	mov	$14, %rsi
-	palignr	$2, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	movdqu	%xmm1, -2(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -610,7 +569,6 @@
 	jnz	L(Shl3LoopExit)
 
 	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	29(%rcx), %xmm2
 
@@ -618,7 +576,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit3Case2OrCase3)
@@ -626,16 +584,30 @@
 	test	%rax, %rax
 	jnz	L(Shl3LoopExit)
 
+	palignr	$3, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	29(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit3Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl3LoopExit)
+
 	palignr	$3, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	29(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit3Case2OrCase3)
@@ -643,25 +615,7 @@
 	test	%rax, %rax
 	jnz	L(Shl3LoopExit)
 
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	29(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$3, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	29(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -676,6 +630,8 @@
 # endif
 	movaps	-3(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl3LoopStart):
 	movaps	13(%rcx), %xmm2
 	movaps	29(%rcx), %xmm3
@@ -709,11 +665,9 @@
 	jmp	L(Shl3LoopStart)
 
 L(Shl3LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$13, %xmm6
+	movdqu	-3(%rcx), %xmm1
 	mov	$13, %rsi
-	palignr	$3, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	movdqu	%xmm1, -3(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -732,7 +686,6 @@
 	jnz	L(Shl4LoopExit)
 
 	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	28(%rcx), %xmm2
 
@@ -740,7 +693,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit4Case2OrCase3)
@@ -748,16 +701,30 @@
 	test	%rax, %rax
 	jnz	L(Shl4LoopExit)
 
+	palignr	$4, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	28(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit4Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl4LoopExit)
+
 	palignr	$4, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	28(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit4Case2OrCase3)
@@ -765,25 +732,7 @@
 	test	%rax, %rax
 	jnz	L(Shl4LoopExit)
 
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	28(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$4, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	28(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -798,6 +747,8 @@
 # endif
 	movaps	-4(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl4LoopStart):
 	movaps	12(%rcx), %xmm2
 	movaps	28(%rcx), %xmm3
@@ -831,11 +782,9 @@
 	jmp	L(Shl4LoopStart)
 
 L(Shl4LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$12, %xmm6
+	movdqu	-4(%rcx), %xmm1
 	mov	$12, %rsi
-	palignr	$4, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	movdqu	%xmm1, -4(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -854,7 +803,6 @@
 	jnz	L(Shl5LoopExit)
 
 	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	27(%rcx), %xmm2
 
@@ -862,7 +810,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit5Case2OrCase3)
@@ -870,16 +818,30 @@
 	test	%rax, %rax
 	jnz	L(Shl5LoopExit)
 
+	palignr	$5, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	27(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit5Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl5LoopExit)
+
 	palignr	$5, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	27(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit5Case2OrCase3)
@@ -887,25 +849,7 @@
 	test	%rax, %rax
 	jnz	L(Shl5LoopExit)
 
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	27(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$5, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	27(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -920,6 +864,8 @@
 # endif
 	movaps	-5(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl5LoopStart):
 	movaps	11(%rcx), %xmm2
 	movaps	27(%rcx), %xmm3
@@ -953,11 +899,9 @@
 	jmp	L(Shl5LoopStart)
 
 L(Shl5LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$11, %xmm6
+	movdqu	-5(%rcx), %xmm1
 	mov	$11, %rsi
-	palignr	$5, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	movdqu	%xmm1, -5(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -976,7 +920,6 @@
 	jnz	L(Shl6LoopExit)
 
 	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	26(%rcx), %xmm2
 
@@ -984,7 +927,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit6Case2OrCase3)
@@ -992,16 +935,30 @@
 	test	%rax, %rax
 	jnz	L(Shl6LoopExit)
 
+	palignr	$6, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	26(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit6Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl6LoopExit)
+
 	palignr	$6, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	26(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit6Case2OrCase3)
@@ -1009,25 +966,7 @@
 	test	%rax, %rax
 	jnz	L(Shl6LoopExit)
 
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	26(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$6, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	26(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1042,6 +981,8 @@
 # endif
 	movaps	-6(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl6LoopStart):
 	movaps	10(%rcx), %xmm2
 	movaps	26(%rcx), %xmm3
@@ -1075,11 +1016,11 @@
 	jmp	L(Shl6LoopStart)
 
 L(Shl6LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$10, %xmm6
+	mov	(%rcx), %r9
+	mov	6(%rcx), %esi
+	mov	%r9, (%rdx)
+	mov	%esi, 6(%rdx)
 	mov	$10, %rsi
-	palignr	$6, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1098,7 +1039,6 @@
 	jnz	L(Shl7LoopExit)
 
 	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	25(%rcx), %xmm2
 
@@ -1106,7 +1046,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit7Case2OrCase3)
@@ -1114,16 +1054,30 @@
 	test	%rax, %rax
 	jnz	L(Shl7LoopExit)
 
+	palignr	$7, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	25(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit7Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl7LoopExit)
+
 	palignr	$7, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	25(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit7Case2OrCase3)
@@ -1131,25 +1085,7 @@
 	test	%rax, %rax
 	jnz	L(Shl7LoopExit)
 
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	25(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$7, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	25(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1164,6 +1100,8 @@
 # endif
 	movaps	-7(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl7LoopStart):
 	movaps	9(%rcx), %xmm2
 	movaps	25(%rcx), %xmm3
@@ -1197,11 +1135,11 @@
 	jmp	L(Shl7LoopStart)
 
 L(Shl7LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$9, %xmm6
+	mov	(%rcx), %r9
+	mov	5(%rcx), %esi
+	mov	%r9, (%rdx)
+	mov	%esi, 5(%rdx)
 	mov	$9, %rsi
-	palignr	$7, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1220,7 +1158,6 @@
 	jnz	L(Shl8LoopExit)
 
 	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	24(%rcx), %xmm2
 
@@ -1228,7 +1165,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit8Case2OrCase3)
@@ -1236,16 +1173,30 @@
 	test	%rax, %rax
 	jnz	L(Shl8LoopExit)
 
+	palignr	$8, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	24(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit8Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl8LoopExit)
+
 	palignr	$8, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	24(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit8Case2OrCase3)
@@ -1253,25 +1204,7 @@
 	test	%rax, %rax
 	jnz	L(Shl8LoopExit)
 
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	24(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$8, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	24(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1286,6 +1219,8 @@
 # endif
 	movaps	-8(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl8LoopStart):
 	movaps	8(%rcx), %xmm2
 	movaps	24(%rcx), %xmm3
@@ -1319,11 +1254,9 @@
 	jmp	L(Shl8LoopStart)
 
 L(Shl8LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$8, %xmm6
+	mov	(%rcx), %r9
 	mov	$8, %rsi
-	palignr	$8, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, (%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1342,7 +1275,6 @@
 	jnz	L(Shl9LoopExit)
 
 	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	23(%rcx), %xmm2
 
@@ -1350,7 +1282,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit9Case2OrCase3)
@@ -1358,16 +1290,30 @@
 	test	%rax, %rax
 	jnz	L(Shl9LoopExit)
 
+	palignr	$9, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	23(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit9Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl9LoopExit)
+
 	palignr	$9, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	23(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit9Case2OrCase3)
@@ -1375,25 +1321,7 @@
 	test	%rax, %rax
 	jnz	L(Shl9LoopExit)
 
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	23(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$9, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	23(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1408,6 +1336,8 @@
 # endif
 	movaps	-9(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl9LoopStart):
 	movaps	7(%rcx), %xmm2
 	movaps	23(%rcx), %xmm3
@@ -1441,11 +1371,9 @@
 	jmp	L(Shl9LoopStart)
 
 L(Shl9LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$7, %xmm6
+	mov	-1(%rcx), %r9
 	mov	$7, %rsi
-	palignr	$9, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, -1(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1464,7 +1392,6 @@
 	jnz	L(Shl10LoopExit)
 
 	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	22(%rcx), %xmm2
 
@@ -1472,7 +1399,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit10Case2OrCase3)
@@ -1480,16 +1407,30 @@
 	test	%rax, %rax
 	jnz	L(Shl10LoopExit)
 
+	palignr	$10, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	22(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit10Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl10LoopExit)
+
 	palignr	$10, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	22(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit10Case2OrCase3)
@@ -1497,25 +1438,7 @@
 	test	%rax, %rax
 	jnz	L(Shl10LoopExit)
 
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	22(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$10, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	22(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1530,6 +1453,8 @@
 # endif
 	movaps	-10(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl10LoopStart):
 	movaps	6(%rcx), %xmm2
 	movaps	22(%rcx), %xmm3
@@ -1563,11 +1488,9 @@
 	jmp	L(Shl10LoopStart)
 
 L(Shl10LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$6, %xmm6
+	mov	-2(%rcx), %r9
 	mov	$6, %rsi
-	palignr	$10, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, -2(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1586,7 +1509,6 @@
 	jnz	L(Shl11LoopExit)
 
 	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	21(%rcx), %xmm2
 
@@ -1594,7 +1516,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit11Case2OrCase3)
@@ -1602,16 +1524,30 @@
 	test	%rax, %rax
 	jnz	L(Shl11LoopExit)
 
+	palignr	$11, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	21(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit11Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl11LoopExit)
+
 	palignr	$11, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	21(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit11Case2OrCase3)
@@ -1619,25 +1555,7 @@
 	test	%rax, %rax
 	jnz	L(Shl11LoopExit)
 
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	21(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$11, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	21(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1652,6 +1570,8 @@
 # endif
 	movaps	-11(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl11LoopStart):
 	movaps	5(%rcx), %xmm2
 	movaps	21(%rcx), %xmm3
@@ -1685,11 +1605,9 @@
 	jmp	L(Shl11LoopStart)
 
 L(Shl11LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$5, %xmm6
+	mov	-3(%rcx), %r9
 	mov	$5, %rsi
-	palignr	$11, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, -3(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1708,7 +1626,6 @@
 	jnz	L(Shl12LoopExit)
 
 	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	20(%rcx), %xmm2
 
@@ -1716,7 +1633,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit12Case2OrCase3)
@@ -1724,16 +1641,30 @@
 	test	%rax, %rax
 	jnz	L(Shl12LoopExit)
 
+	palignr	$12, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	20(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit12Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl12LoopExit)
+
 	palignr	$12, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	20(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit12Case2OrCase3)
@@ -1741,25 +1672,7 @@
 	test	%rax, %rax
 	jnz	L(Shl12LoopExit)
 
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	20(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$12, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	20(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1774,6 +1687,8 @@
 # endif
 	movaps	-12(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl12LoopStart):
 	movaps	4(%rcx), %xmm2
 	movaps	20(%rcx), %xmm3
@@ -1807,11 +1722,9 @@
 	jmp	L(Shl12LoopStart)
 
 L(Shl12LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$4, %xmm6
+	mov	(%rcx), %r9d
 	mov	$4, %rsi
-	palignr	$12, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9d, (%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1830,7 +1743,6 @@
 	jnz	L(Shl13LoopExit)
 
 	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	19(%rcx), %xmm2
 
@@ -1838,7 +1750,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit13Case2OrCase3)
@@ -1846,16 +1758,30 @@
 	test	%rax, %rax
 	jnz	L(Shl13LoopExit)
 
+	palignr	$13, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	19(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit13Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl13LoopExit)
+
 	palignr	$13, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	19(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit13Case2OrCase3)
@@ -1863,25 +1789,7 @@
 	test	%rax, %rax
 	jnz	L(Shl13LoopExit)
 
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	19(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$13, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	19(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -1896,6 +1804,8 @@
 # endif
 	movaps	-13(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl13LoopStart):
 	movaps	3(%rcx), %xmm2
 	movaps	19(%rcx), %xmm3
@@ -1929,11 +1839,9 @@
 	jmp	L(Shl13LoopStart)
 
 L(Shl13LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$3, %xmm6
+	mov	-1(%rcx), %r9d
 	mov	$3, %rsi
-	palignr	$13, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9d, -1(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -1952,7 +1860,6 @@
 	jnz	L(Shl14LoopExit)
 
 	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	18(%rcx), %xmm2
 
@@ -1960,7 +1867,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit14Case2OrCase3)
@@ -1968,16 +1875,30 @@
 	test	%rax, %rax
 	jnz	L(Shl14LoopExit)
 
+	palignr	$14, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	18(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit14Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl14LoopExit)
+
 	palignr	$14, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	18(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit14Case2OrCase3)
@@ -1985,25 +1906,7 @@
 	test	%rax, %rax
 	jnz	L(Shl14LoopExit)
 
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	18(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$14, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	18(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -2018,6 +1921,8 @@
 # endif
 	movaps	-14(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl14LoopStart):
 	movaps	2(%rcx), %xmm2
 	movaps	18(%rcx), %xmm3
@@ -2051,11 +1956,9 @@
 	jmp	L(Shl14LoopStart)
 
 L(Shl14LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$2, %xmm6
+	mov	-2(%rcx), %r9d
 	mov	$2, %rsi
-	palignr	$14, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9d, -2(%rdx)
 	jmp	L(CopyFrom1To16Bytes)
 
 	.p2align 4
@@ -2074,7 +1977,6 @@
 	jnz	L(Shl15LoopExit)
 
 	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
 	movaps	%xmm2, (%rdx)
 	movaps	17(%rcx), %xmm2
 
@@ -2082,7 +1984,7 @@
 	lea	16(%rdx), %rdx
 	pmovmskb %xmm0, %rax
 	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+	movaps	%xmm2, %xmm1
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit15Case2OrCase3)
@@ -2090,16 +1992,30 @@
 	test	%rax, %rax
 	jnz	L(Shl15LoopExit)
 
+	palignr	$15, %xmm3, %xmm2
+	movaps	%xmm2, (%rdx)
+	movaps	17(%rcx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
+	movaps	%xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+	sub	$16, %r8
+	jbe	L(StrncpyExit15Case2OrCase3)
+# endif
+	test	%rax, %rax
+	jnz	L(Shl15LoopExit)
+
 	palignr	$15, %xmm1, %xmm2
 	movaps	%xmm2, (%rdx)
 	movaps	17(%rcx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%rdx), %rdx
+	pmovmskb %xmm0, %rax
+	lea	16(%rcx), %rcx
 # ifdef USE_AS_STRNCPY
 	sub	$16, %r8
 	jbe	L(StrncpyExit15Case2OrCase3)
@@ -2107,25 +2023,7 @@
 	test	%rax, %rax
 	jnz	L(Shl15LoopExit)
 
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%rdx)
-	movaps	17(%rcx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%rdx), %rdx
-	pmovmskb %xmm0, %rax
-	lea	16(%rcx), %rcx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %r8
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%rax, %rax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
+	palignr	$15, %xmm3, %xmm2
 	movaps	%xmm2, (%rdx)
 	lea	17(%rcx), %rcx
 	lea	16(%rdx), %rdx
@@ -2140,6 +2038,8 @@
 # endif
 	movaps	-15(%rcx), %xmm1
 
+/* 64 bytes loop */
+	.p2align 4
 L(Shl15LoopStart):
 	movaps	1(%rcx), %xmm2
 	movaps	17(%rcx), %xmm3
@@ -2173,16 +2073,15 @@
 	jmp	L(Shl15LoopStart)
 
 L(Shl15LoopExit):
-	movaps	(%rdx), %xmm6
-	psrldq	$1, %xmm6
+	mov	-3(%rcx), %r9d
 	mov	$1, %rsi
-	palignr	$15, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9d, -3(%rdx)
 # ifdef USE_AS_STRCAT
 	jmp	L(CopyFrom1To16Bytes)
 # endif
 
 # ifndef USE_AS_STRCAT
+
 	.p2align 4
 L(CopyFrom1To16Bytes):
 #  ifdef USE_AS_STRNCPY
@@ -2463,7 +2362,7 @@
 #   ifdef USE_AS_STPCPY
 	cmpb	$1, (%rax)
 	sbb	$-1, %rax
-#  endif
+#   endif
 #  endif
 	ret
 
@@ -2485,7 +2384,7 @@
 #   ifdef USE_AS_STPCPY
 	cmpb	$1, (%rax)
 	sbb	$-1, %rax
-#   endif
+#  endif
 #  endif
 	ret
 
@@ -2507,7 +2406,7 @@
 #   ifdef USE_AS_STPCPY
 	cmpb	$1, (%rax)
 	sbb	$-1, %rax
-#   endif
+#  endif
 #  endif
 	ret
 
@@ -2617,7 +2516,7 @@
 #   ifdef USE_AS_STPCPY
 	cmpb	$1, (%rax)
 	sbb	$-1, %rax
-#   endif
+#  endif
 #  endif
 	ret
 
@@ -2955,11 +2854,10 @@
 	ret
 
 #  endif
-
-# endif
-
-# ifdef USE_AS_STRNCPY
-
+# endif
+
+# ifdef USE_AS_STRNCPY
+	.p2align 4
 L(StrncpyLeaveCase2OrCase3):
 	test	%rax, %rax
 	jnz	L(Aligned64LeaveCase2)
@@ -3014,710 +2912,639 @@
 	lea	-16(%r8), %r8
 	jmp	L(CopyFrom1To16BytesCase2)
 /*--------------------------------------------------*/
+	.p2align 4
 L(StrncpyExit1Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$15, %xmm6
+	movdqu	-1(%rcx), %xmm0
+	movdqu	%xmm0, -1(%rdx)
 	mov	$15, %rsi
-	palignr	$1, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit2Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$14, %xmm6
+	movdqu	-2(%rcx), %xmm0
+	movdqu	%xmm0, -2(%rdx)
 	mov	$14, %rsi
-	palignr	$2, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit3Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$13, %xmm6
+	movdqu	-3(%rcx), %xmm0
+	movdqu	%xmm0, -3(%rdx)
 	mov	$13, %rsi
-	palignr	$3, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit4Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$12, %xmm6
+	movdqu	-4(%rcx), %xmm0
+	movdqu	%xmm0, -4(%rdx)
 	mov	$12, %rsi
-	palignr	$4, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit5Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$11, %xmm6
+	movdqu	-5(%rcx), %xmm0
+	movdqu	%xmm0, -5(%rdx)
 	mov	$11, %rsi
-	palignr	$5, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit6Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$10, %xmm6
+	mov	(%rcx), %rsi
+	mov	6(%rcx), %r9d
+	mov	%r9d, 6(%rdx)
+	mov	%rsi, (%rdx)
+	test	%rax, %rax
 	mov	$10, %rsi
-	palignr	$6, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
-	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit7Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$9, %xmm6
+	mov	(%rcx), %rsi
+	mov	5(%rcx), %r9d
+	mov	%r9d, 5(%rdx)
+	mov	%rsi, (%rdx)
+	test	%rax, %rax
 	mov	$9, %rsi
-	palignr	$7, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
-	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit8Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$8, %xmm6
+	mov	(%rcx), %r9
 	mov	$8, %rsi
-	palignr	$8, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit9Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$7, %xmm6
+	mov	-1(%rcx), %r9
 	mov	$7, %rsi
-	palignr	$9, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, -1(%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit10Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$6, %xmm6
+	mov	-2(%rcx), %r9
 	mov	$6, %rsi
-	palignr	$10, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, -2(%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit11Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$5, %xmm6
+	mov	-3(%rcx), %r9
 	mov	$5, %rsi
-	palignr	$11, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9, -3(%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit12Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$4, %xmm6
+	mov	(%rcx), %r9d
 	mov	$4, %rsi
-	palignr	$12, %xmm1, %xmm6
-	movaps	%xmm6, (%rdx)
+	mov	%r9d, (%rdx)
 	test	%rax, %rax
 	jnz	L(CopyFrom1To16BytesCase2)
 	jmp	L(CopyFrom1To16BytesCase3)
 
+	.p2align 4
 L(StrncpyExit13Case2OrCase3):
-	movaps	(%rdx), %xmm6
-	psrldq	$3, %xmm6
+	mov	-1(%rcx), %r9d
 	mov	$3, %rsi
-	palignr	$13, %xmm1, %xmm6

[... 1029 lines stripped ...]
_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits