[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r5439 - in /fsf/trunk/libc: ./ dlfcn/ elf/ io/ localedata/ localedata/locales/ nptl/ nptl/sysdeps/unix/sysv/linux/bits/ stdl...



Author: eglibc
Date: Sat Mar  8 00:05:47 2008
New Revision: 5439

Log:
Import glibc-mainline for 2008-03-08

Added:
    fsf/trunk/libc/stdlib/tens_in_limb.c
    fsf/trunk/libc/sysdeps/x86_64/rtld-memset.c
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/dlfcn/dlinfo.c
    fsf/trunk/libc/elf/Versions
    fsf/trunk/libc/elf/dl-iteratephdr.c
    fsf/trunk/libc/elf/dl-sysdep.c
    fsf/trunk/libc/elf/dl-tls.c
    fsf/trunk/libc/elf/rtld.c
    fsf/trunk/libc/io/tst-fchownat.c
    fsf/trunk/libc/localedata/ChangeLog
    fsf/trunk/libc/localedata/locales/es_CR
    fsf/trunk/libc/nptl/ChangeLog
    fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h
    fsf/trunk/libc/stdlib/Makefile
    fsf/trunk/libc/stdlib/strtod_l.c
    fsf/trunk/libc/sysdeps/generic/ldsodefs.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/posix_opt.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-osinfo.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.c
    fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/fpathconf.c
    fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.c
    fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/sysconf.c
    fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
    fsf/trunk/libc/sysdeps/x86_64/memset.S

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sat Mar  8 00:05:47 2008
@@ -1,3 +1,61 @@
+2008-03-07  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	[BZ #5786]
+	* elf/dl-sysdep.c: Undefine ROUND after use.
+	* sysdeps/generic/ldsodefs.h [HAVE_DL_DISCOVER_OSVERSION]
+	(struct rtld_global_ro): Add _dl_tls_get_addr_soft element.
+	* elf/rtld.c (rtld_global_ro): Initialize _dl_discover_osversion.
+	* sysdeps/unix/sysv/linux/dl-osinfo.h: Move _dl_discover_osversion
+	to ...
+	* sysdeps/unix/sysv/linux/dl-sysdep.c: ...here.
+	* sysdeps/unix/sysv/linux/dl-sysdep.h: Declare _dl_discover_osversion
+	if necessary.
+	* sysdeps/unix/sysv/linux/sysconf.c: Handle _SC_ARG_MAX here.
+
+	* sysdeps/generic/ldsodefs.h (struct rtld_global_ro): Add
+	_dl_tls_get_addr_soft element.
+	* elf/rtld.c (rtld_global_ro): Initialize _dl_tls_get_addr_soft.
+	* elf/Versions (ld): Don't export _dl_tls_get_addr_soft.
+	* dlfcn/dlinfo.c (dlinfo_doit): Access dl_tls_get_addr_soft through
+	GLRO.
+	* elf/dl-iteratephdr.c (__dl_iterate_phdr): Likewise.
+	* elf/dl-tls.c (_dl_tls_get_addr_soft): Use attribute_hidden instead
+	of internal_function.
+
+	* stdlib/Makefile (aux): Add tens_in_limb.
+	* stdlib/strtod_l.c: Move _tens_in_limb definition to...
+	* stdlib/tens_in_limb.c: ...here.  New file.
+
+	[BZ #5778]
+	* sysdeps/unix/sysv/linux/pathconf.h: Declare
+	__statfs_chown_restricted.
+	* sysdeps/unix/sysv/linux/fpathconf.c: Call __statfs_chown_restricted
+	for _PC_CHOWN_RESTRICTED.
+	* sysdeps/unix/sysv/linux/pathconf.c: Likewise.
+	Implement __statfs_chown_restricted.
+	* sysdeps/unix/sysv/linux/bits/posix_opt.h: Change
+	_POSIX_CHOWN_RESTRICTED value to zero.
+	* io/tst-fchownat.c: Correctly handle _POSIX_CHOWN_RESTRICTED
+	defined to zero.
+
+	* sysdeps/x86_64/rtld-memset.c: New file.
+
+2008-02-26  Harsha Jagasia  <harsha.jagasia@xxxxxxx>
+
+	* sysdeps/x86_64/cacheinfo.c (NOT_USED_RIGHT_NOW): Remove ifdef guards.
+
+	* sysdeps/x86_64/memset.S: Rewrite non-SSE code path as tuned for AMD
+	Barcelona machine.  Make default fall through branch of
+	__x86_64_preferred_memory_instruction check as the integer code path.
+
+2007-10-15  H.J. Lu  <hongjiu.lu@xxxxxxxxx>
+
+	* sysdeps/x86_64/cacheinfo.c
+	(__x86_64_preferred_memory_instruction): New variable.
+	(init_cacheinfo): Initialize __x86_64_preferred_memory_instruction.
+
+	* sysdeps/x86_64/memset.S: Rewrite.
+
 2008-03-04  Jakub Jelinek  <jakub@xxxxxxxxxx>
 
 	* include/stdio.h (__asprintf_chk, __dprintf_chk,
@@ -309,7 +367,7 @@
 	* stdlib/Makefile: Add rules to build and run tst-makecontext2.
 	* stdlib/tst-makecontext2.c: New test.
 
-008-01-08  Jakub Jelinek  <jakub@xxxxxxxxxx>
+2008-01-08  Jakub Jelinek  <jakub@xxxxxxxxxx>
 
 	* iconv/loop.c (UPDATE_PARAMS): Define to empty statement if not
 	defined.
@@ -392,7 +450,7 @@
 
 2007-12-17  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
-	* malloc/malloc.c (public_cALLOc): For arena other than
+	* malloc/malloc.c (public_cALLOc): For arenas other than
 	main_arena, count all bytes inside the mprotect_size range of the
 	heap as uninitialized.
 

Modified: fsf/trunk/libc/dlfcn/dlinfo.c
==============================================================================
--- fsf/trunk/libc/dlfcn/dlinfo.c (original)
+++ fsf/trunk/libc/dlfcn/dlinfo.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* dlinfo -- Get information from the dynamic linker.
-   Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2006, 2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -101,7 +101,7 @@
       {
 	void *data = NULL;
 	if (l->l_tls_modid != 0)
-	  data = _dl_tls_get_addr_soft (l);
+	  data = GLRO(dl_tls_get_addr_soft) (l);
 	*(void **) args->arg = data;
 	break;
       }

Modified: fsf/trunk/libc/elf/Versions
==============================================================================
--- fsf/trunk/libc/elf/Versions (original)
+++ fsf/trunk/libc/elf/Versions Sat Mar  8 00:05:47 2008
@@ -57,7 +57,6 @@
     _dl_allocate_tls; _dl_deallocate_tls;
     _dl_get_tls_static_info; _dl_allocate_tls_init;
     _dl_tls_setup; _dl_rtld_di_serinfo;
-    _dl_tls_get_addr_soft;
     _dl_make_stack_executable;
     # Only here for gdb while a better method is developed.
     _dl_debug_state;

Modified: fsf/trunk/libc/elf/dl-iteratephdr.c
==============================================================================
--- fsf/trunk/libc/elf/dl-iteratephdr.c (original)
+++ fsf/trunk/libc/elf/dl-iteratephdr.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Get loaded objects program headers.
-   Copyright (C) 2001,2002,2003,2004,2006,2007 Free Software Foundation, Inc.
+   Copyright (C) 2001-2004,2006,2007,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Jakub Jelinek <jakub@xxxxxxxxxx>, 2001.
 
@@ -72,7 +72,7 @@
       info.dlpi_tls_data = NULL;
       info.dlpi_tls_modid = l->l_tls_modid;
       if (info.dlpi_tls_modid != 0)
-	info.dlpi_tls_data = _dl_tls_get_addr_soft (l);
+	info.dlpi_tls_data = GLRO(dl_tls_get_addr_soft) (l);
       ret = callback (&info, sizeof (struct dl_phdr_info), data);
       if (ret)
 	break;

Modified: fsf/trunk/libc/elf/dl-sysdep.c
==============================================================================
--- fsf/trunk/libc/elf/dl-sysdep.c (original)
+++ fsf/trunk/libc/elf/dl-sysdep.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Operating system support for run-time dynamic linker.  Generic Unix version.
-   Copyright (C) 1995-1998, 2000-2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1995-1998, 2000-2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -398,6 +398,7 @@
 		  }
 		note = ((const void *) (note + 1)
 			+ ROUND (note->vendorlen) + ROUND (note->datalen));
+#undef ROUND
 	      }
 	    if (dsocaps != NULL)
 	      break;

Modified: fsf/trunk/libc/elf/dl-tls.c
==============================================================================
--- fsf/trunk/libc/elf/dl-tls.c (original)
+++ fsf/trunk/libc/elf/dl-tls.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Thread-local storage handling in the ELF dynamic linker.  Generic version.
-   Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 2002,2003,2004,2005,2006,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -758,7 +758,6 @@
 /* Look up the module's TLS block as for __tls_get_addr,
    but never touch anything.  Return null if it's not allocated yet.  */
 void *
-internal_function
 _dl_tls_get_addr_soft (struct link_map *l)
 {
   if (__builtin_expect (l->l_tls_modid == 0, 0))

Modified: fsf/trunk/libc/elf/rtld.c
==============================================================================
--- fsf/trunk/libc/elf/rtld.c (original)
+++ fsf/trunk/libc/elf/rtld.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Run time dynamic linker.
-   Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1995-2006, 2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -161,7 +161,11 @@
     ._dl_lookup_symbol_x = _dl_lookup_symbol_x,
     ._dl_check_caller = _dl_check_caller,
     ._dl_open = _dl_open,
-    ._dl_close = _dl_close
+    ._dl_close = _dl_close,
+    ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft,
+#ifdef HAVE_DL_DISCOVER_OSVERSION
+    ._dl_discover_osversion = _dl_discover_osversion
+#endif
   };
 /* If we would use strong_alias here the compiler would see a
    non-hidden definition.  This would undo the effect of the previous

Modified: fsf/trunk/libc/io/tst-fchownat.c
==============================================================================
--- fsf/trunk/libc/io/tst-fchownat.c (original)
+++ fsf/trunk/libc/io/tst-fchownat.c Sat Mar  8 00:05:47 2008
@@ -19,14 +19,17 @@
 static void
 prepare (void)
 {
-#if _POSIX_CHOWN_RESTRICTED > 0
-  uid_t uid = getuid ();
-  if (uid != 0)
+#if _POSIX_CHOWN_RESTRICTED == 0
+  if (pathconf (test_dir, _PC_CHOWN_RESTRICTED) != 0)
+#endif
     {
-      puts ("need root privileges");
-      exit (0);
+      uid_t uid = getuid ();
+      if (uid != 0)
+	{
+	  puts ("need root privileges");
+	  exit (0);
+	}
     }
-#endif
 
   size_t test_dir_len = strlen (test_dir);
   static const char dir_name[] = "/tst-fchownat.XXXXXX";

Modified: fsf/trunk/libc/localedata/ChangeLog
==============================================================================
--- fsf/trunk/libc/localedata/ChangeLog (original)
+++ fsf/trunk/libc/localedata/ChangeLog Sat Mar  8 00:05:47 2008
@@ -1,3 +1,8 @@
+2008-03-07  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	[BZ #5777]
+	* locales/es_CR: Define am/pm format.
+
 2008-03-04  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
 	* SUPPORTED (SUPPORTED-LOCALES): Add shs_CA.UTF-8.

Modified: fsf/trunk/libc/localedata/locales/es_CR
==============================================================================
--- fsf/trunk/libc/localedata/locales/es_CR (original)
+++ fsf/trunk/libc/localedata/locales/es_CR Sat Mar  8 00:05:47 2008
@@ -108,8 +108,8 @@
 d_t_fmt "<U0025><U0061><U0020><U0025><U0064><U0020><U0025><U0062><U0020><U0025><U0059><U0020><U0025><U0054><U0020><U0025><U005A>"
 d_fmt   "<U0025><U0064><U002F><U0025><U006D><U002F><U0025><U0079>"
 t_fmt   "<U0025><U0054>"
-am_pm   "";""
-t_fmt_ampm ""
+am_pm   "<U0061><U002E><U006D><U002E>";"<U0070><U002E><U006D><U002E>"
+t_fmt_ampm "<U0025><U0049><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U0070>"
 date_fmt	"<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065>/
 <U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053><U0020>/
 <U0025><U005A><U0020><U0025><U0059>"

Modified: fsf/trunk/libc/nptl/ChangeLog
==============================================================================
--- fsf/trunk/libc/nptl/ChangeLog (original)
+++ fsf/trunk/libc/nptl/ChangeLog Sat Mar  8 00:05:47 2008
@@ -1,3 +1,9 @@
+2008-03-07  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	[BZ #5778]
+	* sysdeps/unix/sysv/linux/bits/posix_opt.h: Change
+	_POSIX_CHOWN_RESTRICTED value to zero.
+
 2008-01-31  Roland McGrath  <roland@xxxxxxxxxx>
 
 	* Makefile (omit-deps): Variable removed.

Modified: fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h (original)
+++ fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Define POSIX options for Linux.
-   Copyright (C) 1996-2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1996-2004, 2006, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -47,8 +47,8 @@
 /* Setting of memory protections is supported.  */
 #define	_POSIX_MEMORY_PROTECTION	200112L
 
-/* Only root can change owner of file.  */
-#define	_POSIX_CHOWN_RESTRICTED	1
+/* Some filesystems allow all users to change file ownership.  */
+#define	_POSIX_CHOWN_RESTRICTED	0
 
 /* `c_cc' member of 'struct termios' structure can be disabled by
    using the value _POSIX_VDISABLE.  */

Modified: fsf/trunk/libc/stdlib/Makefile
==============================================================================
--- fsf/trunk/libc/stdlib/Makefile (original)
+++ fsf/trunk/libc/stdlib/Makefile Sat Mar  8 00:05:47 2008
@@ -52,7 +52,7 @@
 	rpmatch strfmon strfmon_l getsubopt xpg_basename fmtmsg		      \
 	strtoimax strtoumax wcstoimax wcstoumax				      \
 	getcontext setcontext makecontext swapcontext
-aux =	grouping groupingwc
+aux =	grouping groupingwc tens_in_limb
 
 # These routines will be omitted from the libc shared object.
 # Instead the static object files will be included in a special archive

Modified: fsf/trunk/libc/stdlib/strtod_l.c
==============================================================================
--- fsf/trunk/libc/stdlib/strtod_l.c (original)
+++ fsf/trunk/libc/stdlib/strtod_l.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Convert string representing a number to float value, using given locale.
-   Copyright (C) 1997,1998,2002,2004,2005,2006,2007
+   Copyright (C) 1997,1998,2002,2004,2005,2006,2007,2008
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>, 1997.
@@ -148,23 +148,7 @@
 # error "mp_limb_t size " BITS_PER_MP_LIMB "not accounted for"
 #endif
 
-
-/* Local data structure.  */
-static const mp_limb_t _tens_in_limb[MAX_DIG_PER_LIMB + 1] =
-{    0,                   10,                   100,
-     1000,                10000,                100000L,
-     1000000L,            10000000L,            100000000L,
-     1000000000L
-#if BITS_PER_MP_LIMB > 32
-	        ,	  10000000000ULL,       100000000000ULL,
-     1000000000000ULL,    10000000000000ULL,    100000000000000ULL,
-     1000000000000000ULL, 10000000000000000ULL, 100000000000000000ULL,
-     1000000000000000000ULL, 10000000000000000000ULL
-#endif
-#if BITS_PER_MP_LIMB > 64
-  #error "Need to expand tens_in_limb table to" MAX_DIG_PER_LIMB
-#endif
-};
+extern const mp_limb_t _tens_in_limb[MAX_DIG_PER_LIMB + 1];
 
 #ifndef	howmany
 #define	howmany(x,y)		(((x)+((y)-1))/(y))

Added: fsf/trunk/libc/stdlib/tens_in_limb.c
==============================================================================
--- fsf/trunk/libc/stdlib/tens_in_limb.c (added)
+++ fsf/trunk/libc/stdlib/tens_in_limb.c Sat Mar  8 00:05:47 2008
@@ -1,0 +1,31 @@
+#include <gmp.h>
+
+
+/* Definitions according to limb size used.  */
+#if	BITS_PER_MP_LIMB == 32
+# define MAX_DIG_PER_LIMB	9
+# define MAX_FAC_PER_LIMB	1000000000UL
+#elif	BITS_PER_MP_LIMB == 64
+# define MAX_DIG_PER_LIMB	19
+# define MAX_FAC_PER_LIMB	10000000000000000000ULL
+#else
+# error "mp_limb_t size " BITS_PER_MP_LIMB "not accounted for"
+#endif
+
+
+/* Local data structure.  */
+const mp_limb_t _tens_in_limb[MAX_DIG_PER_LIMB + 1] =
+{    0,                   10,                   100,
+     1000,                10000,                100000L,
+     1000000L,            10000000L,            100000000L,
+     1000000000L
+#if BITS_PER_MP_LIMB > 32
+	        ,	  10000000000ULL,       100000000000ULL,
+     1000000000000ULL,    10000000000000ULL,    100000000000000ULL,
+     1000000000000000ULL, 10000000000000000ULL, 100000000000000000ULL,
+     1000000000000000000ULL, 10000000000000000000ULL
+#endif
+#if BITS_PER_MP_LIMB > 64
+  #error "Need to expand tens_in_limb table to" MAX_DIG_PER_LIMB
+#endif
+};

Modified: fsf/trunk/libc/sysdeps/generic/ldsodefs.h
==============================================================================
--- fsf/trunk/libc/sysdeps/generic/ldsodefs.h (original)
+++ fsf/trunk/libc/sysdeps/generic/ldsodefs.h Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Run-time dynamic linker data structures for loaded ELF shared objects.
-   Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1995-2006, 2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -655,6 +655,10 @@
   void *(*_dl_open) (const char *file, int mode, const void *caller_dlopen,
 		     Lmid_t nsid, int argc, char *argv[], char *env[]);
   void (*_dl_close) (void *map);
+  void *(*_dl_tls_get_addr_soft) (struct link_map *);
+#ifdef HAVE_DL_DISCOVER_OSVERSION
+  int (*_dl_discover_osversion) (void);
+#endif
 
   /* List of auditing interfaces.  */
   struct audit_ifaces *_dl_audit;
@@ -1069,7 +1073,7 @@
 
 /* Look up the module's TLS block as for __tls_get_addr,
    but never touch anything.  Return null if it's not allocated yet.  */
-extern void *_dl_tls_get_addr_soft (struct link_map *l) internal_function;
+extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden;
 
 extern int _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr)
      internal_function attribute_hidden;

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/posix_opt.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/posix_opt.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/posix_opt.h Sat Mar  8 00:05:47 2008
@@ -1,5 +1,6 @@
 /* Define POSIX options for Linux.
-   Copyright (C) 1996,1997,1999,2000,2002,2003 Free Software Foundation, Inc.
+   Copyright (C) 1996,1997,1999,2000,2002,2003,2008
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -51,8 +52,8 @@
 /* Setting of memory protections is supported.  */
 #define	_POSIX_MEMORY_PROTECTION	200112L
 
-/* Only root can change owner of file.  */
-#define	_POSIX_CHOWN_RESTRICTED	1
+/* Some filesystems allow all users to change file ownership.  */
+#define	_POSIX_CHOWN_RESTRICTED	0
 
 /* `c_cc' member of 'struct termios' structure can be disabled by
    using the value _POSIX_VDISABLE.  */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-osinfo.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-osinfo.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-osinfo.h Sat Mar  8 00:05:47 2008
@@ -1,6 +1,5 @@
 /* Operating system specific code for generic dynamic loader functions.  Linux.
-   Copyright (C) 2000,2001,2002,2004,2005,2006,2007
-	Free Software Foundation, Inc.
+   Copyright (C) 2000-2002,2004-2007,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,9 +17,6 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#include <string.h>
-#include <fcntl.h>
-#include <sys/utsname.h>
 #include <kernel-features.h>
 #include <dl-sysdep.h>
 #include <stdint.h>
@@ -40,97 +36,6 @@
   _exit (1);
 }
 #endif
-
-static inline int __attribute__ ((always_inline))
-_dl_discover_osversion (void)
-{
-#if (defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO) && defined SHARED
-  if (GLRO(dl_sysinfo_map) != NULL)
-    {
-      /* If the kernel-supplied DSO contains a note indicating the kernel's
-	 version, we don't need to call uname or parse any strings.  */
-
-      static const struct
-      {
-	ElfW(Nhdr) hdr;
-	char vendor[8];
-      } expected_note = { { sizeof "Linux", sizeof (ElfW(Word)), 0 }, "Linux" };
-      const ElfW(Phdr) *const phdr = GLRO(dl_sysinfo_map)->l_phdr;
-      const ElfW(Word) phnum = GLRO(dl_sysinfo_map)->l_phnum;
-      for (uint_fast16_t i = 0; i < phnum; ++i)
-	if (phdr[i].p_type == PT_NOTE)
-	  {
-	    const ElfW(Addr) start = (phdr[i].p_vaddr
-				      + GLRO(dl_sysinfo_map)->l_addr);
-	    const ElfW(Nhdr) *note = (const void *) start;
-	    while ((ElfW(Addr)) (note + 1) - start < phdr[i].p_memsz)
-	      {
-		if (!memcmp (note, &expected_note, sizeof expected_note))
-		  return *(const ElfW(Word) *) ((const void *) note
-						+ sizeof expected_note);
-#define ROUND(len) (((len) + sizeof note->n_type - 1) & -sizeof note->n_type)
-		note = ((const void *) (note + 1)
-			+ ROUND (note->n_namesz) + ROUND (note->n_descsz));
-#undef ROUND
-	      }
-	  }
-    }
-#endif
-
-  char bufmem[64];
-  char *buf = bufmem;
-  unsigned int version;
-  int parts;
-  char *cp;
-  struct utsname uts;
-
-  /* Try the uname system call.  */
-  if (__uname (&uts))
-    {
-      /* This was not successful.  Now try reading the /proc filesystem.  */
-      int fd = __open ("/proc/sys/kernel/osrelease", O_RDONLY);
-      if (fd < 0)
-	return -1;
-      ssize_t reslen = __read (fd, bufmem, sizeof (bufmem));
-      __close (fd);
-      if (reslen <= 0)
-	/* This also didn't work.  We give up since we cannot
-	   make sure the library can actually work.  */
-	return -1;
-      buf[MIN (reslen, (ssize_t) sizeof (bufmem) - 1)] = '\0';
-    }
-  else
-    buf = uts.release;
-
-  /* Now convert it into a number.  The string consists of at most
-     three parts.  */
-  version = 0;
-  parts = 0;
-  cp = buf;
-  while ((*cp >= '0') && (*cp <= '9'))
-    {
-      unsigned int here = *cp++ - '0';
-
-      while ((*cp >= '0') && (*cp <= '9'))
-	{
-	  here *= 10;
-	  here += *cp++ - '0';
-	}
-
-      ++parts;
-      version <<= 8;
-      version |= here;
-
-      if (*cp++ != '.' || parts == 3)
-	/* Another part following?  */
-	break;
-    }
-
-  if (parts < 3)
-    version <<= 8 * (3 - parts);
-
-  return version;
-}
 
 #define DL_SYSDEP_OSCHECK(FATAL)					      \
   do {									      \

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.c
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.c (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Dynamic linker system dependencies for Linux.
-   Copyright (C) 1995,1997,2001,2004,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 1995,1997,2001,2004,2005,2006, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,10 @@
 /* Linux needs some special initialization, but otherwise uses
    the generic dynamic linker system interface code.  */
 
+#include <string.h>
+#include <fcntl.h>
 #include <unistd.h>
+#include <sys/utsname.h>
 #include <ldsodefs.h>
 #include <kernel-features.h>
 
@@ -54,3 +57,96 @@
 }
 
 #include <elf/dl-sysdep.c>
+
+
+int
+attribute_hidden
+_dl_discover_osversion (void)
+{
+#if (defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO) && defined SHARED
+  if (GLRO(dl_sysinfo_map) != NULL)
+    {
+      /* If the kernel-supplied DSO contains a note indicating the kernel's
+	 version, we don't need to call uname or parse any strings.  */
+
+      static const struct
+      {
+	ElfW(Nhdr) hdr;
+	char vendor[8];
+      } expected_note = { { sizeof "Linux", sizeof (ElfW(Word)), 0 }, "Linux" };
+      const ElfW(Phdr) *const phdr = GLRO(dl_sysinfo_map)->l_phdr;
+      const ElfW(Word) phnum = GLRO(dl_sysinfo_map)->l_phnum;
+      for (uint_fast16_t i = 0; i < phnum; ++i)
+	if (phdr[i].p_type == PT_NOTE)
+	  {
+	    const ElfW(Addr) start = (phdr[i].p_vaddr
+				      + GLRO(dl_sysinfo_map)->l_addr);
+	    const ElfW(Nhdr) *note = (const void *) start;
+	    while ((ElfW(Addr)) (note + 1) - start < phdr[i].p_memsz)
+	      {
+		if (!memcmp (note, &expected_note, sizeof expected_note))
+		  return *(const ElfW(Word) *) ((const void *) note
+						+ sizeof expected_note);
+#define ROUND(len) (((len) + sizeof note->n_type - 1) & -sizeof note->n_type)
+		note = ((const void *) (note + 1)
+			+ ROUND (note->n_namesz) + ROUND (note->n_descsz));
+#undef ROUND
+	      }
+	  }
+    }
+#endif
+
+  char bufmem[64];
+  char *buf = bufmem;
+  unsigned int version;
+  int parts;
+  char *cp;
+  struct utsname uts;
+
+  /* Try the uname system call.  */
+  if (__uname (&uts))
+    {
+      /* This was not successful.  Now try reading the /proc filesystem.  */
+      int fd = __open ("/proc/sys/kernel/osrelease", O_RDONLY);
+      if (fd < 0)
+	return -1;
+      ssize_t reslen = __read (fd, bufmem, sizeof (bufmem));
+      __close (fd);
+      if (reslen <= 0)
+	/* This also didn't work.  We give up since we cannot
+	   make sure the library can actually work.  */
+	return -1;
+      buf[MIN (reslen, (ssize_t) sizeof (bufmem) - 1)] = '\0';
+    }
+  else
+    buf = uts.release;
+
+  /* Now convert it into a number.  The string consists of at most
+     three parts.  */
+  version = 0;
+  parts = 0;
+  cp = buf;
+  while ((*cp >= '0') && (*cp <= '9'))
+    {
+      unsigned int here = *cp++ - '0';
+
+      while ((*cp >= '0') && (*cp <= '9'))
+	{
+	  here *= 10;
+	  here += *cp++ - '0';
+	}
+
+      ++parts;
+      version <<= 8;
+      version |= here;
+
+      if (*cp++ != '.' || parts == 3)
+	/* Another part following?  */
+	break;
+    }
+
+  if (parts < 3)
+    version <<= 8 * (3 - parts);
+
+  return version;
+}

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/dl-sysdep.h Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* System-specific settings for dynamic linker code.  Linux version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
+   Copyright (C) 2005, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -25,3 +25,12 @@
    we aren't making direct use of it.  So enable this across the board.  */
 
 #define NEED_DL_SYSINFO_DSO	1
+
+
+/* The _dl_discover_osversion function is so far only needed in sysconf
+   to check for kernels later than 2.6.23.  */
+#if !defined ASSEMBLER && __LINUX_KERNEL_VERSION < 0x020617
+/* Get version of the OS.  */
+extern int _dl_discover_osversion (void) attribute_hidden;
+# define HAVE_DL_DISCOVER_OSVERSION	1
+#endif

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/fpathconf.c
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/fpathconf.c (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/fpathconf.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Get file-specific information about descriptor FD.  Linux version.
-   Copyright (C) 1991,1995,1996,1998-2002,2003 Free Software Foundation, Inc.
+   Copyright (C) 1991,1995,1996,1998-2003,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -45,6 +45,9 @@
     case _PC_2_SYMLINKS:
       return __statfs_symlinks (__fstatfs (fd, &fsbuf), &fsbuf);
 
+    case _PC_CHOWN_RESTRICTED:
+      return __statfs_chown_restricted (__fstatfs (fd, &fsbuf), &fsbuf);
+
     default:
       return posix_fpathconf (fd, name);
     }

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.c
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.c (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Get file-specific information about a file.  Linux version.
-   Copyright (C) 1991,1995,1996,1998-2002,2003 Free Software Foundation, Inc.
+   Copyright (C) 1991,1995,1996,1998-2003,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,8 +19,10 @@
 
 #include <unistd.h>
 #include <errno.h>
+
 #include "pathconf.h"
 #include "linux_fsinfo.h"
+#include <not-cancel.h>
 
 static long int posix_pathconf (const char *file, int name);
 
@@ -45,6 +47,9 @@
 
     case _PC_2_SYMLINKS:
       return __statfs_symlinks (__statfs (file, &fsbuf), &fsbuf);
+
+    case _PC_CHOWN_RESTRICTED:
+      return __statfs_chown_restricted (__statfs (file, &fsbuf), &fsbuf);
 
     default:
       return posix_pathconf (file, name);
@@ -179,3 +184,44 @@
       return 1;
     }
 }
+
+
+/* Used like: return __statfs_chown_restricted (__statfs (name, &buf), &buf);*/
+long int
+__statfs_chown_restricted (int result, const struct statfs *fsbuf)
+{
+  if (result < 0)
+    {
+      if (errno == ENOSYS)
+	/* Not possible, return the default value.  */
+	return 1;
+
+      /* Some error occured.  */
+      return -1;
+    }
+
+  int fd;
+  long int retval = 1;
+  switch (fsbuf->f_type)
+    {
+    case XFS_SUPER_MAGIC:
+      /* Read the value from /proc/sys/fs/xfs/restrict_chown.  If we cannot
+	 read it default to assume the restriction is in place.  */
+      fd = open_not_cancel_2 ("/proc/sys/fs/xfs/restrict_chown", O_RDONLY);
+      if (fd != -1)
+	{
+	  char buf[2];
+	  if (TEMP_FAILURE_RETRY (read_not_cancel (fd, buf, 2)) == 2
+	      && buf[0] >= '0' && buf[0] <= '1')
+	    retval = buf[0] - '0';
+
+	  close_not_cancel_no_status (fd);
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return retval;
+}

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/pathconf.h Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Common parts of Linux implementation of pathconf and fpathconf.
-   Copyright (C) 1991,1995,1996,1998-2002,2003 Free Software Foundation, Inc.
+   Copyright (C) 1991,1995,1996,1998-2003,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,13 +22,18 @@
 #include <sys/statfs.h>
 
 
-/* Used like: return statfs_link_max (__statfs (name, &buf), &buf); */
+/* Used like: return __statfs_link_max (__statfs (name, &buf), &buf); */
 extern long int __statfs_link_max (int result, const struct statfs *fsbuf);
 
 
-/* Used like: return statfs_filesize_max (__statfs (name, &buf), &buf); */
+/* Used like: return __statfs_filesize_max (__statfs (name, &buf), &buf); */
 extern long int __statfs_filesize_max (int result, const struct statfs *fsbuf);
 
 
-/* Used like: return statfs_link_max (__statfs (name, &buf), &buf); */
+/* Used like: return __statfs_link_max (__statfs (name, &buf), &buf); */
 extern long int __statfs_symlinks (int result, const struct statfs *fsbuf);
+
+
+/* Used like: return __statfs_chown_restricted (__statfs (name, &buf), &buf);*/
+extern long int __statfs_chown_restricted (int result,
+					   const struct statfs *fsbuf);

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/sysconf.c
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/sysconf.c (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/sysconf.c Sat Mar  8 00:05:47 2008
@@ -1,5 +1,5 @@
 /* Get file-specific information about a file.  Linux version.
-   Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2006, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,9 @@
 #include <sysdep.h>
 #include <time.h>
 #include <unistd.h>
+#include <sys/resource.h>
 #include <not-cancel.h>
+#include <ldsodefs.h>
 
 static long int posix_sysconf (int name);
 
@@ -69,6 +71,22 @@
 	return _POSIX_VERSION;
       }
 #endif
+
+    case _SC_ARG_MAX:
+#if __LINUX_KERNEL_VERSION < 0x020617
+      /* Determine whether this is a kernel 2.6.23 or later.  Only
+	 then do we have an argument limit determined by the stack
+	 size.  */
+      if (GLRO(dl_discover_osversion) () >= 0x020617)
+#endif
+	{
+	  /* Use getrlimit to get the stack limit.  */
+	  struct rlimit rlimit;
+	  if (__getrlimit (RLIMIT_STACK, &rlimit) == 0)
+	    return MAX (ARG_MAX, rlimit.rlim_cur / 4);
+	}
+
+      return ARG_MAX;
 
     case _SC_NGROUPS_MAX:
       /* Try to read the information from the /proc/sys/kernel/ngroups_max

Modified: fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c Sat Mar  8 00:05:47 2008
@@ -405,13 +405,10 @@
 /* Shared cache size for use in memory and string routines, typically
    L2 or L3 size.  */
 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-#ifdef NOT_USED_RIGHT_NOW
 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
-#endif
 /* PREFETCHW support flag for use in memory and string routines.  */
 int __x86_64_prefetchw attribute_hidden;
 
-#ifdef NOT_USED_RIGHT_NOW
 /* Instructions preferred for memory and string routines.
 
   0: Regular instructions
@@ -421,7 +418,6 @@
 
   */
 int __x86_64_preferred_memory_instruction attribute_hidden;
-#endif
 
 
 static void
@@ -464,14 +460,12 @@
 		    : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
 		    : "0" (1));
 
-#ifdef NOT_USED_RIGHT_NOW
-      /* Intel prefers SSSE3 instructions for memory/string rountines
+      /* Intel prefers SSSE3 instructions for memory/string routines
 	 if they are avaiable.  */
       if ((ecx & 0x200))
 	__x86_64_preferred_memory_instruction = 3;
       else
 	__x86_64_preferred_memory_instruction = 2;
-#endif
 
       /* Figure out the number of logical threads that share the
 	 highest cache level.  */
@@ -577,8 +571,6 @@
   if (shared > 0)
     {
       __x86_64_shared_cache_size_half = shared / 2;
-#ifdef NOT_USED_RIGHT_NOW
       __x86_64_shared_cache_size = shared;
-#endif
     }
 }

Modified: fsf/trunk/libc/sysdeps/x86_64/memset.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memset.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/memset.S Sat Mar  8 00:05:47 2008
@@ -1,8 +1,7 @@
 /* memset/bzero -- set memory area to CH/0
    Optimized version for x86-64.
-   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Andreas Jaeger <aj@xxxxxxx>.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -20,13 +19,9 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-/* This is somehow experimental and could made dependend on the cache
-   size.  */
-#define LARGE $120000
+
+#define __STOS_LOWER_BOUNDARY	$8192
+#define __STOS_UPPER_BOUNDARY	$65536
 
         .text
 #ifndef NOT_IN_libc
@@ -46,89 +41,1291 @@
 #endif
 ENTRY (memset)
 L(memset_entry):
-	cmp	$0x7,%rdx	/* Check for small length.  */
-	mov	%rdi,%rcx	/* Save ptr as return value.  */
-	jbe	7f
-
-	/* Populate 8 bit data to full 64-bit.  */
-	movabs	$0x0101010101010101,%r8
-	movzbl	%sil,%eax
-	imul	%rax,%r8
-	test	$0x7,%edi	/* Check for alignment.  */
-	je	2f
+	cmp    $0x1,%rdx
+	mov    %rdi,%rax	/* memset returns the dest address.  */
+	jne    L(ck2)
+	mov    %sil,(%rdi)
+	retq
+L(ck2):
+	mov    $0x101010101010101,%r9
+	mov    %rdx,%r8
+	movzbq %sil,%rdx
+	imul   %r9,%rdx
+L(now_dw_aligned):
+	cmp    $0x90,%r8
+	jg     L(ck_mem_ops_method)
+L(now_dw_aligned_small):
+	lea    L(setPxQx)(%rip),%r11
+	add    %r8,%rdi
+#ifndef PIC
+	jmpq   *(%r11,%r8,8)
+#else
+	movslq (%r11,%r8,4),%rcx
+	lea    (%rcx,%r11,1),%r11
+	jmpq   *%r11
+#endif
+
+L(Got0):
+	retq
+
+	.pushsection .rodata
+	.balign     16
+#ifndef PIC
+L(setPxQx):
+	.quad       L(Got0), L(P1Q0), L(P2Q0), L(P3Q0)
+	.quad       L(P4Q0), L(P5Q0), L(P6Q0), L(P7Q0)
+	.quad       L(P0Q1), L(P1Q1), L(P2Q1), L(P3Q1)
+	.quad       L(P4Q1), L(P5Q1), L(P6Q1), L(P7Q1)
+	.quad       L(P0Q2), L(P1Q2), L(P2Q2), L(P3Q2)
+	.quad       L(P4Q2), L(P5Q2), L(P6Q2), L(P7Q2)
+	.quad       L(P0Q3), L(P1Q3), L(P2Q3), L(P3Q3)
+	.quad       L(P4Q3), L(P5Q3), L(P6Q3), L(P7Q3)
+	.quad       L(P0Q4), L(P1Q4), L(P2Q4), L(P3Q4)
+	.quad       L(P4Q4), L(P5Q4), L(P6Q4), L(P7Q4)
+	.quad       L(P0Q5), L(P1Q5), L(P2Q5), L(P3Q5)
+	.quad       L(P4Q5), L(P5Q5), L(P6Q5), L(P7Q5)
+	.quad       L(P0Q6), L(P1Q6), L(P2Q6), L(P3Q6)
+	.quad       L(P4Q6), L(P5Q6), L(P6Q6), L(P7Q6)
+	.quad       L(P0Q7), L(P1Q7), L(P2Q7), L(P3Q7)
+	.quad       L(P4Q7), L(P5Q7), L(P6Q7), L(P7Q7)
+	.quad       L(P0Q8), L(P1Q8), L(P2Q8), L(P3Q8)
+	.quad       L(P4Q8), L(P5Q8), L(P6Q8), L(P7Q8)
+	.quad       L(P0Q9), L(P1Q9), L(P2Q9), L(P3Q9)
+	.quad       L(P4Q9), L(P5Q9), L(P6Q9), L(P7Q9)
+	.quad       L(P0QA), L(P1QA), L(P2QA), L(P3QA)
+	.quad       L(P4QA), L(P5QA), L(P6QA), L(P7QA)
+	.quad       L(P0QB), L(P1QB), L(P2QB), L(P3QB)
+	.quad       L(P4QB), L(P5QB), L(P6QB), L(P7QB)
+	.quad       L(P0QC), L(P1QC), L(P2QC), L(P3QC)
+	.quad       L(P4QC), L(P5QC), L(P6QC), L(P7QC)
+	.quad       L(P0QD), L(P1QD), L(P2QD), L(P3QD)
+	.quad       L(P4QD), L(P5QD), L(P6QD), L(P7QD)
+	.quad       L(P0QE), L(P1QE), L(P2QE), L(P3QE)
+	.quad       L(P4QE), L(P5QE), L(P6QE), L(P7QE)
+	.quad       L(P0QF), L(P1QF), L(P2QF), L(P3QF)
+	.quad       L(P4QF), L(P5QF), L(P6QF), L(P7QF)
+	.quad       L(P0QG), L(P1QG), L(P2QG), L(P3QG)
+	.quad       L(P4QG), L(P5QG), L(P6QG), L(P7QG)
+	.quad       L(P0QH), L(P1QH), L(P2QH), L(P3QH)
+	.quad       L(P4QH), L(P5QH), L(P6QH), L(P7QH)
+	.quad       L(P0QI)
+# ifdef USE_EXTRA_TABLE
+	.quad       L(P1QI), L(P2QI), L(P3QI), L(P4QI)
+	.quad       L(P5QI), L(P6QI), L(P7QI)
+# endif
+#else
+L(setPxQx):
+	.int       L(Got0)-L(setPxQx)
+	.int       L(P1Q0)-L(setPxQx)
+	.int       L(P2Q0)-L(setPxQx)
+	.int       L(P3Q0)-L(setPxQx)
+	.int       L(P4Q0)-L(setPxQx)
+	.int       L(P5Q0)-L(setPxQx)
+	.int       L(P6Q0)-L(setPxQx)
+	.int       L(P7Q0)-L(setPxQx)
+
+	.int       L(P0Q1)-L(setPxQx)
+	.int       L(P1Q1)-L(setPxQx)
+	.int       L(P2Q1)-L(setPxQx)
+	.int       L(P3Q1)-L(setPxQx)
+	.int       L(P4Q1)-L(setPxQx)
+	.int       L(P5Q1)-L(setPxQx)
+	.int       L(P6Q1)-L(setPxQx)
+	.int       L(P7Q1)-L(setPxQx)
+
+	.int       L(P0Q2)-L(setPxQx)
+	.int       L(P1Q2)-L(setPxQx)
+	.int       L(P2Q2)-L(setPxQx)
+	.int       L(P3Q2)-L(setPxQx)
+	.int       L(P4Q2)-L(setPxQx)
+	.int       L(P5Q2)-L(setPxQx)
+	.int       L(P6Q2)-L(setPxQx)
+	.int       L(P7Q2)-L(setPxQx)
+
+	.int       L(P0Q3)-L(setPxQx)
+	.int       L(P1Q3)-L(setPxQx)
+	.int       L(P2Q3)-L(setPxQx)
+	.int       L(P3Q3)-L(setPxQx)
+	.int       L(P4Q3)-L(setPxQx)
+	.int       L(P5Q3)-L(setPxQx)
+	.int       L(P6Q3)-L(setPxQx)
+	.int       L(P7Q3)-L(setPxQx)
+
+	.int       L(P0Q4)-L(setPxQx)
+	.int       L(P1Q4)-L(setPxQx)
+	.int       L(P2Q4)-L(setPxQx)
+	.int       L(P3Q4)-L(setPxQx)
+	.int       L(P4Q4)-L(setPxQx)
+	.int       L(P5Q4)-L(setPxQx)
+	.int       L(P6Q4)-L(setPxQx)
+	.int       L(P7Q4)-L(setPxQx)
+
+	.int       L(P0Q5)-L(setPxQx)
+	.int       L(P1Q5)-L(setPxQx)
+	.int       L(P2Q5)-L(setPxQx)
+	.int       L(P3Q5)-L(setPxQx)
+	.int       L(P4Q5)-L(setPxQx)
+	.int       L(P5Q5)-L(setPxQx)
+	.int       L(P6Q5)-L(setPxQx)
+	.int       L(P7Q5)-L(setPxQx)
+
+	.int       L(P0Q6)-L(setPxQx)
+	.int       L(P1Q6)-L(setPxQx)
+	.int       L(P2Q6)-L(setPxQx)
+	.int       L(P3Q6)-L(setPxQx)
+	.int       L(P4Q6)-L(setPxQx)
+	.int       L(P5Q6)-L(setPxQx)
+	.int       L(P6Q6)-L(setPxQx)
+	.int       L(P7Q6)-L(setPxQx)
+
+	.int       L(P0Q7)-L(setPxQx)
+	.int       L(P1Q7)-L(setPxQx)
+	.int       L(P2Q7)-L(setPxQx)
+	.int       L(P3Q7)-L(setPxQx)
+	.int       L(P4Q7)-L(setPxQx)
+	.int       L(P5Q7)-L(setPxQx)
+	.int       L(P6Q7)-L(setPxQx)
+	.int       L(P7Q7)-L(setPxQx)
+
+	.int       L(P0Q8)-L(setPxQx)
+	.int       L(P1Q8)-L(setPxQx)
+	.int       L(P2Q8)-L(setPxQx)
+	.int       L(P3Q8)-L(setPxQx)
+	.int       L(P4Q8)-L(setPxQx)
+	.int       L(P5Q8)-L(setPxQx)
+	.int       L(P6Q8)-L(setPxQx)
+	.int       L(P7Q8)-L(setPxQx)
+
+	.int       L(P0Q9)-L(setPxQx)
+	.int       L(P1Q9)-L(setPxQx)
+	.int       L(P2Q9)-L(setPxQx)
+	.int       L(P3Q9)-L(setPxQx)
+	.int       L(P4Q9)-L(setPxQx)
+	.int       L(P5Q9)-L(setPxQx)
+	.int       L(P6Q9)-L(setPxQx)
+	.int       L(P7Q9)-L(setPxQx)
+
+	.int       L(P0QA)-L(setPxQx)
+	.int       L(P1QA)-L(setPxQx)
+	.int       L(P2QA)-L(setPxQx)
+	.int       L(P3QA)-L(setPxQx)
+	.int       L(P4QA)-L(setPxQx)
+	.int       L(P5QA)-L(setPxQx)
+	.int       L(P6QA)-L(setPxQx)
+	.int       L(P7QA)-L(setPxQx)
+
+	.int       L(P0QB)-L(setPxQx)
+	.int       L(P1QB)-L(setPxQx)
+	.int       L(P2QB)-L(setPxQx)
+	.int       L(P3QB)-L(setPxQx)
+	.int       L(P4QB)-L(setPxQx)
+	.int       L(P5QB)-L(setPxQx)
+	.int       L(P6QB)-L(setPxQx)
+	.int       L(P7QB)-L(setPxQx)
+
+	.int       L(P0QC)-L(setPxQx)
+	.int       L(P1QC)-L(setPxQx)
+	.int       L(P2QC)-L(setPxQx)
+	.int       L(P3QC)-L(setPxQx)
+	.int       L(P4QC)-L(setPxQx)
+	.int       L(P5QC)-L(setPxQx)
+	.int       L(P6QC)-L(setPxQx)
+	.int       L(P7QC)-L(setPxQx)
+
+	.int       L(P0QD)-L(setPxQx)
+	.int       L(P1QD)-L(setPxQx)
+	.int       L(P2QD)-L(setPxQx)
+	.int       L(P3QD)-L(setPxQx)
+	.int       L(P4QD)-L(setPxQx)
+	.int       L(P5QD)-L(setPxQx)
+	.int       L(P6QD)-L(setPxQx)
+	.int       L(P7QD)-L(setPxQx)
+
+	.int       L(P0QE)-L(setPxQx)
+	.int       L(P1QE)-L(setPxQx)
+	.int       L(P2QE)-L(setPxQx)
+	.int       L(P3QE)-L(setPxQx)
+	.int       L(P4QE)-L(setPxQx)
+	.int       L(P5QE)-L(setPxQx)
+	.int       L(P6QE)-L(setPxQx)
+	.int       L(P7QE)-L(setPxQx)
+
+	.int       L(P0QF)-L(setPxQx)
+	.int       L(P1QF)-L(setPxQx)
+	.int       L(P2QF)-L(setPxQx)
+	.int       L(P3QF)-L(setPxQx)
+	.int       L(P4QF)-L(setPxQx)
+	.int       L(P5QF)-L(setPxQx)
+	.int       L(P6QF)-L(setPxQx)
+	.int       L(P7QF)-L(setPxQx)
+
+	.int       L(P0QG)-L(setPxQx)
+	.int       L(P1QG)-L(setPxQx)
+	.int       L(P2QG)-L(setPxQx)
+	.int       L(P3QG)-L(setPxQx)
+	.int       L(P4QG)-L(setPxQx)
+	.int       L(P5QG)-L(setPxQx)
+	.int       L(P6QG)-L(setPxQx)
+	.int       L(P7QG)-L(setPxQx)
+
+	.int       L(P0QH)-L(setPxQx)
+	.int       L(P1QH)-L(setPxQx)
+	.int       L(P2QH)-L(setPxQx)
+	.int       L(P3QH)-L(setPxQx)
+	.int       L(P4QH)-L(setPxQx)
+	.int       L(P5QH)-L(setPxQx)
+	.int       L(P6QH)-L(setPxQx)
+	.int       L(P7QH)-L(setPxQx)
+
+	.int       L(P0QI)-L(setPxQx)
+# ifdef USE_EXTRA_TABLE
+	.int       L(P1QI)-L(setPxQx)
+	.int       L(P2QI)-L(setPxQx)
+	.int       L(P3QI)-L(setPxQx)
+	.int       L(P4QI)-L(setPxQx)
+	.int       L(P5QI)-L(setPxQx)
+	.int       L(P6QI)-L(setPxQx)
+	.int       L(P7QI)-L(setPxQx)
+# endif
+#endif
+	.popsection
+
+	.balign     16
+#ifdef USE_EXTRA_TABLE
+L(P1QI): mov    %rdx,-0x91(%rdi)
+#endif
+L(P1QH): mov    %rdx,-0x89(%rdi)
+L(P1QG): mov    %rdx,-0x81(%rdi)
+#		   .balign     16
+L(P1QF): mov    %rdx,-0x79(%rdi)
+L(P1QE): mov    %rdx,-0x71(%rdi)
+L(P1QD): mov    %rdx,-0x69(%rdi)
+L(P1QC): mov    %rdx,-0x61(%rdi)
+L(P1QB): mov    %rdx,-0x59(%rdi)
+L(P1QA): mov    %rdx,-0x51(%rdi)
+L(P1Q9): mov    %rdx,-0x49(%rdi)
+L(P1Q8): mov    %rdx,-0x41(%rdi)
+L(P1Q7): mov    %rdx,-0x39(%rdi)
+L(P1Q6): mov    %rdx,-0x31(%rdi)
+L(P1Q5): mov    %rdx,-0x29(%rdi)
+L(P1Q4): mov    %rdx,-0x21(%rdi)
+L(P1Q3): mov    %rdx,-0x19(%rdi)
+L(P1Q2): mov    %rdx,-0x11(%rdi)
+L(P1Q1): mov    %rdx,-0x9(%rdi)
+L(P1Q0): mov    %dl,-0x1(%rdi)
+		retq
+
+	.balign     16
+L(P0QI): mov    %rdx,-0x90(%rdi)
+L(P0QH): mov    %rdx,-0x88(%rdi)
+#		   .balign     16
+L(P0QG): mov    %rdx,-0x80(%rdi)
+L(P0QF): mov    %rdx,-0x78(%rdi)
+L(P0QE): mov    %rdx,-0x70(%rdi)
+L(P0QD): mov    %rdx,-0x68(%rdi)
+L(P0QC): mov    %rdx,-0x60(%rdi)
+L(P0QB): mov    %rdx,-0x58(%rdi)
+L(P0QA): mov    %rdx,-0x50(%rdi)
+L(P0Q9): mov    %rdx,-0x48(%rdi)
+L(P0Q8): mov    %rdx,-0x40(%rdi)
+L(P0Q7): mov    %rdx,-0x38(%rdi)
+L(P0Q6): mov    %rdx,-0x30(%rdi)
+L(P0Q5): mov    %rdx,-0x28(%rdi)
+L(P0Q4): mov    %rdx,-0x20(%rdi)
+L(P0Q3): mov    %rdx,-0x18(%rdi)
+L(P0Q2): mov    %rdx,-0x10(%rdi)
+L(P0Q1): mov    %rdx,-0x8(%rdi)
+L(P0Q0): retq
+
+
+	.balign     16
+#ifdef USE_EXTRA_TABLE
+L(P2QI): mov    %rdx,-0x92(%rdi)
+#endif
+L(P2QH): mov    %rdx,-0x8a(%rdi)
+L(P2QG): mov    %rdx,-0x82(%rdi)
+#		   .balign     16
+L(P2QF): mov    %rdx,-0x7a(%rdi)
+L(P2QE): mov    %rdx,-0x72(%rdi)
+L(P2QD): mov    %rdx,-0x6a(%rdi)
+L(P2QC): mov    %rdx,-0x62(%rdi)
+L(P2QB): mov    %rdx,-0x5a(%rdi)
+L(P2QA): mov    %rdx,-0x52(%rdi)
+L(P2Q9): mov    %rdx,-0x4a(%rdi)
+L(P2Q8): mov    %rdx,-0x42(%rdi)
+L(P2Q7): mov    %rdx,-0x3a(%rdi)
+L(P2Q6): mov    %rdx,-0x32(%rdi)
+L(P2Q5): mov    %rdx,-0x2a(%rdi)
+L(P2Q4): mov    %rdx,-0x22(%rdi)
+L(P2Q3): mov    %rdx,-0x1a(%rdi)
+L(P2Q2): mov    %rdx,-0x12(%rdi)
+L(P2Q1): mov    %rdx,-0xa(%rdi)
+L(P2Q0): mov    %dx,-0x2(%rdi)
+		retq
+
+	.balign     16
+#ifdef USE_EXTRA_TABLE
+L(P3QI): mov    %rdx,-0x93(%rdi)
+#endif
+L(P3QH): mov    %rdx,-0x8b(%rdi)
+L(P3QG): mov    %rdx,-0x83(%rdi)
+#		   .balign     16
+L(P3QF): mov    %rdx,-0x7b(%rdi)
+L(P3QE): mov    %rdx,-0x73(%rdi)
+L(P3QD): mov    %rdx,-0x6b(%rdi)
+L(P3QC): mov    %rdx,-0x63(%rdi)
+L(P3QB): mov    %rdx,-0x5b(%rdi)
+L(P3QA): mov    %rdx,-0x53(%rdi)
+L(P3Q9): mov    %rdx,-0x4b(%rdi)
+L(P3Q8): mov    %rdx,-0x43(%rdi)
+L(P3Q7): mov    %rdx,-0x3b(%rdi)
+L(P3Q6): mov    %rdx,-0x33(%rdi)
+L(P3Q5): mov    %rdx,-0x2b(%rdi)
+L(P3Q4): mov    %rdx,-0x23(%rdi)
+L(P3Q3): mov    %rdx,-0x1b(%rdi)
+L(P3Q2): mov    %rdx,-0x13(%rdi)
+L(P3Q1): mov    %rdx,-0xb(%rdi)
+L(P3Q0): mov    %dx,-0x3(%rdi)
+		mov    %dl,-0x1(%rdi)
+		retq
+
+	.balign     16
+#ifdef USE_EXTRA_TABLE
+L(P4QI): mov    %rdx,-0x94(%rdi)
+#endif
+L(P4QH): mov    %rdx,-0x8c(%rdi)
+L(P4QG): mov    %rdx,-0x84(%rdi)
+#		   .balign     16
+L(P4QF): mov    %rdx,-0x7c(%rdi)
+L(P4QE): mov    %rdx,-0x74(%rdi)
+L(P4QD): mov    %rdx,-0x6c(%rdi)
+L(P4QC): mov    %rdx,-0x64(%rdi)
+L(P4QB): mov    %rdx,-0x5c(%rdi)
+L(P4QA): mov    %rdx,-0x54(%rdi)
+L(P4Q9): mov    %rdx,-0x4c(%rdi)
+L(P4Q8): mov    %rdx,-0x44(%rdi)
+L(P4Q7): mov    %rdx,-0x3c(%rdi)
+L(P4Q6): mov    %rdx,-0x34(%rdi)
+L(P4Q5): mov    %rdx,-0x2c(%rdi)
+L(P4Q4): mov    %rdx,-0x24(%rdi)
+L(P4Q3): mov    %rdx,-0x1c(%rdi)
+L(P4Q2): mov    %rdx,-0x14(%rdi)
+L(P4Q1): mov    %rdx,-0xc(%rdi)
+L(P4Q0): mov    %edx,-0x4(%rdi)
+		retq
+
+	.balign     16
+#if defined(USE_EXTRA_TABLE)
+L(P5QI): mov    %rdx,-0x95(%rdi)
+#endif
+L(P5QH): mov    %rdx,-0x8d(%rdi)
+L(P5QG): mov    %rdx,-0x85(%rdi)
+#		   .balign     16
+L(P5QF): mov    %rdx,-0x7d(%rdi)
+L(P5QE): mov    %rdx,-0x75(%rdi)
+L(P5QD): mov    %rdx,-0x6d(%rdi)
+L(P5QC): mov    %rdx,-0x65(%rdi)
+L(P5QB): mov    %rdx,-0x5d(%rdi)
+L(P5QA): mov    %rdx,-0x55(%rdi)
+L(P5Q9): mov    %rdx,-0x4d(%rdi)
+L(P5Q8): mov    %rdx,-0x45(%rdi)
+L(P5Q7): mov    %rdx,-0x3d(%rdi)
+L(P5Q6): mov    %rdx,-0x35(%rdi)
+L(P5Q5): mov    %rdx,-0x2d(%rdi)
+L(P5Q4): mov    %rdx,-0x25(%rdi)
+L(P5Q3): mov    %rdx,-0x1d(%rdi)
+L(P5Q2): mov    %rdx,-0x15(%rdi)
+L(P5Q1): mov    %rdx,-0xd(%rdi)
+L(P5Q0): mov    %edx,-0x5(%rdi)
+		mov    %dl,-0x1(%rdi)
+		retq
+
+	.balign     16
+#ifdef USE_EXTRA_TABLE
+L(P6QI): mov    %rdx,-0x96(%rdi)
+#endif
+L(P6QH): mov    %rdx,-0x8e(%rdi)
+L(P6QG): mov    %rdx,-0x86(%rdi)
+#		   .balign     16
+L(P6QF): mov    %rdx,-0x7e(%rdi)
+L(P6QE): mov    %rdx,-0x76(%rdi)
+L(P6QD): mov    %rdx,-0x6e(%rdi)
+L(P6QC): mov    %rdx,-0x66(%rdi)
+L(P6QB): mov    %rdx,-0x5e(%rdi)
+L(P6QA): mov    %rdx,-0x56(%rdi)
+L(P6Q9): mov    %rdx,-0x4e(%rdi)
+L(P6Q8): mov    %rdx,-0x46(%rdi)
+L(P6Q7): mov    %rdx,-0x3e(%rdi)
+L(P6Q6): mov    %rdx,-0x36(%rdi)
+L(P6Q5): mov    %rdx,-0x2e(%rdi)
+L(P6Q4): mov    %rdx,-0x26(%rdi)
+L(P6Q3): mov    %rdx,-0x1e(%rdi)
+L(P6Q2): mov    %rdx,-0x16(%rdi)
+L(P6Q1): mov    %rdx,-0xe(%rdi)
+L(P6Q0): mov    %edx,-0x6(%rdi)
+		mov    %dx,-0x2(%rdi)
+		retq
+
+	.balign     16
+#ifdef USE_EXTRA_TABLE
+L(P7QI): mov    %rdx,-0x97(%rdi)
+#endif
+L(P7QH): mov    %rdx,-0x8f(%rdi)
+L(P7QG): mov    %rdx,-0x87(%rdi)
+#		   .balign     16
+L(P7QF): mov    %rdx,-0x7f(%rdi)
+L(P7QE): mov    %rdx,-0x77(%rdi)
+L(P7QD): mov    %rdx,-0x6f(%rdi)
+L(P7QC): mov    %rdx,-0x67(%rdi)
+L(P7QB): mov    %rdx,-0x5f(%rdi)
+L(P7QA): mov    %rdx,-0x57(%rdi)
+L(P7Q9): mov    %rdx,-0x4f(%rdi)
+L(P7Q8): mov    %rdx,-0x47(%rdi)
+L(P7Q7): mov    %rdx,-0x3f(%rdi)
+L(P7Q6): mov    %rdx,-0x37(%rdi)
+L(P7Q5): mov    %rdx,-0x2f(%rdi)
+L(P7Q4): mov    %rdx,-0x27(%rdi)
+L(P7Q3): mov    %rdx,-0x1f(%rdi)
+L(P7Q2): mov    %rdx,-0x17(%rdi)
+L(P7Q1): mov    %rdx,-0xf(%rdi)
+L(P7Q0): mov    %edx,-0x7(%rdi)
+		mov    %dx,-0x3(%rdi)
+		mov    %dl,-0x1(%rdi)
+		retq
+
+	.balign     16
+L(ck_mem_ops_method):
+
+# align to 16 byte boundary first
+	#test $0xf,%rdi
+	#jz L(aligned_now)
+	 lea    L(AliPxQx)(%rip),%r11
+	 mov    $0x10,%r10
+	 mov    %rdi,%r9
+	 and    $0xf,%r9
+	 sub    %r9,%r10
+	 and    $0xf,%r10
+	 add    %r10,%rdi
+	 sub    %r10,%r8
+#ifndef PIC
+	jmpq   *(%r11,%r10,8)
+#else
+	movslq (%r11,%r10,4),%rcx
+	lea    (%rcx,%r11,1),%r11
+	jmpq   *%r11
+#endif
+
+	.pushsection .rodata
+	.balign     16
+#ifndef PIC
+L(AliPxQx):
+	.quad       L(aligned_now), L(A1Q0), L(A2Q0), L(A3Q0)
+	.quad	    L(A4Q0), L(A5Q0), L(A6Q0), L(A7Q0)
+	.quad       L(A0Q1), L(A1Q1), L(A2Q1), L(A3Q1)
+	.quad       L(A4Q1), L(A5Q1), L(A6Q1), L(A7Q1)
+#else
+L(AliPxQx):
+	.int       L(aligned_now)-L(AliPxQx)
+	.int       L(A1Q0)-L(AliPxQx)
+	.int       L(A2Q0)-L(AliPxQx)
+	.int       L(A3Q0)-L(AliPxQx)
+	.int       L(A4Q0)-L(AliPxQx)
+	.int       L(A5Q0)-L(AliPxQx)
+	.int       L(A6Q0)-L(AliPxQx)
+	.int       L(A7Q0)-L(AliPxQx)
+
+	.int       L(A0Q1)-L(AliPxQx)
+	.int       L(A1Q1)-L(AliPxQx)
+	.int       L(A2Q1)-L(AliPxQx)
+	.int       L(A3Q1)-L(AliPxQx)
+	.int       L(A4Q1)-L(AliPxQx)
+	.int       L(A5Q1)-L(AliPxQx)
+	.int       L(A6Q1)-L(AliPxQx)
+	.int       L(A7Q1)-L(AliPxQx)
+#endif
+	.popsection
+
+	.balign     16
+L(A5Q1):    mov    %dl,-0xd(%rdi)
+L(A4Q1):    mov    %edx,-0xc(%rdi)
+L(A0Q1):    mov    %rdx,-0x8(%rdi)
+L(A0Q0):    jmp     L(aligned_now)
+
+	.balign     16
+L(A1Q1):   mov    %dl,-0x9(%rdi)
+	mov    %rdx,-0x8(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(A1Q0):   mov    %dl,-0x1(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(A3Q1):    mov    %dl,-0xb(%rdi)
+L(A2Q1):    mov    %dx,-0xa(%rdi)
+	mov    %rdx,-0x8(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(A3Q0):    mov    %dl,-0x3(%rdi)
+L(A2Q0):    mov    %dx,-0x2(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(A5Q0):    mov    %dl,-0x5(%rdi)
+L(A4Q0):    mov    %edx,-0x4(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(A7Q1):    mov    %dl,-0xf(%rdi)
+L(A6Q1):    mov    %dx,-0xe(%rdi)
+	mov    %edx,-0xc(%rdi)
+	mov    %rdx,-0x8(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(A7Q0):    mov    %dl,-0x7(%rdi)
+L(A6Q0):    mov    %dx,-0x6(%rdi)
+	mov    %edx,-0x4(%rdi)
+	jmp    L(aligned_now)
+
+	.balign     16
+L(aligned_now):
+
+	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
+	 jg     L(SSE_pre)
+
+L(8byte_move_try):
+	cmpq	__STOS_LOWER_BOUNDARY,%r8
+	jae	L(8byte_stos_try)
+
+	.balign     16
+L(8byte_move):
+	movq	%r8,%rcx
+	shrq	$7,%rcx
+	jz	L(8byte_move_skip)
 
 	.p2align 4
-1:	/* Align ptr to 8 byte.  */
-	mov	%sil,(%rcx)
-	dec	%rdx
-	inc	%rcx
-	test	$0x7,%ecx
-	jne	1b
-
-2:	/* Check for really large regions.  */
-	mov	%rdx,%rax
-	shr	$0x6,%rax
-	je	4f
-	cmp	LARGE, %rdx
-	jae	11f
-
-	.p2align 4
-3:	/* Copy 64 bytes.  */
-	mov	%r8,(%rcx)
-	mov	%r8,0x8(%rcx)
-	mov	%r8,0x10(%rcx)
-	mov	%r8,0x18(%rcx)
-	mov	%r8,0x20(%rcx)
-	mov	%r8,0x28(%rcx)
-	mov	%r8,0x30(%rcx)
-	mov	%r8,0x38(%rcx)
-	add	$0x40,%rcx
-	dec	%rax
-	jne	3b
-
-4:	/* Copy final bytes.  */
-	and	$0x3f,%edx
-	mov	%rdx,%rax
-	shr	$0x3,%rax
-	je	6f
-
-5:	/* First in chunks of 8 bytes.  */
-	mov	%r8,(%rcx)
-	add	$0x8,%rcx
-	dec	%rax
-	jne	5b
-6:
-	and	$0x7,%edx
-7:
-	test	%rdx,%rdx
-	je	9f
-8:	/* And finally as bytes (up to 7).  */
-	mov	%sil,(%rcx)
-	inc	%rcx
-	dec	%rdx
-	jne	8b
-9:
-	/* Load result (only if used as memset).  */
-	mov	%rdi,%rax	/* start address of destination is result */
-	retq
-
-	.p2align 4
-11:	/* Copy 64 bytes without polluting the cache.  */
-	/* We could use	movntdq    %xmm0,(%rcx) here to further
-	   speed up for large cases but let's not use XMM registers.  */
-	movnti	%r8,(%rcx)
-	movnti  %r8,0x8(%rcx)
-	movnti  %r8,0x10(%rcx)
-	movnti  %r8,0x18(%rcx)
-	movnti  %r8,0x20(%rcx)
-	movnti  %r8,0x28(%rcx)
-	movnti  %r8,0x30(%rcx)
-	movnti  %r8,0x38(%rcx)
-	add	$0x40,%rcx
-	dec	%rax
-	jne	11b
+
+L(8byte_move_loop):
+	decq	%rcx
+
+	movq	%rdx,    (%rdi)
+	movq	%rdx,  8 (%rdi)
+	movq	%rdx, 16 (%rdi)
+	movq	%rdx, 24 (%rdi)
+	movq	%rdx, 32 (%rdi)
+	movq	%rdx, 40 (%rdi)
+	movq	%rdx, 48 (%rdi)
+	movq	%rdx, 56 (%rdi)
+	movq	%rdx, 64 (%rdi)
+	movq	%rdx, 72 (%rdi)
+	movq	%rdx, 80 (%rdi)
+	movq	%rdx, 88 (%rdi)
+	movq	%rdx, 96 (%rdi)
+	movq	%rdx, 104 (%rdi)
+	movq	%rdx, 112 (%rdi)
+	movq	%rdx, 120 (%rdi)
+
+	leaq	128 (%rdi),%rdi
+
+	jnz     L(8byte_move_loop)
+
+L(8byte_move_skip):
+	andl	$127,%r8d
+	lea    	(%rdi,%r8,1),%rdi
+	lea    	L(setPxQx)(%rip),%r11
+
+#ifndef PIC
+	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+	movslq	(%r11,%r8,4),%rcx
+	lea    	(%rcx,%r11,1),%r11
+	jmpq   	*%r11
+#endif
+
+	.balign     16
+L(8byte_stos_try):
+	mov    __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
+	cmpq	%r8,%r9		// calculate the lesser of remaining
+	cmovaq	%r8,%r9		// bytes and largest cache size
+	jbe	L(8byte_stos)
+
+L(8byte_move_reuse_try):
+	cmp	__STOS_UPPER_BOUNDARY,%r8
+	jae	L(8byte_move)
+
+	.balign     16
+L(8byte_stos):
+	movq	%r9,%rcx
+	andq	$-8,%r9
+
+	shrq	$3,%rcx
+	jz	L(8byte_stos_skip)
+
+	xchgq	%rax,%rdx
+
+	rep
+	stosq
+
+	xchgq	%rax,%rdx
+
+L(8byte_stos_skip):
+	subq	%r9,%r8
+	ja	L(8byte_nt_move)
+
+	andl	$7,%r8d
+	lea    	(%rdi,%r8,1),%rdi
+	lea    	L(setPxQx)(%rip),%r11
+#ifndef PIC
+	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+	movslq	(%r11,%r8,4),%rcx
+	lea	(%rcx,%r11,1),%r11
+	jmpq   	*%r11
+#endif
+
+	.balign     16
+L(8byte_nt_move):
+	movq	%r8,%rcx
+	shrq	$7,%rcx
+	jz      L(8byte_nt_move_skip)
+
+	.balign     16
+L(8byte_nt_move_loop):
+	decq	%rcx
+
+	movntiq	%rdx,     (%rdi)
+	movntiq	%rdx,   8 (%rdi)
+	movntiq	%rdx,  16 (%rdi)
+	movntiq	%rdx,  24 (%rdi)
+	movntiq	%rdx,  32 (%rdi)
+	movntiq	%rdx,  40 (%rdi)
+	movntiq	%rdx,  48 (%rdi)
+	movntiq	%rdx,  56 (%rdi)
+	movntiq	%rdx,  64 (%rdi)
+	movntiq	%rdx,  72 (%rdi)
+	movntiq	%rdx,  80 (%rdi)
+	movntiq	%rdx,  88 (%rdi)
+	movntiq	%rdx,  96 (%rdi)
+	movntiq	%rdx, 104 (%rdi)
+	movntiq	%rdx, 112 (%rdi)
+	movntiq	%rdx, 120 (%rdi)
+
+	leaq	128 (%rdi),%rdi
+
+	jnz     L(8byte_nt_move_loop)
+
 	sfence
-	jmp	4b
+
+L(8byte_nt_move_skip):
+	andl	$127,%r8d
+
+	lea    	(%rdi,%r8,1),%rdi
+	lea    	L(setPxQx)(%rip),%r11
+#ifndef PIC
+	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+#else
+	movslq	(%r11,%r8,4),%rcx
+	lea    	(%rcx,%r11,1),%r11
+	jmpq   	*%r11
+#endif
+
+L(SSE_pre):
+	 # fill RegXMM0 with the pattern
+	 movd   %rdx,%xmm0
+	 punpcklqdq %xmm0,%xmm0
+
+	 lea    L(SSExDx)(%rip),%r9        # for later after the alignment
+	 cmp    $0xb0,%r8 # 176
+	 jge    L(byte32sse2_pre)
+
+	 add    %r8,%rdi
+#ifndef PIC
+	 jmpq   *(%r9,%r8,8)
+#else
+	 movslq    (%r9,%r8,4),%rcx
+	 lea    (%rcx,%r9,1),%r9
+	 jmpq   *%r9
+#endif
+
+L(SSE0QB):  movdqa %xmm0,-0xb0(%rdi)
+L(SSE0QA):  movdqa %xmm0,-0xa0(%rdi)
+L(SSE0Q9):  movdqa %xmm0,-0x90(%rdi)
+L(SSE0Q8):  movdqa %xmm0,-0x80(%rdi)
+L(SSE0Q7):  movdqa %xmm0,-0x70(%rdi)
+L(SSE0Q6):  movdqa %xmm0,-0x60(%rdi)
+L(SSE0Q5):  movdqa %xmm0,-0x50(%rdi)
+L(SSE0Q4):  movdqa %xmm0,-0x40(%rdi)
+L(SSE0Q3):  movdqa %xmm0,-0x30(%rdi)
+L(SSE0Q2):  movdqa %xmm0,-0x20(%rdi)
+L(SSE0Q1):  movdqa %xmm0,-0x10(%rdi)
+L(SSE0Q0):  retq
+
+L(SSE1QB):  movdqa %xmm0,-0xb1(%rdi)
+L(SSE1QA):  movdqa %xmm0,-0xa1(%rdi)
+L(SSE1Q9):  movdqa %xmm0,-0x91(%rdi)
+L(SSE1Q8):  movdqa %xmm0,-0x81(%rdi)
+L(SSE1Q7):  movdqa %xmm0,-0x71(%rdi)
+L(SSE1Q6):  movdqa %xmm0,-0x61(%rdi)
+L(SSE1Q5):  movdqa %xmm0,-0x51(%rdi)
+L(SSE1Q4):  movdqa %xmm0,-0x41(%rdi)
+L(SSE1Q3):  movdqa %xmm0,-0x31(%rdi)
+L(SSE1Q2):  movdqa %xmm0,-0x21(%rdi)
+L(SSE1Q1):  movdqa %xmm0,-0x11(%rdi)
+L(SSE1Q0):  mov    %dl,-0x1(%rdi)
+	retq
+
+L(SSE2QB):  movdqa %xmm0,-0xb2(%rdi)
+L(SSE2QA):  movdqa %xmm0,-0xa2(%rdi)
+L(SSE2Q9):  movdqa %xmm0,-0x92(%rdi)
+L(SSE2Q8):  movdqa %xmm0,-0x82(%rdi)
+L(SSE2Q7):  movdqa %xmm0,-0x72(%rdi)
+L(SSE2Q6):  movdqa %xmm0,-0x62(%rdi)
+L(SSE2Q5):  movdqa %xmm0,-0x52(%rdi)
+L(SSE2Q4):  movdqa %xmm0,-0x42(%rdi)
+L(SSE2Q3):  movdqa %xmm0,-0x32(%rdi)
+L(SSE2Q2):  movdqa %xmm0,-0x22(%rdi)
+L(SSE2Q1):  movdqa %xmm0,-0x12(%rdi)
+L(SSE2Q0):  mov    %dx,-0x2(%rdi)
+	retq
+
+L(SSE3QB):  movdqa %xmm0,-0xb3(%rdi)
+L(SSE3QA):  movdqa %xmm0,-0xa3(%rdi)
+L(SSE3Q9):  movdqa %xmm0,-0x93(%rdi)
+L(SSE3Q8):  movdqa %xmm0,-0x83(%rdi)
+L(SSE3Q7):  movdqa %xmm0,-0x73(%rdi)
+L(SSE3Q6):  movdqa %xmm0,-0x63(%rdi)
+L(SSE3Q5):  movdqa %xmm0,-0x53(%rdi)
+L(SSE3Q4):  movdqa %xmm0,-0x43(%rdi)
+L(SSE3Q3):  movdqa %xmm0,-0x33(%rdi)
+L(SSE3Q2):  movdqa %xmm0,-0x23(%rdi)
+L(SSE3Q1):  movdqa %xmm0,-0x13(%rdi)
+L(SSE3Q0):  mov    %dx,-0x3(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+L(SSE4QB):  movdqa %xmm0,-0xb4(%rdi)
+L(SSE4QA):  movdqa %xmm0,-0xa4(%rdi)
+L(SSE4Q9):  movdqa %xmm0,-0x94(%rdi)
+L(SSE4Q8):  movdqa %xmm0,-0x84(%rdi)
+L(SSE4Q7):  movdqa %xmm0,-0x74(%rdi)
+L(SSE4Q6):  movdqa %xmm0,-0x64(%rdi)
+L(SSE4Q5):  movdqa %xmm0,-0x54(%rdi)
+L(SSE4Q4):  movdqa %xmm0,-0x44(%rdi)
+L(SSE4Q3):  movdqa %xmm0,-0x34(%rdi)
+L(SSE4Q2):  movdqa %xmm0,-0x24(%rdi)
+L(SSE4Q1):  movdqa %xmm0,-0x14(%rdi)
+L(SSE4Q0):  mov    %edx,-0x4(%rdi)
+	retq
+
+L(SSE5QB):  movdqa %xmm0,-0xb5(%rdi)
+L(SSE5QA):  movdqa %xmm0,-0xa5(%rdi)
+L(SSE5Q9):  movdqa %xmm0,-0x95(%rdi)
+L(SSE5Q8):  movdqa %xmm0,-0x85(%rdi)
+L(SSE5Q7):  movdqa %xmm0,-0x75(%rdi)
+L(SSE5Q6):  movdqa %xmm0,-0x65(%rdi)
+L(SSE5Q5):  movdqa %xmm0,-0x55(%rdi)
+L(SSE5Q4):  movdqa %xmm0,-0x45(%rdi)
+L(SSE5Q3):  movdqa %xmm0,-0x35(%rdi)
+L(SSE5Q2):  movdqa %xmm0,-0x25(%rdi)
+L(SSE5Q1):  movdqa %xmm0,-0x15(%rdi)
+L(SSE5Q0):  mov    %edx,-0x5(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+
+L(SSE6QB):  movdqa %xmm0,-0xb6(%rdi)
+L(SSE6QA):  movdqa %xmm0,-0xa6(%rdi)
+L(SSE6Q9):  movdqa %xmm0,-0x96(%rdi)
+L(SSE6Q8):  movdqa %xmm0,-0x86(%rdi)
+L(SSE6Q7):  movdqa %xmm0,-0x76(%rdi)
+L(SSE6Q6):  movdqa %xmm0,-0x66(%rdi)
+L(SSE6Q5):  movdqa %xmm0,-0x56(%rdi)
+L(SSE6Q4):  movdqa %xmm0,-0x46(%rdi)
+L(SSE6Q3):  movdqa %xmm0,-0x36(%rdi)
+L(SSE6Q2):  movdqa %xmm0,-0x26(%rdi)
+L(SSE6Q1):  movdqa %xmm0,-0x16(%rdi)
+L(SSE6Q0):  mov    %edx,-0x6(%rdi)
+	mov    %dx,-0x2(%rdi)
+	retq
+
+L(SSE7QB):  movdqa %xmm0,-0xb7(%rdi)
+L(SSE7QA):  movdqa %xmm0,-0xa7(%rdi)
+L(SSE7Q9):  movdqa %xmm0,-0x97(%rdi)
+L(SSE7Q8):  movdqa %xmm0,-0x87(%rdi)
+L(SSE7Q7):  movdqa %xmm0,-0x77(%rdi)
+L(SSE7Q6):  movdqa %xmm0,-0x67(%rdi)
+L(SSE7Q5):  movdqa %xmm0,-0x57(%rdi)
+L(SSE7Q4):  movdqa %xmm0,-0x47(%rdi)
+L(SSE7Q3):  movdqa %xmm0,-0x37(%rdi)
+L(SSE7Q2):  movdqa %xmm0,-0x27(%rdi)
+L(SSE7Q1):  movdqa %xmm0,-0x17(%rdi)
+L(SSE7Q0):  mov    %edx,-0x7(%rdi)
+	mov    %dx,-0x3(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+L(SSE8QB):  movdqa %xmm0,-0xb8(%rdi)
+L(SSE8QA):  movdqa %xmm0,-0xa8(%rdi)
+L(SSE8Q9):  movdqa %xmm0,-0x98(%rdi)
+L(SSE8Q8):  movdqa %xmm0,-0x88(%rdi)
+L(SSE8Q7):  movdqa %xmm0,-0x78(%rdi)
+L(SSE8Q6):  movdqa %xmm0,-0x68(%rdi)
+L(SSE8Q5):  movdqa %xmm0,-0x58(%rdi)
+L(SSE8Q4):  movdqa %xmm0,-0x48(%rdi)
+L(SSE8Q3):  movdqa %xmm0,-0x38(%rdi)
+L(SSE8Q2):  movdqa %xmm0,-0x28(%rdi)
+L(SSE8Q1):  movdqa %xmm0,-0x18(%rdi)
+L(SSE8Q0):  mov    %rdx,-0x8(%rdi)
+	retq
+
+L(SSE9QB):  movdqa %xmm0,-0xb9(%rdi)
+L(SSE9QA):  movdqa %xmm0,-0xa9(%rdi)
+L(SSE9Q9):  movdqa %xmm0,-0x99(%rdi)
+L(SSE9Q8):  movdqa %xmm0,-0x89(%rdi)
+L(SSE9Q7):  movdqa %xmm0,-0x79(%rdi)
+L(SSE9Q6):  movdqa %xmm0,-0x69(%rdi)
+L(SSE9Q5):  movdqa %xmm0,-0x59(%rdi)
+L(SSE9Q4):  movdqa %xmm0,-0x49(%rdi)
+L(SSE9Q3):  movdqa %xmm0,-0x39(%rdi)
+L(SSE9Q2):  movdqa %xmm0,-0x29(%rdi)
+L(SSE9Q1):  movdqa %xmm0,-0x19(%rdi)
+L(SSE9Q0):  mov    %rdx,-0x9(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+L(SSE10QB): movdqa %xmm0,-0xba(%rdi)
+L(SSE10QA): movdqa %xmm0,-0xaa(%rdi)
+L(SSE10Q9): movdqa %xmm0,-0x9a(%rdi)
+L(SSE10Q8): movdqa %xmm0,-0x8a(%rdi)
+L(SSE10Q7): movdqa %xmm0,-0x7a(%rdi)
+L(SSE10Q6): movdqa %xmm0,-0x6a(%rdi)
+L(SSE10Q5): movdqa %xmm0,-0x5a(%rdi)
+L(SSE10Q4): movdqa %xmm0,-0x4a(%rdi)
+L(SSE10Q3): movdqa %xmm0,-0x3a(%rdi)
+L(SSE10Q2): movdqa %xmm0,-0x2a(%rdi)
+L(SSE10Q1): movdqa %xmm0,-0x1a(%rdi)
+L(SSE10Q0): mov    %rdx,-0xa(%rdi)
+	mov    %dx,-0x2(%rdi)
+	retq
+
+L(SSE11QB): movdqa %xmm0,-0xbb(%rdi)
+L(SSE11QA): movdqa %xmm0,-0xab(%rdi)
+L(SSE11Q9): movdqa %xmm0,-0x9b(%rdi)
+L(SSE11Q8): movdqa %xmm0,-0x8b(%rdi)
+L(SSE11Q7): movdqa %xmm0,-0x7b(%rdi)
+L(SSE11Q6): movdqa %xmm0,-0x6b(%rdi)
+L(SSE11Q5): movdqa %xmm0,-0x5b(%rdi)
+L(SSE11Q4): movdqa %xmm0,-0x4b(%rdi)
+L(SSE11Q3): movdqa %xmm0,-0x3b(%rdi)
+L(SSE11Q2): movdqa %xmm0,-0x2b(%rdi)
+L(SSE11Q1): movdqa %xmm0,-0x1b(%rdi)
+L(SSE11Q0): mov    %rdx,-0xb(%rdi)
+	mov    %dx,-0x3(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+L(SSE12QB): movdqa %xmm0,-0xbc(%rdi)
+L(SSE12QA): movdqa %xmm0,-0xac(%rdi)
+L(SSE12Q9): movdqa %xmm0,-0x9c(%rdi)
+L(SSE12Q8): movdqa %xmm0,-0x8c(%rdi)
+L(SSE12Q7): movdqa %xmm0,-0x7c(%rdi)
+L(SSE12Q6): movdqa %xmm0,-0x6c(%rdi)
+L(SSE12Q5): movdqa %xmm0,-0x5c(%rdi)
+L(SSE12Q4): movdqa %xmm0,-0x4c(%rdi)
+L(SSE12Q3): movdqa %xmm0,-0x3c(%rdi)
+L(SSE12Q2): movdqa %xmm0,-0x2c(%rdi)
+L(SSE12Q1): movdqa %xmm0,-0x1c(%rdi)
+L(SSE12Q0): mov    %rdx,-0xc(%rdi)
+	mov    %edx,-0x4(%rdi)
+	retq
+
+L(SSE13QB): movdqa %xmm0,-0xbd(%rdi)
+L(SSE13QA): movdqa %xmm0,-0xad(%rdi)
+L(SSE13Q9): movdqa %xmm0,-0x9d(%rdi)
+L(SSE13Q8): movdqa %xmm0,-0x8d(%rdi)
+L(SSE13Q7): movdqa %xmm0,-0x7d(%rdi)
+L(SSE13Q6): movdqa %xmm0,-0x6d(%rdi)
+L(SSE13Q5): movdqa %xmm0,-0x5d(%rdi)
+L(SSE13Q4): movdqa %xmm0,-0x4d(%rdi)
+L(SSE13Q3): movdqa %xmm0,-0x3d(%rdi)
+L(SSE13Q2): movdqa %xmm0,-0x2d(%rdi)
+L(SSE13Q1): movdqa %xmm0,-0x1d(%rdi)
+L(SSE13Q0): mov    %rdx,-0xd(%rdi)
+	mov    %edx,-0x5(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+L(SSE14QB): movdqa %xmm0,-0xbe(%rdi)
+L(SSE14QA): movdqa %xmm0,-0xae(%rdi)
+L(SSE14Q9): movdqa %xmm0,-0x9e(%rdi)
+L(SSE14Q8): movdqa %xmm0,-0x8e(%rdi)
+L(SSE14Q7): movdqa %xmm0,-0x7e(%rdi)
+L(SSE14Q6): movdqa %xmm0,-0x6e(%rdi)
+L(SSE14Q5): movdqa %xmm0,-0x5e(%rdi)
+L(SSE14Q4): movdqa %xmm0,-0x4e(%rdi)
+L(SSE14Q3): movdqa %xmm0,-0x3e(%rdi)
+L(SSE14Q2): movdqa %xmm0,-0x2e(%rdi)
+L(SSE14Q1): movdqa %xmm0,-0x1e(%rdi)
+L(SSE14Q0): mov    %rdx,-0xe(%rdi)
+	mov    %edx,-0x6(%rdi)
+	mov    %dx,-0x2(%rdi)
+	retq
+
+L(SSE15QB): movdqa %xmm0,-0xbf(%rdi)
+L(SSE15QA): movdqa %xmm0,-0xaf(%rdi)
+L(SSE15Q9): movdqa %xmm0,-0x9f(%rdi)
+L(SSE15Q8): movdqa %xmm0,-0x8f(%rdi)
+L(SSE15Q7): movdqa %xmm0,-0x7f(%rdi)
+L(SSE15Q6): movdqa %xmm0,-0x6f(%rdi)
+L(SSE15Q5): movdqa %xmm0,-0x5f(%rdi)
+L(SSE15Q4): movdqa %xmm0,-0x4f(%rdi)
+L(SSE15Q3): movdqa %xmm0,-0x3f(%rdi)
+L(SSE15Q2): movdqa %xmm0,-0x2f(%rdi)
+L(SSE15Q1): movdqa %xmm0,-0x1f(%rdi)
+L(SSE15Q0): mov    %rdx,-0xf(%rdi)
+	mov    %edx,-0x7(%rdi)
+	mov    %dx,-0x3(%rdi)
+	mov    %dl,-0x1(%rdi)
+	retq
+
+	.balign     16
+L(byte32sse2_pre):
+
+	mov    __x86_64_shared_cache_size(%rip),%r9d  # The largest cache size
+	cmp    %r9,%r8
+	jg     L(sse2_nt_move_pre)
+	#jmp    L(byte32sse2)
+	.balign     16
+L(byte32sse2):
+	lea    -0x80(%r8),%r8 # 128
+	cmp    $0x80,%r8   # 128
+	movdqa %xmm0,(%rdi)
+	movdqa %xmm0,0x10(%rdi)
+	movdqa %xmm0,0x20(%rdi)
+	movdqa %xmm0,0x30(%rdi)
+	movdqa %xmm0,0x40(%rdi)
+	movdqa %xmm0,0x50(%rdi)
+	movdqa %xmm0,0x60(%rdi)
+	movdqa %xmm0,0x70(%rdi)
+
+	lea    0x80(%rdi),%rdi
+	jge    L(byte32sse2)
+	lea    L(SSExDx)(%rip),%r11
+	add    %r8,%rdi
+#ifndef PIC
+	jmpq   *(%r11,%r8,8)
+#else
+	movslq    (%r11,%r8,4),%rcx
+	lea   (%rcx,%r11,1),%r11
+	jmpq   *%r11
+#endif
+
+	.balign     16
+L(sse2_nt_move_pre):
+	cmp    $0x0,%r9
+	je     L(byte32sse2)
+	jmp    L(sse2_nt_move)
+
+	.balign     16
+L(sse2_nt_move):
+	lea    -0x80(%r8),%r8
+	cmp    $0x80,%r8
+
+	movntdq %xmm0,(%rdi)
+	movntdq %xmm0,0x10(%rdi)
+	movntdq %xmm0,0x20(%rdi)
+	movntdq %xmm0,0x30(%rdi)
+	movntdq %xmm0,0x40(%rdi)
+	movntdq %xmm0,0x50(%rdi)
+	movntdq %xmm0,0x60(%rdi)
+	movntdq %xmm0,0x70(%rdi)
+
+	lea    0x80(%rdi),%rdi
+	jge    L(sse2_nt_move)
+	lea    L(SSExDx)(%rip),%r11
+	sfence
+	add    %r8,%rdi
+#ifndef PIC
+	jmpq   *(%r11,%r8,8)
+#else
+	movslq (%r11,%r8,4),%rcx
+	lea   (%rcx,%r11,1),%r11
+	jmpq   *%r11
+#endif
+
+	.pushsection .rodata
+	.balign     16
+#ifndef PIC
+L(SSExDx):
+	.quad       L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0)
+	.quad       L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0)
+	.quad       L(SSE8Q0), L(SSE9Q0), L(SSE10Q0), L(SSE11Q0)
+	.quad       L(SSE12Q0), L(SSE13Q0), L(SSE14Q0), L(SSE15Q0)
+	.quad       L(SSE0Q1), L(SSE1Q1), L(SSE2Q1), L(SSE3Q1)
+	.quad       L(SSE4Q1), L(SSE5Q1), L(SSE6Q1), L(SSE7Q1)
+	.quad       L(SSE8Q1), L(SSE9Q1), L(SSE10Q1), L(SSE11Q1)
+	.quad       L(SSE12Q1), L(SSE13Q1), L(SSE14Q1), L(SSE15Q1)
+	.quad       L(SSE0Q2), L(SSE1Q2), L(SSE2Q2), L(SSE3Q2)
+	.quad       L(SSE4Q2), L(SSE5Q2), L(SSE6Q2), L(SSE7Q2)
+	.quad       L(SSE8Q2), L(SSE9Q2), L(SSE10Q2), L(SSE11Q2)
+	.quad       L(SSE12Q2), L(SSE13Q2), L(SSE14Q2), L(SSE15Q2)
+	.quad       L(SSE0Q3), L(SSE1Q3), L(SSE2Q3), L(SSE3Q3)
+	.quad       L(SSE4Q3), L(SSE5Q3), L(SSE6Q3), L(SSE7Q3)
+	.quad       L(SSE8Q3), L(SSE9Q3), L(SSE10Q3), L(SSE11Q3)
+	.quad       L(SSE12Q3), L(SSE13Q3), L(SSE14Q3), L(SSE15Q3)
+	.quad       L(SSE0Q4), L(SSE1Q4), L(SSE2Q4), L(SSE3Q4)
+	.quad       L(SSE4Q4), L(SSE5Q4), L(SSE6Q4), L(SSE7Q4)
+	.quad       L(SSE8Q4), L(SSE9Q4), L(SSE10Q4), L(SSE11Q4)
+	.quad       L(SSE12Q4), L(SSE13Q4), L(SSE14Q4), L(SSE15Q4)
+	.quad       L(SSE0Q5), L(SSE1Q5), L(SSE2Q5), L(SSE3Q5)
+	.quad       L(SSE4Q5), L(SSE5Q5), L(SSE6Q5), L(SSE7Q5)
+	.quad       L(SSE8Q5), L(SSE9Q5), L(SSE10Q5), L(SSE11Q5)
+	.quad       L(SSE12Q5), L(SSE13Q5), L(SSE14Q5), L(SSE15Q5)
+	.quad       L(SSE0Q6), L(SSE1Q6), L(SSE2Q6), L(SSE3Q6)
+	.quad       L(SSE4Q6), L(SSE5Q6), L(SSE6Q6), L(SSE7Q6)
+	.quad       L(SSE8Q6), L(SSE9Q6), L(SSE10Q6), L(SSE11Q6)
+	.quad       L(SSE12Q6), L(SSE13Q6), L(SSE14Q6), L(SSE15Q6)
+	.quad       L(SSE0Q7), L(SSE1Q7), L(SSE2Q7), L(SSE3Q7)
+	.quad       L(SSE4Q7), L(SSE5Q7), L(SSE6Q7), L(SSE7Q7)
+	.quad       L(SSE8Q7), L(SSE9Q7), L(SSE10Q7), L(SSE11Q7)
+	.quad       L(SSE12Q7), L(SSE13Q7), L(SSE14Q7), L(SSE15Q7)
+	.quad       L(SSE0Q8), L(SSE1Q8), L(SSE2Q8), L(SSE3Q8)
+	.quad       L(SSE4Q8), L(SSE5Q8), L(SSE6Q8), L(SSE7Q8)
+	.quad       L(SSE8Q8), L(SSE9Q8), L(SSE10Q8), L(SSE11Q8)
+	.quad       L(SSE12Q8), L(SSE13Q8), L(SSE14Q8), L(SSE15Q8)
+	.quad       L(SSE0Q9), L(SSE1Q9), L(SSE2Q9), L(SSE3Q9)
+	.quad       L(SSE4Q9), L(SSE5Q9), L(SSE6Q9), L(SSE7Q9)
+	.quad       L(SSE8Q9), L(SSE9Q9), L(SSE10Q9), L(SSE11Q9)
+	.quad       L(SSE12Q9), L(SSE13Q9), L(SSE14Q9), L(SSE15Q9)
+	.quad       L(SSE0QA), L(SSE1QA), L(SSE2QA), L(SSE3QA)
+	.quad       L(SSE4QA), L(SSE5QA), L(SSE6QA), L(SSE7QA)
+	.quad       L(SSE8QA), L(SSE9QA), L(SSE10QA), L(SSE11QA)
+	.quad       L(SSE12QA), L(SSE13QA), L(SSE14QA), L(SSE15QA)
+	.quad       L(SSE0QB), L(SSE1QB), L(SSE2QB), L(SSE3QB)
+	.quad       L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB)
+	.quad       L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB)
+	.quad       L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
+#else
+L(SSExDx):
+	.int       L(SSE0Q0) -L(SSExDx)
+	.int       L(SSE1Q0) -L(SSExDx)
+	.int       L(SSE2Q0) -L(SSExDx)
+	.int       L(SSE3Q0) -L(SSExDx)
+	.int       L(SSE4Q0) -L(SSExDx)
+	.int       L(SSE5Q0) -L(SSExDx)
+	.int       L(SSE6Q0) -L(SSExDx)
+	.int       L(SSE7Q0) -L(SSExDx)
+
+	.int       L(SSE8Q0) -L(SSExDx)
+	.int       L(SSE9Q0) -L(SSExDx)
+	.int       L(SSE10Q0)-L(SSExDx)
+	.int       L(SSE11Q0)-L(SSExDx)
+	.int       L(SSE12Q0)-L(SSExDx)
+	.int       L(SSE13Q0)-L(SSExDx)
+	.int       L(SSE14Q0)-L(SSExDx)
+	.int       L(SSE15Q0)-L(SSExDx)
+
+	.int       L(SSE0Q1) -L(SSExDx)
+	.int       L(SSE1Q1) -L(SSExDx)
+	.int       L(SSE2Q1) -L(SSExDx)
+	.int       L(SSE3Q1) -L(SSExDx)
+	.int       L(SSE4Q1) -L(SSExDx)
+	.int       L(SSE5Q1) -L(SSExDx)
+	.int       L(SSE6Q1) -L(SSExDx)
+	.int       L(SSE7Q1) -L(SSExDx)
+
+	.int       L(SSE8Q1) -L(SSExDx)
+	.int       L(SSE9Q1) -L(SSExDx)
+	.int       L(SSE10Q1)-L(SSExDx)
+	.int       L(SSE11Q1)-L(SSExDx)
+	.int       L(SSE12Q1)-L(SSExDx)
+	.int       L(SSE13Q1)-L(SSExDx)
+	.int       L(SSE14Q1)-L(SSExDx)
+	.int       L(SSE15Q1)-L(SSExDx)
+
+	.int       L(SSE0Q2) -L(SSExDx)
+	.int       L(SSE1Q2) -L(SSExDx)
+	.int       L(SSE2Q2) -L(SSExDx)
+	.int       L(SSE3Q2) -L(SSExDx)
+	.int       L(SSE4Q2) -L(SSExDx)
+	.int       L(SSE5Q2) -L(SSExDx)
+	.int       L(SSE6Q2) -L(SSExDx)
+	.int       L(SSE7Q2) -L(SSExDx)
+
+	.int       L(SSE8Q2) -L(SSExDx)
+	.int       L(SSE9Q2) -L(SSExDx)
+	.int       L(SSE10Q2)-L(SSExDx)
+	.int       L(SSE11Q2)-L(SSExDx)
+	.int       L(SSE12Q2)-L(SSExDx)
+	.int       L(SSE13Q2)-L(SSExDx)
+	.int       L(SSE14Q2)-L(SSExDx)
+	.int       L(SSE15Q2)-L(SSExDx)
+
+	.int       L(SSE0Q3) -L(SSExDx)
+	.int       L(SSE1Q3) -L(SSExDx)
+	.int       L(SSE2Q3) -L(SSExDx)
+	.int       L(SSE3Q3) -L(SSExDx)
+	.int       L(SSE4Q3) -L(SSExDx)
+	.int       L(SSE5Q3) -L(SSExDx)
+	.int       L(SSE6Q3) -L(SSExDx)
+	.int       L(SSE7Q3) -L(SSExDx)
+
+	.int       L(SSE8Q3) -L(SSExDx)
+	.int       L(SSE9Q3) -L(SSExDx)
+	.int       L(SSE10Q3)-L(SSExDx)
+	.int       L(SSE11Q3)-L(SSExDx)
+	.int       L(SSE12Q3)-L(SSExDx)
+	.int       L(SSE13Q3)-L(SSExDx)
+	.int       L(SSE14Q3)-L(SSExDx)
+	.int       L(SSE15Q3)-L(SSExDx)
+
+	.int       L(SSE0Q4) -L(SSExDx)
+	.int       L(SSE1Q4) -L(SSExDx)
+	.int       L(SSE2Q4) -L(SSExDx)
+	.int       L(SSE3Q4) -L(SSExDx)
+	.int       L(SSE4Q4) -L(SSExDx)
+	.int       L(SSE5Q4) -L(SSExDx)
+	.int       L(SSE6Q4) -L(SSExDx)
+	.int       L(SSE7Q4) -L(SSExDx)
+
+	.int       L(SSE8Q4) -L(SSExDx)
+	.int       L(SSE9Q4) -L(SSExDx)
+	.int       L(SSE10Q4)-L(SSExDx)
+	.int       L(SSE11Q4)-L(SSExDx)
+	.int       L(SSE12Q4)-L(SSExDx)
+	.int       L(SSE13Q4)-L(SSExDx)
+	.int       L(SSE14Q4)-L(SSExDx)
+	.int       L(SSE15Q4)-L(SSExDx)
+
+	.int       L(SSE0Q5) -L(SSExDx)
+	.int       L(SSE1Q5) -L(SSExDx)
+	.int       L(SSE2Q5) -L(SSExDx)
+	.int       L(SSE3Q5) -L(SSExDx)
+	.int       L(SSE4Q5) -L(SSExDx)
+	.int       L(SSE5Q5) -L(SSExDx)
+	.int       L(SSE6Q5) -L(SSExDx)
+	.int       L(SSE7Q5) -L(SSExDx)
+
+	.int       L(SSE8Q5) -L(SSExDx)
+	.int       L(SSE9Q5) -L(SSExDx)
+	.int       L(SSE10Q5)-L(SSExDx)
+	.int       L(SSE11Q5)-L(SSExDx)
+	.int       L(SSE12Q5)-L(SSExDx)
+	.int       L(SSE13Q5)-L(SSExDx)
+	.int       L(SSE14Q5)-L(SSExDx)
+	.int       L(SSE15Q5)-L(SSExDx)
+
+	.int       L(SSE0Q6) -L(SSExDx)
+	.int       L(SSE1Q6) -L(SSExDx)
+	.int       L(SSE2Q6) -L(SSExDx)
+	.int       L(SSE3Q6) -L(SSExDx)
+	.int       L(SSE4Q6) -L(SSExDx)
+	.int       L(SSE5Q6) -L(SSExDx)
+	.int       L(SSE6Q6) -L(SSExDx)
+	.int       L(SSE7Q6) -L(SSExDx)
+
+	.int       L(SSE8Q6) -L(SSExDx)
+	.int       L(SSE9Q6) -L(SSExDx)
+	.int       L(SSE10Q6)-L(SSExDx)
+	.int       L(SSE11Q6)-L(SSExDx)
+	.int       L(SSE12Q6)-L(SSExDx)
+	.int       L(SSE13Q6)-L(SSExDx)
+	.int       L(SSE14Q6)-L(SSExDx)
+	.int       L(SSE15Q6)-L(SSExDx)
+
+	.int       L(SSE0Q7) -L(SSExDx)
+	.int       L(SSE1Q7) -L(SSExDx)
+	.int       L(SSE2Q7) -L(SSExDx)
+	.int       L(SSE3Q7) -L(SSExDx)
+	.int       L(SSE4Q7) -L(SSExDx)
+	.int       L(SSE5Q7) -L(SSExDx)
+	.int       L(SSE6Q7) -L(SSExDx)
+	.int       L(SSE7Q7) -L(SSExDx)
+
+	.int       L(SSE8Q7) -L(SSExDx)
+	.int       L(SSE9Q7) -L(SSExDx)
+	.int       L(SSE10Q7)-L(SSExDx)
+	.int       L(SSE11Q7)-L(SSExDx)
+	.int       L(SSE12Q7)-L(SSExDx)
+	.int       L(SSE13Q7)-L(SSExDx)
+	.int       L(SSE14Q7)-L(SSExDx)
+	.int       L(SSE15Q7)-L(SSExDx)
+
+	.int       L(SSE0Q8) -L(SSExDx)
+	.int       L(SSE1Q8) -L(SSExDx)
+	.int       L(SSE2Q8) -L(SSExDx)
+	.int       L(SSE3Q8) -L(SSExDx)
+	.int       L(SSE4Q8) -L(SSExDx)
+	.int       L(SSE5Q8) -L(SSExDx)
+	.int       L(SSE6Q8) -L(SSExDx)
+	.int       L(SSE7Q8) -L(SSExDx)
+
+	.int       L(SSE8Q8) -L(SSExDx)
+	.int       L(SSE9Q8) -L(SSExDx)
+	.int       L(SSE10Q8)-L(SSExDx)
+	.int       L(SSE11Q8)-L(SSExDx)
+	.int       L(SSE12Q8)-L(SSExDx)
+	.int       L(SSE13Q8)-L(SSExDx)
+	.int       L(SSE14Q8)-L(SSExDx)
+	.int       L(SSE15Q8)-L(SSExDx)
+
+	.int       L(SSE0Q9) -L(SSExDx)
+	.int       L(SSE1Q9) -L(SSExDx)
+	.int       L(SSE2Q9) -L(SSExDx)
+	.int       L(SSE3Q9) -L(SSExDx)
+	.int       L(SSE4Q9) -L(SSExDx)
+	.int       L(SSE5Q9) -L(SSExDx)
+	.int       L(SSE6Q9) -L(SSExDx)
+	.int       L(SSE7Q9) -L(SSExDx)
+
+	.int       L(SSE8Q9) -L(SSExDx)
+	.int       L(SSE9Q9) -L(SSExDx)
+	.int       L(SSE10Q9)-L(SSExDx)
+	.int       L(SSE11Q9)-L(SSExDx)
+	.int       L(SSE12Q9)-L(SSExDx)
+	.int       L(SSE13Q9)-L(SSExDx)
+	.int       L(SSE14Q9)-L(SSExDx)
+	.int       L(SSE15Q9)-L(SSExDx)
+
+	.int       L(SSE0QA) -L(SSExDx)
+	.int       L(SSE1QA) -L(SSExDx)
+	.int       L(SSE2QA) -L(SSExDx)
+	.int       L(SSE3QA) -L(SSExDx)
+	.int       L(SSE4QA) -L(SSExDx)
+	.int       L(SSE5QA) -L(SSExDx)
+	.int       L(SSE6QA) -L(SSExDx)
+	.int       L(SSE7QA) -L(SSExDx)
+
+	.int       L(SSE8QA) -L(SSExDx)
+	.int       L(SSE9QA) -L(SSExDx)
+	.int       L(SSE10QA)-L(SSExDx)
+	.int       L(SSE11QA)-L(SSExDx)
+	.int       L(SSE12QA)-L(SSExDx)
+	.int       L(SSE13QA)-L(SSExDx)
+	.int       L(SSE14QA)-L(SSExDx)
+	.int       L(SSE15QA)-L(SSExDx)
+
+	.int       L(SSE0QB) -L(SSExDx)
+	.int       L(SSE1QB) -L(SSExDx)
+	.int       L(SSE2QB) -L(SSExDx)
+	.int       L(SSE3QB) -L(SSExDx)
+	.int       L(SSE4QB) -L(SSExDx)
+	.int       L(SSE5QB) -L(SSExDx)
+	.int       L(SSE6QB) -L(SSExDx)
+	.int       L(SSE7QB) -L(SSExDx)
+
+	.int       L(SSE8QB) -L(SSExDx)
+	.int       L(SSE9QB) -L(SSExDx)
+	.int       L(SSE10QB)-L(SSExDx)
+	.int       L(SSE11QB)-L(SSExDx)
+	.int       L(SSE12QB)-L(SSExDx)
+	.int       L(SSE13QB)-L(SSExDx)
+	.int       L(SSE14QB)-L(SSExDx)
+	.int       L(SSE15QB)-L(SSExDx)
+#endif
+	.popsection
 
 END (memset)
 libc_hidden_builtin_def (memset)

Added: fsf/trunk/libc/sysdeps/x86_64/rtld-memset.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/rtld-memset.c (added)
+++ fsf/trunk/libc/sysdeps/x86_64/rtld-memset.c Sat Mar  8 00:05:47 2008
@@ -1,0 +1,1 @@
+#include <string/memset.c>