[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r3842 - in /fsf/trunk/libc: ./ include/ nptl/ nptl/sysdeps/unix/sysv/linux/i386/i486/ sysdeps/mach/hurd/ sysdeps/mach/hurd/b...



Author: eglibc
Date: Thu Oct 18 00:04:21 2007
New Revision: 3842

Log:
Import glibc-mainline for 2007-10-18

Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/NEWS
    fsf/trunk/libc/README
    fsf/trunk/libc/include/features.h
    fsf/trunk/libc/nptl/ChangeLog
    fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
    fsf/trunk/libc/sysdeps/mach/hurd/bits/fcntl.h
    fsf/trunk/libc/sysdeps/mach/hurd/fcntl.c
    fsf/trunk/libc/sysdeps/posix/getaddrinfo.c
    fsf/trunk/libc/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
    fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
    fsf/trunk/libc/sysdeps/x86_64/memset.S
    fsf/trunk/libc/time/tzfile.c
    fsf/trunk/libc/time/tzset.c
    fsf/trunk/libc/version.h

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Thu Oct 18 00:04:21 2007
@@ -1,3 +1,60 @@
+2007-10-17  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+
+	* version.h (VERSION): Set to 2.7.90.
+
+2007-10-17  Jakub Jelinek  <jakub@xxxxxxxxxx>
+
+	* sysdeps/x86_64/memset.S (bzero): Renamed to __bzero.  Add
+	weak_alias.
+
+2007-10-17  Roland McGrath  <roland@xxxxxxxx>
+
+	* sysdeps/mach/hurd/bits/fcntl.h [__USE_GNU__] (F_DUPFD_CLOEXEC): New.
+	* sysdeps/mach/hurd/fcntl.c (__libc_fcntl): Implement it.
+
+2007-10-17  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	* version.h (VERSION): Bump to 2.7.
+	* include/features.h (__GLIBC_MINOR__): Bump to 7.
+
+	[BZ #5186]
+	* time/tzset.c (__tz_convert): Don't force testing for a change of
+	TZ if not called from localtime.  But then also see whether the
+	file changed, in case __use_tzfile is set.
+
+	* sysdeps/unix/sysv/linux/i386/bits/fcntl.h: Define F_DUPFD_CLOEXEC.
+	* sysdeps/unix/sysv/linux/ia64/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/s390/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/sh/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h:  Likewise.
+	* sysdeps/unix/sysv/linux/alpha/bits/fcntl.h: Likewise.
+
+2007-10-17  Jakub Jelinek  <jakub@xxxxxxxxxx>
+
+	* sysdeps/posix/getaddrinfo.c (getaddrinfo): When sorting addresses
+	and admin selects to be able to replace the gai.conf file, lock
+	data structures around the qsort call.
+
+2007-10-17  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	* sysdeps/x86_64/cacheinfo.c: Comment out code added in support of
+	new memset.
+	* sysdeps/x86_64/memset.S: Revert to old version for now.  The cost is
+	too high for the improvements.  Implement bzero unconditionally for
+	use in libc.
+
+2007-10-17  Ulrich Drepper  <drepper@xxxxxxxxxx>
+	    Jakub Jelinek  <jakub@xxxxxxxxxx>
+
+	* time/tzfile.c (__tzfile_read): Read POSIX TZ string if available
+	even when time_t is 32-bit.
+	(__tzfile_compute): Override POSIX TZ string STD/DST zone names if
+	timezone data read by __tzfile_default.  Ensure __tzname[0] is
+	always set after the search.
+
 2007-10-16  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
 	* time/tzfile.c (__tzfile_read): Help the compiler recognize
@@ -11,11 +68,16 @@
 	(__strftime_internal): ... new function.  Add tzset_called
 	argument, pass it down to recursive calls, don't call tzset ()
 	if already true, set to true after call to tzset ().
-						
+
 2007-10-16  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
 	* time/tzfile.c (__tzfile_read): Take extra memory requested by caller
 	into account when copying TZ string.
+
+2007-10-16  Jakub Jelinek  <jakub@xxxxxxxxxx>
+
+	* time/tzfile.c (__tzfile_compute): For use_last case set i to
+	num_transition rather than num_transitions - 1.
 
 2007-10-16  Jakub Jelinek  <jakub@xxxxxxxxxx>
 

Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Thu Oct 18 00:04:21 2007
@@ -1,9 +1,12 @@
-GNU C Library NEWS -- history of user-visible changes.  2007-10-5
+GNU C Library NEWS -- history of user-visible changes.  2007-10-17
 Copyright (C) 1992-2006, 2007 Free Software Foundation, Inc.
 See the end for copying conditions.
 
 Please send GNU C library bug reports via <http://sources.redhat.com/bugzilla/>
 using `glibc' in the "product" field.
+
+Version 2.8
+
 
 Version 2.7
 
@@ -19,7 +22,7 @@
 * PPC optimizations to math and string functions.
   Implemented by Steven Munroe.
 
-* New interfaces: mkostemp, mkostemp64.  Like mkstemp* but allow additonal
+* New interfaces: mkostemp, mkostemp64.  Like mkstemp* but allow additional
   options to be passed.  Implemented by Ulrich Drepper.
 
 * More CPU set manipulation functions.  Implemented by Ulrich Drepper.

Modified: fsf/trunk/libc/README
==============================================================================
--- fsf/trunk/libc/README (original)
+++ fsf/trunk/libc/README Thu Oct 18 00:04:21 2007
@@ -1,4 +1,4 @@
-This directory contains the version 2.6 release of the GNU C Library.
+This directory contains the version 2.7 release of the GNU C Library.
 
 The GNU C Library is the standard system C library for all GNU systems,
 and is an important part of what makes up a GNU system.  It provides the
@@ -52,7 +52,7 @@
 
 The code for other CPU configurations supported by volunteers outside of
 the core glibc maintenance effort is contained in the separate `ports'
-add-on.  You can find glibc-ports-2.6 distributed separately in the
+add-on.  You can find glibc-ports-2.7 distributed separately in the
 same place where you got the main glibc distribution files.
 Currently these configurations are known to work using the `ports' add-on:
 

Modified: fsf/trunk/libc/include/features.h
==============================================================================
--- fsf/trunk/libc/include/features.h (original)
+++ fsf/trunk/libc/include/features.h Thu Oct 18 00:04:21 2007
@@ -311,7 +311,7 @@
 /* Major and minor version number of the GNU C library package.  Use
    these macros to test for features in specific releases.  */
 #define	__GLIBC__	2
-#define	__GLIBC_MINOR__	6
+#define	__GLIBC_MINOR__	7
 
 #define __GLIBC_PREREQ(maj, min) \
 	((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))

Modified: fsf/trunk/libc/nptl/ChangeLog
==============================================================================
--- fsf/trunk/libc/nptl/ChangeLog (original)
+++ fsf/trunk/libc/nptl/ChangeLog Thu Oct 18 00:04:21 2007
@@ -1,3 +1,8 @@
+2007-10-17  Jakub Jelinek  <jakub@xxxxxxxxxx>
+
+	* sysdeps/unix/sysv/linux/i386/i486/sem_post.S (__old_sem_post): New
+	routine instead of an alias to __new_sem_post.
+
 2007-10-15  Jakub Jelinek  <jakub@xxxxxxxxxx>
 
 	* init.c (__pthread_initialize_minimal): Initialize word to appease

Modified: fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S (original)
+++ fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S Thu Oct 18 00:04:21 2007
@@ -86,7 +86,26 @@
 	versioned_symbol(libpthread, __new_sem_post, sem_post, GLIBC_2_1)
 #if SHLIB_COMPAT(libpthread, GLIBC_2_0, GLIBC_2_1)
 	.global	__old_sem_post
-__old_sem_post = __new_sem_post
+	.type	__old_sem_post,@function
+__old_sem_post:
+	pushl	%ebx
+
+	movl	8(%esp), %ebx
+	LOCK
+	addl	$1, (%ebx)
+
+	movl	$SYS_futex, %eax
+	movl	$FUTEX_WAKE, %ecx
+	movl	$1, %edx
+	ENTER_KERNEL
+
+	testl	%eax, %eax
+	js	1b
+
+	xorl	%eax, %eax
+	popl	%ebx
+	ret
+	.size	__old_sem_post,.-__old_sem_post
 	compat_symbol(libpthread, __old_sem_post, sem_post, GLIBC_2_0)
 #endif
 

Modified: fsf/trunk/libc/sysdeps/mach/hurd/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/mach/hurd/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/mach/hurd/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -162,6 +162,11 @@
 #define	F_GETLK		7	/* Get record locking info.  */
 #define	F_SETLK		8	/* Set record locking info (non-blocking).  */
 #define	F_SETLKW	9	/* Set record locking info (blocking).  */
+
+#ifdef __USE_GNU
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate, set FD_CLOEXEC on new one.  */
+#endif
+
 
 /* File descriptor flags used with F_GETFD and F_SETFD.  */
 #define	FD_CLOEXEC	1	/* Close on exec.  */

Modified: fsf/trunk/libc/sysdeps/mach/hurd/fcntl.c
==============================================================================
--- fsf/trunk/libc/sysdeps/mach/hurd/fcntl.c (original)
+++ fsf/trunk/libc/sysdeps/mach/hurd/fcntl.c Thu Oct 18 00:04:21 2007
@@ -1,4 +1,4 @@
-/* Copyright (C) 1992-1997,1999,2000,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1992-1997,1999,2000,2002,2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -50,6 +50,7 @@
       /* First the descriptor-based commands, which do no RPCs.  */
 
     case F_DUPFD:		/* Duplicate the file descriptor.  */
+    case F_DUPFD_CLOEXEC:
       {
 	struct hurd_fd *new;
 	io_t port, ctty;
@@ -63,6 +64,12 @@
 	flags = d->flags;
 	ctty = _hurd_port_get (&d->ctty, &ctty_ulink);
 	port = _hurd_port_locked_get (&d->port, &ulink); /* Unlocks D.  */
+
+	if (cmd == F_DUPFD_CLOEXEC)
+	  flags |= FD_CLOEXEC;
+	else
+	  /* Duplication clears the FD_CLOEXEC flag.  */
+	  flags &= ~FD_CLOEXEC;
 
 	/* Get a new file descriptor.  The third argument to __fcntl is the
 	   minimum file descriptor number for it.  */
@@ -82,8 +89,7 @@
 	    /* Install the ports and flags in the new descriptor.  */
 	    if (ctty != MACH_PORT_NULL)
 	      _hurd_port_set (&new->ctty, ctty);
-	    /* Duplication clears the FD_CLOEXEC flag.  */
-	    new->flags = flags & ~FD_CLOEXEC;
+	    new->flags = flags;
 	    _hurd_port_locked_set (&new->port, port); /* Unlocks NEW.  */
 	  }
 

Modified: fsf/trunk/libc/sysdeps/posix/getaddrinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/posix/getaddrinfo.c (original)
+++ fsf/trunk/libc/sysdeps/posix/getaddrinfo.c Thu Oct 18 00:04:21 2007
@@ -1426,9 +1426,12 @@
 #define GAICONF_FNAME "/etc/gai.conf"
 
 
-/* Nozero if we are supposed to reload the config file automatically
+/* Non-zero if we are supposed to reload the config file automatically
    whenever it changed.  */
 static int gaiconf_reload_flag;
+
+/* Non-zero if gaiconf_reload_flag was ever set to true.  */
+static int gaiconf_reload_flag_ever_set;
 
 /* Last modification time.  */
 static struct timespec gaiconf_mtime;
@@ -1611,7 +1614,11 @@
 
 	    case 6:
 	      if (strcmp (cmd, "reload") == 0)
-		gaiconf_reload_flag = strcmp (val1, "yes") == 0;
+		{
+		  gaiconf_reload_flag = strcmp (val1, "yes") == 0;
+		  if (gaiconf_reload_flag)
+		    gaiconf_reload_flag_ever_set = 1;
+		}
 	      break;
 
 	    case 10:
@@ -1934,9 +1941,6 @@
       __libc_once_define (static, once);
       __typeof (once) old_once = once;
       __libc_once (once, gaiconf_init);
-      if (old_once && gaiconf_reload_flag)
-	gaiconf_reload ();
-
       /* Sort results according to RFC 3484.  */
       struct sort_result results[nresults];
       struct addrinfo *q;
@@ -2055,7 +2059,18 @@
 
       /* We got all the source addresses we can get, now sort using
 	 the information.  */
-      qsort (results, nresults, sizeof (results[0]), rfc3484_sort);
+      if (__builtin_expect (gaiconf_reload_flag_ever_set, 0))
+	{
+	  __libc_lock_define_initialized (static, lock);
+
+	  __libc_lock_lock (lock);
+	  if (old_once && gaiconf_reload_flag)
+	    gaiconf_reload ();
+	  qsort (results, nresults, sizeof (results[0]), rfc3484_sort);
+	  __libc_lock_unlock (lock);
+	}
+      else
+	qsort (results, nresults, sizeof (results[0]), rfc3484_sort);
 
       /* Queue the results up as they come out of sorting.  */
       q = p = results[0].dest_addr;

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -93,6 +93,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* for F_[GET|SET]FD */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -98,6 +98,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* For F_[GET|SET]FD.  */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -94,6 +94,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* For F_[GET|SET]FD.  */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -98,6 +98,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* For F_[GET|SET]FD.  */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -113,6 +113,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* For F_[GET|SET]FD.  */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -98,6 +98,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* For F_[GET|SET]FD.  */

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -105,6 +105,8 @@
 # define F_SETLEASE     1024	/* Set a lease.  */
 # define F_GETLEASE     1025	/* Enquire what lease is active.  */
 # define F_NOTIFY       1026	/* Request notfications on a directory.  */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 #if __WORDSIZE == 64

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h Thu Oct 18 00:04:21 2007
@@ -112,6 +112,8 @@
 # define F_SETLEASE	1024	/* Set a lease.	 */
 # define F_GETLEASE	1025	/* Enquire what lease is active.  */
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
+# define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
+				   close-on-exit set.  */
 #endif
 
 /* For F_[GET|SET]FD.  */

Modified: fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c Thu Oct 18 00:04:21 2007
@@ -404,10 +404,13 @@
 /* Shared cache size for use in memory and string routines, typically
    L2 or L3 size.  */
 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
+#ifdef NOT_USED_RIGHT_NOW
 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
+#endif
 /* PREFETCHW support flag for use in memory and string routines.  */
 int __x86_64_prefetchw attribute_hidden;
 
+#ifdef NOT_USED_RIGHT_NOW
 /* Instructions preferred for memory and string routines.
 
   0: Regular instructions
@@ -417,6 +420,7 @@
 
   */
 int __x86_64_preferred_memory_instruction attribute_hidden;
+#endif
 
 
 static void
@@ -459,12 +463,14 @@
 		    : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
 		    : "0" (1));
 
+#ifdef NOT_USED_RIGHT_NOW
       /* Intel prefers SSSE3 instructions for memory/string rountines
 	 if they are avaiable.  */
       if ((ecx & 0x200))
 	__x86_64_preferred_memory_instruction = 3;
       else
 	__x86_64_preferred_memory_instruction = 2;
+#endif
 
       /* Figure out the number of logical threads that share the
 	 highest cache level.  */
@@ -570,6 +576,8 @@
   if (shared > 0)
     {
       __x86_64_shared_cache_size_half = shared / 2;
+#ifdef NOT_USED_RIGHT_NOW
       __x86_64_shared_cache_size = shared;
+#endif
     }
 }

Modified: fsf/trunk/libc/sysdeps/x86_64/memset.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memset.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/memset.S Thu Oct 18 00:04:21 2007
@@ -2,6 +2,7 @@
    Optimized version for x86-64.
    Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
+   Contributed by Andreas Jaeger <aj@xxxxxxx>.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -19,13 +20,23 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
 
-	.text
-ENTRY (bzero)
+/* This is somehow experimental and could made dependend on the cache
+   size.  */
+#define LARGE $120000
+
+        .text
+#ifndef NOT_IN_libc
+ENTRY(__bzero)
 	mov	%rsi,%rdx	/* Adjust parameter.  */
 	xorl	%esi,%esi	/* Fill with 0s.  */
 	jmp	L(memset_entry)
-END (bzero)
+END(__bzero)
+weak_alias (__bzero, bzero)
+#endif
 
 #if defined PIC && !defined NOT_IN_libc
 ENTRY (__memset_chk)
@@ -33,1233 +44,90 @@
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (__memset_chk)
 #endif
-
 ENTRY (memset)
 L(memset_entry):
-	cmp    $0x1,%rdx
-	mov    %rdi,%rax	/* memset returns the dest address.  */
-	jne    L(ck2)
-	mov    %sil,(%rdi)
-	retq   $0x0
-L(ck2):
-	mov    $0x101010101010101,%r9
-	mov    %rdx,%r8
-	movzbq %sil,%rdx
-	imul   %r9,%rdx
-L(now_dw_aligned):
-	cmp    $0x90,%r8
-	jg     L(ck_mem_ops_method)
-L(now_dw_aligned_small):
-	lea    L(setPxQx)(%rip),%r11
-	add    %r8,%rdi
-#ifndef PIC
-	jmpq   *(%r11,%r8,8)
-#else
-	movslq (%r11,%r8,4),%rcx
-	lea    (%rcx,%r11,1),%r11
-	jmpq   *%r11
-#endif
+	cmp	$0x7,%rdx	/* Check for small length.  */
+	mov	%rdi,%rcx	/* Save ptr as return value.  */
+	jbe	7f
 
-L(Got0):
-	retq   $0x0
+	/* Populate 8 bit data to full 64-bit.  */
+	movabs	$0x0101010101010101,%r8
+	movzbl	%sil,%eax
+	imul	%rax,%r8
+	test	$0x7,%edi	/* Check for alignment.  */
+	je	2f
 
-	.pushsection .rodata
-	.balign     16
-#ifndef PIC
-L(setPxQx):
-	.quad       L(Got0), L(P1Q0), L(P2Q0), L(P3Q0)
-	.quad       L(P4Q0), L(P5Q0), L(P6Q0), L(P7Q0)
-	.quad       L(P0Q1), L(P1Q1), L(P2Q1), L(P3Q1)
-	.quad       L(P4Q1), L(P5Q1), L(P6Q1), L(P7Q1)
-	.quad       L(P0Q2), L(P1Q2), L(P2Q2), L(P3Q2)
-	.quad       L(P4Q2), L(P5Q2), L(P6Q2), L(P7Q2)
-	.quad       L(P0Q3), L(P1Q3), L(P2Q3), L(P3Q3)
-	.quad       L(P4Q3), L(P5Q3), L(P6Q3), L(P7Q3)
-	.quad       L(P0Q4), L(P1Q4), L(P2Q4), L(P3Q4)
-	.quad       L(P4Q4), L(P5Q4), L(P6Q4), L(P7Q4)
-	.quad       L(P0Q5), L(P1Q5), L(P2Q5), L(P3Q5)
-	.quad       L(P4Q5), L(P5Q5), L(P6Q5), L(P7Q5)
-	.quad       L(P0Q6), L(P1Q6), L(P2Q6), L(P3Q6)
-	.quad       L(P4Q6), L(P5Q6), L(P6Q6), L(P7Q6)
-	.quad       L(P0Q7), L(P1Q7), L(P2Q7), L(P3Q7)
-	.quad       L(P4Q7), L(P5Q7), L(P6Q7), L(P7Q7)
-	.quad       L(P0Q8), L(P1Q8), L(P2Q8), L(P3Q8)
-	.quad       L(P4Q8), L(P5Q8), L(P6Q8), L(P7Q8)
-	.quad       L(P0Q9), L(P1Q9), L(P2Q9), L(P3Q9)
-	.quad       L(P4Q9), L(P5Q9), L(P6Q9), L(P7Q9)
-	.quad       L(P0QA), L(P1QA), L(P2QA), L(P3QA)
-	.quad       L(P4QA), L(P5QA), L(P6QA), L(P7QA)
-	.quad       L(P0QB), L(P1QB), L(P2QB), L(P3QB)
-	.quad       L(P4QB), L(P5QB), L(P6QB), L(P7QB)
-	.quad       L(P0QC), L(P1QC), L(P2QC), L(P3QC)
-	.quad       L(P4QC), L(P5QC), L(P6QC), L(P7QC)
-	.quad       L(P0QD), L(P1QD), L(P2QD), L(P3QD)
-	.quad       L(P4QD), L(P5QD), L(P6QD), L(P7QD)
-	.quad       L(P0QE), L(P1QE), L(P2QE), L(P3QE)
-	.quad       L(P4QE), L(P5QE), L(P6QE), L(P7QE)
-	.quad       L(P0QF), L(P1QF), L(P2QF), L(P3QF)
-	.quad       L(P4QF), L(P5QF), L(P6QF), L(P7QF)
-	.quad       L(P0QG), L(P1QG), L(P2QG), L(P3QG)
-	.quad       L(P4QG), L(P5QG), L(P6QG), L(P7QG)
-	.quad       L(P0QH), L(P1QH), L(P2QH), L(P3QH)
-	.quad       L(P4QH), L(P5QH), L(P6QH), L(P7QH)
-	.quad       L(P0QI)
-# ifdef USE_EXTRA_TABLE
-	.quad       L(P1QI), L(P2QI), L(P3QI), L(P4QI)
-	.quad       L(P5QI), L(P6QI), L(P7QI)
-# endif
-#else
-L(setPxQx):
-	.int       L(Got0)-L(setPxQx)
-	.int       L(P1Q0)-L(setPxQx)
-	.int       L(P2Q0)-L(setPxQx)
-	.int       L(P3Q0)-L(setPxQx)
-	.int       L(P4Q0)-L(setPxQx)
-	.int       L(P5Q0)-L(setPxQx)
-	.int       L(P6Q0)-L(setPxQx)
-	.int       L(P7Q0)-L(setPxQx)
+	.p2align 4
+1:	/* Align ptr to 8 byte.  */
+	mov	%sil,(%rcx)
+	dec	%rdx
+	inc	%rcx
+	test	$0x7,%ecx
+	jne	1b
 
-	.int       L(P0Q1)-L(setPxQx)
-	.int       L(P1Q1)-L(setPxQx)
-	.int       L(P2Q1)-L(setPxQx)
-	.int       L(P3Q1)-L(setPxQx)
-	.int       L(P4Q1)-L(setPxQx)
-	.int       L(P5Q1)-L(setPxQx)
-	.int       L(P6Q1)-L(setPxQx)
-	.int       L(P7Q1)-L(setPxQx)
+2:	/* Check for really large regions.  */
+	mov	%rdx,%rax
+	shr	$0x6,%rax
+	je	4f
+	cmp	LARGE, %rdx
+	jae	11f
 
-	.int       L(P0Q2)-L(setPxQx)
-	.int       L(P1Q2)-L(setPxQx)
-	.int       L(P2Q2)-L(setPxQx)
-	.int       L(P3Q2)-L(setPxQx)
-	.int       L(P4Q2)-L(setPxQx)
-	.int       L(P5Q2)-L(setPxQx)
-	.int       L(P6Q2)-L(setPxQx)
-	.int       L(P7Q2)-L(setPxQx)
+	.p2align 4
+3:	/* Copy 64 bytes.  */
+	mov	%r8,(%rcx)
+	mov	%r8,0x8(%rcx)
+	mov	%r8,0x10(%rcx)
+	mov	%r8,0x18(%rcx)
+	mov	%r8,0x20(%rcx)
+	mov	%r8,0x28(%rcx)
+	mov	%r8,0x30(%rcx)
+	mov	%r8,0x38(%rcx)
+	add	$0x40,%rcx
+	dec	%rax
+	jne	3b
 
-	.int       L(P0Q3)-L(setPxQx)
-	.int       L(P1Q3)-L(setPxQx)
-	.int       L(P2Q3)-L(setPxQx)
-	.int       L(P3Q3)-L(setPxQx)
-	.int       L(P4Q3)-L(setPxQx)
-	.int       L(P5Q3)-L(setPxQx)
-	.int       L(P6Q3)-L(setPxQx)
-	.int       L(P7Q3)-L(setPxQx)
+4:	/* Copy final bytes.  */
+	and	$0x3f,%edx
+	mov	%rdx,%rax
+	shr	$0x3,%rax
+	je	6f
 
-	.int       L(P0Q4)-L(setPxQx)
-	.int       L(P1Q4)-L(setPxQx)
-	.int       L(P2Q4)-L(setPxQx)
-	.int       L(P3Q4)-L(setPxQx)
-	.int       L(P4Q4)-L(setPxQx)
-	.int       L(P5Q4)-L(setPxQx)
-	.int       L(P6Q4)-L(setPxQx)
-	.int       L(P7Q4)-L(setPxQx)
+5:	/* First in chunks of 8 bytes.  */
+	mov	%r8,(%rcx)
+	add	$0x8,%rcx
+	dec	%rax
+	jne	5b
+6:
+	and	$0x7,%edx
+7:
+	test	%rdx,%rdx
+	je	9f
+8:	/* And finally as bytes (up to 7).  */
+	mov	%sil,(%rcx)
+	inc	%rcx
+	dec	%rdx
+	jne	8b
+9:
+	/* Load result (only if used as memset).  */
+	mov	%rdi,%rax	/* start address of destination is result */
+	retq
 
-	.int       L(P0Q5)-L(setPxQx)
-	.int       L(P1Q5)-L(setPxQx)
-	.int       L(P2Q5)-L(setPxQx)
-	.int       L(P3Q5)-L(setPxQx)
-	.int       L(P4Q5)-L(setPxQx)
-	.int       L(P5Q5)-L(setPxQx)
-	.int       L(P6Q5)-L(setPxQx)
-	.int       L(P7Q5)-L(setPxQx)
-
-	.int       L(P0Q6)-L(setPxQx)
-	.int       L(P1Q6)-L(setPxQx)
-	.int       L(P2Q6)-L(setPxQx)
-	.int       L(P3Q6)-L(setPxQx)
-	.int       L(P4Q6)-L(setPxQx)
-	.int       L(P5Q6)-L(setPxQx)
-	.int       L(P6Q6)-L(setPxQx)
-	.int       L(P7Q6)-L(setPxQx)
-
-	.int       L(P0Q7)-L(setPxQx)
-	.int       L(P1Q7)-L(setPxQx)
-	.int       L(P2Q7)-L(setPxQx)
-	.int       L(P3Q7)-L(setPxQx)
-	.int       L(P4Q7)-L(setPxQx)
-	.int       L(P5Q7)-L(setPxQx)
-	.int       L(P6Q7)-L(setPxQx)
-	.int       L(P7Q7)-L(setPxQx)
-
-	.int       L(P0Q8)-L(setPxQx)
-	.int       L(P1Q8)-L(setPxQx)
-	.int       L(P2Q8)-L(setPxQx)
-	.int       L(P3Q8)-L(setPxQx)
-	.int       L(P4Q8)-L(setPxQx)
-	.int       L(P5Q8)-L(setPxQx)
-	.int       L(P6Q8)-L(setPxQx)
-	.int       L(P7Q8)-L(setPxQx)
-
-	.int       L(P0Q9)-L(setPxQx)
-	.int       L(P1Q9)-L(setPxQx)
-	.int       L(P2Q9)-L(setPxQx)
-	.int       L(P3Q9)-L(setPxQx)
-	.int       L(P4Q9)-L(setPxQx)
-	.int       L(P5Q9)-L(setPxQx)
-	.int       L(P6Q9)-L(setPxQx)
-	.int       L(P7Q9)-L(setPxQx)
-
-	.int       L(P0QA)-L(setPxQx)
-	.int       L(P1QA)-L(setPxQx)
-	.int       L(P2QA)-L(setPxQx)
-	.int       L(P3QA)-L(setPxQx)
-	.int       L(P4QA)-L(setPxQx)
-	.int       L(P5QA)-L(setPxQx)
-	.int       L(P6QA)-L(setPxQx)
-	.int       L(P7QA)-L(setPxQx)
-
-	.int       L(P0QB)-L(setPxQx)
-	.int       L(P1QB)-L(setPxQx)
-	.int       L(P2QB)-L(setPxQx)
-	.int       L(P3QB)-L(setPxQx)
-	.int       L(P4QB)-L(setPxQx)
-	.int       L(P5QB)-L(setPxQx)
-	.int       L(P6QB)-L(setPxQx)
-	.int       L(P7QB)-L(setPxQx)
-
-	.int       L(P0QC)-L(setPxQx)
-	.int       L(P1QC)-L(setPxQx)
-	.int       L(P2QC)-L(setPxQx)
-	.int       L(P3QC)-L(setPxQx)
-	.int       L(P4QC)-L(setPxQx)
-	.int       L(P5QC)-L(setPxQx)
-	.int       L(P6QC)-L(setPxQx)
-	.int       L(P7QC)-L(setPxQx)
-
-	.int       L(P0QD)-L(setPxQx)
-	.int       L(P1QD)-L(setPxQx)
-	.int       L(P2QD)-L(setPxQx)
-	.int       L(P3QD)-L(setPxQx)
-	.int       L(P4QD)-L(setPxQx)
-	.int       L(P5QD)-L(setPxQx)
-	.int       L(P6QD)-L(setPxQx)
-	.int       L(P7QD)-L(setPxQx)
-
-	.int       L(P0QE)-L(setPxQx)
-	.int       L(P1QE)-L(setPxQx)
-	.int       L(P2QE)-L(setPxQx)
-	.int       L(P3QE)-L(setPxQx)
-	.int       L(P4QE)-L(setPxQx)
-	.int       L(P5QE)-L(setPxQx)
-	.int       L(P6QE)-L(setPxQx)
-	.int       L(P7QE)-L(setPxQx)
-
-	.int       L(P0QF)-L(setPxQx)
-	.int       L(P1QF)-L(setPxQx)
-	.int       L(P2QF)-L(setPxQx)
-	.int       L(P3QF)-L(setPxQx)
-	.int       L(P4QF)-L(setPxQx)
-	.int       L(P5QF)-L(setPxQx)
-	.int       L(P6QF)-L(setPxQx)
-	.int       L(P7QF)-L(setPxQx)
-
-	.int       L(P0QG)-L(setPxQx)
-	.int       L(P1QG)-L(setPxQx)
-	.int       L(P2QG)-L(setPxQx)
-	.int       L(P3QG)-L(setPxQx)
-	.int       L(P4QG)-L(setPxQx)
-	.int       L(P5QG)-L(setPxQx)
-	.int       L(P6QG)-L(setPxQx)
-	.int       L(P7QG)-L(setPxQx)
-
-	.int       L(P0QH)-L(setPxQx)
-	.int       L(P1QH)-L(setPxQx)
-	.int       L(P2QH)-L(setPxQx)
-	.int       L(P3QH)-L(setPxQx)
-	.int       L(P4QH)-L(setPxQx)
-	.int       L(P5QH)-L(setPxQx)
-	.int       L(P6QH)-L(setPxQx)
-	.int       L(P7QH)-L(setPxQx)
-
-	.int       L(P0QI)-L(setPxQx)
-# ifdef USE_EXTRA_TABLE
-	.int       L(P1QI)-L(setPxQx)
-	.int       L(P2QI)-L(setPxQx)
-	.int       L(P3QI)-L(setPxQx)
-	.int       L(P4QI)-L(setPxQx)
-	.int       L(P5QI)-L(setPxQx)
-	.int       L(P6QI)-L(setPxQx)
-	.int       L(P7QI)-L(setPxQx)
-# endif
-#endif
-	.popsection
-
-	.balign     16
-#ifdef USE_EXTRA_TABLE
-L(P1QI): mov    %rdx,-0x91(%rdi)
-#endif
-L(P1QH): mov    %rdx,-0x89(%rdi)
-L(P1QG): mov    %rdx,-0x81(%rdi)
-#		   .balign     16
-L(P1QF): mov    %rdx,-0x79(%rdi)
-L(P1QE): mov    %rdx,-0x71(%rdi)
-L(P1QD): mov    %rdx,-0x69(%rdi)
-L(P1QC): mov    %rdx,-0x61(%rdi)
-L(P1QB): mov    %rdx,-0x59(%rdi)
-L(P1QA): mov    %rdx,-0x51(%rdi)
-L(P1Q9): mov    %rdx,-0x49(%rdi)
-L(P1Q8): mov    %rdx,-0x41(%rdi)
-L(P1Q7): mov    %rdx,-0x39(%rdi)
-L(P1Q6): mov    %rdx,-0x31(%rdi)
-L(P1Q5): mov    %rdx,-0x29(%rdi)
-L(P1Q4): mov    %rdx,-0x21(%rdi)
-L(P1Q3): mov    %rdx,-0x19(%rdi)
-L(P1Q2): mov    %rdx,-0x11(%rdi)
-L(P1Q1): mov    %rdx,-0x9(%rdi)
-L(P1Q0): mov    %dl,-0x1(%rdi)
-		retq   $0x0
-
-	.balign     16
-L(P0QI): mov    %rdx,-0x90(%rdi)
-L(P0QH): mov    %rdx,-0x88(%rdi)
-#		   .balign     16
-L(P0QG): mov    %rdx,-0x80(%rdi)
-L(P0QF): mov    %rdx,-0x78(%rdi)
-L(P0QE): mov    %rdx,-0x70(%rdi)
-L(P0QD): mov    %rdx,-0x68(%rdi)
-L(P0QC): mov    %rdx,-0x60(%rdi)
-L(P0QB): mov    %rdx,-0x58(%rdi)
-L(P0QA): mov    %rdx,-0x50(%rdi)
-L(P0Q9): mov    %rdx,-0x48(%rdi)
-L(P0Q8): mov    %rdx,-0x40(%rdi)
-L(P0Q7): mov    %rdx,-0x38(%rdi)
-L(P0Q6): mov    %rdx,-0x30(%rdi)
-L(P0Q5): mov    %rdx,-0x28(%rdi)
-L(P0Q4): mov    %rdx,-0x20(%rdi)
-L(P0Q3): mov    %rdx,-0x18(%rdi)
-L(P0Q2): mov    %rdx,-0x10(%rdi)
-L(P0Q1): mov    %rdx,-0x8(%rdi)
-L(P0Q0): retq   $0x0
-
-
-	.balign     16
-#ifdef USE_EXTRA_TABLE
-L(P2QI): mov    %rdx,-0x92(%rdi)
-#endif
-L(P2QH): mov    %rdx,-0x8a(%rdi)
-L(P2QG): mov    %rdx,-0x82(%rdi)
-#		   .balign     16
-L(P2QF): mov    %rdx,-0x7a(%rdi)
-L(P2QE): mov    %rdx,-0x72(%rdi)
-L(P2QD): mov    %rdx,-0x6a(%rdi)
-L(P2QC): mov    %rdx,-0x62(%rdi)
-L(P2QB): mov    %rdx,-0x5a(%rdi)
-L(P2QA): mov    %rdx,-0x52(%rdi)
-L(P2Q9): mov    %rdx,-0x4a(%rdi)
-L(P2Q8): mov    %rdx,-0x42(%rdi)
-L(P2Q7): mov    %rdx,-0x3a(%rdi)
-L(P2Q6): mov    %rdx,-0x32(%rdi)
-L(P2Q5): mov    %rdx,-0x2a(%rdi)
-L(P2Q4): mov    %rdx,-0x22(%rdi)
-L(P2Q3): mov    %rdx,-0x1a(%rdi)
-L(P2Q2): mov    %rdx,-0x12(%rdi)
-L(P2Q1): mov    %rdx,-0xa(%rdi)
-L(P2Q0): mov    %dx,-0x2(%rdi)
-		retq   $0x0
-
-	.balign     16
-#ifdef USE_EXTRA_TABLE
-L(P3QI): mov    %rdx,-0x93(%rdi)
-#endif
-L(P3QH): mov    %rdx,-0x8b(%rdi)
-L(P3QG): mov    %rdx,-0x83(%rdi)
-#		   .balign     16
-L(P3QF): mov    %rdx,-0x7b(%rdi)
-L(P3QE): mov    %rdx,-0x73(%rdi)
-L(P3QD): mov    %rdx,-0x6b(%rdi)
-L(P3QC): mov    %rdx,-0x63(%rdi)
-L(P3QB): mov    %rdx,-0x5b(%rdi)
-L(P3QA): mov    %rdx,-0x53(%rdi)
-L(P3Q9): mov    %rdx,-0x4b(%rdi)
-L(P3Q8): mov    %rdx,-0x43(%rdi)
-L(P3Q7): mov    %rdx,-0x3b(%rdi)
-L(P3Q6): mov    %rdx,-0x33(%rdi)
-L(P3Q5): mov    %rdx,-0x2b(%rdi)
-L(P3Q4): mov    %rdx,-0x23(%rdi)
-L(P3Q3): mov    %rdx,-0x1b(%rdi)
-L(P3Q2): mov    %rdx,-0x13(%rdi)
-L(P3Q1): mov    %rdx,-0xb(%rdi)
-L(P3Q0): mov    %dx,-0x3(%rdi)
-		mov    %dl,-0x1(%rdi)
-		retq   $0x0
-
-	.balign     16
-#ifdef USE_EXTRA_TABLE
-L(P4QI): mov    %rdx,-0x94(%rdi)
-#endif
-L(P4QH): mov    %rdx,-0x8c(%rdi)
-L(P4QG): mov    %rdx,-0x84(%rdi)
-#		   .balign     16
-L(P4QF): mov    %rdx,-0x7c(%rdi)
-L(P4QE): mov    %rdx,-0x74(%rdi)
-L(P4QD): mov    %rdx,-0x6c(%rdi)
-L(P4QC): mov    %rdx,-0x64(%rdi)
-L(P4QB): mov    %rdx,-0x5c(%rdi)
-L(P4QA): mov    %rdx,-0x54(%rdi)
-L(P4Q9): mov    %rdx,-0x4c(%rdi)
-L(P4Q8): mov    %rdx,-0x44(%rdi)
-L(P4Q7): mov    %rdx,-0x3c(%rdi)
-L(P4Q6): mov    %rdx,-0x34(%rdi)
-L(P4Q5): mov    %rdx,-0x2c(%rdi)
-L(P4Q4): mov    %rdx,-0x24(%rdi)
-L(P4Q3): mov    %rdx,-0x1c(%rdi)
-L(P4Q2): mov    %rdx,-0x14(%rdi)
-L(P4Q1): mov    %rdx,-0xc(%rdi)
-L(P4Q0): mov    %edx,-0x4(%rdi)
-		retq   $0x0
-
-	.balign     16
-#if defined(USE_EXTRA_TABLE)
-L(P5QI): mov    %rdx,-0x95(%rdi)
-#endif
-L(P5QH): mov    %rdx,-0x8d(%rdi)
-L(P5QG): mov    %rdx,-0x85(%rdi)
-#		   .balign     16
-L(P5QF): mov    %rdx,-0x7d(%rdi)
-L(P5QE): mov    %rdx,-0x75(%rdi)
-L(P5QD): mov    %rdx,-0x6d(%rdi)
-L(P5QC): mov    %rdx,-0x65(%rdi)
-L(P5QB): mov    %rdx,-0x5d(%rdi)
-L(P5QA): mov    %rdx,-0x55(%rdi)
-L(P5Q9): mov    %rdx,-0x4d(%rdi)
-L(P5Q8): mov    %rdx,-0x45(%rdi)
-L(P5Q7): mov    %rdx,-0x3d(%rdi)
-L(P5Q6): mov    %rdx,-0x35(%rdi)
-L(P5Q5): mov    %rdx,-0x2d(%rdi)
-L(P5Q4): mov    %rdx,-0x25(%rdi)
-L(P5Q3): mov    %rdx,-0x1d(%rdi)
-L(P5Q2): mov    %rdx,-0x15(%rdi)
-L(P5Q1): mov    %rdx,-0xd(%rdi)
-L(P5Q0): mov    %edx,-0x5(%rdi)
-		mov    %dl,-0x1(%rdi)
-		retq   $0x0
-
-	.balign     16
-#ifdef USE_EXTRA_TABLE
-L(P6QI): mov    %rdx,-0x96(%rdi)
-#endif
-L(P6QH): mov    %rdx,-0x8e(%rdi)
-L(P6QG): mov    %rdx,-0x86(%rdi)
-#		   .balign     16
-L(P6QF): mov    %rdx,-0x7e(%rdi)
-L(P6QE): mov    %rdx,-0x76(%rdi)
-L(P6QD): mov    %rdx,-0x6e(%rdi)
-L(P6QC): mov    %rdx,-0x66(%rdi)
-L(P6QB): mov    %rdx,-0x5e(%rdi)
-L(P6QA): mov    %rdx,-0x56(%rdi)
-L(P6Q9): mov    %rdx,-0x4e(%rdi)
-L(P6Q8): mov    %rdx,-0x46(%rdi)
-L(P6Q7): mov    %rdx,-0x3e(%rdi)
-L(P6Q6): mov    %rdx,-0x36(%rdi)
-L(P6Q5): mov    %rdx,-0x2e(%rdi)
-L(P6Q4): mov    %rdx,-0x26(%rdi)
-L(P6Q3): mov    %rdx,-0x1e(%rdi)
-L(P6Q2): mov    %rdx,-0x16(%rdi)
-L(P6Q1): mov    %rdx,-0xe(%rdi)
-L(P6Q0): mov    %edx,-0x6(%rdi)
-		mov    %dx,-0x2(%rdi)
-		retq   $0x0
-
-	.balign     16
-#ifdef USE_EXTRA_TABLE
-L(P7QI): mov    %rdx,-0x97(%rdi)
-#endif
-L(P7QH): mov    %rdx,-0x8f(%rdi)
-L(P7QG): mov    %rdx,-0x87(%rdi)
-#		   .balign     16
-L(P7QF): mov    %rdx,-0x7f(%rdi)
-L(P7QE): mov    %rdx,-0x77(%rdi)
-L(P7QD): mov    %rdx,-0x6f(%rdi)
-L(P7QC): mov    %rdx,-0x67(%rdi)
-L(P7QB): mov    %rdx,-0x5f(%rdi)
-L(P7QA): mov    %rdx,-0x57(%rdi)
-L(P7Q9): mov    %rdx,-0x4f(%rdi)
-L(P7Q8): mov    %rdx,-0x47(%rdi)
-L(P7Q7): mov    %rdx,-0x3f(%rdi)
-L(P7Q6): mov    %rdx,-0x37(%rdi)
-L(P7Q5): mov    %rdx,-0x2f(%rdi)
-L(P7Q4): mov    %rdx,-0x27(%rdi)
-L(P7Q3): mov    %rdx,-0x1f(%rdi)
-L(P7Q2): mov    %rdx,-0x17(%rdi)
-L(P7Q1): mov    %rdx,-0xf(%rdi)
-L(P7Q0): mov    %edx,-0x7(%rdi)
-		mov    %dx,-0x3(%rdi)
-		mov    %dl,-0x1(%rdi)
-		retq   $0x0
-
-	.balign     16
-L(ck_mem_ops_method):
-
-# align to 16 byte boundary first
-	#test $0xf,%rdi
-	#jz L(aligned_now)
-	 lea    L(AliPxQx)(%rip),%r11
-	 mov    $0x10,%r10
-	 mov    %rdi,%r9
-	 and    $0xf,%r9
-	 sub    %r9,%r10
-	 and    $0xf,%r10
-	 add    %r10,%rdi
-	 sub    %r10,%r8
-#ifndef PIC
-	jmpq   *(%r11,%r10,8)
-#else
-	movslq (%r11,%r10,4),%rcx
-	lea    (%rcx,%r11,1),%r11
-	jmpq   *%r11
-#endif
-
-	.pushsection .rodata
-	.balign     16
-#ifndef PIC
-L(AliPxQx):
-	.quad       L(aligned_now), L(A1Q0), L(A2Q0), L(A3Q0)
-	.quad	    L(A4Q0), L(A5Q0), L(A6Q0), L(A7Q0)
-	.quad       L(A0Q1), L(A1Q1), L(A2Q1), L(A3Q1)
-	.quad       L(A4Q1), L(A5Q1), L(A6Q1), L(A7Q1)
-#else
-L(AliPxQx):
-	.int       L(aligned_now)-L(AliPxQx)
-	.int       L(A1Q0)-L(AliPxQx)
-	.int       L(A2Q0)-L(AliPxQx)
-	.int       L(A3Q0)-L(AliPxQx)
-	.int       L(A4Q0)-L(AliPxQx)
-	.int       L(A5Q0)-L(AliPxQx)
-	.int       L(A6Q0)-L(AliPxQx)
-	.int       L(A7Q0)-L(AliPxQx)
-
-	.int       L(A0Q1)-L(AliPxQx)
-	.int       L(A1Q1)-L(AliPxQx)
-	.int       L(A2Q1)-L(AliPxQx)
-	.int       L(A3Q1)-L(AliPxQx)
-	.int       L(A4Q1)-L(AliPxQx)
-	.int       L(A5Q1)-L(AliPxQx)
-	.int       L(A6Q1)-L(AliPxQx)
-	.int       L(A7Q1)-L(AliPxQx)
-#endif
-	.popsection
-
-	.balign     16
-L(A5Q1):    mov    %dl,-0xd(%rdi)
-L(A4Q1):    mov    %edx,-0xc(%rdi)
-L(A0Q1):    mov    %rdx,-0x8(%rdi)
-L(A0Q0):    jmp     L(aligned_now)
-
-	.balign     16
-L(A1Q1):   mov    %dl,-0x9(%rdi)
-	mov    %rdx,-0x8(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(A1Q0):   mov    %dl,-0x1(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(A3Q1):    mov    %dl,-0xb(%rdi)
-L(A2Q1):    mov    %dx,-0xa(%rdi)
-	mov    %rdx,-0x8(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(A3Q0):    mov    %dl,-0x3(%rdi)
-L(A2Q0):    mov    %dx,-0x2(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(A5Q0):    mov    %dl,-0x5(%rdi)
-L(A4Q0):    mov    %edx,-0x4(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(A7Q1):    mov    %dl,-0xf(%rdi)
-L(A6Q1):    mov    %dx,-0xe(%rdi)
-	mov    %edx,-0xc(%rdi)
-	mov    %rdx,-0x8(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(A7Q0):    mov    %dl,-0x7(%rdi)
-L(A6Q0):    mov    %dx,-0x6(%rdi)
-	mov    %edx,-0x4(%rdi)
-	jmp    L(aligned_now)
-
-	.balign     16
-L(aligned_now):
-
-	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
-	 jle     L(Loop8byte_pre)
-
-	 # fill RegXMM0 with the pattern
-	 movd   %rdx,%xmm0
-	 punpcklqdq %xmm0,%xmm0
-
-  #cmp ecx,256
-  #jmp L(byte32sse2)
-  #cmp ecx,512  ; 256 or 512 doesn't matter much. Changing to 256 would reduce code size
-  #cmp RegLen,128  ; 256 or 512 doesn't matter much. Changing to 256 would reduce code size
-
-	 lea    L(SSExDx)(%rip),%r9        # for later after the alignment
-	 cmp    $0xb0,%r8 # 176
-	 jge    L(byte32sse2_pre)
-
-	 add    %r8,%rdi
-#ifndef PIC
-	 jmpq   *(%r9,%r8,8)
-#else
-	 movslq    (%r9,%r8,4),%rcx
-	 lea    (%rcx,%r9,1),%r9
-	 jmpq   *%r9
-#endif
-
-L(SSE0QB):  movdqa %xmm0,-0xb0(%rdi)
-L(SSE0QA):  movdqa %xmm0,-0xa0(%rdi)
-L(SSE0Q9):  movdqa %xmm0,-0x90(%rdi)
-L(SSE0Q8):  movdqa %xmm0,-0x80(%rdi)
-L(SSE0Q7):  movdqa %xmm0,-0x70(%rdi)
-L(SSE0Q6):  movdqa %xmm0,-0x60(%rdi)
-L(SSE0Q5):  movdqa %xmm0,-0x50(%rdi)
-L(SSE0Q4):  movdqa %xmm0,-0x40(%rdi)
-L(SSE0Q3):  movdqa %xmm0,-0x30(%rdi)
-L(SSE0Q2):  movdqa %xmm0,-0x20(%rdi)
-L(SSE0Q1):  movdqa %xmm0,-0x10(%rdi)
-L(SSE0Q0):  retq   $0x0
-
-L(SSE1QB):  movdqa %xmm0,-0xb1(%rdi)
-L(SSE1QA):  movdqa %xmm0,-0xa1(%rdi)
-L(SSE1Q9):  movdqa %xmm0,-0x91(%rdi)
-L(SSE1Q8):  movdqa %xmm0,-0x81(%rdi)
-L(SSE1Q7):  movdqa %xmm0,-0x71(%rdi)
-L(SSE1Q6):  movdqa %xmm0,-0x61(%rdi)
-L(SSE1Q5):  movdqa %xmm0,-0x51(%rdi)
-L(SSE1Q4):  movdqa %xmm0,-0x41(%rdi)
-L(SSE1Q3):  movdqa %xmm0,-0x31(%rdi)
-L(SSE1Q2):  movdqa %xmm0,-0x21(%rdi)
-L(SSE1Q1):  movdqa %xmm0,-0x11(%rdi)
-L(SSE1Q0):  mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-L(SSE2QB):  movdqa %xmm0,-0xb2(%rdi)
-L(SSE2QA):  movdqa %xmm0,-0xa2(%rdi)
-L(SSE2Q9):  movdqa %xmm0,-0x92(%rdi)
-L(SSE2Q8):  movdqa %xmm0,-0x82(%rdi)
-L(SSE2Q7):  movdqa %xmm0,-0x72(%rdi)
-L(SSE2Q6):  movdqa %xmm0,-0x62(%rdi)
-L(SSE2Q5):  movdqa %xmm0,-0x52(%rdi)
-L(SSE2Q4):  movdqa %xmm0,-0x42(%rdi)
-L(SSE2Q3):  movdqa %xmm0,-0x32(%rdi)
-L(SSE2Q2):  movdqa %xmm0,-0x22(%rdi)
-L(SSE2Q1):  movdqa %xmm0,-0x12(%rdi)
-L(SSE2Q0):  mov    %dx,-0x2(%rdi)
-	retq   $0x0
-
-L(SSE3QB):  movdqa %xmm0,-0xb3(%rdi)
-L(SSE3QA):  movdqa %xmm0,-0xa3(%rdi)
-L(SSE3Q9):  movdqa %xmm0,-0x93(%rdi)
-L(SSE3Q8):  movdqa %xmm0,-0x83(%rdi)
-L(SSE3Q7):  movdqa %xmm0,-0x73(%rdi)
-L(SSE3Q6):  movdqa %xmm0,-0x63(%rdi)
-L(SSE3Q5):  movdqa %xmm0,-0x53(%rdi)
-L(SSE3Q4):  movdqa %xmm0,-0x43(%rdi)
-L(SSE3Q3):  movdqa %xmm0,-0x33(%rdi)
-L(SSE3Q2):  movdqa %xmm0,-0x23(%rdi)
-L(SSE3Q1):  movdqa %xmm0,-0x13(%rdi)
-L(SSE3Q0):  mov    %dx,-0x3(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-L(SSE4QB):  movdqa %xmm0,-0xb4(%rdi)
-L(SSE4QA):  movdqa %xmm0,-0xa4(%rdi)
-L(SSE4Q9):  movdqa %xmm0,-0x94(%rdi)
-L(SSE4Q8):  movdqa %xmm0,-0x84(%rdi)
-L(SSE4Q7):  movdqa %xmm0,-0x74(%rdi)
-L(SSE4Q6):  movdqa %xmm0,-0x64(%rdi)
-L(SSE4Q5):  movdqa %xmm0,-0x54(%rdi)
-L(SSE4Q4):  movdqa %xmm0,-0x44(%rdi)
-L(SSE4Q3):  movdqa %xmm0,-0x34(%rdi)
-L(SSE4Q2):  movdqa %xmm0,-0x24(%rdi)
-L(SSE4Q1):  movdqa %xmm0,-0x14(%rdi)
-L(SSE4Q0):  mov    %edx,-0x4(%rdi)
-	retq   $0x0
-
-L(SSE5QB):  movdqa %xmm0,-0xb5(%rdi)
-L(SSE5QA):  movdqa %xmm0,-0xa5(%rdi)
-L(SSE5Q9):  movdqa %xmm0,-0x95(%rdi)
-L(SSE5Q8):  movdqa %xmm0,-0x85(%rdi)
-L(SSE5Q7):  movdqa %xmm0,-0x75(%rdi)
-L(SSE5Q6):  movdqa %xmm0,-0x65(%rdi)
-L(SSE5Q5):  movdqa %xmm0,-0x55(%rdi)
-L(SSE5Q4):  movdqa %xmm0,-0x45(%rdi)
-L(SSE5Q3):  movdqa %xmm0,-0x35(%rdi)
-L(SSE5Q2):  movdqa %xmm0,-0x25(%rdi)
-L(SSE5Q1):  movdqa %xmm0,-0x15(%rdi)
-L(SSE5Q0):  mov    %edx,-0x5(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-
-L(SSE6QB):  movdqa %xmm0,-0xb6(%rdi)
-L(SSE6QA):  movdqa %xmm0,-0xa6(%rdi)
-L(SSE6Q9):  movdqa %xmm0,-0x96(%rdi)
-L(SSE6Q8):  movdqa %xmm0,-0x86(%rdi)
-L(SSE6Q7):  movdqa %xmm0,-0x76(%rdi)
-L(SSE6Q6):  movdqa %xmm0,-0x66(%rdi)
-L(SSE6Q5):  movdqa %xmm0,-0x56(%rdi)
-L(SSE6Q4):  movdqa %xmm0,-0x46(%rdi)
-L(SSE6Q3):  movdqa %xmm0,-0x36(%rdi)
-L(SSE6Q2):  movdqa %xmm0,-0x26(%rdi)
-L(SSE6Q1):  movdqa %xmm0,-0x16(%rdi)
-L(SSE6Q0):  mov    %edx,-0x6(%rdi)
-	mov    %dx,-0x2(%rdi)
-	retq   $0x0
-
-L(SSE7QB):  movdqa %xmm0,-0xb7(%rdi)
-L(SSE7QA):  movdqa %xmm0,-0xa7(%rdi)
-L(SSE7Q9):  movdqa %xmm0,-0x97(%rdi)
-L(SSE7Q8):  movdqa %xmm0,-0x87(%rdi)
-L(SSE7Q7):  movdqa %xmm0,-0x77(%rdi)
-L(SSE7Q6):  movdqa %xmm0,-0x67(%rdi)
-L(SSE7Q5):  movdqa %xmm0,-0x57(%rdi)
-L(SSE7Q4):  movdqa %xmm0,-0x47(%rdi)
-L(SSE7Q3):  movdqa %xmm0,-0x37(%rdi)
-L(SSE7Q2):  movdqa %xmm0,-0x27(%rdi)
-L(SSE7Q1):  movdqa %xmm0,-0x17(%rdi)
-L(SSE7Q0):  mov    %edx,-0x7(%rdi)
-	mov    %dx,-0x3(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-L(SSE8QB):  movdqa %xmm0,-0xb8(%rdi)
-L(SSE8QA):  movdqa %xmm0,-0xa8(%rdi)
-L(SSE8Q9):  movdqa %xmm0,-0x98(%rdi)
-L(SSE8Q8):  movdqa %xmm0,-0x88(%rdi)
-L(SSE8Q7):  movdqa %xmm0,-0x78(%rdi)
-L(SSE8Q6):  movdqa %xmm0,-0x68(%rdi)
-L(SSE8Q5):  movdqa %xmm0,-0x58(%rdi)
-L(SSE8Q4):  movdqa %xmm0,-0x48(%rdi)
-L(SSE8Q3):  movdqa %xmm0,-0x38(%rdi)
-L(SSE8Q2):  movdqa %xmm0,-0x28(%rdi)
-L(SSE8Q1):  movdqa %xmm0,-0x18(%rdi)
-L(SSE8Q0):  mov    %rdx,-0x8(%rdi)
-	retq   $0x0
-
-L(SSE9QB):  movdqa %xmm0,-0xb9(%rdi)
-L(SSE9QA):  movdqa %xmm0,-0xa9(%rdi)
-L(SSE9Q9):  movdqa %xmm0,-0x99(%rdi)
-L(SSE9Q8):  movdqa %xmm0,-0x89(%rdi)
-L(SSE9Q7):  movdqa %xmm0,-0x79(%rdi)
-L(SSE9Q6):  movdqa %xmm0,-0x69(%rdi)
-L(SSE9Q5):  movdqa %xmm0,-0x59(%rdi)
-L(SSE9Q4):  movdqa %xmm0,-0x49(%rdi)
-L(SSE9Q3):  movdqa %xmm0,-0x39(%rdi)
-L(SSE9Q2):  movdqa %xmm0,-0x29(%rdi)
-L(SSE9Q1):  movdqa %xmm0,-0x19(%rdi)
-L(SSE9Q0):  mov    %rdx,-0x9(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-L(SSE10QB): movdqa %xmm0,-0xba(%rdi)
-L(SSE10QA): movdqa %xmm0,-0xaa(%rdi)
-L(SSE10Q9): movdqa %xmm0,-0x9a(%rdi)
-L(SSE10Q8): movdqa %xmm0,-0x8a(%rdi)
-L(SSE10Q7): movdqa %xmm0,-0x7a(%rdi)
-L(SSE10Q6): movdqa %xmm0,-0x6a(%rdi)
-L(SSE10Q5): movdqa %xmm0,-0x5a(%rdi)
-L(SSE10Q4): movdqa %xmm0,-0x4a(%rdi)
-L(SSE10Q3): movdqa %xmm0,-0x3a(%rdi)
-L(SSE10Q2): movdqa %xmm0,-0x2a(%rdi)
-L(SSE10Q1): movdqa %xmm0,-0x1a(%rdi)
-L(SSE10Q0): mov    %rdx,-0xa(%rdi)
-	mov    %dx,-0x2(%rdi)
-	retq   $0x0
-
-L(SSE11QB): movdqa %xmm0,-0xbb(%rdi)
-L(SSE11QA): movdqa %xmm0,-0xab(%rdi)
-L(SSE11Q9): movdqa %xmm0,-0x9b(%rdi)
-L(SSE11Q8): movdqa %xmm0,-0x8b(%rdi)
-L(SSE11Q7): movdqa %xmm0,-0x7b(%rdi)
-L(SSE11Q6): movdqa %xmm0,-0x6b(%rdi)
-L(SSE11Q5): movdqa %xmm0,-0x5b(%rdi)
-L(SSE11Q4): movdqa %xmm0,-0x4b(%rdi)
-L(SSE11Q3): movdqa %xmm0,-0x3b(%rdi)
-L(SSE11Q2): movdqa %xmm0,-0x2b(%rdi)
-L(SSE11Q1): movdqa %xmm0,-0x1b(%rdi)
-L(SSE11Q0): mov    %rdx,-0xb(%rdi)
-	mov    %dx,-0x3(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-L(SSE12QB): movdqa %xmm0,-0xbc(%rdi)
-L(SSE12QA): movdqa %xmm0,-0xac(%rdi)
-L(SSE12Q9): movdqa %xmm0,-0x9c(%rdi)
-L(SSE12Q8): movdqa %xmm0,-0x8c(%rdi)
-L(SSE12Q7): movdqa %xmm0,-0x7c(%rdi)
-L(SSE12Q6): movdqa %xmm0,-0x6c(%rdi)
-L(SSE12Q5): movdqa %xmm0,-0x5c(%rdi)
-L(SSE12Q4): movdqa %xmm0,-0x4c(%rdi)
-L(SSE12Q3): movdqa %xmm0,-0x3c(%rdi)
-L(SSE12Q2): movdqa %xmm0,-0x2c(%rdi)
-L(SSE12Q1): movdqa %xmm0,-0x1c(%rdi)
-L(SSE12Q0): mov    %rdx,-0xc(%rdi)
-	mov    %edx,-0x4(%rdi)
-	retq   $0x0
-
-L(SSE13QB): movdqa %xmm0,-0xbd(%rdi)
-L(SSE13QA): movdqa %xmm0,-0xad(%rdi)
-L(SSE13Q9): movdqa %xmm0,-0x9d(%rdi)
-L(SSE13Q8): movdqa %xmm0,-0x8d(%rdi)
-L(SSE13Q7): movdqa %xmm0,-0x7d(%rdi)
-L(SSE13Q6): movdqa %xmm0,-0x6d(%rdi)
-L(SSE13Q5): movdqa %xmm0,-0x5d(%rdi)
-L(SSE13Q4): movdqa %xmm0,-0x4d(%rdi)
-L(SSE13Q3): movdqa %xmm0,-0x3d(%rdi)
-L(SSE13Q2): movdqa %xmm0,-0x2d(%rdi)
-L(SSE13Q1): movdqa %xmm0,-0x1d(%rdi)
-L(SSE13Q0): mov    %rdx,-0xd(%rdi)
-	mov    %edx,-0x5(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-L(SSE14QB): movdqa %xmm0,-0xbe(%rdi)
-L(SSE14QA): movdqa %xmm0,-0xae(%rdi)
-L(SSE14Q9): movdqa %xmm0,-0x9e(%rdi)
-L(SSE14Q8): movdqa %xmm0,-0x8e(%rdi)
-L(SSE14Q7): movdqa %xmm0,-0x7e(%rdi)
-L(SSE14Q6): movdqa %xmm0,-0x6e(%rdi)
-L(SSE14Q5): movdqa %xmm0,-0x5e(%rdi)
-L(SSE14Q4): movdqa %xmm0,-0x4e(%rdi)
-L(SSE14Q3): movdqa %xmm0,-0x3e(%rdi)
-L(SSE14Q2): movdqa %xmm0,-0x2e(%rdi)
-L(SSE14Q1): movdqa %xmm0,-0x1e(%rdi)
-L(SSE14Q0): mov    %rdx,-0xe(%rdi)
-	mov    %edx,-0x6(%rdi)
-	mov    %dx,-0x2(%rdi)
-	retq   $0x0
-
-L(SSE15QB): movdqa %xmm0,-0xbf(%rdi)
-L(SSE15QA): movdqa %xmm0,-0xaf(%rdi)
-L(SSE15Q9): movdqa %xmm0,-0x9f(%rdi)
-L(SSE15Q8): movdqa %xmm0,-0x8f(%rdi)
-L(SSE15Q7): movdqa %xmm0,-0x7f(%rdi)
-L(SSE15Q6): movdqa %xmm0,-0x6f(%rdi)
-L(SSE15Q5): movdqa %xmm0,-0x5f(%rdi)
-L(SSE15Q4): movdqa %xmm0,-0x4f(%rdi)
-L(SSE15Q3): movdqa %xmm0,-0x3f(%rdi)
-L(SSE15Q2): movdqa %xmm0,-0x2f(%rdi)
-L(SSE15Q1): movdqa %xmm0,-0x1f(%rdi)
-L(SSE15Q0): mov    %rdx,-0xf(%rdi)
-	mov    %edx,-0x7(%rdi)
-	mov    %dx,-0x3(%rdi)
-	mov    %dl,-0x1(%rdi)
-	retq   $0x0
-
-	.balign     16
-L(byte32sse2_pre):
-
-	mov    __x86_64_shared_cache_size(%rip),%r9d  # The largest cache size
-	cmp    %r9,%r8
-	jg     L(sse2_nt_move_pre)
-	#jmp    L(byte32sse2)
-	.balign     16
-L(byte32sse2):
-	lea    -0x80(%r8),%r8 # 128
-	cmp    $0x80,%r8   # 128
-	movdqa %xmm0,(%rdi)
-	movdqa %xmm0,0x10(%rdi)
-	movdqa %xmm0,0x20(%rdi)
-	movdqa %xmm0,0x30(%rdi)
-	movdqa %xmm0,0x40(%rdi)
-	movdqa %xmm0,0x50(%rdi)
-	movdqa %xmm0,0x60(%rdi)
-	movdqa %xmm0,0x70(%rdi)
-
-	lea    0x80(%rdi),%rdi
-	jge    L(byte32sse2)
-	lea    L(SSExDx)(%rip),%r11
-	add    %r8,%rdi
-#ifndef PIC
-	jmpq   *(%r11,%r8,8)
-#else
-	movslq    (%r11,%r8,4),%rcx
-	lea   (%rcx,%r11,1),%r11
-	jmpq   *%r11
-#endif
-
-	.balign     16
-L(sse2_nt_move_pre):
-	cmp    $0x0,%r9
-	je     L(byte32sse2)
-	jmp    L(sse2_nt_move)
-
-	.balign     16
-L(sse2_nt_move):
-	lea    -0x80(%r8),%r8
-	cmp    $0x80,%r8
-
-	movntdq %xmm0,(%rdi)
-	movntdq %xmm0,0x10(%rdi)
-	movntdq %xmm0,0x20(%rdi)
-	movntdq %xmm0,0x30(%rdi)
-	movntdq %xmm0,0x40(%rdi)
-	movntdq %xmm0,0x50(%rdi)
-	movntdq %xmm0,0x60(%rdi)
-	movntdq %xmm0,0x70(%rdi)
-
-	lea    0x80(%rdi),%rdi
-	jge    L(sse2_nt_move)
-	lea    L(SSExDx)(%rip),%r11
-	sfence
-	add    %r8,%rdi
-#ifndef PIC
-	jmpq   *(%r11,%r8,8)
-#else
-	movslq (%r11,%r8,4),%rcx
-	lea   (%rcx,%r11,1),%r11
-	jmpq   *%r11
-#endif
-
-	.balign     16
-L(Loop8byte_pre):
-	mov    __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
-
-L(top_8byte_loop):
-	cmp    %r9,%r8
-	jg     L(Loop8byte_nt_move_pre)
-
-	.balign     16
-L(Loop8byte):
-	lea    -0x80(%r8),%r8
-	cmp    $0x80,%r8   # 64
-	mov    %rdx,(%rdi)
-	mov    %rdx,0x8(%rdi)
-	mov    %rdx,0x10(%rdi)
-	mov    %rdx,0x18(%rdi)
-	mov    %rdx,0x20(%rdi)
-	mov    %rdx,0x28(%rdi)
-	mov    %rdx,0x30(%rdi)
-	mov    %rdx,0x38(%rdi)
-	mov    %rdx,0x40(%rdi)
-	mov    %rdx,0x48(%rdi)
-	mov    %rdx,0x50(%rdi)
-	mov    %rdx,0x58(%rdi)
-	mov    %rdx,0x60(%rdi)
-	mov    %rdx,0x68(%rdi)
-	mov    %rdx,0x70(%rdi)
-	mov    %rdx,0x78(%rdi)
-	lea    0x80(%rdi),%rdi
-	jg     L(Loop8byte)
-	lea    (%rdi,%r8,1),%rdi
-	lea    L(setPxQx)(%rip),%r11
-#ifndef PIC
-	jmpq   *(%r11,%r8,8)
-#else
-	movslq    (%r11,%r8,4),%rcx
-	lea    (%rcx,%r11,1),%r11
-	jmpq   *%r11
-#endif
-
-	.balign     16
-L(Loop8byte_nt_move_pre):
-	cmp    $0x0,%r9
-	je     L(Loop8byte)
-	jmp    L(Loop8byte_nt_move)
-
-	.balign     16
-L(Loop8byte_nt_move):
-	movnti %rdx,(%rdi)
-	movnti %rdx,0x8(%rdi)
-	lea    -0x40(%r8),%r8
-	movnti %rdx,0x10(%rdi)
-	movnti %rdx,0x18(%rdi)
-	movnti %rdx,0x20(%rdi)
-	cmp    $0x40,%r8  # 64
-	movnti %rdx,0x28(%rdi)
-	movnti %rdx,0x30(%rdi)
-	movnti %rdx,0x38(%rdi)
-	lea    0x40(%rdi),%rdi
-	jge    L(Loop8byte_nt_move)
-	lea    L(setPxQx)(%rip),%r11
-	lea    (%rdi,%r8,1),%rdi
-	sfence
-#ifndef PIC
-	jmpq   *(%r11,%r8,8)
-#else
-	movslq    (%r11,%r8,4),%rcx
-	lea    (%rcx,%r11,1),%r11
-	jmpq   *%r11
-#endif
-
-	.pushsection .rodata
-	.balign     16
-#ifndef PIC
-L(SSExDx):
-	.quad       L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0)
-	.quad       L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0)
-	.quad       L(SSE8Q0), L(SSE9Q0), L(SSE10Q0), L(SSE11Q0)
-	.quad       L(SSE12Q0), L(SSE13Q0), L(SSE14Q0), L(SSE15Q0)
-	.quad       L(SSE0Q1), L(SSE1Q1), L(SSE2Q1), L(SSE3Q1)
-	.quad       L(SSE4Q1), L(SSE5Q1), L(SSE6Q1), L(SSE7Q1)
-	.quad       L(SSE8Q1), L(SSE9Q1), L(SSE10Q1), L(SSE11Q1)
-	.quad       L(SSE12Q1), L(SSE13Q1), L(SSE14Q1), L(SSE15Q1)
-	.quad       L(SSE0Q2), L(SSE1Q2), L(SSE2Q2), L(SSE3Q2)
-	.quad       L(SSE4Q2), L(SSE5Q2), L(SSE6Q2), L(SSE7Q2)
-	.quad       L(SSE8Q2), L(SSE9Q2), L(SSE10Q2), L(SSE11Q2)
-	.quad       L(SSE12Q2), L(SSE13Q2), L(SSE14Q2), L(SSE15Q2)
-	.quad       L(SSE0Q3), L(SSE1Q3), L(SSE2Q3), L(SSE3Q3)
-	.quad       L(SSE4Q3), L(SSE5Q3), L(SSE6Q3), L(SSE7Q3)
-	.quad       L(SSE8Q3), L(SSE9Q3), L(SSE10Q3), L(SSE11Q3)
-	.quad       L(SSE12Q3), L(SSE13Q3), L(SSE14Q3), L(SSE15Q3)
-	.quad       L(SSE0Q4), L(SSE1Q4), L(SSE2Q4), L(SSE3Q4)
-	.quad       L(SSE4Q4), L(SSE5Q4), L(SSE6Q4), L(SSE7Q4)
-	.quad       L(SSE8Q4), L(SSE9Q4), L(SSE10Q4), L(SSE11Q4)
-	.quad       L(SSE12Q4), L(SSE13Q4), L(SSE14Q4), L(SSE15Q4)
-	.quad       L(SSE0Q5), L(SSE1Q5), L(SSE2Q5), L(SSE3Q5)
-	.quad       L(SSE4Q5), L(SSE5Q5), L(SSE6Q5), L(SSE7Q5)
-	.quad       L(SSE8Q5), L(SSE9Q5), L(SSE10Q5), L(SSE11Q5)
-	.quad       L(SSE12Q5), L(SSE13Q5), L(SSE14Q5), L(SSE15Q5)
-	.quad       L(SSE0Q6), L(SSE1Q6), L(SSE2Q6), L(SSE3Q6)
-	.quad       L(SSE4Q6), L(SSE5Q6), L(SSE6Q6), L(SSE7Q6)
-	.quad       L(SSE8Q6), L(SSE9Q6), L(SSE10Q6), L(SSE11Q6)
-	.quad       L(SSE12Q6), L(SSE13Q6), L(SSE14Q6), L(SSE15Q6)
-	.quad       L(SSE0Q7), L(SSE1Q7), L(SSE2Q7), L(SSE3Q7)
-	.quad       L(SSE4Q7), L(SSE5Q7), L(SSE6Q7), L(SSE7Q7)
-	.quad       L(SSE8Q7), L(SSE9Q7), L(SSE10Q7), L(SSE11Q7)
-	.quad       L(SSE12Q7), L(SSE13Q7), L(SSE14Q7), L(SSE15Q7)
-	.quad       L(SSE0Q8), L(SSE1Q8), L(SSE2Q8), L(SSE3Q8)
-	.quad       L(SSE4Q8), L(SSE5Q8), L(SSE6Q8), L(SSE7Q8)
-	.quad       L(SSE8Q8), L(SSE9Q8), L(SSE10Q8), L(SSE11Q8)
-	.quad       L(SSE12Q8), L(SSE13Q8), L(SSE14Q8), L(SSE15Q8)
-	.quad       L(SSE0Q9), L(SSE1Q9), L(SSE2Q9), L(SSE3Q9)
-	.quad       L(SSE4Q9), L(SSE5Q9), L(SSE6Q9), L(SSE7Q9)
-	.quad       L(SSE8Q9), L(SSE9Q9), L(SSE10Q9), L(SSE11Q9)
-	.quad       L(SSE12Q9), L(SSE13Q9), L(SSE14Q9), L(SSE15Q9)
-	.quad       L(SSE0QA), L(SSE1QA), L(SSE2QA), L(SSE3QA)
-	.quad       L(SSE4QA), L(SSE5QA), L(SSE6QA), L(SSE7QA)
-	.quad       L(SSE8QA), L(SSE9QA), L(SSE10QA), L(SSE11QA)
-	.quad       L(SSE12QA), L(SSE13QA), L(SSE14QA), L(SSE15QA)
-	.quad       L(SSE0QB), L(SSE1QB), L(SSE2QB), L(SSE3QB)
-	.quad       L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB)
-	.quad       L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB)
-	.quad       L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
-#else
-L(SSExDx):
-	.int       L(SSE0Q0) -L(SSExDx)
-	.int       L(SSE1Q0) -L(SSExDx)
-	.int       L(SSE2Q0) -L(SSExDx)
-	.int       L(SSE3Q0) -L(SSExDx)
-	.int       L(SSE4Q0) -L(SSExDx)
-	.int       L(SSE5Q0) -L(SSExDx)
-	.int       L(SSE6Q0) -L(SSExDx)
-	.int       L(SSE7Q0) -L(SSExDx)
-
-	.int       L(SSE8Q0) -L(SSExDx)
-	.int       L(SSE9Q0) -L(SSExDx)
-	.int       L(SSE10Q0)-L(SSExDx)
-	.int       L(SSE11Q0)-L(SSExDx)
-	.int       L(SSE12Q0)-L(SSExDx)
-	.int       L(SSE13Q0)-L(SSExDx)
-	.int       L(SSE14Q0)-L(SSExDx)
-	.int       L(SSE15Q0)-L(SSExDx)
-
-	.int       L(SSE0Q1) -L(SSExDx)
-	.int       L(SSE1Q1) -L(SSExDx)
-	.int       L(SSE2Q1) -L(SSExDx)
-	.int       L(SSE3Q1) -L(SSExDx)
-	.int       L(SSE4Q1) -L(SSExDx)
-	.int       L(SSE5Q1) -L(SSExDx)
-	.int       L(SSE6Q1) -L(SSExDx)
-	.int       L(SSE7Q1) -L(SSExDx)
-
-	.int       L(SSE8Q1) -L(SSExDx)
-	.int       L(SSE9Q1) -L(SSExDx)
-	.int       L(SSE10Q1)-L(SSExDx)
-	.int       L(SSE11Q1)-L(SSExDx)
-	.int       L(SSE12Q1)-L(SSExDx)
-	.int       L(SSE13Q1)-L(SSExDx)
-	.int       L(SSE14Q1)-L(SSExDx)
-	.int       L(SSE15Q1)-L(SSExDx)
-
-	.int       L(SSE0Q2) -L(SSExDx)
-	.int       L(SSE1Q2) -L(SSExDx)
-	.int       L(SSE2Q2) -L(SSExDx)
-	.int       L(SSE3Q2) -L(SSExDx)
-	.int       L(SSE4Q2) -L(SSExDx)
-	.int       L(SSE5Q2) -L(SSExDx)
-	.int       L(SSE6Q2) -L(SSExDx)
-	.int       L(SSE7Q2) -L(SSExDx)
-
-	.int       L(SSE8Q2) -L(SSExDx)
-	.int       L(SSE9Q2) -L(SSExDx)
-	.int       L(SSE10Q2)-L(SSExDx)
-	.int       L(SSE11Q2)-L(SSExDx)
-	.int       L(SSE12Q2)-L(SSExDx)
-	.int       L(SSE13Q2)-L(SSExDx)
-	.int       L(SSE14Q2)-L(SSExDx)
-	.int       L(SSE15Q2)-L(SSExDx)
-
-	.int       L(SSE0Q3) -L(SSExDx)
-	.int       L(SSE1Q3) -L(SSExDx)
-	.int       L(SSE2Q3) -L(SSExDx)
-	.int       L(SSE3Q3) -L(SSExDx)
-	.int       L(SSE4Q3) -L(SSExDx)
-	.int       L(SSE5Q3) -L(SSExDx)
-	.int       L(SSE6Q3) -L(SSExDx)
-	.int       L(SSE7Q3) -L(SSExDx)
-
-	.int       L(SSE8Q3) -L(SSExDx)
-	.int       L(SSE9Q3) -L(SSExDx)
-	.int       L(SSE10Q3)-L(SSExDx)
-	.int       L(SSE11Q3)-L(SSExDx)
-	.int       L(SSE12Q3)-L(SSExDx)
-	.int       L(SSE13Q3)-L(SSExDx)
-	.int       L(SSE14Q3)-L(SSExDx)
-	.int       L(SSE15Q3)-L(SSExDx)
-
-	.int       L(SSE0Q4) -L(SSExDx)
-	.int       L(SSE1Q4) -L(SSExDx)
-	.int       L(SSE2Q4) -L(SSExDx)
-	.int       L(SSE3Q4) -L(SSExDx)
-	.int       L(SSE4Q4) -L(SSExDx)
-	.int       L(SSE5Q4) -L(SSExDx)
-	.int       L(SSE6Q4) -L(SSExDx)
-	.int       L(SSE7Q4) -L(SSExDx)
-
-	.int       L(SSE8Q4) -L(SSExDx)
-	.int       L(SSE9Q4) -L(SSExDx)
-	.int       L(SSE10Q4)-L(SSExDx)
-	.int       L(SSE11Q4)-L(SSExDx)
-	.int       L(SSE12Q4)-L(SSExDx)
-	.int       L(SSE13Q4)-L(SSExDx)
-	.int       L(SSE14Q4)-L(SSExDx)
-	.int       L(SSE15Q4)-L(SSExDx)
-
-	.int       L(SSE0Q5) -L(SSExDx)
-	.int       L(SSE1Q5) -L(SSExDx)
-	.int       L(SSE2Q5) -L(SSExDx)
-	.int       L(SSE3Q5) -L(SSExDx)
-	.int       L(SSE4Q5) -L(SSExDx)
-	.int       L(SSE5Q5) -L(SSExDx)
-	.int       L(SSE6Q5) -L(SSExDx)
-	.int       L(SSE7Q5) -L(SSExDx)
-
-	.int       L(SSE8Q5) -L(SSExDx)
-	.int       L(SSE9Q5) -L(SSExDx)
-	.int       L(SSE10Q5)-L(SSExDx)
-	.int       L(SSE11Q5)-L(SSExDx)
-	.int       L(SSE12Q5)-L(SSExDx)
-	.int       L(SSE13Q5)-L(SSExDx)
-	.int       L(SSE14Q5)-L(SSExDx)
-	.int       L(SSE15Q5)-L(SSExDx)
-
-	.int       L(SSE0Q6) -L(SSExDx)
-	.int       L(SSE1Q6) -L(SSExDx)
-	.int       L(SSE2Q6) -L(SSExDx)
-	.int       L(SSE3Q6) -L(SSExDx)
-	.int       L(SSE4Q6) -L(SSExDx)
-	.int       L(SSE5Q6) -L(SSExDx)
-	.int       L(SSE6Q6) -L(SSExDx)
-	.int       L(SSE7Q6) -L(SSExDx)
-
-	.int       L(SSE8Q6) -L(SSExDx)
-	.int       L(SSE9Q6) -L(SSExDx)
-	.int       L(SSE10Q6)-L(SSExDx)
-	.int       L(SSE11Q6)-L(SSExDx)
-	.int       L(SSE12Q6)-L(SSExDx)
-	.int       L(SSE13Q6)-L(SSExDx)
-	.int       L(SSE14Q6)-L(SSExDx)
-	.int       L(SSE15Q6)-L(SSExDx)
-
-	.int       L(SSE0Q7) -L(SSExDx)
-	.int       L(SSE1Q7) -L(SSExDx)
-	.int       L(SSE2Q7) -L(SSExDx)
-	.int       L(SSE3Q7) -L(SSExDx)
-	.int       L(SSE4Q7) -L(SSExDx)
-	.int       L(SSE5Q7) -L(SSExDx)
-	.int       L(SSE6Q7) -L(SSExDx)
-	.int       L(SSE7Q7) -L(SSExDx)
-
-	.int       L(SSE8Q7) -L(SSExDx)
-	.int       L(SSE9Q7) -L(SSExDx)
-	.int       L(SSE10Q7)-L(SSExDx)
-	.int       L(SSE11Q7)-L(SSExDx)
-	.int       L(SSE12Q7)-L(SSExDx)
-	.int       L(SSE13Q7)-L(SSExDx)
-	.int       L(SSE14Q7)-L(SSExDx)
-	.int       L(SSE15Q7)-L(SSExDx)
-
-	.int       L(SSE0Q8) -L(SSExDx)
-	.int       L(SSE1Q8) -L(SSExDx)
-	.int       L(SSE2Q8) -L(SSExDx)
-	.int       L(SSE3Q8) -L(SSExDx)
-	.int       L(SSE4Q8) -L(SSExDx)
-	.int       L(SSE5Q8) -L(SSExDx)
-	.int       L(SSE6Q8) -L(SSExDx)
-	.int       L(SSE7Q8) -L(SSExDx)
-
-	.int       L(SSE8Q8) -L(SSExDx)
-	.int       L(SSE9Q8) -L(SSExDx)
-	.int       L(SSE10Q8)-L(SSExDx)
-	.int       L(SSE11Q8)-L(SSExDx)
-	.int       L(SSE12Q8)-L(SSExDx)
-	.int       L(SSE13Q8)-L(SSExDx)
-	.int       L(SSE14Q8)-L(SSExDx)
-	.int       L(SSE15Q8)-L(SSExDx)
-
-	.int       L(SSE0Q9) -L(SSExDx)
-	.int       L(SSE1Q9) -L(SSExDx)
-	.int       L(SSE2Q9) -L(SSExDx)
-	.int       L(SSE3Q9) -L(SSExDx)
-	.int       L(SSE4Q9) -L(SSExDx)
-	.int       L(SSE5Q9) -L(SSExDx)
-	.int       L(SSE6Q9) -L(SSExDx)
-	.int       L(SSE7Q9) -L(SSExDx)
-
-	.int       L(SSE8Q9) -L(SSExDx)
-	.int       L(SSE9Q9) -L(SSExDx)
-	.int       L(SSE10Q9)-L(SSExDx)
-	.int       L(SSE11Q9)-L(SSExDx)
-	.int       L(SSE12Q9)-L(SSExDx)
-	.int       L(SSE13Q9)-L(SSExDx)
-	.int       L(SSE14Q9)-L(SSExDx)
-	.int       L(SSE15Q9)-L(SSExDx)
-
-	.int       L(SSE0QA) -L(SSExDx)
-	.int       L(SSE1QA) -L(SSExDx)
-	.int       L(SSE2QA) -L(SSExDx)
-	.int       L(SSE3QA) -L(SSExDx)
-	.int       L(SSE4QA) -L(SSExDx)
-	.int       L(SSE5QA) -L(SSExDx)
-	.int       L(SSE6QA) -L(SSExDx)
-	.int       L(SSE7QA) -L(SSExDx)
-
-	.int       L(SSE8QA) -L(SSExDx)
-	.int       L(SSE9QA) -L(SSExDx)
-	.int       L(SSE10QA)-L(SSExDx)
-	.int       L(SSE11QA)-L(SSExDx)
-	.int       L(SSE12QA)-L(SSExDx)
-	.int       L(SSE13QA)-L(SSExDx)
-	.int       L(SSE14QA)-L(SSExDx)
-	.int       L(SSE15QA)-L(SSExDx)
-
-	.int       L(SSE0QB) -L(SSExDx)
-	.int       L(SSE1QB) -L(SSExDx)
-	.int       L(SSE2QB) -L(SSExDx)
-	.int       L(SSE3QB) -L(SSExDx)
-	.int       L(SSE4QB) -L(SSExDx)
-	.int       L(SSE5QB) -L(SSExDx)
-	.int       L(SSE6QB) -L(SSExDx)
-	.int       L(SSE7QB) -L(SSExDx)
-
-	.int       L(SSE8QB) -L(SSExDx)
-	.int       L(SSE9QB) -L(SSExDx)
-	.int       L(SSE10QB)-L(SSExDx)
-	.int       L(SSE11QB)-L(SSExDx)
-	.int       L(SSE12QB)-L(SSExDx)
-	.int       L(SSE13QB)-L(SSExDx)
-	.int       L(SSE14QB)-L(SSExDx)
-	.int       L(SSE15QB)-L(SSExDx)
-#endif
-	.popsection
+	.p2align 4
+11:	/* Copy 64 bytes without polluting the cache.  */
+	/* We could use	movntdq    %xmm0,(%rcx) here to further
+	   speed up for large cases but let's not use XMM registers.  */
+	movnti	%r8,(%rcx)
+	movnti  %r8,0x8(%rcx)
+	movnti  %r8,0x10(%rcx)
+	movnti  %r8,0x18(%rcx)
+	movnti  %r8,0x20(%rcx)
+	movnti  %r8,0x28(%rcx)
+	movnti  %r8,0x30(%rcx)
+	movnti  %r8,0x38(%rcx)
+	add	$0x40,%rcx
+	dec	%rax
+	jne	11b
+	jmp	4b
 
 END (memset)
 libc_hidden_builtin_def (memset)

Modified: fsf/trunk/libc/time/tzfile.c
==============================================================================
--- fsf/trunk/libc/time/tzfile.c (original)
+++ fsf/trunk/libc/time/tzfile.c Thu Oct 18 00:04:21 2007
@@ -371,14 +371,52 @@
     types[i++].isgmt = 0;
 
   /* Read the POSIX TZ-style information if possible.  */
-  if (tzspec != NULL)
+  if (sizeof (time_t) == 8 && tzspec != NULL)
     {
       /* Skip over the newline first.  */
       if (getc_unlocked (f) != '\n'
-	  || fread_unlocked (tzspec, 1, tzspec_len - 1, f) != tzspec_len - 1)
+	  || (fread_unlocked (tzspec, 1, tzspec_len - 1, f)
+	      != tzspec_len - 1))
 	tzspec = NULL;
       else
 	tzspec[tzspec_len - 1] = '\0';
+    }
+  else if (sizeof (time_t) == 4 && tzhead.tzh_version != '\0')
+    {
+      /* Get the TZ string.  */
+      if (__builtin_expect (fread_unlocked ((void *) &tzhead, sizeof (tzhead),
+					    1, f) != 1, 0)
+	  || (memcmp (tzhead.tzh_magic, TZ_MAGIC, sizeof (tzhead.tzh_magic))
+	      != 0))
+	goto lose;
+
+      size_t num_transitions2 = (size_t) decode (tzhead.tzh_timecnt);
+      size_t num_types2 = (size_t) decode (tzhead.tzh_typecnt);
+      size_t chars2 = (size_t) decode (tzhead.tzh_charcnt);
+      size_t num_leaps2 = (size_t) decode (tzhead.tzh_leapcnt);
+      size_t num_isstd2 = (size_t) decode (tzhead.tzh_ttisstdcnt);
+      size_t num_isgmt2 = (size_t) decode (tzhead.tzh_ttisgmtcnt);
+
+      /* Position the stream before the second header.  */
+      size_t to_skip = (num_transitions2 * (8 + 1)
+			+ num_types2 * 6
+			+ chars2
+			+ num_leaps2 * 12
+			+ num_isstd2
+			+ num_isgmt2);
+      off_t off;
+      if (fseek (f, to_skip, SEEK_CUR) != 0
+	  || (off = ftello (f)) < 0
+	  || st.st_size < off + 2)
+	goto lose;
+
+      tzspec_len = st.st_size - off - 1;
+      char *tzstr = alloca (tzspec_len);
+      if (getc_unlocked (f) != '\n'
+	  || (fread_unlocked (tzstr, 1, tzspec_len - 1, f) != tzspec_len - 1))
+	goto lose;
+      tzstr[tzspec_len - 1] = '\0';
+      tzspec = __tzstring (tzstr);
     }
 
   fclose (f);
@@ -561,7 +599,7 @@
       __tzname[0] = NULL;
       __tzname[1] = NULL;
 
-      if (num_transitions == 0 || timer < transitions[0])
+      if (__builtin_expect (num_transitions == 0 || timer < transitions[0], 0))
 	{
 	  /* TIMER is before any transition (or there are no transitions).
 	     Choose the first non-DST type
@@ -591,9 +629,9 @@
 		  ++j;
 	    }
 	}
-      else if (timer >= transitions[num_transitions - 1])
+      else if (__builtin_expect (timer >= transitions[num_transitions - 1], 0))
 	{
-	  if (tzspec == NULL)
+	  if (__builtin_expect (tzspec == NULL, 0))
 	    {
 	    use_last:
 	      i = num_transitions;
@@ -605,11 +643,21 @@
 
 	  /* Convert to broken down structure.  If this fails do not
 	     use the string.  */
-	  if (! __offtime (&timer, 0, tp))
+	  if (__builtin_expect (! __offtime (&timer, 0, tp), 0))
 	    goto use_last;
 
 	  /* Use the rules from the TZ string to compute the change.  */
 	  __tz_compute (timer, tp, 1);
+
+	  /* If tzspec comes from posixrules loaded by __tzfile_default,
+	     override the STD and DST zone names with the ones user
+	     requested in TZ envvar.  */
+	  if (__builtin_expect (zone_names == (char *) &leaps[num_leaps], 0))
+	    {
+	      assert (num_types == 2);
+	      __tzname[0] = __tzstring (zone_names);
+	      __tzname[1] = __tzstring (&zone_names[strlen (zone_names) + 1]);
+	    }
 
 	  *leap_correct = 0L;
 	  *leap_hit = 0;
@@ -688,6 +736,9 @@
 	      ++j;
 	    }
 
+	  if (__builtin_expect (__tzname[0] == NULL, 0))
+	    __tzname[0] = __tzname[1];
+
 	  i = type_idxs[i - 1];
 	}
 

Modified: fsf/trunk/libc/time/tzset.c
==============================================================================
--- fsf/trunk/libc/time/tzset.c (original)
+++ fsf/trunk/libc/time/tzset.c Thu Oct 18 00:04:21 2007
@@ -401,7 +401,7 @@
   if (tz && *tz == ':')
     ++tz;
 
-  /* Check whether the value changes since the last run.  */
+  /* Check whether the value changed since the last run.  */
   if (old_tz != NULL && tz != NULL && strcmp (tz, old_tz) == 0)
     /* No change, simply return.  */
     return;
@@ -606,9 +606,8 @@
 
   /* Update internal database according to current TZ setting.
      POSIX.1 8.3.7.2 says that localtime_r is not required to set tzname.
-     This is a good idea since this allows at least a bit more parallelism.
-     By analogy we apply the same rule to gmtime_r.  */
-  tzset_internal (tp == &_tmbuf, 0);
+     This is a good idea since this allows at least a bit more parallelism.  */
+  tzset_internal (tp == &_tmbuf && use_localtime, 1);
 
   if (__use_tzfile)
     __tzfile_compute (*timer, use_localtime, &leap_correction,

Modified: fsf/trunk/libc/version.h
==============================================================================
--- fsf/trunk/libc/version.h (original)
+++ fsf/trunk/libc/version.h Thu Oct 18 00:04:21 2007
@@ -1,4 +1,4 @@
 /* This file just defines the current version number of libc.  */
 
 #define RELEASE "development"
-#define VERSION "2.6.90"
+#define VERSION "2.7.90"