[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r9924 - in /fsf/trunk/libc: ./ elf/ libio/ malloc/ sysdeps/i386/i686/multiarch/ sysdeps/x86_64/



Author: eglibc
Date: Thu Feb 25 00:03:40 2010
New Revision: 9924

Log:
Import glibc-mainline for 2010-02-25

Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/elf/dl-load.c
    fsf/trunk/libc/libio/iovdprintf.c
    fsf/trunk/libc/malloc/malloc.c
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
    fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2.S
    fsf/trunk/libc/sysdeps/x86_64/Implies

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Thu Feb 25 00:03:40 2010
@@ -1,3 +1,46 @@
+2010-02-24  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	* sysdeps/x86_64/Implies: Add ieee754/dbl-64/wordsize-64 entry.
+
+2010-02-24  H.J. Lu  <hongjiu.lu@xxxxxxxxx>
+
+	* sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+	(bk_write_less32bytes_2): Renamed to ...
+	(bk_write_less48bytes): This.
+	Use unsigned conditional jumps.
+	Correct unwind info.
+	Use add/sub instead of lea if possible.
+	(shl_0_gobble_cache_loop_tail): Removed.
+	(large_page): Properly adjust ECX.
+
+	* sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Use unsigned
+	conditional jumps.
+	Correct unwind info.
+
+	* sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Remove redundant
+	punpcklbw.
+	Use unsigned conditional jumps.
+	(128bytesormore_nt): Renamed to ...
+	(128bytesormore_endof_L1): This.
+	Use add instead of lea if possible.
+	Correct unwind info.
+	* sysdeps/i386/i686/multiarch/memset-sse2.S: Remove redundant
+	punpcklbw.
+	Use unsigned conditional jumps.
+	Use add instead of lea if possible.
+	Correct unwind info.
+
+2010-02-24  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	[BZ #11319]
+	* libio/iovdprintf.c (_IO_vdprintf): Explicitly flush stream before
+	undoing the stream because _IO_FINISH doesn't report failures.
+
+	[BZ #5553]
+	* malloc/malloc.c (public_vALLOc): Set ar_ptr when trying main_arena.
+	(public_pVALLOc): Likewise.
+	Patch by Petr Baudis.
+
 2010-02-22  Jim Meyering  <meyering@xxxxxxxxxx>
 
 	* manual/math.texi (BSD Random): Fix a typo: s/are/is/

Modified: fsf/trunk/libc/elf/dl-load.c
==============================================================================
--- fsf/trunk/libc/elf/dl-load.c (original)
+++ fsf/trunk/libc/elf/dl-load.c Thu Feb 25 00:03:40 2010
@@ -1,5 +1,5 @@
 /* Map in a shared object's segments from the file.
-   Copyright (C) 1995-2005, 2006, 2007, 2009 Free Software Foundation, Inc.
+   Copyright (C) 1995-2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -313,7 +313,7 @@
 expand_dynamic_string_token (struct link_map *l, const char *s)
 {
   /* We make two runs over the string.  First we determine how large the
-     resulting string is and then we copy it over.  Since this is now
+     resulting string is and then we copy it over.  Since this is no
      frequently executed operation we are looking here not for performance
      but rather for code size.  */
   size_t cnt;
@@ -391,7 +391,7 @@
       size_t len = strlen (cp);
 
       /* `strsep' can pass an empty string.  This has to be
-         interpreted as `use the current directory'. */
+	 interpreted as `use the current directory'. */
       if (len == 0)
 	{
 	  static const char curwd[] = "./";
@@ -1519,7 +1519,7 @@
 /* Print search path.  */
 static void
 print_search_path (struct r_search_path_elem **list,
-                   const char *what, const char *name)
+		   const char *what, const char *name)
 {
   char buf[max_dirnamelen + max_capstrlen];
   int first = 1;
@@ -2044,7 +2044,7 @@
       fd = -1;
 
       /* When the object has the RUNPATH information we don't use any
-         RPATHs.  */
+	 RPATHs.  */
       if (loader == NULL || loader->l_info[DT_RUNPATH] == NULL)
 	{
 	  /* This is the executable's map (if there is one).  Make sure that
@@ -2067,7 +2067,7 @@
 	      }
 
 	  /* If dynamically linked, try the DT_RPATH of the executable
-             itself.  NB: we do this for lookups in any namespace.  */
+	     itself.  NB: we do this for lookups in any namespace.  */
 	  if (fd == -1 && !did_main_map
 	      && main_map != NULL && main_map->l_type != lt_loaded
 	      && cache_rpath (main_map, &main_map->l_rpath_dirs, DT_RPATH,
@@ -2164,7 +2164,7 @@
 
       /* Add another newline when we are tracing the library loading.  */
       if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_LIBS, 0))
-        _dl_debug_printf ("\n");
+	_dl_debug_printf ("\n");
     }
   else
     {

Modified: fsf/trunk/libc/libio/iovdprintf.c
==============================================================================
--- fsf/trunk/libc/libio/iovdprintf.c (original)
+++ fsf/trunk/libc/libio/iovdprintf.c Thu Feb 25 00:03:40 2010
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1997-2000, 2001, 2002, 2003, 2006
+/* Copyright (C) 1995, 1997-2000, 2001, 2002, 2003, 2006, 2010
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -60,6 +60,9 @@
 
   done = INTUSE(_IO_vfprintf) (&tmpfil.file, format, arg);
 
+  if (done != EOF && _IO_do_flush (&tmpfil.file) == EOF)
+    done = EOF;
+
   _IO_FINISH (&tmpfil.file);
 
   return done;

Modified: fsf/trunk/libc/malloc/malloc.c
==============================================================================
--- fsf/trunk/libc/malloc/malloc.c (original)
+++ fsf/trunk/libc/malloc/malloc.c Thu Feb 25 00:03:40 2010
@@ -1,5 +1,5 @@
 /* Malloc implementation for multiple threads without lock contention.
-   Copyright (C) 1996-2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+   Copyright (C) 1996-2009, 2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Wolfram Gloger <wg@xxxxxxxxx>
    and Doug Lea <dl@xxxxxxxxxxxxx>, 2001.
@@ -3933,9 +3933,10 @@
   if(!p) {
     /* Maybe the failure is due to running out of mmapped areas. */
     if(ar_ptr != &main_arena) {
-      (void)mutex_lock(&main_arena.mutex);
-      p = _int_memalign(&main_arena, pagesz, bytes);
-      (void)mutex_unlock(&main_arena.mutex);
+      ar_ptr = &main_arena;
+      (void)mutex_lock(&ar_ptr->mutex);
+      p = _int_memalign(ar_ptr, pagesz, bytes);
+      (void)mutex_unlock(&ar_ptr->mutex);
     } else {
 #if USE_ARENAS
       /* ... or sbrk() has failed and there is still a chance to mmap() */
@@ -3978,9 +3979,10 @@
   if(!p) {
     /* Maybe the failure is due to running out of mmapped areas. */
     if(ar_ptr != &main_arena) {
-      (void)mutex_lock(&main_arena.mutex);
-      p = _int_memalign(&main_arena, pagesz, rounded_bytes);
-      (void)mutex_unlock(&main_arena.mutex);
+      ar_ptr = &main_arena;
+      (void)mutex_lock(&ar_ptr->mutex);
+      p = _int_memalign(ar_ptr, pagesz, rounded_bytes);
+      (void)mutex_unlock(&ar_ptr->mutex);
     } else {
 #if USE_ARENAS
       /* ... or sbrk() has failed and there is still a chance to mmap() */

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S Thu Feb 25 00:03:40 2010
@@ -127,10 +127,8 @@
 	cmp	%eax, %edx
 	jb	L(copy_forward)
 	je	L(fwd_write_0bytes)
-	cmp	$32, %ecx
-	jge	L(memmove_bwd)
-	jmp	L(bk_write_less32bytes_2)
-L(memmove_bwd):
+	cmp	$48, %ecx
+	jb	L(bk_write_less48bytes)
 	add	%ecx, %eax
 	cmp	%eax, %edx
 	movl	SRC(%esp), %eax
@@ -139,12 +137,12 @@
 L(copy_forward):
 #endif
 	cmp	$48, %ecx
-	jge	L(48bytesormore)
+	jae	L(48bytesormore)
 
 L(fwd_write_less32bytes):
 #ifndef USE_AS_MEMMOVE
 	cmp	%dl, %al
-	jl	L(bk_write)
+	jb	L(bk_write)
 #endif
 	add	%ecx, %edx
 	add	%ecx, %eax
@@ -162,6 +160,7 @@
 	movl	%edx, %edi
 	and	$-16, %edx
 	PUSH (%esi)
+	cfi_remember_state
 	add	$16, %edx
 	movl	%edi, %esi
 	sub	%edx, %edi
@@ -181,7 +180,7 @@
 #endif
 
 	mov	%eax, %edi
-	jge	L(large_page)
+	jae	L(large_page)
 	and	$0xf, %edi
 	jz	L(shl_0)
 
@@ -201,7 +200,7 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
-	jl	L(shl_0_end)
+	jb	L(shl_0_end)
 
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -209,7 +208,7 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
-	jl	L(shl_0_end)
+	jb	L(shl_0_end)
 
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -217,7 +216,7 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
-	jl	L(shl_0_end)
+	jb	L(shl_0_end)
 
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -234,6 +233,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 L(shl_0_gobble):
 
 #ifdef DATA_CACHE_SIZE_HALF
@@ -251,8 +252,8 @@
 	shr	$3, %esi
 	sub	%esi, %edi
 	cmp	%edi, %ecx
-	jge	L(shl_0_gobble_mem_start)
-	lea	-128(%ecx), %ecx
+	jae	L(shl_0_gobble_mem_start)
+	sub	$128, %ecx
 	ALIGN (4)
 L(shl_0_gobble_cache_loop):
 	movdqa	(%eax), %xmm0
@@ -275,11 +276,10 @@
 	movaps	%xmm7, 0x70(%edx)
 	lea	0x80(%edx), %edx
 
-	jge	L(shl_0_gobble_cache_loop)
-L(shl_0_gobble_cache_loop_tail):
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(shl_0_cache_less_64bytes)
+	jae	L(shl_0_gobble_cache_loop)
+	add	$0x80, %ecx
+	cmp	$0x40, %ecx
+	jb	L(shl_0_cache_less_64bytes)
 
 	movdqa	(%eax), %xmm0
 	sub	$0x40, %ecx
@@ -297,7 +297,7 @@
 	add	$0x40, %edx
 L(shl_0_cache_less_64bytes):
 	cmp	$0x20, %ecx
-	jl	L(shl_0_cache_less_32bytes)
+	jb	L(shl_0_cache_less_32bytes)
 	movdqa	(%eax), %xmm0
 	sub	$0x20, %ecx
 	movdqa	0x10(%eax), %xmm1
@@ -307,7 +307,7 @@
 	add	$0x20, %edx
 L(shl_0_cache_less_32bytes):
 	cmp	$0x10, %ecx
-	jl	L(shl_0_cache_less_16bytes)
+	jb	L(shl_0_cache_less_16bytes)
 	sub	$0x10, %ecx
 	movdqa	(%eax), %xmm0
 	add	$0x10, %eax
@@ -320,12 +320,13 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_0_gobble_mem_start):
 	cmp	%al, %dl
 	je	L(copy_page_by_rep)
-	lea	-128(%ecx), %ecx
+	sub	$128, %ecx
 L(shl_0_gobble_mem_loop):
 	prefetchnta 0x1c0(%eax)
 	prefetchnta 0x280(%eax)
@@ -352,10 +353,10 @@
 	movaps	%xmm7, 0x70(%edx)
 	lea	0x80(%edx), %edx
 
-	jge	L(shl_0_gobble_mem_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(shl_0_mem_less_64bytes)
+	jae	L(shl_0_gobble_mem_loop)
+	add	$0x80, %ecx
+	cmp	$0x40, %ecx
+	jb	L(shl_0_mem_less_64bytes)
 
 	movdqa	(%eax), %xmm0
 	sub	$0x40, %ecx
@@ -373,7 +374,7 @@
 	add	$0x40, %edx
 L(shl_0_mem_less_64bytes):
 	cmp	$0x20, %ecx
-	jl	L(shl_0_mem_less_32bytes)
+	jb	L(shl_0_mem_less_32bytes)
 	movdqa	(%eax), %xmm0
 	sub	$0x20, %ecx
 	movdqa	0x10(%eax), %xmm1
@@ -383,7 +384,7 @@
 	add	$0x20, %edx
 L(shl_0_mem_less_32bytes):
 	cmp	$0x10, %ecx
-	jl	L(shl_0_mem_less_16bytes)
+	jb	L(shl_0_mem_less_16bytes)
 	sub	$0x10, %ecx
 	movdqa	(%eax), %xmm0
 	add	$0x10, %eax
@@ -396,14 +397,15 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_1):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-1(%eax), %eax
+	sub	$1, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_1_loop):
@@ -418,7 +420,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_1_end)
+	jb	L(shl_1_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -433,20 +435,22 @@
 	jae	L(shl_1_loop)
 
 L(shl_1_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	1(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_2):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-2(%eax), %eax
+	sub	$2, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_2_loop):
@@ -461,7 +465,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_2_end)
+	jb	L(shl_2_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -476,20 +480,22 @@
 	jae	L(shl_2_loop)
 
 L(shl_2_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	2(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_3):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-3(%eax), %eax
+	sub	$3, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_3_loop):
@@ -504,7 +510,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_3_end)
+	jb	L(shl_3_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -519,20 +525,22 @@
 	jae	L(shl_3_loop)
 
 L(shl_3_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	3(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_4):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-4(%eax), %eax
+	sub	$4, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_4_loop):
@@ -547,7 +555,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_4_end)
+	jb	L(shl_4_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -562,20 +570,22 @@
 	jae	L(shl_4_loop)
 
 L(shl_4_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	4(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_5):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-5(%eax), %eax
+	sub	$5, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_5_loop):
@@ -590,7 +600,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_5_end)
+	jb	L(shl_5_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -605,21 +615,22 @@
 	jae	L(shl_5_loop)
 
 L(shl_5_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	5(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_6):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-6(%eax), %eax
+	sub	$6, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_6_loop):
@@ -634,7 +645,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_6_end)
+	jb	L(shl_6_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -649,20 +660,22 @@
 	jae	L(shl_6_loop)
 
 L(shl_6_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	6(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_7):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-7(%eax), %eax
+	sub	$7, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_7_loop):
@@ -677,7 +690,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_7_end)
+	jb	L(shl_7_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -692,20 +705,22 @@
 	jae	L(shl_7_loop)
 
 L(shl_7_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	7(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_8):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-8(%eax), %eax
+	sub	$8, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_8_loop):
@@ -720,7 +735,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_8_end)
+	jb	L(shl_8_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -735,20 +750,22 @@
 	jae	L(shl_8_loop)
 
 L(shl_8_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	8(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_9):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-9(%eax), %eax
+	sub	$9, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_9_loop):
@@ -763,7 +780,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_9_end)
+	jb	L(shl_9_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -778,20 +795,22 @@
 	jae	L(shl_9_loop)
 
 L(shl_9_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	9(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_10):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-10(%eax), %eax
+	sub	$10, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_10_loop):
@@ -806,7 +825,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_10_end)
+	jb	L(shl_10_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -821,20 +840,22 @@
 	jae	L(shl_10_loop)
 
 L(shl_10_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	10(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_11):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-11(%eax), %eax
+	sub	$11, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_11_loop):
@@ -849,7 +870,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_11_end)
+	jb	L(shl_11_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -864,20 +885,22 @@
 	jae	L(shl_11_loop)
 
 L(shl_11_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	11(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_12):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-12(%eax), %eax
+	sub	$12, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_12_loop):
@@ -892,7 +915,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_12_end)
+	jb	L(shl_12_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -907,20 +930,22 @@
 	jae	L(shl_12_loop)
 
 L(shl_12_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	12(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_13):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-13(%eax), %eax
+	sub	$13, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_13_loop):
@@ -935,7 +960,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_13_end)
+	jb	L(shl_13_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -950,20 +975,22 @@
 	jae	L(shl_13_loop)
 
 L(shl_13_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	13(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_14):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-14(%eax), %eax
+	sub	$14, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_14_loop):
@@ -978,7 +1005,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_14_end)
+	jb	L(shl_14_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -993,21 +1020,22 @@
 	jae	L(shl_14_loop)
 
 L(shl_14_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	14(%edi, %eax), %eax
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_15):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	lea	-15(%eax), %eax
+	sub	$15, %eax
 	movaps	(%eax), %xmm1
 	xor	%edi, %edi
-	lea	-32(%ecx), %ecx
+	sub	$32, %ecx
 	movdqu	%xmm0, (%esi)
 	POP (%esi)
 L(shl_15_loop):
@@ -1022,7 +1050,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_15_end)
+	jb	L(shl_15_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -1037,7 +1065,7 @@
 	jae	L(shl_15_loop)
 
 L(shl_15_end):
-	lea	32(%ecx), %ecx
+	add	$32, %ecx
 	add	%ecx, %edi
 	add	%edi, %edx
 	lea	15(%edi, %eax), %eax
@@ -1241,20 +1269,23 @@
 	movl	DEST(%esp), %eax
 # endif
 #endif
-	RETURN
-
+	RETURN_END
+
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(large_page):
 	movdqu	(%eax), %xmm1
-	lea	16(%eax), %eax
 	movdqu	%xmm0, (%esi)
 	movntdq	%xmm1, (%edx)
-	lea	16(%edx), %edx
+	add	$0x10, %eax
+	add	$0x10, %edx
+	sub	$0x10, %ecx
 	cmp	%al, %dl
 	je	L(copy_page_by_rep)
 L(large_page_loop_init):
 	POP (%esi)
-	lea	-0x90(%ecx), %ecx
+	sub	$0x80, %ecx
 	POP (%edi)
 L(large_page_loop):
 	prefetchnta	0x1c0(%eax)
@@ -1280,9 +1311,9 @@
 	movntdq	%xmm7, 0x70(%edx)
 	lea	0x80(%edx), %edx
 	jae	L(large_page_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(large_page_less_64bytes)
+	add	$0x80, %ecx
+	cmp	$0x40, %ecx
+	jb	L(large_page_less_64bytes)
 
 	movdqu	(%eax), %xmm0
 	movdqu	0x10(%eax), %xmm1
@@ -1298,7 +1329,7 @@
 	sub	$0x40, %ecx
 L(large_page_less_64bytes):
 	cmp	$32, %ecx
-	jl	L(large_page_less_32bytes)
+	jb	L(large_page_less_32bytes)
 	movdqu	(%eax), %xmm0
 	movdqu	0x10(%eax), %xmm1
 	lea	0x20(%eax), %eax
@@ -1312,6 +1343,8 @@
 	sfence
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(copy_page_by_rep):
 	mov	%eax, %esi
@@ -1658,18 +1691,18 @@
 L(copy_backward):
 	PUSH (%esi)
 	movl	%eax, %esi
-	lea	(%ecx,%edx,1),%edx
-	lea	(%ecx,%esi,1),%esi
+	add	%ecx, %edx
+	add	%ecx, %esi
 	testl	$0x3, %edx
 	jnz	L(bk_align)
 
 L(bk_aligned_4):
 	cmp	$64, %ecx
-	jge	L(bk_write_more64bytes)
+	jae	L(bk_write_more64bytes)
 
 L(bk_write_64bytesless):
 	cmp	$32, %ecx
-	jl	L(bk_write_less32bytes)
+	jb	L(bk_write_less32bytes)
 
 L(bk_write_more32bytes):
 	/* Copy 32 bytes at a time.  */
@@ -1698,13 +1731,14 @@
 	sub	%ecx, %edx
 	sub	%ecx, %eax
 	POP (%esi)
-L(bk_write_less32bytes_2):
+L(bk_write_less48bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
 
+	CFI_PUSH (%esi)
 	ALIGN (4)
 L(bk_align):
 	cmp	$8, %ecx
-	jle	L(bk_write_less32bytes)
+	jbe	L(bk_write_less32bytes)
 	testl	$1, %edx
 	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
 	   then (EDX & 2) must be != 0.  */
@@ -1760,7 +1794,7 @@
 
 L(bk_ssse3_cpy_pre):
 	cmp	$64, %ecx
-	jl	L(bk_write_more32bytes)
+	jb	L(bk_write_more32bytes)
 
 L(bk_ssse3_cpy):
 	sub	$64, %esi
@@ -1775,7 +1809,7 @@
 	movdqu	(%esi), %xmm0
 	movdqa	%xmm0, (%edx)
 	cmp	$64, %ecx
-	jge	L(bk_ssse3_cpy)
+	jae	L(bk_ssse3_cpy)
 	jmp	L(bk_write_64bytesless)
 
 #endif

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S Thu Feb 25 00:03:40 2010
@@ -128,7 +128,7 @@
 	jb	L(copy_forward)
 	je	L(fwd_write_0bytes)
 	cmp	$32, %ecx
-	jge	L(memmove_bwd)
+	jae	L(memmove_bwd)
 	jmp	L(bk_write_less32bytes_2)
 L(memmove_bwd):
 	add	%ecx, %eax
@@ -139,12 +139,12 @@
 L(copy_forward):
 #endif
 	cmp	$48, %ecx
-	jge	L(48bytesormore)
+	jae	L(48bytesormore)
 
 L(fwd_write_less32bytes):
 #ifndef USE_AS_MEMMOVE
 	cmp	%dl, %al
-	jl	L(bk_write)
+	jb	L(bk_write)
 #endif
 	add	%ecx, %edx
 	add	%ecx, %eax
@@ -162,6 +162,7 @@
 	movl	%edx, %edi
 	and	$-16, %edx
 	PUSH (%esi)
+	cfi_remember_state
 	add	$16, %edx
 	movl	%edi, %esi
 	sub	%edx, %edi
@@ -181,12 +182,14 @@
 #endif
 
 	mov	%eax, %edi
-	jge	L(large_page)
+	jae	L(large_page)
 	and	$0xf, %edi
 	jz	L(shl_0)
 
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_0):
 	movdqu	%xmm0, (%esi)
@@ -202,7 +205,7 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
-	jl	L(shl_0_end)
+	jb	L(shl_0_end)
 
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -210,7 +213,7 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
-	jl	L(shl_0_end)
+	jb	L(shl_0_end)
 
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -218,7 +221,7 @@
 	movdqa	%xmm0, (%edx, %edi)
 	movdqa	%xmm1, 16(%edx, %edi)
 	lea	32(%edi), %edi
-	jl	L(shl_0_end)
+	jb	L(shl_0_end)
 
 	movdqa	(%eax, %edi), %xmm0
 	movdqa	16(%eax, %edi), %xmm1
@@ -234,6 +237,7 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
+	CFI_PUSH (%edi)
 L(shl_0_gobble):
 
 #ifdef DATA_CACHE_SIZE_HALF
@@ -250,7 +254,7 @@
 
 	POP (%edi)
 	lea	-128(%ecx), %ecx
-	jge	L(shl_0_gobble_mem_loop)
+	jae	L(shl_0_gobble_mem_loop)
 L(shl_0_gobble_cache_loop):
 	movdqa	(%eax), %xmm0
 	movdqa	0x10(%eax), %xmm1
@@ -272,8 +276,7 @@
 	movdqa	%xmm7, 0x70(%edx)
 	lea	0x80(%edx), %edx
 
-	jge	L(shl_0_gobble_cache_loop)
-L(shl_0_gobble_cache_loop_tail):
+	jae	L(shl_0_gobble_cache_loop)
 	cmp	$-0x40, %ecx
 	lea	0x80(%ecx), %ecx
 	jl	L(shl_0_cache_less_64bytes)
@@ -294,7 +297,7 @@
 	add	$0x40, %edx
 L(shl_0_cache_less_64bytes):
 	cmp	$0x20, %ecx
-	jl	L(shl_0_cache_less_32bytes)
+	jb	L(shl_0_cache_less_32bytes)
 	movdqa	(%eax), %xmm0
 	sub	$0x20, %ecx
 	movdqa	0x10(%eax), %xmm1
@@ -304,7 +307,7 @@
 	add	$0x20, %edx
 L(shl_0_cache_less_32bytes):
 	cmp	$0x10, %ecx
-	jl	L(shl_0_cache_less_16bytes)
+	jb	L(shl_0_cache_less_16bytes)
 	sub	$0x10, %ecx
 	movdqa	(%eax), %xmm0
 	add	$0x10, %eax
@@ -342,7 +345,7 @@
 	movdqa	%xmm7, 0x70(%edx)
 	lea	0x80(%edx), %edx
 
-	jge	L(shl_0_gobble_mem_loop)
+	jae	L(shl_0_gobble_mem_loop)
 	cmp	$-0x40, %ecx
 	lea	0x80(%ecx), %ecx
 	jl	L(shl_0_mem_less_64bytes)
@@ -363,7 +366,7 @@
 	add	$0x40, %edx
 L(shl_0_mem_less_64bytes):
 	cmp	$0x20, %ecx
-	jl	L(shl_0_mem_less_32bytes)
+	jb	L(shl_0_mem_less_32bytes)
 	movdqa	(%eax), %xmm0
 	sub	$0x20, %ecx
 	movdqa	0x10(%eax), %xmm1
@@ -373,7 +376,7 @@
 	add	$0x20, %edx
 L(shl_0_mem_less_32bytes):
 	cmp	$0x10, %ecx
-	jl	L(shl_0_mem_less_16bytes)
+	jb	L(shl_0_mem_less_16bytes)
 	sub	$0x10, %ecx
 	movdqa	(%eax), %xmm0
 	add	$0x10, %eax
@@ -384,7 +387,8 @@
 	add	%ecx, %eax
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_1):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -406,7 +410,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_1_end)
+	jb	L(shl_1_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -428,6 +432,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_2):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -449,7 +455,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_2_end)
+	jb	L(shl_2_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -471,6 +477,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_3):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -492,7 +500,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_3_end)
+	jb	L(shl_3_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -514,6 +522,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_4):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -535,7 +545,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_4_end)
+	jb	L(shl_4_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -557,6 +567,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_5):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -578,7 +590,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_5_end)
+	jb	L(shl_5_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -600,7 +612,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_6):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -622,7 +635,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_6_end)
+	jb	L(shl_6_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -644,6 +657,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_7):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -665,7 +680,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_7_end)
+	jb	L(shl_7_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -687,6 +702,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_8):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -708,7 +725,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_8_end)
+	jb	L(shl_8_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -730,6 +747,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_9):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -751,7 +770,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_9_end)
+	jb	L(shl_9_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -773,6 +792,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_10):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -794,7 +815,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_10_end)
+	jb	L(shl_10_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -816,6 +837,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_11):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -837,7 +860,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_11_end)
+	jb	L(shl_11_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -859,6 +882,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_12):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -880,7 +905,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_12_end)
+	jb	L(shl_12_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -902,6 +927,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_13):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -923,7 +950,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_13_end)
+	jb	L(shl_13_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -945,6 +972,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_14):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -966,7 +995,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_14_end)
+	jb	L(shl_14_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -988,7 +1017,8 @@
 	POP (%edi)
 	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
 
-
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(shl_15):
 	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -1010,7 +1040,7 @@
 	movdqa	%xmm2, -32(%edx, %edi)
 	movdqa	%xmm3, -16(%edx, %edi)
 
-	jl	L(shl_15_end)
+	jb	L(shl_15_end)
 
 	movdqa	16(%eax, %edi), %xmm2
 	sub	$32, %ecx
@@ -1229,8 +1259,10 @@
 	movl	DEST(%esp), %eax
 # endif
 #endif
-	RETURN
-
+	RETURN_END
+
+	cfi_restore_state
+	cfi_remember_state
 	ALIGN (4)
 L(large_page):
 	movdqu	(%eax), %xmm1
@@ -1281,7 +1313,7 @@
 	sub	$0x40, %ecx
 L(large_page_less_64bytes):
 	cmp	$32, %ecx
-	jl	L(large_page_less_32bytes)
+	jb	L(large_page_less_32bytes)
 	movdqu	(%eax), %xmm0
 	movdqu	0x10(%eax), %xmm1
 	lea	0x20(%eax), %eax
@@ -1617,11 +1649,11 @@
 
 L(bk_aligned_4):
 	cmp	$64, %ecx
-	jge	L(bk_write_more64bytes)
+	jae	L(bk_write_more64bytes)
 
 L(bk_write_64bytesless):
 	cmp	$32, %ecx
-	jl	L(bk_write_less32bytes)
+	jb	L(bk_write_less32bytes)
 
 L(bk_write_more32bytes):
 	/* Copy 32 bytes at a time.  */
@@ -1653,10 +1685,11 @@
 L(bk_write_less32bytes_2):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
 
+	CFI_PUSH (%esi)
 	ALIGN (4)
 L(bk_align):
 	cmp	$8, %ecx
-	jle	L(bk_write_less32bytes)
+	jbe	L(bk_write_less32bytes)
 	testl	$1, %edx
 	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
 	   then (EDX & 2) must be != 0.  */
@@ -1712,7 +1745,7 @@
 
 L(bk_ssse3_cpy_pre):
 	cmp	$64, %ecx
-	jl	L(bk_write_more32bytes)
+	jb	L(bk_write_more32bytes)
 
 L(bk_ssse3_cpy):
 	sub	$64, %esi
@@ -1727,7 +1760,7 @@
 	movdqu	(%esi), %xmm0
 	movdqa	%xmm0, (%edx)
 	cmp	$64, %ecx
-	jge	L(bk_ssse3_cpy)
+	jae	L(bk_ssse3_cpy)
 	jmp	L(bk_write_64bytesless)
 
 #endif

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S Thu Feb 25 00:03:40 2010
@@ -243,7 +243,6 @@
 	pxor	%xmm0, %xmm0
 #else
 	movd	%eax, %xmm0
-	punpcklbw %xmm0, %xmm0
 	pshufd	$0, %xmm0, %xmm0
 #endif
 	testl	$0xf, %edx
@@ -261,7 +260,7 @@
 	ALIGN (4)
 L(aligned_16):
 	cmp	$128, %ecx
-	jge	L(128bytesormore)
+	jae	L(128bytesormore)
 
 L(aligned_16_less128bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
@@ -293,7 +292,7 @@
  * fast string will prefetch and combine data efficiently.
  */
 	cmp	%edi, %ecx
-	jae	L(128bytesormore_nt)
+	jae	L(128bytesormore_endof_L1)
 	subl	$128, %ecx
 L(128bytesormore_normal):
 	sub	$128, %ecx
@@ -306,7 +305,7 @@
 	movdqa	%xmm0, 0x60(%edx)
 	movdqa	%xmm0, 0x70(%edx)
 	lea	128(%edx), %edx
-	jl	L(128bytesless_normal)
+	jb	L(128bytesless_normal)
 
 
 	sub	$128, %ecx
@@ -319,15 +318,16 @@
 	movdqa	%xmm0, 0x60(%edx)
 	movdqa	%xmm0, 0x70(%edx)
 	lea	128(%edx), %edx
-	jge	L(128bytesormore_normal)
+	jae	L(128bytesormore_normal)
 
 L(128bytesless_normal):
 	POP (%edi)
-	lea	128(%ecx), %ecx
+	add	$128, %ecx
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
 
-	ALIGN (4)
-L(128bytesormore_nt):
+	CFI_PUSH (%edi)
+	ALIGN (4)
+L(128bytesormore_endof_L1):
 	mov	%edx, %edi
 	mov	%ecx, %edx
 	shr	$2, %ecx

Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/memset-sse2.S Thu Feb 25 00:03:40 2010
@@ -243,7 +243,6 @@
 	pxor	%xmm0, %xmm0
 #else
 	movd	%eax, %xmm0
-	punpcklbw %xmm0, %xmm0
 	pshufd	$0, %xmm0, %xmm0
 #endif
 	testl	$0xf, %edx
@@ -261,7 +260,7 @@
 	ALIGN (4)
 L(aligned_16):
 	cmp	$128, %ecx
-	jge	L(128bytesormore)
+	jae	L(128bytesormore)
 
 L(aligned_16_less128bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
@@ -287,14 +286,17 @@
 
 #ifdef DATA_CACHE_SIZE
 	POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
 	cmp	$DATA_CACHE_SIZE, %ecx
 #else
 # ifdef SHARED
+#  define RESTORE_EBX_STATE
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
 # else
 	POP (%ebx)
+#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
 	cmp	__x86_data_cache_size, %ecx
 # endif
 #endif
@@ -312,7 +314,7 @@
 	movdqa	%xmm0, 0x60(%edx)
 	movdqa	%xmm0, 0x70(%edx)
 	lea	128(%edx), %edx
-	jl	L(128bytesless_normal)
+	jb	L(128bytesless_normal)
 
 
 	sub	$128, %ecx
@@ -325,10 +327,10 @@
 	movdqa	%xmm0, 0x60(%edx)
 	movdqa	%xmm0, 0x70(%edx)
 	lea	128(%edx), %edx
-	jge	L(128bytesormore_normal)
+	jae	L(128bytesormore_normal)
 
 L(128bytesless_normal):
-	lea	128(%ecx), %ecx
+	add	$128, %ecx
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
 
 	ALIGN (4)
@@ -346,11 +348,12 @@
 	movaps	%xmm0, 0x70(%edx)
 	add	$128, %edx
 	cmp	$128, %ecx
-	jge	L(128bytes_L2_normal)
+	jae	L(128bytes_L2_normal)
 
 L(128bytesless_L2_normal):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
 
+	RESTORE_EBX_STATE
 L(128bytesormore_nt_start):
 	sub	%ebx, %ecx
 	ALIGN (4)
@@ -368,7 +371,7 @@
 	movdqa	%xmm0, 0x70(%edx)
 	add	$0x80, %edx
 	cmp	$0x80, %ebx
-	jge	L(128bytesormore_shared_cache_loop)
+	jae	L(128bytesormore_shared_cache_loop)
 	cmp	$0x80, %ecx
 	jb	L(shared_cache_loop_end)
 	ALIGN (4)
@@ -384,7 +387,7 @@
 	movntdq	%xmm0, 0x70(%edx)
 	add	$0x80, %edx
 	cmp	$0x80, %ecx
-	jge	L(128bytesormore_nt)
+	jae	L(128bytesormore_nt)
 	sfence
 L(shared_cache_loop_end):
 #if defined DATA_CACHE_SIZE || !defined SHARED

Modified: fsf/trunk/libc/sysdeps/x86_64/Implies
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/Implies (original)
+++ fsf/trunk/libc/sysdeps/x86_64/Implies Thu Feb 25 00:03:40 2010
@@ -1,4 +1,5 @@
 wordsize-64
 ieee754/ldbl-96
+ieee754/dbl-64/wordsize-64
 ieee754/dbl-64
 ieee754/flt-32