[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r11583 - in /fsf/trunk/libc: ./ elf/ nptl/ nptl/sysdeps/pthread/ posix/ sysdeps/powerpc/powerpc32/ppca2/ sysdeps/powerpc/pow...



Author: eglibc
Date: Wed Sep 22 00:03:13 2010
New Revision: 11583

Log:
Import glibc-mainline for 2010-09-22

Added:
    fsf/trunk/libc/sysdeps/powerpc/powerpc32/ppca2/
    fsf/trunk/libc/sysdeps/powerpc/powerpc32/ppca2/memcpy.S
    fsf/trunk/libc/sysdeps/powerpc/powerpc64/ppca2/
    fsf/trunk/libc/sysdeps/powerpc/powerpc64/ppca2/memcpy.S
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/fpu/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/fpu/Implies
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/fpu/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/fpu/Implies
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/elf/dl-close.c
    fsf/trunk/libc/elf/dl-libc.c
    fsf/trunk/libc/nptl/ChangeLog
    fsf/trunk/libc/nptl/sysdeps/pthread/pthread.h
    fsf/trunk/libc/posix/unistd.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/getdents.c

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Wed Sep 22 00:03:13 2010
@@ -1,3 +1,29 @@
+2010-09-15  Michael B. Brutman  <brutman@xxxxxxxxxx>
+
+	* sysdeps/powerpc/powerpc32/ppca2/memcpy.S: New file.
+	* sysdeps/powerpc/powerpc64/ppca2/memcpy.S: New file.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/fpu/Implies: New
+	submachine.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/fpu/Implies:
+	Likewise.
+
+2010-09-15  Joseph Myers  <joseph@xxxxxxxxxxxxxxxx>
+
+	* sysdeps/unix/sysv/linux/getdents.c (__GETDENTS): When
+	implementing getdents64 using getdents syscall, set d_type if
+	__ASSUME_GETDENTS32_D_TYPE.
+
+2010-09-16  Andreas Schwab  <schwab@xxxxxxxxxx>
+
+	* elf/dl-close.c (free_slotinfo, free_mem): Move to...
+	* elf/dl-libc.c (free_slotinfo, free_mem): ... here.
+
+2010-09-21  Ulrich Drepper  <drepper@xxxxxxxxxx>
+
+	[BZ #12037]
+	* posix/unistd.h: Undo change of feature selection for ftruncate from
+	2010-01-11.
+
 2010-09-20  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
 	* sysdeps/x86_64/strcmp.S: Fix another typo in x86-64 strncasecmp limit

Modified: fsf/trunk/libc/elf/dl-close.c
==============================================================================
--- fsf/trunk/libc/elf/dl-close.c (original)
+++ fsf/trunk/libc/elf/dl-close.c Wed Sep 22 00:03:13 2010
@@ -748,77 +748,3 @@
 
   __rtld_lock_unlock_recursive (GL(dl_load_lock));
 }
-
-
-static bool __libc_freeres_fn_section
-free_slotinfo (struct dtv_slotinfo_list **elemp)
-{
-  size_t cnt;
-
-  if (*elemp == NULL)
-    /* Nothing here, all is removed (or there never was anything).  */
-    return true;
-
-  if (!free_slotinfo (&(*elemp)->next))
-    /* We cannot free the entry.  */
-    return false;
-
-  /* That cleared our next pointer for us.  */
-
-  for (cnt = 0; cnt < (*elemp)->len; ++cnt)
-    if ((*elemp)->slotinfo[cnt].map != NULL)
-      /* Still used.  */
-      return false;
-
-  /* We can remove the list element.  */
-  free (*elemp);
-  *elemp = NULL;
-
-  return true;
-}
-
-
-libc_freeres_fn (free_mem)
-{
-  for (Lmid_t nsid = 0; nsid < GL(dl_nns); ++nsid)
-    if (__builtin_expect (GL(dl_ns)[nsid]._ns_global_scope_alloc, 0) != 0
-	&& (GL(dl_ns)[nsid]._ns_main_searchlist->r_nlist
-	    // XXX Check whether we need NS-specific initial_searchlist
-	    == GLRO(dl_initial_searchlist).r_nlist))
-      {
-	/* All object dynamically loaded by the program are unloaded.  Free
-	   the memory allocated for the global scope variable.  */
-	struct link_map **old = GL(dl_ns)[nsid]._ns_main_searchlist->r_list;
-
-	/* Put the old map in.  */
-	GL(dl_ns)[nsid]._ns_main_searchlist->r_list
-	  // XXX Check whether we need NS-specific initial_searchlist
-	  = GLRO(dl_initial_searchlist).r_list;
-	/* Signal that the original map is used.  */
-	GL(dl_ns)[nsid]._ns_global_scope_alloc = 0;
-
-	/* Now free the old map.  */
-	free (old);
-      }
-
-  if (USE___THREAD || GL(dl_tls_dtv_slotinfo_list) != NULL)
-    {
-      /* Free the memory allocated for the dtv slotinfo array.  We can do
-	 this only if all modules which used this memory are unloaded.  */
-#ifdef SHARED
-      if (GL(dl_initial_dtv) == NULL)
-	/* There was no initial TLS setup, it was set up later when
-	   it used the normal malloc.  */
-	free_slotinfo (&GL(dl_tls_dtv_slotinfo_list));
-      else
-#endif
-	/* The first element of the list does not have to be deallocated.
-	   It was allocated in the dynamic linker (i.e., with a different
-	   malloc), and in the static library it's in .bss space.  */
-	free_slotinfo (&GL(dl_tls_dtv_slotinfo_list)->next);
-    }
-
-  void *scope_free_list = GL(dl_scope_free_list);
-  GL(dl_scope_free_list) = NULL;
-  free (scope_free_list);
-}

Modified: fsf/trunk/libc/elf/dl-libc.c
==============================================================================
--- fsf/trunk/libc/elf/dl-libc.c (original)
+++ fsf/trunk/libc/elf/dl-libc.c Wed Sep 22 00:03:13 2010
@@ -1,5 +1,5 @@
 /* Handle loading and unloading shared objects for internal libc purposes.
-   Copyright (C) 1999-2002,2004,2005,2006,2009 Free Software Foundation, Inc.
+   Copyright (C) 1999-2002,2004-2006,2009,2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Zack Weinberg <zack@xxxxxxxxxxxxxxxxx>, 1999.
 
@@ -219,6 +219,34 @@
   return dlerror_run (do_dlclose, map);
 }
 libc_hidden_def (__libc_dlclose)
+
+
+static bool __libc_freeres_fn_section
+free_slotinfo (struct dtv_slotinfo_list **elemp)
+{
+  size_t cnt;
+
+  if (*elemp == NULL)
+    /* Nothing here, all is removed (or there never was anything).  */
+    return true;
+
+  if (!free_slotinfo (&(*elemp)->next))
+    /* We cannot free the entry.  */
+    return false;
+
+  /* That cleared our next pointer for us.  */
+
+  for (cnt = 0; cnt < (*elemp)->len; ++cnt)
+    if ((*elemp)->slotinfo[cnt].map != NULL)
+      /* Still used.  */
+      return false;
+
+  /* We can remove the list element.  */
+  free (*elemp);
+  *elemp = NULL;
+
+  return true;
+}
 
 
 libc_freeres_fn (free_mem)
@@ -235,20 +263,63 @@
       free (old);
     }
 
-  /* Remove all additional names added to the objects.  */
   for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns)
-    for (l = GL(dl_ns)[ns]._ns_loaded; l != NULL; l = l->l_next)
-      {
-	struct libname_list *lnp = l->l_libname->next;
-
-	l->l_libname->next = NULL;
-
-	while (lnp != NULL)
-	  {
-	    struct libname_list *old = lnp;
-	    lnp = lnp->next;
-	    if (! old->dont_free)
-	    free (old);
-	  }
-      }
-}
+    {
+      /* Remove all additional names added to the objects.  */
+      for (l = GL(dl_ns)[ns]._ns_loaded; l != NULL; l = l->l_next)
+	{
+	  struct libname_list *lnp = l->l_libname->next;
+
+	  l->l_libname->next = NULL;
+
+	  while (lnp != NULL)
+	    {
+	      struct libname_list *old = lnp;
+	      lnp = lnp->next;
+	      if (! old->dont_free)
+		free (old);
+	    }
+	}
+
+      if (__builtin_expect (GL(dl_ns)[ns]._ns_global_scope_alloc, 0) != 0
+	  && (GL(dl_ns)[ns]._ns_main_searchlist->r_nlist
+	      // XXX Check whether we need NS-specific initial_searchlist
+	      == GLRO(dl_initial_searchlist).r_nlist))
+	{
+	  /* All object dynamically loaded by the program are unloaded.  Free
+	     the memory allocated for the global scope variable.  */
+	  struct link_map **old = GL(dl_ns)[ns]._ns_main_searchlist->r_list;
+
+	  /* Put the old map in.  */
+	  GL(dl_ns)[ns]._ns_main_searchlist->r_list
+	    // XXX Check whether we need NS-specific initial_searchlist
+	    = GLRO(dl_initial_searchlist).r_list;
+	  /* Signal that the original map is used.  */
+	  GL(dl_ns)[ns]._ns_global_scope_alloc = 0;
+
+	  /* Now free the old map.  */
+	  free (old);
+	}
+    }
+
+  if (USE___THREAD || GL(dl_tls_dtv_slotinfo_list) != NULL)
+    {
+      /* Free the memory allocated for the dtv slotinfo array.  We can do
+	 this only if all modules which used this memory are unloaded.  */
+#ifdef SHARED
+      if (GL(dl_initial_dtv) == NULL)
+	/* There was no initial TLS setup, it was set up later when
+	   it used the normal malloc.  */
+	free_slotinfo (&GL(dl_tls_dtv_slotinfo_list));
+      else
+#endif
+	/* The first element of the list does not have to be deallocated.
+	   It was allocated in the dynamic linker (i.e., with a different
+	   malloc), and in the static library it's in .bss space.  */
+	free_slotinfo (&GL(dl_tls_dtv_slotinfo_list)->next);
+    }
+
+  void *scope_free_list = GL(dl_scope_free_list);
+  GL(dl_scope_free_list) = NULL;
+  free (scope_free_list);
+}

Modified: fsf/trunk/libc/nptl/ChangeLog
==============================================================================
--- fsf/trunk/libc/nptl/ChangeLog (original)
+++ fsf/trunk/libc/nptl/ChangeLog Wed Sep 22 00:03:13 2010
@@ -1,3 +1,9 @@
+2010-09-21  Andreas Schwab  <schwab@xxxxxxxxxx>
+
+	* sysdeps/pthread/pthread.h (pthread_cleanup_push)
+	[!__EXCEPTIONS]: Mangle local variable not_first_call.
+	(pthread_cleanup_push_defer_np): Likewise.
+
 2010-09-03  Ulrich Drepper  <drepper@xxxxxxxxxx>
 
 	* sysdeps/pthread/allocalim.h (__libc_use_alloca): Expect blocks are

Modified: fsf/trunk/libc/nptl/sysdeps/pthread/pthread.h
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/pthread/pthread.h (original)
+++ fsf/trunk/libc/nptl/sysdeps/pthread/pthread.h Wed Sep 22 00:03:13 2010
@@ -650,9 +650,9 @@
     __pthread_unwind_buf_t __cancel_buf;				      \
     void (*__cancel_routine) (void *) = (routine);			      \
     void *__cancel_arg = (arg);						      \
-    int not_first_call = __sigsetjmp ((struct __jmp_buf_tag *) (void *)	      \
-				      __cancel_buf.__cancel_jmp_buf, 0);      \
-    if (__builtin_expect (not_first_call, 0))				      \
+    int __not_first_call = __sigsetjmp ((struct __jmp_buf_tag *) (void *)     \
+					__cancel_buf.__cancel_jmp_buf, 0);    \
+    if (__builtin_expect (__not_first_call, 0))				      \
       {									      \
 	__cancel_routine (__cancel_arg);				      \
 	__pthread_unwind_next (&__cancel_buf);				      \
@@ -685,9 +685,9 @@
     __pthread_unwind_buf_t __cancel_buf;				      \
     void (*__cancel_routine) (void *) = (routine);			      \
     void *__cancel_arg = (arg);						      \
-    int not_first_call = __sigsetjmp ((struct __jmp_buf_tag *) (void *)	      \
-				      __cancel_buf.__cancel_jmp_buf, 0);      \
-    if (__builtin_expect (not_first_call, 0))				      \
+    int __not_first_call = __sigsetjmp ((struct __jmp_buf_tag *) (void *)     \
+					__cancel_buf.__cancel_jmp_buf, 0);    \
+    if (__builtin_expect (__not_first_call, 0))				      \
       {									      \
 	__cancel_routine (__cancel_arg);				      \
 	__pthread_unwind_next (&__cancel_buf);				      \

Modified: fsf/trunk/libc/posix/unistd.h
==============================================================================
--- fsf/trunk/libc/posix/unistd.h (original)
+++ fsf/trunk/libc/posix/unistd.h Wed Sep 22 00:03:13 2010
@@ -1020,6 +1020,9 @@
      __THROW __nonnull ((1)) __wur;
 # endif
 
+#endif /* Use BSD || X/Open Unix || POSIX 2008.  */
+
+#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED || defined __USE_XOPEN2K
 
 /* Truncate the file FD is open on to LENGTH bytes.  */
 # ifndef __USE_FILE_OFFSET64

Added: fsf/trunk/libc/sysdeps/powerpc/powerpc32/ppca2/memcpy.S
==============================================================================
--- fsf/trunk/libc/sysdeps/powerpc/powerpc32/ppca2/memcpy.S (added)
+++ fsf/trunk/libc/sysdeps/powerpc/powerpc32/ppca2/memcpy.S Wed Sep 22 00:03:13 2010
@@ -1,0 +1,511 @@
+/* Optimized memcpy implementation for PowerPC A2.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Michael Brutman <brutman@xxxxxxxxxx>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+#define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
+#define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
+
+	.machine  ppca2
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT
+
+	dcbt    0,r4            /* Prefetch ONE SRC cacheline  */
+	cmplwi  cr1,r5,16       /* is size < 16 ?  */
+	mr      r6,r3           /* Copy dest reg to r6; */
+	blt+    cr1,L(shortcopy)
+
+
+	/* Big copy (16 bytes or more)
+
+	   Figure out how far to the nearest quadword boundary, or if we are
+	   on one already.
+
+	   r3 - return value (always)
+	   r4 - current source addr
+	   r5 - copy length
+	   r6 - current dest addr
+	*/
+
+	neg     r8,r3           /* LS 4 bits = # bytes to 8-byte dest bdry  */
+	clrlwi  r8,r8,32-4      /* align to 16byte boundary  */
+	sub     r7,r4,r3        /* compute offset to src from dest */
+	cmplwi  cr0,r8,0        /* Were we aligned on a 16 byte bdy? */
+	beq+    L(dst_aligned)
+
+
+
+	/* Destination is not aligned on quadword boundary.  Get us to one.
+
+	   r3 - return value (always)
+	   r4 - current source addr
+	   r5 - copy length
+	   r6 - current dest addr
+	   r7 - offset to src from dest
+	   r8 - number of bytes to quadword boundary
+	*/
+
+	mtcrf   0x01,r8         /* put #bytes to boundary into cr7  */
+	subf    r5,r8,r5        /* adjust remaining len */
+
+	bf      cr7*4+3,1f
+	lbzx    r0,r7,r6        /* copy 1 byte addr */
+	stb     r0,0(r6)
+	addi    r6,r6,1
+1:
+	bf      cr7*4+2,2f
+	lhzx    r0,r7,r6        /* copy 2 byte addr */
+	sth     r0,0(r6)
+	addi    r6,r6,2
+2:
+	bf      cr7*4+1,4f
+	lwzx    r0,r7,r6        /* copy 4 byte addr */
+	stw     r0,0(r6)
+	addi    r6,r6,4
+4:
+	bf      cr7*4+0,8f
+	lfdx    r0,r7,r6        /* copy 8 byte addr */
+	stfd    r0,0(r6)
+	addi    r6,r6,8
+8:
+	add     r4,r7,r6        /* update src addr */
+
+
+
+	/* Dest is quadword aligned now.
+
+	   Lots of decisions to make.  If we are copying less than a cache
+	   line we won't be here long.  If we are not on a cache line
+	   boundary we need to get there.  And then we need to figure out
+	   how many cache lines ahead to pre-touch.
+
+	   r3 - return value (always)
+	   r4 - current source addr
+	   r5 - copy length
+	   r6 - current dest addr
+	*/
+
+
+	.align  4
+L(dst_aligned):
+
+
+#ifdef SHARED
+	mflr    r0
+/* Establishes GOT addressability so we can load __cache_line_size
+   from static. This value was set from the aux vector during startup.  */
+	bcl     20,31,1f
+1:
+	mflr    r9
+	addis   r9,r9,__cache_line_size-1b@ha
+	lwz     r9,__cache_line_size-1b@l(r9)
+	mtlr    r0
+#else
+/* Load __cache_line_size from static. This value was set from the
+   aux vector during startup.  */
+	lis     r9,__cache_line_size@ha
+	lwz     r9,__cache_line_size@l(r9)
+#endif
+
+	cmplwi  cr5, r9, 0
+	bne+    cr5,L(cachelineset)
+	li      r9,64
+
+
+
+L(cachelineset):
+
+	addi   r10,r9,-1
+
+	cmpw   cr5,r5,r10       /* Less than a cacheline to go? */
+
+	neg     r7,r6           /* How far to next cacheline bdy? */
+
+	addi    r6,r6,-8        /* prepare for stdu  */
+	cmpwi   cr0,r9,128
+	addi    r4,r4,-8        /* prepare for ldu  */
+
+
+	ble+    cr5,L(lessthancacheline)
+
+	beq-    cr0,L(big_lines) /* 128 byte line code */
+
+
+
+
+	/* More than a cacheline left to go, and using 64 byte cachelines */
+
+	clrlwi  r7,r7,32-6      /* How far to next cacheline bdy? */
+
+	cmplwi  cr6,r7,0        /* Are we on a cacheline bdy already? */
+
+	/* Reduce total len by what it takes to get to the next cache line */
+	subf    r5,r7,r5
+	srwi    r7,r7,4         /* How many qws to get to the line bdy? */
+
+	/* How many full cache lines to copy after getting to a line bdy? */
+	srwi    r10,r5,6
+
+	cmplwi  r10,0           /* If no full cache lines to copy ... */
+	li      r11,0           /* number cachelines to copy with prefetch  */
+	beq     L(nocacheprefetch)
+
+
+	/* We are here because we have at least one full cache line to copy,
+	   and therefore some pre-touching to do. */
+
+	cmplwi  r10,PREFETCH_AHEAD
+	li      r12,64+8        /* prefetch distance  */
+	ble     L(lessthanmaxprefetch)
+
+	/* We can only do so much pre-fetching.  R11 will have the count of
+	   lines left to prefetch after the initial batch of prefetches
+	   are executed. */
+
+	subi    r11,r10,PREFETCH_AHEAD
+	li      r10,PREFETCH_AHEAD
+
+L(lessthanmaxprefetch):
+	mtctr   r10
+
+	/* At this point r10/ctr hold the number of lines to prefetch in this
+	   initial batch, and r11 holds any remainder. */
+
+L(prefetchSRC):
+	dcbt    r12,r4
+	addi    r12,r12,64
+	bdnz    L(prefetchSRC)
+
+
+	/* Prefetching is done, or was not needed.
+
+	   cr6 - are we on a cacheline boundary already?
+	   r7  - number of quadwords to the next cacheline boundary
+	*/
+
+L(nocacheprefetch):
+	mtctr   r7
+
+	cmplwi  cr1,r5,64   /* Less than a cache line to copy? */
+
+	/* How many bytes are left after we copy whatever full
+	   cache lines we can get? */
+	clrlwi  r5,r5,32-6
+
+	beq     cr6,L(cachelinealigned)
+
+
+	/* Copy quadwords up to the next cacheline boundary */
+
+L(aligntocacheline):
+	lfd     fp9,0x08(r4)
+	lfdu    fp10,0x10(r4)
+	stfd    fp9,0x08(r6)
+	stfdu   fp10,0x10(r6)
+	bdnz    L(aligntocacheline)
+
+
+	.align 4
+L(cachelinealigned):            /* copy while cache lines  */
+
+	blt-    cr1,L(lessthancacheline) /* size <64  */
+
+L(outerloop):
+	cmpwi   r11,0
+	mtctr   r11
+	beq-    L(endloop)
+
+	li      r11,64*ZERO_AHEAD +8    /* DCBZ dist  */
+
+	.align  4
+	/* Copy whole cachelines, optimized by prefetching SRC cacheline  */
+L(loop):                        /* Copy aligned body  */
+	dcbt    r12,r4          /* PREFETCH SOURCE some cache lines ahead  */
+	lfd     fp9,  0x08(r4)
+	dcbz    r11,r6
+	lfd     fp10, 0x10(r4)
+	lfd     fp11, 0x18(r4)
+	lfd     fp12, 0x20(r4)
+	stfd    fp9,  0x08(r6)
+	stfd    fp10, 0x10(r6)
+	stfd    fp11, 0x18(r6)
+	stfd    fp12, 0x20(r6)
+	lfd     fp9,  0x28(r4)
+	lfd     fp10, 0x30(r4)
+	lfd     fp11, 0x38(r4)
+	lfdu    fp12, 0x40(r4)
+	stfd    fp9,  0x28(r6)
+	stfd    fp10, 0x30(r6)
+	stfd    fp11, 0x38(r6)
+	stfdu   fp12, 0x40(r6)
+
+	bdnz    L(loop)
+
+
+L(endloop):
+	cmpwi   r10,0
+	beq-    L(endloop2)
+	mtctr   r10
+
+L(loop2):                       /* Copy aligned body  */
+	lfd     fp9,  0x08(r4)
+	lfd     fp10, 0x10(r4)
+	lfd     fp11, 0x18(r4)
+	lfd     fp12, 0x20(r4)
+	stfd    fp9,  0x08(r6)
+	stfd    fp10, 0x10(r6)
+	stfd    fp11, 0x18(r6)
+	stfd    fp12, 0x20(r6)
+	lfd     fp9,  0x28(r4)
+	lfd     fp10, 0x30(r4)
+	lfd     fp11, 0x38(r4)
+	lfdu    fp12, 0x40(r4)
+	stfd    fp9,  0x28(r6)
+	stfd    fp10, 0x30(r6)
+	stfd    fp11, 0x38(r6)
+	stfdu   fp12, 0x40(r6)
+
+	bdnz    L(loop2)
+L(endloop2):
+
+
+	.align  4
+L(lessthancacheline):           /* Was there less than cache to do ?  */
+	cmplwi  cr0,r5,16
+	srwi    r7,r5,4         /* divide size by 16  */
+	blt-    L(do_lt16)
+	mtctr   r7
+
+L(copy_remaining):
+	lfd     fp9,  0x08(r4)
+	lfdu    fp10, 0x10(r4)
+	stfd    fp9,  0x08(r6)
+	stfdu   fp10, 0x10(r6)
+	bdnz    L(copy_remaining)
+
+L(do_lt16):                     /* less than 16 ?  */
+	cmplwi  cr0,r5,0        /* copy remaining bytes (0-15)  */
+	beqlr+                  /* no rest to copy  */
+	addi    r4,r4,8
+	addi    r6,r6,8
+
+L(shortcopy):                   /* SIMPLE COPY to handle size =< 15 bytes  */
+	mtcrf   0x01,r5
+	sub     r7,r4,r6
+	bf-     cr7*4+0,8f
+	lfdx    fp9,r7,r6       /* copy 8 byte  */
+	stfd    fp9,0(r6)
+	addi    r6,r6,8
+8:
+	bf      cr7*4+1,4f
+	lwzx    r0,r7,r6        /* copy 4 byte  */
+	stw     r0,0(r6)
+	addi    r6,r6,4
+4:
+	bf      cr7*4+2,2f
+	lhzx    r0,r7,r6        /* copy 2 byte  */
+	sth     r0,0(r6)
+	addi    r6,r6,2
+2:
+	bf      cr7*4+3,1f
+	lbzx    r0,r7,r6        /* copy 1 byte  */
+	stb     r0,0(r6)
+1:
+	blr
+
+
+
+
+
+	/* Similar to above, but for use with 128 byte lines. */
+
+
+L(big_lines):
+
+	clrlwi  r7,r7,32-7      /* How far to next cacheline bdy? */
+
+	cmplwi  cr6,r7,0        /* Are we on a cacheline bdy already? */
+
+	/* Reduce total len by what it takes to get to the next cache line */
+	subf    r5,r7,r5
+	srwi    r7,r7,4         /* How many qw to get to the line bdy? */
+
+	/* How many full cache lines to copy after getting to a line bdy? */
+	srwi    r10,r5,7
+
+	cmplwi  r10,0           /* If no full cache lines to copy ... */
+	li      r11,0           /* number cachelines to copy with prefetch  */
+	beq     L(nocacheprefetch_128)
+
+
+	/* We are here because we have at least one full cache line to copy,
+	   and therefore some pre-touching to do. */
+
+	cmplwi  r10,PREFETCH_AHEAD
+	li      r12,128+8       /* prefetch distance  */
+	ble     L(lessthanmaxprefetch_128)
+
+	/* We can only do so much pre-fetching.  R11 will have the count of
+	   lines left to prefetch after the initial batch of prefetches
+	   are executed. */
+
+	subi    r11,r10,PREFETCH_AHEAD
+	li      r10,PREFETCH_AHEAD
+
+L(lessthanmaxprefetch_128):
+	mtctr   r10
+
+	/* At this point r10/ctr hold the number of lines to prefetch in this
+	   initial batch, and r11 holds any remainder. */
+
+L(prefetchSRC_128):
+	dcbt    r12,r4
+	addi    r12,r12,128
+	bdnz    L(prefetchSRC_128)
+
+
+	/* Prefetching is done, or was not needed.
+
+	   cr6 - are we on a cacheline boundary already?
+	   r7  - number of quadwords to the next cacheline boundary
+	*/
+
+L(nocacheprefetch_128):
+	mtctr   r7
+
+	cmplwi  cr1,r5,128  /* Less than a cache line to copy? */
+
+	/* How many bytes are left after we copy whatever full
+	   cache lines we can get? */
+	clrlwi  r5,r5,32-7
+
+	beq     cr6,L(cachelinealigned_128)
+
+
+	/* Copy quadwords up to the next cacheline boundary */
+
+L(aligntocacheline_128):
+	lfd     fp9,0x08(r4)
+	lfdu    fp10,0x10(r4)
+	stfd    fp9,0x08(r6)
+	stfdu   fp10,0x10(r6)
+	bdnz    L(aligntocacheline_128)
+
+
+L(cachelinealigned_128):        /* copy while cache lines  */
+
+	blt-    cr1,L(lessthancacheline) /* size <128  */
+
+L(outerloop_128):
+	cmpwi   r11,0
+	mtctr   r11
+	beq-    L(endloop_128)
+
+	li      r11,128*ZERO_AHEAD +8    /* DCBZ dist  */
+
+	.align  4
+	/* Copy whole cachelines, optimized by prefetching SRC cacheline  */
+L(loop_128):                    /* Copy aligned body  */
+	dcbt    r12,r4          /* PREFETCH SOURCE some cache lines ahead  */
+	lfd     fp9,  0x08(r4)
+	dcbz    r11,r6
+	lfd     fp10, 0x10(r4)
+	lfd     fp11, 0x18(r4)
+	lfd     fp12, 0x20(r4)
+	stfd    fp9,  0x08(r6)
+	stfd    fp10, 0x10(r6)
+	stfd    fp11, 0x18(r6)
+	stfd    fp12, 0x20(r6)
+	lfd     fp9,  0x28(r4)
+	lfd     fp10, 0x30(r4)
+	lfd     fp11, 0x38(r4)
+	lfd     fp12, 0x40(r4)
+	stfd    fp9,  0x28(r6)
+	stfd    fp10, 0x30(r6)
+	stfd    fp11, 0x38(r6)
+	stfd    fp12, 0x40(r6)
+	lfd     fp9,  0x48(r4)
+	lfd     fp10, 0x50(r4)
+	lfd     fp11, 0x58(r4)
+	lfd     fp12, 0x60(r4)
+	stfd    fp9,  0x48(r6)
+	stfd    fp10, 0x50(r6)
+	stfd    fp11, 0x58(r6)
+	stfd    fp12, 0x60(r6)
+	lfd     fp9,  0x68(r4)
+	lfd     fp10, 0x70(r4)
+	lfd     fp11, 0x78(r4)
+	lfdu    fp12, 0x80(r4)
+	stfd    fp9,  0x68(r6)
+	stfd    fp10, 0x70(r6)
+	stfd    fp11, 0x78(r6)
+	stfdu   fp12, 0x80(r6)
+
+	bdnz    L(loop_128)
+
+
+L(endloop_128):
+	cmpwi   r10,0
+	beq-    L(endloop2_128)
+	mtctr   r10
+
+L(loop2_128):                   /* Copy aligned body  */
+	lfd     fp9,  0x08(r4)
+	lfd     fp10, 0x10(r4)
+	lfd     fp11, 0x18(r4)
+	lfd     fp12, 0x20(r4)
+	stfd    fp9,  0x08(r6)
+	stfd    fp10, 0x10(r6)
+	stfd    fp11, 0x18(r6)
+	stfd    fp12, 0x20(r6)
+	lfd     fp9,  0x28(r4)
+	lfd     fp10, 0x30(r4)
+	lfd     fp11, 0x38(r4)
+	lfd     fp12, 0x40(r4)
+	stfd    fp9,  0x28(r6)
+	stfd    fp10, 0x30(r6)
+	stfd    fp11, 0x38(r6)
+	stfd    fp12, 0x40(r6)
+	lfd     fp9,  0x48(r4)
+	lfd     fp10, 0x50(r4)
+	lfd     fp11, 0x58(r4)
+	lfd     fp12, 0x60(r4)
+	stfd    fp9,  0x48(r6)
+	stfd    fp10, 0x50(r6)
+	stfd    fp11, 0x58(r6)
+	stfd    fp12, 0x60(r6)
+	lfd     fp9,  0x68(r4)
+	lfd     fp10, 0x70(r4)
+	lfd     fp11, 0x78(r4)
+	lfdu    fp12, 0x80(r4)
+	stfd    fp9,  0x68(r6)
+	stfd    fp10, 0x70(r6)
+	stfd    fp11, 0x78(r6)
+	stfdu   fp12, 0x80(r6)
+	bdnz    L(loop2_128)
+L(endloop2_128):
+
+	b       L(lessthancacheline)
+
+
+END (BP_SYM (memcpy))
+libc_hidden_builtin_def (memcpy)

Added: fsf/trunk/libc/sysdeps/powerpc/powerpc64/ppca2/memcpy.S
==============================================================================
--- fsf/trunk/libc/sysdeps/powerpc/powerpc64/ppca2/memcpy.S (added)
+++ fsf/trunk/libc/sysdeps/powerpc/powerpc64/ppca2/memcpy.S Wed Sep 22 00:03:13 2010
@@ -1,0 +1,501 @@
+/* Optimized memcpy implementation for PowerPC A2.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Michael Brutman <brutman@xxxxxxxxxx>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+#define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
+#define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
+
+	.section        ".toc","aw"
+.LC0:
+	.tc __cache_line_size[TC],__cache_line_size
+	.section        ".text"
+	.align 2
+
+
+	.machine  ppca2
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT 3
+
+	dcbt    0,r4            /* Prefetch ONE SRC cacheline  */
+	cmpldi  cr1,r5,16       /* is size < 16 ?  */
+	mr      r6,r3           /* Copy dest reg to r6; */
+	blt+    cr1,L(shortcopy)
+
+
+	/* Big copy (16 bytes or more)
+
+	   Figure out how far to the nearest quadword boundary, or if we are
+	   on one already.  Also get the cache line size.
+
+	   r3 - return value (always)
+	   r4 - current source addr
+	   r5 - copy length
+	   r6 - current dest addr
+	*/
+
+	neg     r8,r3           /* LS 4 bits = # bytes to 8-byte dest bdry  */
+	ld      r9,.LC0@toc(r2) /* Get cache line size (part 1) */
+	clrldi  r8,r8,64-4      /* align to 16byte boundary  */
+	sub     r7,r4,r3        /* compute offset to src from dest */
+	lwz     r9,0(r9)        /* Get cache line size (part 2) */
+	cmpldi  cr0,r8,0        /* Were we aligned on a 16 byte bdy? */
+	addi    r10,r9,-1       /* Cache line mask */
+	beq+    L(dst_aligned)
+
+
+
+	/* Destination is not aligned on quadword boundary.  Get us to one.
+
+	   r3 - return value (always)
+	   r4 - current source addr
+	   r5 - copy length
+	   r6 - current dest addr
+	   r7 - offset to src from dest
+	   r8 - number of bytes to quadword boundary
+	*/
+
+	mtcrf   0x01,r8         /* put #bytes to boundary into cr7  */
+	subf    r5,r8,r5        /* adjust remaining len */
+
+	bf      cr7*4+3,1f
+	lbzx    r0,r7,r6        /* copy 1 byte addr */
+	stb     r0,0(r6)
+	addi    r6,r6,1
+1:
+	bf      cr7*4+2,2f
+	lhzx    r0,r7,r6        /* copy 2 byte addr */
+	sth     r0,0(r6)
+	addi    r6,r6,2
+2:
+	bf      cr7*4+1,4f
+	lwzx    r0,r7,r6        /* copy 4 byte addr */
+	stw     r0,0(r6)
+	addi    r6,r6,4
+4:
+	bf      cr7*4+0,8f
+	ldx     r0,r7,r6        /* copy 8 byte addr */
+	std     r0,0(r6)
+	addi    r6,r6,8
+8:
+	add     r4,r7,r6        /* update src addr */
+
+
+
+	/* Dest is quadword aligned now.
+
+	   Lots of decisions to make.  If we are copying less than a cache
+	   line we won't be here long.  If we are not on a cache line
+	   boundary we need to get there.  And then we need to figure out
+	   how many cache lines ahead to pre-touch.
+
+	   r3 - return value (always)
+	   r4 - current source addr
+	   r5 - copy length
+	   r6 - current dest addr
+	*/
+
+
+	.align 4
+L(dst_aligned):
+
+
+	cmpd   cr5,r5,r10       /* Less than a cacheline to go? */
+
+	neg     r7,r6           /* How far to next cacheline bdy? */
+
+	addi    r6,r6,-8        /* prepare for stdu  */
+	cmpdi   cr0,r9,128
+	addi    r4,r4,-8        /* prepare for ldu  */
+
+
+	ble+    cr5,L(lessthancacheline)
+
+	beq-    cr0,L(big_lines) /* 128 byte line code */
+
+
+
+	/* More than a cacheline left to go, and using 64 byte cachelines */
+
+	clrldi  r7,r7,64-6      /* How far to next cacheline bdy? */
+
+	cmpldi  cr6,r7,0        /* Are we on a cacheline bdy already? */
+
+	/* Reduce total len by what it takes to get to the next cache line */
+	subf    r5,r7,r5
+	srdi    r7,r7,4         /* How many qws to get to the line bdy? */
+
+	/* How many full cache lines to copy after getting to a line bdy? */
+	srdi    r10,r5,6
+
+	cmpldi  r10,0           /* If no full cache lines to copy ... */
+	li      r11,0           /* number cachelines to copy with prefetch  */
+	beq     L(nocacheprefetch)
+
+
+	/* We are here because we have at least one full cache line to copy,
+	   and therefore some pre-touching to do. */
+
+	cmpldi  r10,PREFETCH_AHEAD
+	li      r12,64+8        /* prefetch distance  */
+	ble     L(lessthanmaxprefetch)
+
+	/* We can only do so much pre-fetching.  R11 will have the count of
+	   lines left to prefetch after the initial batch of prefetches
+	   are executed. */
+
+	subi    r11,r10,PREFETCH_AHEAD
+	li      r10,PREFETCH_AHEAD
+
+L(lessthanmaxprefetch):
+	mtctr   r10
+
+	/* At this point r10/ctr hold the number of lines to prefetch in this
+	   initial batch, and r11 holds any remainder. */
+
+L(prefetchSRC):
+	dcbt    r12,r4
+	addi    r12,r12,64
+	bdnz    L(prefetchSRC)
+
+
+	/* Prefetching is done, or was not needed.
+
+	   cr6 - are we on a cacheline boundary already?
+	   r7  - number of quadwords to the next cacheline boundary
+	*/
+
+L(nocacheprefetch):
+	mtctr   r7
+
+	cmpldi  cr1,r5,64   /* Less than a cache line to copy? */
+
+	/* How many bytes are left after we copy whatever full
+	   cache lines we can get? */
+	clrldi  r5,r5,64-6
+
+	beq     cr6,L(cachelinealigned)
+
+
+	/* Copy quadwords up to the next cacheline boundary */
+
+L(aligntocacheline):
+	ld      r9,0x08(r4)
+	ld      r7,0x10(r4)
+	addi    r4,r4,0x10
+	std     r9,0x08(r6)
+	stdu    r7,0x10(r6)
+	bdnz    L(aligntocacheline)
+
+
+	.align 4
+L(cachelinealigned):            /* copy while cache lines  */
+
+	blt-    cr1,L(lessthancacheline) /* size <64  */
+
+L(outerloop):
+	cmpdi   r11,0
+	mtctr   r11
+	beq-    L(endloop)
+
+	li      r11,64*ZERO_AHEAD +8    /* DCBZ dist  */
+
+	.align  4
+	/* Copy whole cachelines, optimized by prefetching SRC cacheline  */
+L(loop):                        /* Copy aligned body  */
+	dcbt    r12,r4          /* PREFETCH SOURCE some cache lines ahead  */
+	ld      r9, 0x08(r4)
+	dcbz    r11,r6
+	ld      r7, 0x10(r4)
+	ld      r8, 0x18(r4)
+	ld      r0, 0x20(r4)
+	std     r9, 0x08(r6)
+	std     r7, 0x10(r6)
+	std     r8, 0x18(r6)
+	std     r0, 0x20(r6)
+	ld      r9, 0x28(r4)
+	ld      r7, 0x30(r4)
+	ld      r8, 0x38(r4)
+	ld      r0, 0x40(r4)
+	addi    r4, r4,0x40
+	std     r9, 0x28(r6)
+	std     r7, 0x30(r6)
+	std     r8, 0x38(r6)
+	stdu    r0, 0x40(r6)
+
+	bdnz    L(loop)
+
+
+L(endloop):
+	cmpdi   r10,0
+	beq-    L(endloop2)
+	mtctr   r10
+
+L(loop2):                       /* Copy aligned body  */
+	ld      r9, 0x08(r4)
+	ld      r7, 0x10(r4)
+	ld      r8, 0x18(r4)
+	ld      r0, 0x20(r4)
+	std     r9, 0x08(r6)
+	std     r7, 0x10(r6)
+	std     r8, 0x18(r6)
+	std     r0, 0x20(r6)
+	ld      r9, 0x28(r4)
+	ld      r7, 0x30(r4)
+	ld      r8, 0x38(r4)
+	ld      r0, 0x40(r4)
+	addi    r4, r4,0x40
+	std     r9, 0x28(r6)
+	std     r7, 0x30(r6)
+	std     r8, 0x38(r6)
+	stdu    r0, 0x40(r6)
+
+	bdnz    L(loop2)
+L(endloop2):
+
+
+	.align 4
+L(lessthancacheline):           /* Was there less than cache to do ?  */
+	cmpldi  cr0,r5,16
+	srdi    r7,r5,4         /* divide size by 16  */
+	blt-    L(do_lt16)
+	mtctr   r7
+
+L(copy_remaining):
+	ld      r8,0x08(r4)
+	ld      r7,0x10(r4)
+	addi    r4,r4,0x10
+	std     r8,0x08(r6)
+	stdu    r7,0x10(r6)
+	bdnz    L(copy_remaining)
+
+L(do_lt16):                     /* less than 16 ?  */
+	cmpldi  cr0,r5,0        /* copy remaining bytes (0-15)  */
+	beqlr+                  /* no rest to copy  */
+	addi    r4,r4,8
+	addi    r6,r6,8
+
+L(shortcopy):                   /* SIMPLE COPY to handle size =< 15 bytes  */
+	mtcrf   0x01,r5
+	sub     r7,r4,r6
+	bf-     cr7*4+0,8f
+	ldx     r0,r7,r6        /* copy 8 byte  */
+	std     r0,0(r6)
+	addi    r6,r6,8
+8:
+	bf      cr7*4+1,4f
+	lwzx    r0,r7,r6        /* copy 4 byte  */
+	stw     r0,0(r6)
+	addi    r6,r6,4
+4:
+	bf      cr7*4+2,2f
+	lhzx    r0,r7,r6        /* copy 2 byte  */
+	sth     r0,0(r6)
+	addi    r6,r6,2
+2:
+	bf      cr7*4+3,1f
+	lbzx    r0,r7,r6        /* copy 1 byte  */
+	stb     r0,0(r6)
+1:
+	blr
+
+
+
+
+
+	/* Similar to above, but for use with 128 byte lines. */
+
+
+L(big_lines):
+
+	clrldi  r7,r7,64-7      /* How far to next cacheline bdy? */
+
+	cmpldi  cr6,r7,0        /* Are we on a cacheline bdy already? */
+
+	/* Reduce total len by what it takes to get to the next cache line */
+	subf    r5,r7,r5
+	srdi    r7,r7,4         /* How many qws to get to the line bdy? */
+
+	/* How many full cache lines to copy after getting to a line bdy? */
+	srdi    r10,r5,7
+
+	cmpldi  r10,0           /* If no full cache lines to copy ... */
+	li      r11,0           /* number cachelines to copy with prefetch  */
+	beq     L(nocacheprefetch_128)
+
+
+	/* We are here because we have at least one full cache line to copy,
+	   and therefore some pre-touching to do. */
+
+	cmpldi  r10,PREFETCH_AHEAD
+	li      r12,128+8       /* prefetch distance  */
+	ble     L(lessthanmaxprefetch_128)
+
+	/* We can only do so much pre-fetching.  R11 will have the count of
+	   lines left to prefetch after the initial batch of prefetches
+	   are executed. */
+
+	subi    r11,r10,PREFETCH_AHEAD
+	li      r10,PREFETCH_AHEAD
+
+L(lessthanmaxprefetch_128):
+	mtctr   r10
+
+	/* At this point r10/ctr hold the number of lines to prefetch in this
+	   initial batch, and r11 holds any remainder. */
+
+L(prefetchSRC_128):
+	dcbt    r12,r4
+	addi    r12,r12,128
+	bdnz    L(prefetchSRC_128)
+
+
+	/* Prefetching is done, or was not needed.
+
+	   cr6 - are we on a cacheline boundary already?
+	   r7  - number of quadwords to the next cacheline boundary
+	*/
+
+L(nocacheprefetch_128):
+	mtctr   r7
+
+	cmpldi  cr1,r5,128  /* Less than a cache line to copy? */
+
+	/* How many bytes are left after we copy whatever full
+	   cache lines we can get? */
+	clrldi  r5,r5,64-7
+
+	beq     cr6,L(cachelinealigned_128)
+
+
+	/* Copy quadwords up to the next cacheline boundary */
+
+L(aligntocacheline_128):
+	ld      r9,0x08(r4)
+	ld      r7,0x10(r4)
+	addi    r4,r4,0x10
+	std     r9,0x08(r6)
+	stdu    r7,0x10(r6)
+	bdnz    L(aligntocacheline_128)
+
+
+L(cachelinealigned_128):        /* copy while cache lines  */
+
+	blt-    cr1,L(lessthancacheline) /* size <128  */
+
+L(outerloop_128):
+	cmpdi   r11,0
+	mtctr   r11
+	beq-    L(endloop_128)
+
+	li      r11,128*ZERO_AHEAD +8    /* DCBZ dist  */
+
+	.align  4
+	/* Copy whole cachelines, optimized by prefetching SRC cacheline  */
+L(loop_128):                    /* Copy aligned body  */
+	dcbt    r12,r4          /* PREFETCH SOURCE some cache lines ahead  */
+	ld      r9, 0x08(r4)
+	dcbz    r11,r6
+	ld      r7, 0x10(r4)
+	ld      r8, 0x18(r4)
+	ld      r0, 0x20(r4)
+	std     r9, 0x08(r6)
+	std     r7, 0x10(r6)
+	std     r8, 0x18(r6)
+	std     r0, 0x20(r6)
+	ld      r9, 0x28(r4)
+	ld      r7, 0x30(r4)
+	ld      r8, 0x38(r4)
+	ld      r0, 0x40(r4)
+	std     r9, 0x28(r6)
+	std     r7, 0x30(r6)
+	std     r8, 0x38(r6)
+	std     r0, 0x40(r6)
+	ld      r9, 0x48(r4)
+	ld      r7, 0x50(r4)
+	ld      r8, 0x58(r4)
+	ld      r0, 0x60(r4)
+	std     r9, 0x48(r6)
+	std     r7, 0x50(r6)
+	std     r8, 0x58(r6)
+	std     r0, 0x60(r6)
+	ld      r9, 0x68(r4)
+	ld      r7, 0x70(r4)
+	ld      r8, 0x78(r4)
+	ld      r0, 0x80(r4)
+	addi    r4, r4,0x80
+	std     r9, 0x68(r6)
+	std     r7, 0x70(r6)
+	std     r8, 0x78(r6)
+	stdu    r0, 0x80(r6)
+
+	bdnz    L(loop_128)
+
+
+L(endloop_128):
+	cmpdi   r10,0
+	beq-    L(endloop2_128)
+	mtctr   r10
+
+L(loop2_128):                       /* Copy aligned body  */
+	ld      r9, 0x08(r4)
+	ld      r7, 0x10(r4)
+	ld      r8, 0x18(r4)
+	ld      r0, 0x20(r4)
+	std     r9, 0x08(r6)
+	std     r7, 0x10(r6)
+	std     r8, 0x18(r6)
+	std     r0, 0x20(r6)
+	ld      r9, 0x28(r4)
+	ld      r7, 0x30(r4)
+	ld      r8, 0x38(r4)
+	ld      r0, 0x40(r4)
+	std     r9, 0x28(r6)
+	std     r7, 0x30(r6)
+	std     r8, 0x38(r6)
+	std     r0, 0x40(r6)
+	ld      r9, 0x48(r4)
+	ld      r7, 0x50(r4)
+	ld      r8, 0x58(r4)
+	ld      r0, 0x60(r4)
+	std     r9, 0x48(r6)
+	std     r7, 0x50(r6)
+	std     r8, 0x58(r6)
+	std     r0, 0x60(r6)
+	ld      r9, 0x68(r4)
+	ld      r7, 0x70(r4)
+	ld      r8, 0x78(r4)
+	ld      r0, 0x80(r4)
+	addi    r4, r4,0x80
+	std     r9, 0x68(r6)
+	std     r7, 0x70(r6)
+	std     r8, 0x78(r6)
+	stdu    r0, 0x80(r6)
+
+	bdnz    L(loop2_128)
+L(endloop2_128):
+
+	b       L(lessthancacheline)
+
+
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/getdents.c
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/getdents.c (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/getdents.c Wed Sep 22 00:03:13 2010
@@ -1,4 +1,4 @@
-/* Copyright (C) 1993, 1995-2003, 2004, 2006, 2007
+/* Copyright (C) 1993, 1995-2004, 2006, 2007, 2010
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -285,7 +285,11 @@
 	DIRENT_SET_DP_INO(dp, kdp->d_ino);
 	dp->d_off = kdp->d_off;
 	dp->d_reclen = new_reclen;
+#ifdef __ASSUME_GETDENTS32_D_TYPE
+	dp->d_type = *((char *) kdp + kdp->d_reclen - 1);
+#else
 	dp->d_type = DT_UNKNOWN;
+#endif
 	memcpy (dp->d_name, kdp->d_name,
 		kdp->d_reclen - offsetof (struct kernel_dirent, d_name));
 

Added: fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/fpu/Implies
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/fpu/Implies (added)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/ppca2/fpu/Implies Wed Sep 22 00:03:13 2010
@@ -1,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/ppca2/fpu/

Added: fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/fpu/Implies
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/fpu/Implies (added)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/ppca2/fpu/Implies Wed Sep 22 00:03:13 2010
@@ -1,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/ppca2/fpu