[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r9685 - in /fsf/trunk/libc: ./ elf/ sysdeps/mach/hurd/bits/ sysdeps/powerpc/powerpc32/cell/ sysdeps/powerpc/powerpc64/cell/ ...



Author: eglibc
Date: Tue Jan 19 00:03:13 2010
New Revision: 9685

Log:
Import glibc-mainline for 2010-01-19

Added:
    fsf/trunk/libc/sysdeps/powerpc/powerpc32/cell/
    fsf/trunk/libc/sysdeps/powerpc/powerpc32/cell/memcpy.S
    fsf/trunk/libc/sysdeps/powerpc/powerpc64/cell/
    fsf/trunk/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/fpu/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/fpu/Implies
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/fpu/
    fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/fpu/Implies
Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/elf/dynamic-link.h
    fsf/trunk/libc/sysdeps/mach/hurd/bits/libc-lock.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Tue Jan 19 00:03:13 2010
@@ -1,3 +1,27 @@
+2010-01-17  Samuel Thibault  <samuel.thibault@xxxxxxxxxxxx>
+
+	* sysdeps/mach/hurd/bits/libc-lock.h
+	(__rtld_lock_recursive_t): New type.
+	(__rtld_lock_initialize): New macro.
+
+2010-01-14  Ryan S. Arnold  <rsa@xxxxxxxxxx>
+
+	* sysdeps/powerpc/powerpc32/cell/memcpy.S: New file.
+	* sysdeps/powerpc/powerpc64/cell/memcpy.S: New file.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/fpu/Implies: New file.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/fpu/Implies: New file.
+
+2010-01-18  Andreas Schwab  <schwab@xxxxxxxxxx>
+
+	* sysdeps/unix/sysv/linux/sparc/bits/fcntl.h: Remove duplicate
+	definitions of O_DSYNC and O_RSYNC.
+
+	* elf/dynamic-link.h (elf_get_dynamic_info): Use correct type when
+	casting d_tag.
+
+	* elf/dynamic-link.h (elf_get_dynamic_info): Ignore negative
+	dynamic entry types.
+
 2010-01-16  Jakub Jelinek  <jakub@xxxxxxxxxx>
 
 	* sysdeps/ieee754/ldbl-128/s_ceill.c (huge): Add L suffix to the

Modified: fsf/trunk/libc/elf/dynamic-link.h
==============================================================================
--- fsf/trunk/libc/elf/dynamic-link.h (original)
+++ fsf/trunk/libc/elf/dynamic-link.h Tue Jan 19 00:03:13 2010
@@ -103,6 +103,11 @@
 {
   ElfW(Dyn) *dyn = l->l_ld;
   ElfW(Dyn) **info;
+#if __ELF_NATIVE_CLASS == 32
+  typedef Elf32_Word d_tag_utype;
+#elif __ELF_NATIVE_CLASS == 64
+  typedef Elf64_Xword d_tag_utype;
+#endif
 
 #ifndef RTLD_BOOTSTRAP
   if (dyn == NULL)
@@ -113,20 +118,20 @@
 
   while (dyn->d_tag != DT_NULL)
     {
-      if (dyn->d_tag < DT_NUM)
+      if ((d_tag_utype) dyn->d_tag < DT_NUM)
 	info[dyn->d_tag] = dyn;
       else if (dyn->d_tag >= DT_LOPROC &&
 	       dyn->d_tag < DT_LOPROC + DT_THISPROCNUM)
 	info[dyn->d_tag - DT_LOPROC + DT_NUM] = dyn;
-      else if ((Elf32_Word) DT_VERSIONTAGIDX (dyn->d_tag) < DT_VERSIONTAGNUM)
+      else if ((d_tag_utype) DT_VERSIONTAGIDX (dyn->d_tag) < DT_VERSIONTAGNUM)
 	info[VERSYMIDX (dyn->d_tag)] = dyn;
-      else if ((Elf32_Word) DT_EXTRATAGIDX (dyn->d_tag) < DT_EXTRANUM)
+      else if ((d_tag_utype) DT_EXTRATAGIDX (dyn->d_tag) < DT_EXTRANUM)
 	info[DT_EXTRATAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
 	     + DT_VERSIONTAGNUM] = dyn;
-      else if ((Elf32_Word) DT_VALTAGIDX (dyn->d_tag) < DT_VALNUM)
+      else if ((d_tag_utype) DT_VALTAGIDX (dyn->d_tag) < DT_VALNUM)
 	info[DT_VALTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
 	     + DT_VERSIONTAGNUM + DT_EXTRANUM] = dyn;
-      else if ((Elf32_Word) DT_ADDRTAGIDX (dyn->d_tag) < DT_ADDRNUM)
+      else if ((d_tag_utype) DT_ADDRTAGIDX (dyn->d_tag) < DT_ADDRNUM)
 	info[DT_ADDRTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
 	     + DT_VERSIONTAGNUM + DT_EXTRANUM + DT_VALNUM] = dyn;
       ++dyn;

Modified: fsf/trunk/libc/sysdeps/mach/hurd/bits/libc-lock.h
==============================================================================
--- fsf/trunk/libc/sysdeps/mach/hurd/bits/libc-lock.h (original)
+++ fsf/trunk/libc/sysdeps/mach/hurd/bits/libc-lock.h Tue Jan 19 00:03:13 2010
@@ -31,6 +31,7 @@
   void *owner;
   int count;
 } __libc_lock_recursive_t;
+typedef __libc_lock_recursive_t __rtld_lock_recursive_t;
 
 #define __libc_lock_owner_self() ((void *) __hurd_threadvar_location (0))
 
@@ -121,6 +122,8 @@
 
 #define __rtld_lock_init_recursive(NAME) \
   __libc_lock_init_recursive (NAME)
+#define __rtld_lock_initialize(NAME) \
+  (void) ((NAME) = (__rtld_lock_recursive_t) _RTLD_LOCK_RECURSIVE_INITIALIZER)
 #define __rtld_lock_trylock_recursive(NAME) \
   __libc_lock_trylock_recursive (NAME)
 #define __rtld_lock_lock_recursive(NAME) \

Added: fsf/trunk/libc/sysdeps/powerpc/powerpc32/cell/memcpy.S
==============================================================================
--- fsf/trunk/libc/sysdeps/powerpc/powerpc32/cell/memcpy.S (added)
+++ fsf/trunk/libc/sysdeps/powerpc/powerpc32/cell/memcpy.S Tue Jan 19 00:03:13 2010
@@ -1,0 +1,245 @@
+/* Optimized memcpy implementation for CELL BE PowerPC.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+#define PREFETCH_AHEAD 6	/* no cache lines SRC prefetching ahead  */
+#define ZERO_AHEAD 4		/* no cache lines DST zeroing ahead  */
+
+/* memcpy routine optimized for CELL-BE-PPC	v2.0
+ *
+ * The CELL PPC core has 1 integer unit and 1 load/store unit
+ * CELL:
+ * 1st level data cache = 32K
+ * 2nd level data cache = 512K
+ * 3rd level data cache = 0K
+ * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks,
+ * latency to memory is >400 clocks
+ * To improve copy performance we need to prefetch source data
+ * far ahead to hide this latency
+ * For best performance instructionforms ending in "." like "andi."
+ * should be avoided as the are implemented in microcode on CELL.
+ * The below code is loop unrolled for the CELL cache line of 128 bytes
+ */
+
+.align  7
+
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT
+
+	dcbt	0,r4		/* Prefetch ONE SRC cacheline  */
+	cmplwi	cr1,r5,16	/* is size < 16 ?  */
+	mr	r6,r3
+	blt+	cr1,.Lshortcopy
+
+.Lbigcopy:
+	neg	r8,r3		/* LS 3 bits = # bytes to 8-byte dest bdry  */
+	clrlwi  r8,r8,32-4	/* aling to 16byte boundary  */
+	sub     r7,r4,r3
+	cmplwi	cr0,r8,0
+	beq+	.Ldst_aligned
+
+.Ldst_unaligned:
+	mtcrf	0x01,r8		/* put #bytes to boundary into cr7  */
+	subf	r5,r8,r5
+
+	bf	cr7*4+3,1f
+	lbzx	r0,r7,r6	/* copy 1 byte  */
+	stb	r0,0(r6)
+	addi	r6,r6,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r6	/* copy 2 byte  */
+	sth	r0,0(r6)
+	addi	r6,r6,2
+2:	bf	cr7*4+1,4f
+	lwzx	r0,r7,r6	/* copy 4 byte  */
+	stw	r0,0(r6)
+	addi	r6,r6,4
+4:	bf	cr7*4+0,8f
+	lfdx	fp9,r7,r6	/* copy 8 byte  */
+	stfd	fp9,0(r6)
+	addi	r6,r6,8
+8:
+	add	r4,r7,r6
+
+.Ldst_aligned:
+
+	cmpwi	cr5,r5,128-1
+
+	neg	r7,r6
+	addi	r6,r6,-8	/* prepare for stfdu  */
+	addi	r4,r4,-8	/* prepare for lfdu  */
+
+	clrlwi  r7,r7,32-7	/* align to cacheline boundary  */
+	ble+	cr5,.Llessthancacheline
+
+	cmplwi	cr6,r7,0
+	subf	r5,r7,r5
+	srwi	r7,r7,4		/* divide size by 16  */
+	srwi	r10,r5,7	/* number of cache lines to copy  */
+
+	cmplwi	r10,0
+	li	r11,0		/* number cachelines to copy with prefetch  */
+	beq	.Lnocacheprefetch
+
+	cmplwi	r10,PREFETCH_AHEAD
+	li	r12,128+8	/* prefetch distance  */
+	ble	.Llessthanmaxprefetch
+
+	subi	r11,r10,PREFETCH_AHEAD
+	li	r10,PREFETCH_AHEAD
+
+.Llessthanmaxprefetch:
+	mtctr	r10
+
+.LprefetchSRC:
+	dcbt    r12,r4
+	addi    r12,r12,128
+	bdnz    .LprefetchSRC
+
+.Lnocacheprefetch:
+	mtctr	r7
+	cmplwi	cr1,r5,128
+	clrlwi  r5,r5,32-7
+	beq	cr6,.Lcachelinealigned
+
+.Laligntocacheline:
+	lfd	fp9,0x08(r4)
+	lfdu	fp10,0x10(r4)
+	stfd	fp9,0x08(r6)
+	stfdu	fp10,0x10(r6)
+	bdnz	.Laligntocacheline
+
+
+.Lcachelinealigned:		/* copy while cache lines  */
+
+	blt-	cr1,.Llessthancacheline	/* size <128  */
+
+.Louterloop:
+	cmpwi   r11,0
+	mtctr	r11
+	beq-	.Lendloop
+
+	li	r11,128*ZERO_AHEAD +8	/* DCBZ dist  */
+
+.align	4
+	/* Copy whole cachelines, optimized by prefetching SRC cacheline  */
+.Lloop:				/* Copy aligned body  */
+	dcbt	r12,r4		/* PREFETCH SOURCE some cache lines ahead  */
+	lfd	fp9, 0x08(r4)
+	dcbz	r11,r6
+	lfd	fp10, 0x10(r4)	/* 4 register stride copy is optimal  */
+	lfd	fp11, 0x18(r4)	/* to hide 1st level cache lantency.  */
+	lfd	fp12, 0x20(r4)
+	stfd	fp9, 0x08(r6)
+	stfd	fp10, 0x10(r6)
+	stfd	fp11, 0x18(r6)
+	stfd	fp12, 0x20(r6)
+	lfd	fp9, 0x28(r4)
+	lfd	fp10, 0x30(r4)
+	lfd	fp11, 0x38(r4)
+	lfd	fp12, 0x40(r4)
+	stfd	fp9, 0x28(r6)
+	stfd	fp10, 0x30(r6)
+	stfd	fp11, 0x38(r6)
+	stfd	fp12, 0x40(r6)
+	lfd	fp9, 0x48(r4)
+	lfd	fp10, 0x50(r4)
+	lfd	fp11, 0x58(r4)
+	lfd	fp12, 0x60(r4)
+	stfd	fp9, 0x48(r6)
+	stfd	fp10, 0x50(r6)
+	stfd	fp11, 0x58(r6)
+	stfd	fp12, 0x60(r6)
+	lfd	fp9, 0x68(r4)
+	lfd	fp10, 0x70(r4)
+	lfd	fp11, 0x78(r4)
+	lfdu	fp12, 0x80(r4)
+	stfd	fp9, 0x68(r6)
+	stfd	fp10, 0x70(r6)
+	stfd	fp11, 0x78(r6)
+	stfdu	fp12, 0x80(r6)
+
+	bdnz	.Lloop
+
+.Lendloop:
+	cmpwi	r10,0
+	slwi	r10,r10,2	/* adjust from 128 to 32 byte stride  */
+	beq-	.Lendloop2
+	mtctr	r10
+
+.Lloop2:			/* Copy aligned body  */
+	lfd	fp9, 0x08(r4)
+	lfd	fp10, 0x10(r4)
+	lfd	fp11, 0x18(r4)
+	lfdu	fp12, 0x20(r4)
+	stfd	fp9, 0x08(r6)
+	stfd	fp10, 0x10(r6)
+	stfd	fp11, 0x18(r6)
+	stfdu	fp12, 0x20(r6)
+
+	bdnz	.Lloop2
+.Lendloop2:
+
+.Llessthancacheline:		/* less than cache to do ?  */
+	cmplwi	cr0,r5,16
+	srwi	r7,r5,4		/* divide size by 16  */
+	blt-	.Ldo_lt16
+	mtctr	r7
+
+.Lcopy_remaining:
+	lfd	fp9,0x08(r4)
+	lfdu	fp10,0x10(r4)
+	stfd	fp9,0x08(r6)
+	stfdu	fp10,0x10(r6)
+	bdnz	.Lcopy_remaining
+
+.Ldo_lt16:			/* less than 16 ?  */
+	cmplwi	cr0,r5,0	/* copy remaining bytes (0-15)  */
+	beqlr+			/* no rest to copy  */
+	addi	r4,r4,8
+	addi	r6,r6,8
+
+.Lshortcopy:			/* SIMPLE COPY to handle size =< 15 bytes  */
+	mtcrf	0x01,r5
+	sub	r7,r4,r6
+	bf-	cr7*4+0,8f
+	lfdx	fp9,r7,r6	/* copy 8 byte  */
+	stfd	fp9,0(r6)
+	addi	r6,r6,8
+8:
+	bf	cr7*4+1,4f
+	lwzx	r0,r7,r6	/* copy 4 byte  */
+	stw	r0,0(r6)
+	addi	r6,r6,4
+4:
+	bf	cr7*4+2,2f
+	lhzx	r0,r7,r6	/* copy 2 byte  */
+	sth	r0,0(r6)
+	addi	r6,r6,2
+2:
+	bf	cr7*4+3,1f
+	lbzx	r0,r7,r6	/* copy 1 byte  */
+	stb	r0,0(r6)
+1:	blr
+
+END (BP_SYM (memcpy))
+libc_hidden_builtin_def (memcpy)

Added: fsf/trunk/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S
==============================================================================
--- fsf/trunk/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S (added)
+++ fsf/trunk/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S Tue Jan 19 00:03:13 2010
@@ -1,0 +1,245 @@
+/* Optimized memcpy implementation for CELL BE PowerPC.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+#define PREFETCH_AHEAD 6	/* no cache lines SRC prefetching ahead  */
+#define ZERO_AHEAD 4		/* no cache lines DST zeroing ahead  */
+
+/* memcpy routine optimized for CELL-BE-PPC	v2.0
+ *
+ * The CELL PPC core has 1 integer unit and 1 load/store unit
+ * CELL:
+ * 1st level data cache = 32K
+ * 2nd level data cache = 512K
+ * 3rd level data cache = 0K
+ * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks,
+ * latency to memory is >400 clocks
+ * To improve copy performance we need to prefetch source data
+ * far ahead to hide this latency
+ * For best performance instructionforms ending in "." like "andi."
+ * should be avoided as the are implemented in microcode on CELL.
+ * The below code is loop unrolled for the CELL cache line of 128 bytes
+ */
+
+.align  7
+
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT 3
+
+	dcbt	0,r4		/* Prefetch ONE SRC cacheline  */
+	cmpldi	cr1,r5,16	/* is size < 16 ?  */
+	mr	r6,r3
+	blt+	cr1,.Lshortcopy
+
+.Lbigcopy:
+	neg	r8,r3		/* LS 3 bits = # bytes to 8-byte dest bdry  */
+	clrldi  r8,r8,64-4	/* aling to 16byte boundary  */
+	sub     r7,r4,r3
+	cmpldi	cr0,r8,0
+	beq+	.Ldst_aligned
+
+.Ldst_unaligned:
+	mtcrf	0x01,r8		/* put #bytes to boundary into cr7  */
+	subf	r5,r8,r5
+
+	bf	cr7*4+3,1f
+	lbzx	r0,r7,r6	/* copy 1 byte  */
+	stb	r0,0(r6)
+	addi	r6,r6,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r6	/* copy 2 byte  */
+	sth	r0,0(r6)
+	addi	r6,r6,2
+2:	bf	cr7*4+1,4f
+	lwzx	r0,r7,r6	/* copy 4 byte  */
+	stw	r0,0(r6)
+	addi	r6,r6,4
+4:	bf	cr7*4+0,8f
+	ldx	r0,r7,r6	/* copy 8 byte  */
+	std	r0,0(r6)
+	addi	r6,r6,8
+8:
+	add	r4,r7,r6
+
+.Ldst_aligned:
+
+	cmpdi	cr5,r5,128-1
+
+	neg	r7,r6
+	addi	r6,r6,-8	/* prepare for stdu  */
+	addi	r4,r4,-8	/* prepare for ldu  */
+
+	clrldi  r7,r7,64-7	/* align to cacheline boundary  */
+	ble+	cr5,.Llessthancacheline
+
+	cmpldi	cr6,r7,0
+	subf	r5,r7,r5
+	srdi	r7,r7,4		/* divide size by 16  */
+	srdi	r10,r5,7	/* number of cache lines to copy  */
+
+	cmpldi	r10,0
+	li	r11,0		/* number cachelines to copy with prefetch  */
+	beq	.Lnocacheprefetch
+
+	cmpldi	r10,PREFETCH_AHEAD
+	li	r12,128+8	/* prefetch distance  */
+	ble	.Llessthanmaxprefetch
+
+	subi	r11,r10,PREFETCH_AHEAD
+	li	r10,PREFETCH_AHEAD
+
+.Llessthanmaxprefetch:
+	mtctr	r10
+
+.LprefetchSRC:
+	dcbt    r12,r4
+	addi    r12,r12,128
+	bdnz    .LprefetchSRC
+
+.Lnocacheprefetch:
+	mtctr	r7
+	cmpldi	cr1,r5,128
+	clrldi  r5,r5,64-7
+	beq	cr6,.Lcachelinealigned
+
+.Laligntocacheline:
+	ld	r9,0x08(r4)
+	ldu	r7,0x10(r4)
+	std	r9,0x08(r6)
+	stdu	r7,0x10(r6)
+	bdnz	.Laligntocacheline
+
+
+.Lcachelinealigned:		/* copy while cache lines  */
+
+	blt-	cr1,.Llessthancacheline	/* size <128  */
+
+.Louterloop:
+	cmpdi   r11,0
+	mtctr	r11
+	beq-	.Lendloop
+
+	li	r11,128*ZERO_AHEAD +8	/* DCBZ dist  */
+
+.align	4
+	/* Copy whole cachelines, optimized by prefetching SRC cacheline  */
+.Lloop:				/* Copy aligned body  */
+	dcbt	r12,r4		/* PREFETCH SOURCE some cache lines ahead  */
+	ld	r9, 0x08(r4)
+	dcbz	r11,r6
+	ld	r7, 0x10(r4)	/* 4 register stride copy is optimal  */
+	ld	r8, 0x18(r4)	/* to hide 1st level cache lantency.  */
+	ld	r0, 0x20(r4)
+	std	r9, 0x08(r6)
+	std	r7, 0x10(r6)
+	std	r8, 0x18(r6)
+	std	r0, 0x20(r6)
+	ld	r9, 0x28(r4)
+	ld	r7, 0x30(r4)
+	ld	r8, 0x38(r4)
+	ld	r0, 0x40(r4)
+	std	r9, 0x28(r6)
+	std	r7, 0x30(r6)
+	std	r8, 0x38(r6)
+	std	r0, 0x40(r6)
+	ld	r9, 0x48(r4)
+	ld	r7, 0x50(r4)
+	ld	r8, 0x58(r4)
+	ld	r0, 0x60(r4)
+	std	r9, 0x48(r6)
+	std	r7, 0x50(r6)
+	std	r8, 0x58(r6)
+	std	r0, 0x60(r6)
+	ld	r9, 0x68(r4)
+	ld	r7, 0x70(r4)
+	ld	r8, 0x78(r4)
+	ldu	r0, 0x80(r4)
+	std	r9, 0x68(r6)
+	std	r7, 0x70(r6)
+	std	r8, 0x78(r6)
+	stdu	r0, 0x80(r6)
+
+	bdnz	.Lloop
+
+.Lendloop:
+	cmpdi	r10,0
+	sldi	r10,r10,2	/* adjust from 128 to 32 byte stride  */
+	beq-	.Lendloop2
+	mtctr	r10
+
+.Lloop2:			/* Copy aligned body  */
+	ld	r9, 0x08(r4)
+	ld	r7, 0x10(r4)
+	ld	r8, 0x18(r4)
+	ldu	r0, 0x20(r4)
+	std	r9, 0x08(r6)
+	std	r7, 0x10(r6)
+	std	r8, 0x18(r6)
+	stdu	r0, 0x20(r6)
+
+	bdnz	.Lloop2
+.Lendloop2:
+
+.Llessthancacheline:		/* less than cache to do ?  */
+	cmpldi	cr0,r5,16
+	srdi	r7,r5,4		/* divide size by 16  */
+	blt-	.Ldo_lt16
+	mtctr	r7
+
+.Lcopy_remaining:
+	ld	r8,0x08(r4)
+	ldu	r7,0x10(r4)
+	std	r8,0x08(r6)
+	stdu	r7,0x10(r6)
+	bdnz	.Lcopy_remaining
+
+.Ldo_lt16:			/* less than 16 ?  */
+	cmpldi	cr0,r5,0	/* copy remaining bytes (0-15)  */
+	beqlr+			/* no rest to copy  */
+	addi	r4,r4,8
+	addi	r6,r6,8
+
+.Lshortcopy:			/* SIMPLE COPY to handle size =< 15 bytes  */
+	mtcrf	0x01,r5
+	sub	r7,r4,r6
+	bf-	cr7*4+0,8f
+	ldx	r0,r7,r6	/* copy 8 byte  */
+	std	r0,0(r6)
+	addi	r6,r6,8
+8:
+	bf	cr7*4+1,4f
+	lwzx	r0,r7,r6	/* copy 4 byte  */
+	stw	r0,0(r6)
+	addi	r6,r6,4
+4:
+	bf	cr7*4+2,2f
+	lhzx	r0,r7,r6	/* copy 2 byte  */
+	sth	r0,0(r6)
+	addi	r6,r6,2
+2:
+	bf	cr7*4+3,1f
+	lbzx	r0,r7,r6	/* copy 1 byte  */
+	stb	r0,0(r6)
+1:	blr
+
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)

Added: fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/fpu/Implies
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/fpu/Implies (added)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/cell/fpu/Implies Tue Jan 19 00:03:13 2010
@@ -1,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/cell/fpu

Added: fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/fpu/Implies
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/fpu/Implies (added)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/cell/fpu/Implies Tue Jan 19 00:03:13 2010
@@ -1,0 +1,1 @@
+powerpc/powerpc64/cell/fpu

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h Tue Jan 19 00:03:13 2010
@@ -68,14 +68,6 @@
 #if defined __USE_POSIX199309 || defined __USE_UNIX98
 # define O_DSYNC	0x2000	/* Synchronize data.  */
 # define O_RSYNC	O_SYNC	/* Synchronize read operations.  */
-#endif
-
-/* For now Linux has synchronisity options for data and read operations.
-   We define the symbols here but let them do the same as O_SYNC since
-   this is a superset.  */
-#if defined __USE_POSIX199309 || defined __USE_UNIX98
-# define O_DSYNC        O_SYNC  /* Synchronize data.  */
-# define O_RSYNC        O_SYNC  /* Synchronize read operations.  */
 #endif
 
 /* Values for the second argument to `fcntl'.  */