[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patches] powerpc 8xx dcbz problem



The 8xx PowerPC series processors have a problem with the dcbz instruction, which is used in memset calls. They do not correctly indicate the fault address, should a page fault be signaled. The Linux kernel tries to guess, but can guess wrong. In particular we found it consistently guessing wrong for a particular ld.so.1 on the first call to memset.

This patch amends the startup code to check for an 8xx processor, and if so, clear a newly created __memset_cache_line_size to prevent dcbz's use.

ok?

nathan

--
Nathan Sidwell    ::   http://www.codesourcery.com   ::         CodeSourcery
nathan@xxxxxxxxxxxxxxxx    ::     http://www.planetfall.pwp.blueyonder.co.uk

2007-06-04  Mark Shinwell  <shinwell@xxxxxxxxxxxxxxxx>
	
	sysdeps/powerpc/powerpc32/
	* dl-machine.c: Adjust comment about __cache_line_size.
	* memset.S: Change __cache_line_size to __memeset_cache_line_size.
	sysdeps/unix/sysv/linux/powerpc/
	* libc-start.c (__cache_line_size): Make non-weak.
	(__memset_cache_line_size): Declare weakly.
	(__libc_start_main): Detect 8xx parts and clear
	__memeset_cache_line_size if detected.
	sysdeps/unix/sysv/linux/
	* kernel-features.h (__ASSUME_EMULATED_MFSPR_PVR): Define on
	2.2.18 and above.

Index: sysdeps/powerpc/powerpc32/dl-machine.c
===================================================================
--- sysdeps/powerpc/powerpc32/dl-machine.c	(revision 172620)
+++ sysdeps/powerpc/powerpc32/dl-machine.c	(working copy)
@@ -26,7 +26,7 @@
 #include <dl-machine.h>
 #include <stdio-common/_itoa.h>
 
-/* The value __cache_line_size is defined in memset.S and is initialised
+/* The value __cache_line_size is defined in libc-start.c and is initialised
    by _dl_sysdep_start via DL_PLATFORM_INIT.  */
 extern int __cache_line_size;
 weak_extern (__cache_line_size)
Index: sysdeps/powerpc/powerpc32/memset.S
===================================================================
--- sysdeps/powerpc/powerpc32/memset.S	(revision 172620)
+++ sysdeps/powerpc/powerpc32/memset.S	(working copy)
@@ -26,14 +26,14 @@
    to obtain the value set by the kernel and store it into this
    variable.  */
 
-	.globl __cache_line_size
-	.lcomm __cache_line_size,4,4
+	.globl __memset_cache_line_size
+	.lcomm __memset_cache_line_size,4,4
 
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
 
    The memset is done in four sizes: byte (8 bits), word (32 bits),
-   32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
+   32-byte blocks (256 bits) and __memset_cache_line_size (128, 256, 1024 bits).
    There is a special case for setting whole cache lines to 0, which
    takes advantage of the dcbz instruction.  */
 
@@ -122,7 +122,7 @@ L(caligned):
 
 /* Check if we can use the special case for clearing memory using dcbz.
    This requires that we know the correct cache line size for this
-   processor.  Getting the __cache_line_size may require establishing GOT
+   processor.  Getting the __memset_cache_line_size may require establishing GOT
    addressability, so branch out of line to set this up.  */
 	beq	cr1, L(checklinesize)
 
@@ -262,28 +262,28 @@ L(checklinesize):
 /* If the remaining length is less the 32 bytes then don't bother getting
    the cache line size.  */
 	beq	L(medium)
-/* Establishes GOT addressability so we can load __cache_line_size
+/* Establishes GOT addressability so we can load __memset_cache_line_size
    from static. This value was set from the aux vector during startup.  */
 # ifdef HAVE_ASM_PPC_REL16
 	bcl	20,31,1f
 1:	mflr	rGOT
-	addis	rGOT,rGOT,__cache_line_size-1b@ha
-	lwz	rCLS,__cache_line_size-1b@l(rGOT)
+	addis	rGOT,rGOT,__memset_cache_line_size-1b@ha
+	lwz	rCLS,__memset_cache_line_size-1b@l(rGOT)
 # else
 	bl	_GLOBAL_OFFSET_TABLE_@local-4
 	mflr	rGOT
-	lwz	rGOT,__cache_line_size@got(rGOT)
+	lwz	rGOT,__memset_cache_line_size@got(rGOT)
 	lwz	rCLS,0(rGOT)
 # endif
 	mtlr	rTMP
 #else
-/* Load __cache_line_size from static. This value was set from the
+/* Load __memset_cache_line_size from static. This value was set from the
    aux vector during startup.  */
-	lis	rCLS,__cache_line_size@ha
+	lis	rCLS,__memset_cache_line_size@ha
 /* If the remaining length is less the 32 bytes then don't bother getting
    the cache line size.  */
 	beq	L(medium)
-	lwz	rCLS,__cache_line_size@l(rCLS)
+	lwz	rCLS,__memset_cache_line_size@l(rCLS)
 #endif
 
 /* If the cache line size was not set then goto to L(nondcbz), which is
Index: sysdeps/powerpc/powerpc64/memset.S
===================================================================
--- sysdeps/powerpc/powerpc64/memset.S	(revision 172620)
+++ sysdeps/powerpc/powerpc64/memset.S	(working copy)
@@ -25,11 +25,11 @@
    assumption is that startup code will access the "aux vector" to
    to obtain the value set by the kernel and store it into this
    variable.  */
-	.globl __cache_line_size
-	.lcomm __cache_line_size,4,4
+	.globl __memset_cache_line_size
+	.lcomm __memset_cache_line_size,4,4
 	.section	".toc","aw"
 .LC0:
-	.tc __cache_line_size[TC],__cache_line_size
+	.tc __memset_cache_line_size[TC],__memset_cache_line_size
 	.section	".text"
 	.align 2
 
Index: sysdeps/unix/sysv/linux/powerpc/libc-start.c
===================================================================
--- sysdeps/unix/sysv/linux/powerpc/libc-start.c	(revision 172620)
+++ sysdeps/unix/sysv/linux/powerpc/libc-start.c	(working copy)
@@ -22,8 +22,10 @@
 #include <bp-start.h>
 #include <bp-sym.h>
 
-extern int __cache_line_size;
-weak_extern (__cache_line_size)
+int __cache_line_size;
+extern int __memset_cache_line_size;
+weak_extern (__memset_cache_line_size)
+
 /* The main work is done in the generic function.  */
 #define LIBC_START_MAIN generic_start_main
 #define LIBC_START_DISABLE_INLINE
@@ -85,6 +87,8 @@ int
 #else
 # define argv ubp_av
 #endif
+  unsigned int *memset_cls = &__memset_cache_line_size;
+  unsigned int pvr;
 
   /* the PPC SVR4 ABI says that the top thing on the stack will
      be a NULL pointer, so if not we assume that we're being called
@@ -113,13 +117,29 @@ int
     switch (av->a_type)
       {
       case AT_DCACHEBSIZE:
-	{
-	  int *cls = &__cache_line_size;
-	  if (cls != NULL)
-	    *cls = av->a_un.a_val;
-	}
+	__cache_line_size = av->a_un.a_val;
 	break;
       }
+
+  /* Determine if we are running on an 8xx series processor; if we are,
+     signal to the memset routine (by setting __memset_cache_line_size
+     to zero) so it avoids the dcbz instruction which is broken on such
+     processors.  */
+  if (memset_cls)
+    {
+      /* Read the PVR register.  This is normally accessible only in
+	 supervisor mode but kernels from 2.2.18 onwards emulate the
+	 instruction.  */
+#ifdef __ASSUME_EMULATED_MFSPR_PVR
+      asm volatile ("mfspr %0, 287" : "=r" (pvr) :);
+      if ((pvr & 0xffff0000) == 0x00500000)
+	/* We are running on an 8xx processor.  Avoid dcbz.  */
+	*memset_cls = 0;
+      else
+#endif
+	*memset_cls = __cache_line_size;
+    }
+
 #ifdef SHARED
   /* Resolve and initialize function pointers for VDSO functions.  */
   _libc_vdso_platform_setup ();
Index: sysdeps/unix/sysv/linux/kernel-features.h
===================================================================
--- sysdeps/unix/sysv/linux/kernel-features.h	(revision 172620)
+++ sysdeps/unix/sysv/linux/kernel-features.h	(working copy)
@@ -381,6 +381,12 @@
 # define __ASSUME_SWAPCONTEXT_SYSCALL	1
 #endif
 
+/* Starting with 2.2.18 the kernel emulates PowerPC mfspr reads from the
+   PVR register.  */
+#if __LINUX_KERNEL_VERSION >= 0x020218 && defined __powerpc__
+# define __ASSUME_EMULATED_MFSPR_PVR	1
+#endif
+
 /* The CLONE_DETACHED flag is not necessary in 2.6.2 kernels, it is
    implied.  */
 #if __LINUX_KERNEL_VERSION >= 132610