[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[patches] powerpc 8xx dcbz problem
- To: patches@xxxxxxxxxx
- Subject: [patches] powerpc 8xx dcbz problem
- From: Nathan Sidwell <nathan@xxxxxxxxxxxxxxxx>
- Date: Mon, 04 Jun 2007 12:54:16 +0100
The 8xx PowerPC series processors have a problem with the dcbz instruction,
which is used in memset calls. They do not correctly indicate the fault
address, should a page fault be signaled. The Linux kernel tries to guess, but
can guess wrong. In particular we found it consistently guessing wrong for a
particular ld.so.1 on the first call to memset.
This patch amends the startup code to check for an 8xx processor, and if so,
clear a newly created __memset_cache_line_size to prevent dcbz's use.
ok?
nathan
--
Nathan Sidwell :: http://www.codesourcery.com :: CodeSourcery
nathan@xxxxxxxxxxxxxxxx :: http://www.planetfall.pwp.blueyonder.co.uk
2007-06-04 Mark Shinwell <shinwell@xxxxxxxxxxxxxxxx>
sysdeps/powerpc/powerpc32/
* dl-machine.c: Adjust comment about __cache_line_size.
* memset.S: Change __cache_line_size to __memeset_cache_line_size.
sysdeps/unix/sysv/linux/powerpc/
* libc-start.c (__cache_line_size): Make non-weak.
(__memset_cache_line_size): Declare weakly.
(__libc_start_main): Detect 8xx parts and clear
__memeset_cache_line_size if detected.
sysdeps/unix/sysv/linux/
* kernel-features.h (__ASSUME_EMULATED_MFSPR_PVR): Define on
2.2.18 and above.
Index: sysdeps/powerpc/powerpc32/dl-machine.c
===================================================================
--- sysdeps/powerpc/powerpc32/dl-machine.c (revision 172620)
+++ sysdeps/powerpc/powerpc32/dl-machine.c (working copy)
@@ -26,7 +26,7 @@
#include <dl-machine.h>
#include <stdio-common/_itoa.h>
-/* The value __cache_line_size is defined in memset.S and is initialised
+/* The value __cache_line_size is defined in libc-start.c and is initialised
by _dl_sysdep_start via DL_PLATFORM_INIT. */
extern int __cache_line_size;
weak_extern (__cache_line_size)
Index: sysdeps/powerpc/powerpc32/memset.S
===================================================================
--- sysdeps/powerpc/powerpc32/memset.S (revision 172620)
+++ sysdeps/powerpc/powerpc32/memset.S (working copy)
@@ -26,14 +26,14 @@
to obtain the value set by the kernel and store it into this
variable. */
- .globl __cache_line_size
- .lcomm __cache_line_size,4,4
+ .globl __memset_cache_line_size
+ .lcomm __memset_cache_line_size,4,4
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'.
The memset is done in four sizes: byte (8 bits), word (32 bits),
- 32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
+ 32-byte blocks (256 bits) and __memset_cache_line_size (128, 256, 1024 bits).
There is a special case for setting whole cache lines to 0, which
takes advantage of the dcbz instruction. */
@@ -122,7 +122,7 @@ L(caligned):
/* Check if we can use the special case for clearing memory using dcbz.
This requires that we know the correct cache line size for this
- processor. Getting the __cache_line_size may require establishing GOT
+ processor. Getting the __memset_cache_line_size may require establishing GOT
addressability, so branch out of line to set this up. */
beq cr1, L(checklinesize)
@@ -262,28 +262,28 @@ L(checklinesize):
/* If the remaining length is less the 32 bytes then don't bother getting
the cache line size. */
beq L(medium)
-/* Establishes GOT addressability so we can load __cache_line_size
+/* Establishes GOT addressability so we can load __memset_cache_line_size
from static. This value was set from the aux vector during startup. */
# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr rGOT
- addis rGOT,rGOT,__cache_line_size-1b@ha
- lwz rCLS,__cache_line_size-1b@l(rGOT)
+ addis rGOT,rGOT,__memset_cache_line_size-1b@ha
+ lwz rCLS,__memset_cache_line_size-1b@l(rGOT)
# else
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr rGOT
- lwz rGOT,__cache_line_size@got(rGOT)
+ lwz rGOT,__memset_cache_line_size@got(rGOT)
lwz rCLS,0(rGOT)
# endif
mtlr rTMP
#else
-/* Load __cache_line_size from static. This value was set from the
+/* Load __memset_cache_line_size from static. This value was set from the
aux vector during startup. */
- lis rCLS,__cache_line_size@ha
+ lis rCLS,__memset_cache_line_size@ha
/* If the remaining length is less the 32 bytes then don't bother getting
the cache line size. */
beq L(medium)
- lwz rCLS,__cache_line_size@l(rCLS)
+ lwz rCLS,__memset_cache_line_size@l(rCLS)
#endif
/* If the cache line size was not set then goto to L(nondcbz), which is
Index: sysdeps/powerpc/powerpc64/memset.S
===================================================================
--- sysdeps/powerpc/powerpc64/memset.S (revision 172620)
+++ sysdeps/powerpc/powerpc64/memset.S (working copy)
@@ -25,11 +25,11 @@
assumption is that startup code will access the "aux vector" to
to obtain the value set by the kernel and store it into this
variable. */
- .globl __cache_line_size
- .lcomm __cache_line_size,4,4
+ .globl __memset_cache_line_size
+ .lcomm __memset_cache_line_size,4,4
.section ".toc","aw"
.LC0:
- .tc __cache_line_size[TC],__cache_line_size
+ .tc __memset_cache_line_size[TC],__memset_cache_line_size
.section ".text"
.align 2
Index: sysdeps/unix/sysv/linux/powerpc/libc-start.c
===================================================================
--- sysdeps/unix/sysv/linux/powerpc/libc-start.c (revision 172620)
+++ sysdeps/unix/sysv/linux/powerpc/libc-start.c (working copy)
@@ -22,8 +22,10 @@
#include <bp-start.h>
#include <bp-sym.h>
-extern int __cache_line_size;
-weak_extern (__cache_line_size)
+int __cache_line_size;
+extern int __memset_cache_line_size;
+weak_extern (__memset_cache_line_size)
+
/* The main work is done in the generic function. */
#define LIBC_START_MAIN generic_start_main
#define LIBC_START_DISABLE_INLINE
@@ -85,6 +87,8 @@ int
#else
# define argv ubp_av
#endif
+ unsigned int *memset_cls = &__memset_cache_line_size;
+ unsigned int pvr;
/* the PPC SVR4 ABI says that the top thing on the stack will
be a NULL pointer, so if not we assume that we're being called
@@ -113,13 +117,29 @@ int
switch (av->a_type)
{
case AT_DCACHEBSIZE:
- {
- int *cls = &__cache_line_size;
- if (cls != NULL)
- *cls = av->a_un.a_val;
- }
+ __cache_line_size = av->a_un.a_val;
break;
}
+
+ /* Determine if we are running on an 8xx series processor; if we are,
+ signal to the memset routine (by setting __memset_cache_line_size
+ to zero) so it avoids the dcbz instruction which is broken on such
+ processors. */
+ if (memset_cls)
+ {
+ /* Read the PVR register. This is normally accessible only in
+ supervisor mode but kernels from 2.2.18 onwards emulate the
+ instruction. */
+#ifdef __ASSUME_EMULATED_MFSPR_PVR
+ asm volatile ("mfspr %0, 287" : "=r" (pvr) :);
+ if ((pvr & 0xffff0000) == 0x00500000)
+ /* We are running on an 8xx processor. Avoid dcbz. */
+ *memset_cls = 0;
+ else
+#endif
+ *memset_cls = __cache_line_size;
+ }
+
#ifdef SHARED
/* Resolve and initialize function pointers for VDSO functions. */
_libc_vdso_platform_setup ();
Index: sysdeps/unix/sysv/linux/kernel-features.h
===================================================================
--- sysdeps/unix/sysv/linux/kernel-features.h (revision 172620)
+++ sysdeps/unix/sysv/linux/kernel-features.h (working copy)
@@ -381,6 +381,12 @@
# define __ASSUME_SWAPCONTEXT_SYSCALL 1
#endif
+/* Starting with 2.2.18 the kernel emulates PowerPC mfspr reads from the
+ PVR register. */
+#if __LINUX_KERNEL_VERSION >= 0x020218 && defined __powerpc__
+# define __ASSUME_EMULATED_MFSPR_PVR 1
+#endif
+
/* The CLONE_DETACHED flag is not necessary in 2.6.2 kernels, it is
implied. */
#if __LINUX_KERNEL_VERSION >= 132610