[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r13056 - in /fsf/trunk/libc: ChangeLog sysdeps/x86_64/cacheinfo.c sysdeps/x86_64/memset.S sysdeps/x86_64/multiarch/init-arch.c
- To: commits@xxxxxxxxxx
- Subject: [commits] r13056 - in /fsf/trunk/libc: ChangeLog sysdeps/x86_64/cacheinfo.c sysdeps/x86_64/memset.S sysdeps/x86_64/multiarch/init-arch.c
- From: eglibc@xxxxxxxxxx
- Date: Sun, 06 Mar 2011 08:03:04 -0000
Author: eglibc
Date: Sun Mar 6 00:03:02 2011
New Revision: 13056
Log:
Import glibc-mainline for 2011-03-06
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
fsf/trunk/libc/sysdeps/x86_64/memset.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sun Mar 6 00:03:02 2011
@@ -1,3 +1,17 @@
+2011-03-02 Harsha Jagasia <harsha.jagasia@xxxxxxx>
+ Ulrich Drepper <drepper@xxxxxxxxx>
+
+ * sysdeps/x86_64/memset.S: After aligning destination, code
+ branches to different locations depending on the value of
+ misalignment, when multiarch is enabled. Fix this.
+
+2011-03-02 Harsha Jagasia <harsha.jagasia@xxxxxxx>
+
+ * sysdeps/x86_64/cacheinfo.c (init_cacheinfo):
+ Set _x86_64_preferred_memory_instruction for AMD processsors.
+ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+ Set bit_Prefer_SSE_for_memop for AMD processors.
+
2011-03-04 Ulrich Drepper <drepper@xxxxxxxxx>
* libio/fmemopen.c (fmemopen): Optimize a bit.
@@ -12,7 +26,7 @@
2011-02-28 Aurelien Jarno <aurelien@xxxxxxxxxxx>
- * sysdeps/sparc/sparc64/multiarch/memset.S(__bzero): call
+ * sysdeps/sparc/sparc64/multiarch/memset.S(__bzero): Call
__bzero_ultra1 instead of __memset_ultra1.
2011-02-23 Andreas Schwab <schwab@xxxxxxxxxx>
Modified: fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c Sun Mar 6 00:03:02 2011
@@ -1,5 +1,5 @@
/* x86_64 cache info.
- Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2003,2004,2006,2007,2009,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -352,11 +352,11 @@
case _SC_LEVEL2_CACHE_ASSOC:
switch ((ecx >> 12) & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
return (ecx >> 12) & 0xf;
case 6:
return 8;
@@ -376,7 +376,7 @@
return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
default:
return 0;
- }
+ }
/* NOTREACHED */
case _SC_LEVEL2_CACHE_LINESIZE:
@@ -521,10 +521,10 @@
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
if (shared <= 0)
- {
+ {
/* Try L2 otherwise. */
- level = 2;
- shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+ level = 2;
+ shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
unsigned int ebx_1;
@@ -540,7 +540,7 @@
#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
/* Intel prefers SSSE3 instructions for memory/string routines
- if they are avaiable. */
+ if they are available. */
if ((ecx & 0x200))
__x86_64_preferred_memory_instruction = 3;
else
@@ -550,7 +550,7 @@
/* Figure out the number of logical threads that share the
highest cache level. */
if (max_cpuid >= 4)
- {
+ {
int i = 0;
/* Query until desired cache level is enumerated. */
@@ -565,7 +565,7 @@
if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
}
- while (((eax >> 5) & 0x7) != level);
+ while (((eax >> 5) & 0x7) != level);
threads = (eax >> 14) & 0x3ff;
@@ -602,7 +602,7 @@
threads += 1;
}
else
- {
+ {
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
@@ -612,7 +612,7 @@
/* Cap usage of highest cache level to the number of supported
threads. */
if (shared > 0 && threads > 0)
- shared /= threads;
+ shared /= threads;
}
/* This spells out "AuthenticAMD". */
else if (is_amd)
@@ -620,6 +620,25 @@
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+
+#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
+# ifdef USE_MULTIARCH
+ eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+ ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+ ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+ edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
+# else
+ __cpuid (1, eax, ebx, ecx, edx);
+# endif
+
+ /* AMD prefers SSSE3 instructions for memory/string routines
+ if they are avaiable, otherwise it prefers integer
+ instructions. */
+ if ((ecx & 0x200))
+ __x86_64_preferred_memory_instruction = 3;
+ else
+ __x86_64_preferred_memory_instruction = 0;
+#endif
/* Get maximum extended function. */
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
Modified: fsf/trunk/libc/sysdeps/x86_64/memset.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memset.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/memset.S Sun Mar 6 00:03:02 2011
@@ -1,6 +1,6 @@
/* memset/bzero -- set memory area to CH/0
Optimized version for x86-64.
- Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2002-2005, 2007, 2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,7 @@
#define __STOS_LOWER_BOUNDARY $8192
#define __STOS_UPPER_BOUNDARY $65536
- .text
+ .text
#if !defined NOT_IN_libc && !defined USE_MULTIARCH
ENTRY(__bzero)
mov %rsi,%rdx /* Adjust parameter. */
@@ -417,7 +417,7 @@
retq
.balign 16
-#if defined(USE_EXTRA_TABLE)
+#ifdef USE_EXTRA_TABLE
L(P5QI): mov %rdx,-0x95(%rdi)
#endif
L(P5QH): mov %rdx,-0x8d(%rdi)
@@ -596,6 +596,8 @@
jmp L(aligned_now)
L(SSE_pre):
+#else
+L(aligned_now):
#endif
#if !defined USE_MULTIARCH || defined USE_SSE2
# fill RegXMM0 with the pattern
@@ -606,16 +608,16 @@
jge L(byte32sse2_pre)
add %r8,%rdi
-#ifndef PIC
+# ifndef PIC
lea L(SSExDx)(%rip),%r9
jmpq *(%r9,%r8,8)
-#else
+# else
lea L(SSE0Q0)(%rip),%r9
lea L(SSExDx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r9,1),%r9
jmpq *%r9
-#endif
+# endif
L(SSE0QB): movdqa %xmm0,-0xb0(%rdi)
L(SSE0QA): movdqa %xmm0,-0xa0(%rdi)
@@ -881,16 +883,16 @@
lea 0x80(%rdi),%rdi
jge L(byte32sse2)
add %r8,%rdi
-#ifndef PIC
+# ifndef PIC
lea L(SSExDx)(%rip),%r11
jmpq *(%r11,%r8,8)
-#else
+# else
lea L(SSE0Q0)(%rip),%r11
lea L(SSExDx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r11,1),%r11
jmpq *%r11
-#endif
+# endif
.balign 16
L(sse2_nt_move_pre):
@@ -916,20 +918,20 @@
jge L(sse2_nt_move)
sfence
add %r8,%rdi
-#ifndef PIC
+# ifndef PIC
lea L(SSExDx)(%rip),%r11
jmpq *(%r11,%r8,8)
-#else
+# else
lea L(SSE0Q0)(%rip),%r11
lea L(SSExDx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r11,1),%r11
jmpq *%r11
-#endif
+# endif
.pushsection .rodata
.balign 16
-#ifndef PIC
+# ifndef PIC
L(SSExDx):
.quad L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0)
.quad L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0)
@@ -979,7 +981,7 @@
.quad L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB)
.quad L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB)
.quad L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
-#else
+# else
L(SSExDx):
.short L(SSE0Q0) -L(SSE0Q0)
.short L(SSE1Q0) -L(SSE0Q0)
@@ -1196,14 +1198,14 @@
.short L(SSE13QB)-L(SSE0Q0)
.short L(SSE14QB)-L(SSE0Q0)
.short L(SSE15QB)-L(SSE0Q0)
-#endif
+# endif
.popsection
#endif /* !defined USE_MULTIARCH || defined USE_SSE2 */
.balign 16
+#ifndef USE_MULTIARCH
L(aligned_now):
-#ifndef USE_MULTIARCH
cmpl $0x1,__x86_64_preferred_memory_instruction(%rip)
jg L(SSE_pre)
#endif /* USE_MULTIARCH */
@@ -1246,17 +1248,17 @@
L(8byte_move_skip):
andl $127,%r8d
- lea (%rdi,%r8,1),%rdi
+ lea (%rdi,%r8,1),%rdi
#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
#else
- lea L(Got0)(%rip),%r11
+ lea L(Got0)(%rip),%r11
lea L(setPxQx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
#endif
.balign 16
@@ -1290,16 +1292,16 @@
ja L(8byte_nt_move)
andl $7,%r8d
- lea (%rdi,%r8,1),%rdi
+ lea (%rdi,%r8,1),%rdi
#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
#else
- lea L(Got0)(%rip),%r11
+ lea L(Got0)(%rip),%r11
lea L(setPxQx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r11,1),%r11
- jmpq *%r11
+ jmpq *%r11
#endif
.balign 16
@@ -1338,16 +1340,16 @@
L(8byte_nt_move_skip):
andl $127,%r8d
- lea (%rdi,%r8,1),%rdi
+ lea (%rdi,%r8,1),%rdi
#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
#else
- lea L(Got0)(%rip),%r11
+ lea L(Got0)(%rip),%r11
lea L(setPxQx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
#endif
END (memset)
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c Sun Mar 6 00:03:02 2011
@@ -1,6 +1,6 @@
/* Initialize CPU feature data.
This file is part of the GNU C Library.
- Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>.
The GNU C Library is free software; you can redistribute it and/or
@@ -60,7 +60,7 @@
get_common_indeces (&family, &model);
/* Intel processors prefer SSE instruction for memory/string
- routines if they are avaiable. */
+ routines if they are available. */
__cpu_features.feature[index_Prefer_SSE_for_memop]
|= bit_Prefer_SSE_for_memop;
@@ -107,6 +107,14 @@
kind = arch_kind_amd;
get_common_indeces (&family, &model);
+
+ unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+
+ /* AMD processors prefer SSE instructions for memory/string routines
+ if they are available, otherwise they prefer integer instructions. */
+ if ((ecx & 0x200))
+ __cpu_features.feature[index_Prefer_SSE_for_memop]
+ |= bit_Prefer_SSE_for_memop;
}
else
kind = arch_kind_other;