[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[commits] r13056 - in /fsf/trunk/libc: ChangeLog sysdeps/x86_64/cacheinfo.c sysdeps/x86_64/memset.S sysdeps/x86_64/multiarch/init-arch.c



Author: eglibc
Date: Sun Mar  6 00:03:02 2011
New Revision: 13056

Log:
Import glibc-mainline for 2011-03-06

Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
    fsf/trunk/libc/sysdeps/x86_64/memset.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sun Mar  6 00:03:02 2011
@@ -1,3 +1,17 @@
+2011-03-02  Harsha Jagasia  <harsha.jagasia@xxxxxxx>
+	    Ulrich Drepper  <drepper@xxxxxxxxx>
+
+	* sysdeps/x86_64/memset.S: After aligning destination, code
+	branches to different locations depending on the value of
+	misalignment, when multiarch is enabled. Fix this.
+
+2011-03-02  Harsha Jagasia  <harsha.jagasia@xxxxxxx>
+
+	* sysdeps/x86_64/cacheinfo.c (init_cacheinfo):
+	Set _x86_64_preferred_memory_instruction for AMD processsors.
+	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+	Set bit_Prefer_SSE_for_memop for AMD processors.
+
 2011-03-04  Ulrich Drepper  <drepper@xxxxxxxxx>
 
 	* libio/fmemopen.c (fmemopen): Optimize a bit.
@@ -12,7 +26,7 @@
 
 2011-02-28  Aurelien Jarno  <aurelien@xxxxxxxxxxx>
 
-	* sysdeps/sparc/sparc64/multiarch/memset.S(__bzero): call
+	* sysdeps/sparc/sparc64/multiarch/memset.S(__bzero): Call
 	__bzero_ultra1 instead of __memset_ultra1.
 
 2011-02-23  Andreas Schwab  <schwab@xxxxxxxxxx>

Modified: fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/cacheinfo.c Sun Mar  6 00:03:02 2011
@@ -1,5 +1,5 @@
 /* x86_64 cache info.
-   Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
+   Copyright (C) 2003,2004,2006,2007,2009,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -352,11 +352,11 @@
 
     case _SC_LEVEL2_CACHE_ASSOC:
       switch ((ecx >> 12) & 0xf)
-        {
-        case 0:
-        case 1:
-        case 2:
-        case 4:
+	{
+	case 0:
+	case 1:
+	case 2:
+	case 4:
 	  return (ecx >> 12) & 0xf;
 	case 6:
 	  return 8;
@@ -376,7 +376,7 @@
 	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
 	default:
 	  return 0;
-        }
+	}
       /* NOTREACHED */
 
     case _SC_LEVEL2_CACHE_LINESIZE:
@@ -521,10 +521,10 @@
       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
 
       if (shared <= 0)
-        {
+	{
 	  /* Try L2 otherwise.  */
-          level  = 2;
-          shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+	  level  = 2;
+	  shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
 	}
 
       unsigned int ebx_1;
@@ -540,7 +540,7 @@
 
 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
       /* Intel prefers SSSE3 instructions for memory/string routines
-	 if they are avaiable.  */
+	 if they are available.  */
       if ((ecx & 0x200))
 	__x86_64_preferred_memory_instruction = 3;
       else
@@ -550,7 +550,7 @@
       /* Figure out the number of logical threads that share the
 	 highest cache level.  */
       if (max_cpuid >= 4)
-        {
+	{
 	  int i = 0;
 
 	  /* Query until desired cache level is enumerated.  */
@@ -565,7 +565,7 @@
 	      if ((eax & 0x1f) == 0)
 		goto intel_bug_no_cache_info;
 	    }
-          while (((eax >> 5) & 0x7) != level);
+	  while (((eax >> 5) & 0x7) != level);
 
 	  threads = (eax >> 14) & 0x3ff;
 
@@ -602,7 +602,7 @@
 	  threads += 1;
 	}
       else
-        {
+	{
 	intel_bug_no_cache_info:
 	  /* Assume that all logical threads share the highest cache level.  */
 
@@ -612,7 +612,7 @@
       /* Cap usage of highest cache level to the number of supported
 	 threads.  */
       if (shared > 0 && threads > 0)
-        shared /= threads;
+	shared /= threads;
     }
   /* This spells out "AuthenticAMD".  */
   else if (is_amd)
@@ -620,6 +620,25 @@
       data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+
+#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
+# ifdef USE_MULTIARCH
+      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+      ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
+# else
+      __cpuid (1, eax, ebx, ecx, edx);
+# endif
+
+      /* AMD prefers SSSE3 instructions for memory/string routines
+	 if they are avaiable, otherwise it prefers integer
+	 instructions.  */
+      if ((ecx & 0x200))
+	__x86_64_preferred_memory_instruction = 3;
+      else
+	__x86_64_preferred_memory_instruction = 0;
+#endif
 
       /* Get maximum extended function. */
       __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);

Modified: fsf/trunk/libc/sysdeps/x86_64/memset.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memset.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/memset.S Sun Mar  6 00:03:02 2011
@@ -1,6 +1,6 @@
 /* memset/bzero -- set memory area to CH/0
    Optimized version for x86-64.
-   Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 2002-2005, 2007, 2008, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,7 @@
 #define __STOS_LOWER_BOUNDARY	$8192
 #define __STOS_UPPER_BOUNDARY	$65536
 
-        .text
+	.text
 #if !defined NOT_IN_libc && !defined USE_MULTIARCH
 ENTRY(__bzero)
 	mov	%rsi,%rdx	/* Adjust parameter.  */
@@ -417,7 +417,7 @@
 		retq
 
 	.balign     16
-#if defined(USE_EXTRA_TABLE)
+#ifdef USE_EXTRA_TABLE
 L(P5QI): mov    %rdx,-0x95(%rdi)
 #endif
 L(P5QH): mov    %rdx,-0x8d(%rdi)
@@ -596,6 +596,8 @@
 	jmp    L(aligned_now)
 
 L(SSE_pre):
+#else
+L(aligned_now):
 #endif
 #if !defined USE_MULTIARCH || defined USE_SSE2
 	 # fill RegXMM0 with the pattern
@@ -606,16 +608,16 @@
 	 jge    L(byte32sse2_pre)
 
 	 add    %r8,%rdi
-#ifndef PIC
+# ifndef PIC
 	 lea    L(SSExDx)(%rip),%r9
 	 jmpq   *(%r9,%r8,8)
-#else
+# else
 	 lea    L(SSE0Q0)(%rip),%r9
 	 lea    L(SSExDx)(%rip),%rcx
 	 movswq (%rcx,%r8,2),%rcx
 	 lea    (%rcx,%r9,1),%r9
 	 jmpq   *%r9
-#endif
+# endif
 
 L(SSE0QB):  movdqa %xmm0,-0xb0(%rdi)
 L(SSE0QA):  movdqa %xmm0,-0xa0(%rdi)
@@ -881,16 +883,16 @@
 	lea    0x80(%rdi),%rdi
 	jge    L(byte32sse2)
 	add    %r8,%rdi
-#ifndef PIC
+# ifndef PIC
 	lea    L(SSExDx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
-#else
+# else
 	lea    L(SSE0Q0)(%rip),%r11
 	lea    L(SSExDx)(%rip),%rcx
 	movswq (%rcx,%r8,2),%rcx
 	lea    (%rcx,%r11,1),%r11
 	jmpq   *%r11
-#endif
+# endif
 
 	.balign     16
 L(sse2_nt_move_pre):
@@ -916,20 +918,20 @@
 	jge    L(sse2_nt_move)
 	sfence
 	add    %r8,%rdi
-#ifndef PIC
+# ifndef PIC
 	lea    L(SSExDx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
-#else
+# else
 	lea    L(SSE0Q0)(%rip),%r11
 	lea    L(SSExDx)(%rip),%rcx
 	movswq (%rcx,%r8,2),%rcx
 	lea   (%rcx,%r11,1),%r11
 	jmpq   *%r11
-#endif
+# endif
 
 	.pushsection .rodata
 	.balign     16
-#ifndef PIC
+# ifndef PIC
 L(SSExDx):
 	.quad       L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0)
 	.quad       L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0)
@@ -979,7 +981,7 @@
 	.quad       L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB)
 	.quad       L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB)
 	.quad       L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
-#else
+# else
 L(SSExDx):
 	.short     L(SSE0Q0) -L(SSE0Q0)
 	.short     L(SSE1Q0) -L(SSE0Q0)
@@ -1196,14 +1198,14 @@
 	.short     L(SSE13QB)-L(SSE0Q0)
 	.short     L(SSE14QB)-L(SSE0Q0)
 	.short     L(SSE15QB)-L(SSE0Q0)
-#endif
+# endif
 	.popsection
 #endif /* !defined USE_MULTIARCH || defined USE_SSE2  */
 
 	.balign     16
+#ifndef USE_MULTIARCH
 L(aligned_now):
 
-#ifndef USE_MULTIARCH
 	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
 	 jg     L(SSE_pre)
 #endif /* USE_MULTIARCH */
@@ -1246,17 +1248,17 @@
 
 L(8byte_move_skip):
 	andl	$127,%r8d
-	lea    	(%rdi,%r8,1),%rdi
+	lea	(%rdi,%r8,1),%rdi
 
 #ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+	lea	L(setPxQx)(%rip),%r11
+	jmpq	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	lea    	L(Got0)(%rip),%r11
+	lea	L(Got0)(%rip),%r11
 	lea	L(setPxQx)(%rip),%rcx
 	movswq	(%rcx,%r8,2),%rcx
-	lea    	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
+	lea	(%rcx,%r11,1),%r11
+	jmpq	*%r11
 #endif
 
 	.balign     16
@@ -1290,16 +1292,16 @@
 	ja	L(8byte_nt_move)
 
 	andl	$7,%r8d
-	lea    	(%rdi,%r8,1),%rdi
+	lea	(%rdi,%r8,1),%rdi
 #ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+	lea	L(setPxQx)(%rip),%r11
+	jmpq	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	lea    	L(Got0)(%rip),%r11
+	lea	L(Got0)(%rip),%r11
 	lea     L(setPxQx)(%rip),%rcx
 	movswq	(%rcx,%r8,2),%rcx
 	lea	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
+	jmpq	*%r11
 #endif
 
 	.balign     16
@@ -1338,16 +1340,16 @@
 L(8byte_nt_move_skip):
 	andl	$127,%r8d
 
-	lea    	(%rdi,%r8,1),%rdi
+	lea	(%rdi,%r8,1),%rdi
 #ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+	lea	L(setPxQx)(%rip),%r11
+	jmpq	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	lea    	L(Got0)(%rip),%r11
+	lea	L(Got0)(%rip),%r11
 	lea     L(setPxQx)(%rip),%rcx
 	movswq	(%rcx,%r8,2),%rcx
-	lea    	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
+	lea	(%rcx,%r11,1),%r11
+	jmpq	*%r11
 #endif
 
 END (memset)

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/init-arch.c Sun Mar  6 00:03:02 2011
@@ -1,6 +1,6 @@
 /* Initialize CPU feature data.
    This file is part of the GNU C Library.
-   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -60,7 +60,7 @@
       get_common_indeces (&family, &model);
 
       /* Intel processors prefer SSE instruction for memory/string
-	 routines if they are avaiable.  */
+	 routines if they are available.  */
       __cpu_features.feature[index_Prefer_SSE_for_memop]
 	|= bit_Prefer_SSE_for_memop;
 
@@ -107,6 +107,14 @@
       kind = arch_kind_amd;
 
       get_common_indeces (&family, &model);
+
+      unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+
+      /* AMD processors prefer SSE instructions for memory/string routines
+	 if they are available, otherwise they prefer integer instructions.  */
+      if ((ecx & 0x200))
+	__cpu_features.feature[index_Prefer_SSE_for_memop]
+	  |= bit_Prefer_SSE_for_memop;
     }
   else
     kind = arch_kind_other;