[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r8732 - in /trunk/libc: ./ csu/ elf/ include/ math/ nptl/ nptl/sysdeps/x86_64/ stdio-common/ sysdeps/x86_64/ sysdeps/x86_64/...
- To: commits@xxxxxxxxxx
- Subject: [commits] r8732 - in /trunk/libc: ./ csu/ elf/ include/ math/ nptl/ nptl/sysdeps/x86_64/ stdio-common/ sysdeps/x86_64/ sysdeps/x86_64/...
- From: joseph@xxxxxxxxxx
- Date: Thu, 30 Jul 2009 22:27:49 -0000
Author: joseph
Date: Thu Jul 30 15:27:48 2009
New Revision: 8732
Log:
Merge changes between r8721 and r8731 from /fsf/trunk.
Added:
trunk/libc/sysdeps/x86_64/multiarch/Versions
- copied unchanged from r8731, fsf/trunk/libc/sysdeps/x86_64/multiarch/Versions
trunk/libc/sysdeps/x86_64/multiarch/s_fma.c
- copied unchanged from r8731, fsf/trunk/libc/sysdeps/x86_64/multiarch/s_fma.c
trunk/libc/sysdeps/x86_64/multiarch/s_fmaf.c
- copied unchanged from r8731, fsf/trunk/libc/sysdeps/x86_64/multiarch/s_fmaf.c
Modified:
trunk/libc/ChangeLog
trunk/libc/csu/libc-tls.c
trunk/libc/elf/dl-lookup.c
trunk/libc/elf/dl-reloc.c
trunk/libc/elf/dl-runtime.c
trunk/libc/include/libc-symbols.h
trunk/libc/math/s_fma.c
trunk/libc/math/s_fmaf.c
trunk/libc/nptl/ChangeLog
trunk/libc/nptl/sysdeps/x86_64/tcb-offsets.sym
trunk/libc/nptl/sysdeps/x86_64/tls.h
trunk/libc/stdio-common/scanf15.c
trunk/libc/stdio-common/scanf17.c
trunk/libc/sysdeps/x86_64/dl-trampoline.S
trunk/libc/sysdeps/x86_64/multiarch/init-arch.c
trunk/libc/sysdeps/x86_64/multiarch/init-arch.h
trunk/libc/sysdeps/x86_64/tst-xmmymm.sh
Modified: trunk/libc/ChangeLog
==============================================================================
--- trunk/libc/ChangeLog (original)
+++ trunk/libc/ChangeLog Thu Jul 30 15:27:48 2009
@@ -1,3 +1,49 @@
+2009-07-29 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * math/s_fma.c: Don't define alias if __fma is a macro.
+ * math/s_fmaf.c: Likewise.
+ * sysdeps/x86_64/multiarch/s_fma.c: New file.
+ * sysdeps/x86_64/multiarch/s_fmaf.c: New file.
+ Partially based on a patch by H.J. Lu <hongjiu.lu@xxxxxxxxx>.
+
+ * sysdeps/x86_64/multiarch/init-arch.h (__get_cpu_features): Declare.
+ (HAS_POPCOUNT, HAS_SSE4_2): Add variants which work outside libc.
+ New macro HAS_FMA.
+ * sysdeps/x86_64/multiarch/init-arch.c (__get_cpu_features): New
+ function.
+ * include/libc-symbols.h (libm_ifunc): Define.
+ * sysdeps/x86_64/multiarch/Versions: New file.
+
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Improve CFI.
+
+2009-07-28 H.J. Lu <hongjiu.lu@xxxxxxxxx>
+
+ * sysdeps/x86_64/dl-trampoline.S: Properly restore AVX registers.
+
+2009-07-29 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * elf/dl-runtime.c (_dl_fixup): Indicate before _dl_lookup_symbol_x
+ call that registers used in calling conventions need to be preserved.
+ * elf/dl-lookup.c (do_lookup_x): Use RTLD_*_FOREIGN_CALL macros
+ to preserve register content if necessary.
+ * sysdeps/x86_64/dl-trampoline.S (_dl_x86_64_save_sse): New function.
+ (_dl_x86_64_restore_sse): New function.
+ * sysdeps/x86_64/tst-xmmymm.sh: There is now one more function that
+ is allowed to modify xmm/ymm registers.
+
+ * stdio-common/scanf15.c: Undefine _LIBC. We want to test from an
+ application's perspective.
+ * stdio-common/scanf17.c: Likewise.
+
+2009-07-28 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * csu/libc-tls.c (__libc_setup_tls) [TLS_TCB_AT_TP]: Don't add TCB
+ size to memsz.
+ (init_static_tls) [TLS_TCB_AT_TP]: Add it to GL(dl_tls_static_size)
+ here.
+ * elf/dl-reloc.c (_dl_try_allocate_static_tls): Compute freebytes in
+ two steps to catch bugs.
+
2009-07-27 Ulrich Drepper <drepper@xxxxxxxxxx>
* sysdeps/x86_64/tst-xmmymm.sh: Refine testing. The script now
Modified: trunk/libc/csu/libc-tls.c
==============================================================================
--- trunk/libc/csu/libc-tls.c (original)
+++ trunk/libc/csu/libc-tls.c Thu Jul 30 15:27:48 2009
@@ -1,5 +1,5 @@
/* Initialization code for TLS in statically linked application.
- Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2002-2006, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -99,6 +99,9 @@
surplus that permits dynamic loading of modules with IE-model TLS. */
GL(dl_tls_static_size) = roundup (memsz + GL(dl_tls_static_size),
TLS_TCB_ALIGN);
+#if TLS_TCB_AT_TP
+ GL(dl_tls_static_size) += TLS_TCB_SIZE;
+#endif
GL(dl_tls_static_used) = memsz;
/* The alignment requirement for the static TLS block. */
GL(dl_tls_static_align) = align;
@@ -211,9 +214,7 @@
memsz = roundup (memsz, align ?: 1);
-#if TLS_TCB_AT_TP
- memsz += tcbsize;
-#elif TLS_DTV_AT_TP
+#if TLS_DTV_AT_TP
memsz += tcb_offset;
#endif
Modified: trunk/libc/elf/dl-lookup.c
==============================================================================
--- trunk/libc/elf/dl-lookup.c (original)
+++ trunk/libc/elf/dl-lookup.c Thu Jul 30 15:27:48 2009
@@ -380,6 +380,10 @@
if (size * 3 <= tab->n_elements * 4)
{
/* Expand the table. */
+#ifdef RTLD_CHECK_FOREIGN_CALL
+ /* This must not happen during runtime relocations. */
+ assert (!RTLD_CHECK_FOREIGN_CALL);
+#endif
size_t newsize = _dl_higher_prime_number (size + 1);
struct unique_sym *newentries
= calloc (sizeof (struct unique_sym), newsize);
@@ -405,6 +409,11 @@
}
else
{
+#ifdef RTLD_CHECK_FOREIGN_CALL
+ /* This must not happen during runtime relocations. */
+ assert (!RTLD_CHECK_FOREIGN_CALL);
+#endif
+
#define INITIAL_NUNIQUE_SYM_TABLE 31
size = INITIAL_NUNIQUE_SYM_TABLE;
entries = calloc (sizeof (struct unique_sym), size);
@@ -599,6 +608,10 @@
struct link_map_reldeps *newp;
unsigned int max
= undef_map->l_reldepsmax ? undef_map->l_reldepsmax * 2 : 10;
+
+#ifdef RTLD_PREPARE_FOREIGN_CALL
+ RTLD_PREPARE_FOREIGN_CALL;
+#endif
newp = malloc (sizeof (*newp) + max * sizeof (struct link_map *));
if (newp == NULL)
Modified: trunk/libc/elf/dl-reloc.c
==============================================================================
--- trunk/libc/elf/dl-reloc.c (original)
+++ trunk/libc/elf/dl-reloc.c Thu Jul 30 15:27:48 2009
@@ -61,7 +61,10 @@
size_t n;
size_t blsize;
- freebytes = GL(dl_tls_static_size) - GL(dl_tls_static_used) - TLS_TCB_SIZE;
+ freebytes = GL(dl_tls_static_size) - GL(dl_tls_static_used);
+ if (freebytes < TLS_TCB_SIZE)
+ goto fail;
+ freebytes -= TLS_TCB_SIZE;
blsize = map->l_tls_blocksize + map->l_tls_firstbyte_offset;
if (freebytes < blsize)
Modified: trunk/libc/elf/dl-runtime.c
==============================================================================
--- trunk/libc/elf/dl-runtime.c (original)
+++ trunk/libc/elf/dl-runtime.c Thu Jul 30 15:27:48 2009
@@ -111,12 +111,20 @@
flags |= DL_LOOKUP_GSCOPE_LOCK;
}
+#ifdef RTLD_ENABLE_FOREIGN_CALL
+ RTLD_ENABLE_FOREIGN_CALL;
+#endif
+
result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, l->l_scope,
version, ELF_RTYPE_CLASS_PLT, flags, NULL);
/* We are done with the global scope. */
if (!RTLD_SINGLE_THREAD_P)
THREAD_GSCOPE_RESET_FLAG ();
+
+#ifdef RTLD_FINALIZE_FOREIGN_CALL
+ RTLD_FINALIZE_FOREIGN_CALL;
+#endif
/* Currently result contains the base load address (or link map)
of the object that defines sym. Now add in the symbol
Modified: trunk/libc/include/libc-symbols.h
==============================================================================
--- trunk/libc/include/libc-symbols.h (original)
+++ trunk/libc/include/libc-symbols.h Thu Jul 30 15:27:48 2009
@@ -1,6 +1,6 @@
/* Support macros for making weak and strong aliases for symbols,
and for using symbol sets and linker warnings with GNU ld.
- Copyright (C) 1995-1998, 2000-2006, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1995-1998,2000-2006,2008,2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -866,6 +866,17 @@
} \
__asm__ (".type " #name ", %gnu_indirect_function");
+/* The body of the function is supposed to use __get_cpu_features
+ which will, if necessary, initialize the data first. */
+#define libm_ifunc(name, expr) \
+ extern void *name##_ifunc (void) __asm__ (#name); \
+ void *name##_ifunc (void) \
+ { \
+ __typeof (name) *res = expr; \
+ return res; \
+ } \
+ __asm__ (".type " #name ", %gnu_indirect_function");
+
#ifdef HAVE_ASM_SET_DIRECTIVE
# define libc_ifunc_hidden_def1(local, name) \
__asm__ (declare_symbol_alias_1_stringify (ASM_GLOBAL_DIRECTIVE) \
Modified: trunk/libc/math/s_fma.c
==============================================================================
--- trunk/libc/math/s_fma.c (original)
+++ trunk/libc/math/s_fma.c Thu Jul 30 15:27:48 2009
@@ -1,5 +1,5 @@
/* Compute x * y + z as ternary operation.
- Copyright (C) 1997, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1997, 2001, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>, 1997.
@@ -25,7 +25,9 @@
{
return (x * y) + z;
}
+#ifndef __fma
weak_alias (__fma, fma)
+#endif
#ifdef NO_LONG_DOUBLE
strong_alias (__fma, __fmal)
Modified: trunk/libc/math/s_fmaf.c
==============================================================================
--- trunk/libc/math/s_fmaf.c (original)
+++ trunk/libc/math/s_fmaf.c Thu Jul 30 15:27:48 2009
@@ -1,5 +1,5 @@
/* Compute x * y + z as ternary operation.
- Copyright (C) 1997 Free Software Foundation, Inc.
+ Copyright (C) 1997, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>, 1997.
@@ -25,4 +25,6 @@
{
return (x * y) + z;
}
+#ifndef __fmaf
weak_alias (__fmaf, fmaf)
+#endif
Modified: trunk/libc/nptl/ChangeLog
==============================================================================
--- trunk/libc/nptl/ChangeLog (original)
+++ trunk/libc/nptl/ChangeLog Thu Jul 30 15:27:48 2009
@@ -1,3 +1,15 @@
+2009-07-29 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * sysdeps/x86_64/tls.h (TLS_TCB_ALIGN): Define explicitly to 32.
+
+ * sysdeps/x86_64/tls.h (tcbhead_t): Add room for SSE registers the
+ dynamic linker might have to save.
+ Define RTLD_CHECK_FOREIGN_CALL, RTLD_ENABLE_FOREIGN_CALL,
+ RTLD_PREPARE_FOREIGN_CALL, and RTLD_FINALIZE_FOREIGN_CALL. Pretty
+ printing.
+
+ * sysdeps/x86_64/tcb-offsets.sym: Add RTLD_SAVESPACE_SSE.
+
2009-07-28 Ulrich Drepper <drepper@xxxxxxxxxx>
* pthread_mutex_lock.c [NO_INCR] (__pthread_mutex_cond_lock_adjust):
Modified: trunk/libc/nptl/sysdeps/x86_64/tcb-offsets.sym
==============================================================================
--- trunk/libc/nptl/sysdeps/x86_64/tcb-offsets.sym (original)
+++ trunk/libc/nptl/sysdeps/x86_64/tcb-offsets.sym Thu Jul 30 15:27:48 2009
@@ -15,3 +15,4 @@
#ifndef __ASSUME_PRIVATE_FUTEX
PRIVATE_FUTEX offsetof (tcbhead_t, private_futex)
#endif
+RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse)
Modified: trunk/libc/nptl/sysdeps/x86_64/tls.h
==============================================================================
--- trunk/libc/nptl/sysdeps/x86_64/tls.h (original)
+++ trunk/libc/nptl/sysdeps/x86_64/tls.h Thu Jul 30 15:27:48 2009
@@ -29,6 +29,7 @@
# include <sysdep.h>
# include <kernel-features.h>
# include <bits/wordsize.h>
+# include <xmmintrin.h>
/* Type for the dtv. */
@@ -55,16 +56,23 @@
uintptr_t stack_guard;
uintptr_t pointer_guard;
unsigned long int vgetcpu_cache[2];
-#ifndef __ASSUME_PRIVATE_FUTEX
+# ifndef __ASSUME_PRIVATE_FUTEX
int private_futex;
-#else
+# else
int __unused1;
-#endif
-#if __WORDSIZE == 64
- int __pad1;
-#endif
+# endif
+# if __WORDSIZE == 64
+ int rtld_must_xmm_save;
+# endif
/* Reservation of some values for the TM ABI. */
void *__private_tm[5];
+# if __WORDSIZE == 64
+ long int __unused2;
+ /* Have space for the post-AVX register size. */
+ __m128 rtld_savespace_sse[8][4];
+
+ void *__padding[8];
+# endif
} tcbhead_t;
#else /* __ASSEMBLER__ */
@@ -109,7 +117,12 @@
# define TLS_TCB_SIZE sizeof (struct pthread)
/* Alignment requirements for the TCB. */
-# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+//# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+// Normally the above would be correct But we have to store post-AVX
+// vector registers in the TCB and we want the storage to be aligned.
+// unfortunately there isn't yet a type for these values and hence no
+// 32-byte alignment requirement. Make this explicit, for now.
+# define TLS_TCB_ALIGN 32
/* The TCB can have any size and the memory following the address the
thread pointer points to is unspecified. Allocate the TCB there. */
@@ -298,7 +311,7 @@
/* Atomic compare and exchange on TLS, returning old value. */
-#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
({ __typeof (descr->member) __ret; \
__typeof (oldval) __old = (oldval); \
if (sizeof (descr->member) == 4) \
@@ -313,7 +326,7 @@
/* Atomic logical and. */
-#define THREAD_ATOMIC_AND(descr, member, val) \
+# define THREAD_ATOMIC_AND(descr, member, val) \
(void) ({ if (sizeof ((descr)->member) == 4) \
asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0" \
:: "i" (offsetof (struct pthread, member)), \
@@ -324,7 +337,7 @@
/* Atomic set bit. */
-#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
(void) ({ if (sizeof ((descr)->member) == 4) \
asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0" \
:: "i" (offsetof (struct pthread, member)), \
@@ -334,7 +347,7 @@
abort (); })
-#define CALL_THREAD_FCT(descr) \
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
asm volatile ("movq %%fs:%P2, %%rdi\n\t" \
"callq *%%fs:%P1" \
@@ -355,18 +368,18 @@
/* Set the pointer guard field in the TCB head. */
-#define THREAD_SET_POINTER_GUARD(value) \
+# define THREAD_SET_POINTER_GUARD(value) \
THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
-#define THREAD_COPY_POINTER_GUARD(descr) \
+# define THREAD_COPY_POINTER_GUARD(descr) \
((descr)->header.pointer_guard \
= THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
/* Get and set the global scope generation counter in the TCB head. */
-#define THREAD_GSCOPE_FLAG_UNUSED 0
-#define THREAD_GSCOPE_FLAG_USED 1
-#define THREAD_GSCOPE_FLAG_WAIT 2
-#define THREAD_GSCOPE_RESET_FLAG() \
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED 1
+# define THREAD_GSCOPE_FLAG_WAIT 2
+# define THREAD_GSCOPE_RESET_FLAG() \
do \
{ int __res; \
asm volatile ("xchgl %0, %%fs:%P1" \
@@ -377,11 +390,40 @@
lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \
} \
while (0)
-#define THREAD_GSCOPE_SET_FLAG() \
+# define THREAD_GSCOPE_SET_FLAG() \
THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-#define THREAD_GSCOPE_WAIT() \
+# define THREAD_GSCOPE_WAIT() \
GL(dl_wait_lookup_done) ()
+
+# ifdef SHARED
+/* Defined in dl-trampoline.S. */
+extern void _dl_x86_64_save_sse (void);
+extern void _dl_x86_64_restore_sse (void);
+
+# define RTLD_CHECK_FOREIGN_CALL \
+ (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
+
+# define RTLD_ENABLE_FOREIGN_CALL \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
+
+# define RTLD_PREPARE_FOREIGN_CALL \
+ do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \
+ { \
+ _dl_x86_64_save_sse (); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \
+ } \
+ while (0)
+
+# define RTLD_FINALIZE_FOREIGN_CALL \
+ do { \
+ if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \
+ _dl_x86_64_restore_sse (); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \
+ } while (0)
+# endif
+
+
#endif /* __ASSEMBLER__ */
#endif /* tls.h */
Modified: trunk/libc/stdio-common/scanf15.c
==============================================================================
--- trunk/libc/stdio-common/scanf15.c (original)
+++ trunk/libc/stdio-common/scanf15.c Thu Jul 30 15:27:48 2009
@@ -1,5 +1,6 @@
#undef _GNU_SOURCE
#define _XOPEN_SOURCE 600
+#undef _LIBC
/* The following macro definitions are a hack. They word around disabling
the GNU extension while still using a few internal headers. */
#define u_char unsigned char
Modified: trunk/libc/stdio-common/scanf17.c
==============================================================================
--- trunk/libc/stdio-common/scanf17.c (original)
+++ trunk/libc/stdio-common/scanf17.c Thu Jul 30 15:27:48 2009
@@ -1,5 +1,6 @@
#undef _GNU_SOURCE
#define _XOPEN_SOURCE 600
+#undef _LIBC
/* The following macro definitions are a hack. They word around disabling
the GNU extension while still using a few internal headers. */
#define u_char unsigned char
Modified: trunk/libc/sysdeps/x86_64/dl-trampoline.S
==============================================================================
--- trunk/libc/sysdeps/x86_64/dl-trampoline.S (original)
+++ trunk/libc/sysdeps/x86_64/dl-trampoline.S Thu Jul 30 15:27:48 2009
@@ -61,6 +61,7 @@
cfi_startproc
_dl_runtime_profile:
+ cfi_adjust_cfa_offset(16) # Incorporate PLT
/* The La_x86_64_regs data structure pointed to by the
fourth paramater must be 16-byte aligned. This must
be explicitly enforced. We have the set up a dynamically
@@ -68,7 +69,7 @@
has a fixed size and preserves the original stack pointer. */
subq $32, %rsp # Allocate the local storage.
- cfi_adjust_cfa_offset(48) # Incorporate PLT
+ cfi_adjust_cfa_offset(32)
movq %rbx, (%rsp)
cfi_rel_offset(%rbx, 0)
@@ -203,49 +204,49 @@
vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
L(no_avx2):
@@ -361,13 +362,13 @@
vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
L(no_avx4):
@@ -390,3 +391,85 @@
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
#endif
+
+
+#ifdef SHARED
+ .globl _dl_x86_64_save_sse
+ .type _dl_x86_64_save_sse, @function
+ .align 16
+ cfi_startproc
+_dl_x86_64_save_sse:
+# ifdef HAVE_AVX_SUPPORT
+ cmpl $0, L(have_avx)(%rip)
+ jne 1f
+ movq %rbx, %r11 # Save rbx
+ movl $1, %eax
+ cpuid
+ movq %r11,%rbx # Restore rbx
+ movl $1, %eax
+ testl $(1 << 28), %ecx
+ jne 2f
+ negl %eax
+2: movl %eax, L(have_avx)(%rip)
+ cmpl $0, %eax
+
+1: js L(no_avx5)
+
+# define YMM_SIZE 32
+ vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
+ vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
+ vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
+ vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
+ vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
+ vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
+ vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
+ vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
+ ret
+L(no_avx5):
+# endif
+# define YMM_SIZE 16
+ movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
+ movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
+ movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
+ movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
+ movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
+ movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
+ movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
+ movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
+ ret
+ cfi_endproc
+ .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
+
+
+ .globl _dl_x86_64_restore_sse
+ .type _dl_x86_64_restore_sse, @function
+ .align 16
+ cfi_startproc
+_dl_x86_64_restore_sse:
+# ifdef HAVE_AVX_SUPPORT
+ cmpl $0, L(have_avx)(%rip)
+ js L(no_avx6)
+
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
+ ret
+L(no_avx6):
+# endif
+ movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
+ movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
+ movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
+ movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
+ movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
+ movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
+ movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
+ movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
+ ret
+ cfi_endproc
+ .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
+#endif
Modified: trunk/libc/sysdeps/x86_64/multiarch/init-arch.c
==============================================================================
--- trunk/libc/sysdeps/x86_64/multiarch/init-arch.c (original)
+++ trunk/libc/sysdeps/x86_64/multiarch/init-arch.c Thu Jul 30 15:27:48 2009
@@ -86,3 +86,13 @@
else
__cpu_features.kind = arch_kind_other;
}
+
+
+const struct cpu_features *
+__get_cpu_features (void)
+{
+ if (__cpu_features.kind == arch_kind_unknown)
+ __init_cpu_features ();
+
+ return &__cpu_features;
+}
Modified: trunk/libc/sysdeps/x86_64/multiarch/init-arch.h
==============================================================================
--- trunk/libc/sysdeps/x86_64/multiarch/init-arch.h (original)
+++ trunk/libc/sysdeps/x86_64/multiarch/init-arch.h Thu Jul 30 15:27:48 2009
@@ -54,10 +54,28 @@
__init_cpu_features (); \
while (0)
+/* Used from outside libc.so to get access to the CPU features structure. */
+extern const struct cpu_features *__get_cpu_features (void)
+ __attribute__ ((const));
+
/* Following are the feature tests used throughout libc. */
-#define HAS_POPCOUNT \
+#ifndef NOT_IN_libc
+# define HAS_POPCOUNT \
((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
-#define HAS_SSE4_2 \
+# define HAS_SSE4_2 \
((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 20)) != 0)
+
+# define HAS_FMA \
+ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0)
+#else
+# define HAS_POPCOUNT \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
+
+# define HAS_SSE4_2 \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 20)) != 0)
+
+# define HAS_FMA \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0)
+#endif
Modified: trunk/libc/sysdeps/x86_64/tst-xmmymm.sh
==============================================================================
--- trunk/libc/sysdeps/x86_64/tst-xmmymm.sh (original)
+++ trunk/libc/sysdeps/x86_64/tst-xmmymm.sh Thu Jul 30 15:27:48 2009
@@ -59,10 +59,11 @@
objdump -d "$objpfx"../*/"$f" |
awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
while read fct; do
- if test "$fct" != "_dl_runtime_profile"; then
- echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
- result=1
+ if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+ continue;
fi
+ echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
+ result=1
done
done