[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r8786 - in /fsf/trunk/libc: ./ elf/ nptl/ nptl/sysdeps/unix/sysv/linux/x86_64/ sysdeps/i386/ sysdeps/i386/i686/multiarch/ sy...
- To: commits@xxxxxxxxxx
- Subject: [commits] r8786 - in /fsf/trunk/libc: ./ elf/ nptl/ nptl/sysdeps/unix/sysv/linux/x86_64/ sysdeps/i386/ sysdeps/i386/i686/multiarch/ sy...
- From: eglibc@xxxxxxxxxx
- Date: Sun, 09 Aug 2009 07:05:22 -0000
Author: eglibc
Date: Sun Aug 9 00:05:21 2009
New Revision: 8786
Log:
Import glibc-mainline for 2009-08-09
Added:
fsf/trunk/libc/elf/tst-audit6.c
fsf/trunk/libc/elf/tst-audit7.c
fsf/trunk/libc/elf/tst-auditmod6a.c
fsf/trunk/libc/elf/tst-auditmod6b.c
fsf/trunk/libc/elf/tst-auditmod6c.c
fsf/trunk/libc/elf/tst-auditmod7a.c
fsf/trunk/libc/elf/tst-auditmod7b.c
fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.h
fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp-ssse3.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strncmp-ssse3.S
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/NEWS
fsf/trunk/libc/elf/Makefile
fsf/trunk/libc/nptl/ChangeLog
fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
fsf/trunk/libc/sysdeps/i386/configure
fsf/trunk/libc/sysdeps/i386/configure.in
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcspn.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strspn.S
fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile
fsf/trunk/libc/sysdeps/x86_64/multiarch/rawmemchr.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S
fsf/trunk/libc/sysdeps/x86_64/strcmp.S
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Sun Aug 9 00:05:21 2009
@@ -1,4 +1,57 @@
+2009-08-01 H.J. Lu <hongjiu.lu@xxxxxxxxx>
+
+ * elf/Makefile (distribute): Add tst-audit6.c tst-auditmod6a.c
+ tst-auditmod6b.c tst-auditmod6c.c tst-audit7.c tst-auditmod7a.c
+ tst-auditmod7b.c.
+ (tests): Add tst-audit6 tst-audit7.
+ (modules-names): Add st-auditmod6a tst-auditmod6b tst-auditmod6c
+ tst-auditmod7a tst-auditmod7b.
+ ($(objpfx)tst-audit6): New.
+ ($(objpfx)tst-audit6.out): Likewise.
+ ($(objpfx)tst-audit7): Likewise.
+ ($(objpfx)tst-audit7.out): Likewise.
+ (tst-audit6-ENV): Likewise.
+ (tst-audit7-ENV): Likewise.
+ (CFLAGS-tst-auditmod6b.c): Likewise.
+ (CFLAGS-tst-auditmod6c.c): Likewise.
+ (CFLAGS-tst-auditmod7b.c): Likewise.
+ * elf/tst-audit6.c: New file.
+ * elf/tst-audit7.c: New file.
+ * elf/tst-auditmod6a.c: New file.
+ * elf/tst-auditmod6b.c: New file.
+ * elf/tst-auditmod6c.c: New file.
+ * elf/tst-auditmod7a.c: New file.
+ * elf/tst-auditmod7b.c: New file.
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Move
+ saving and restoring SSE/AVX registers to ...
+ * sysdeps/x86_64/dl-trampoline.h: This. New file.
+
+2009-08-07 H.J. Lu <hongjiu.lu@xxxxxxxxx>
+
+ * sysdeps/i386/i686/multiarch/strcspn.S (STRCSPN): Use PIC
+ only if SHARED is defined.
+ * sysdeps/i386/i686/multiarch/strspn.S (strspn): Likewise.
+
+2009-08-03 Jim Meyering <meyering@xxxxxxxxxx>
+
+ * sysdeps/i386/configure.in: Use AC_HEADER_CHECK.
+
+2009-08-08 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * sysdeps/x86_64/multiarch/strlen.S: Move SSE4.2 version into the same
+ section as the other functions for this architecture.
+ * sysdeps/x86_64/multiarch/rawmemchr.S: Likewise.
+
2009-08-07 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * sysdeps/x86_64/strcmp.S: Add support to compile with
+ USE_SSSE3. In this case palignr is used.
+ * sysdeps/x86_64/multiarch/strcmp.S (strcmp): If SSE4.3 is not
+ available but SSSE3 is, pick __str{,n}cmp_ssse3.
+ * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines):
+ Add strcmp-ssse3 and strncmp-ssse3.
+ * sysdeps/x86_64/multiarch/strcmp-ssse3.S: New file.
+ * sysdeps/x86_64/multiarch/strncmp-ssse3.S: New file.
* sysdeps/x86_64/multiarch/strcspn-c.c (STRCSPN_SSE42): Avoid
warning through fake initialization.
Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Sun Aug 9 00:05:21 2009
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes. 2009-7-21
+GNU C Library NEWS -- history of user-visible changes. 2009-8-8
Copyright (C) 1992-2008, 2009 Free Software Foundation, Inc.
See the end for copying conditions.
@@ -17,11 +17,19 @@
Implemented by H.J. Lu.
* New optimized string functions for x86-64: strstr, strcasestr, memcmp,
- strcspn, strpbrk, strspn, strcpy, stpcpy, strncpy, strcmp, strncmp.
+ strcspn, strpbrk, strspn, strcpy, stpcpy, strncpy, strcmp (SSE2, SSE4.2),
+ strncmp (SSE2, SSE4.2).
Contributed by H.J. Lu.
- strlen, rawmemchr.
- Implemented by Ulrich Drepper.
+ strlen, rawmemchr, strcmp (SSSE3), strncmp (SSSE3).
+ Implemented by Ulrich Drepper.
+
+* New optimized string functions for x86: strlen, strcspn, strspn, strpbrk,
+ strstr, strcasestr.
+ Contributed by H.J. Lu.
+
+* Support for fma instruction in AVX on x86-64.
+ Implemented by H.J. Lu and Ulrich Drepper.
* AVX support in x86-64 auditing support in ld.so.
Implemented by H.J. Lu.
@@ -35,6 +43,16 @@
the second request. The 'single-request-reopen' option in /etc/resolv.conf
can be used to select this mode right away, instead of rediscovering the
necessity is every process again.
+ Implemented by Ulrich Drepper.
+
+* New resolver flag RES_USE_DNSSEC to enable use of verified lookup.
+ Implemented by Adam Tkac.
+
+* Optimized iconv conversions for S390x.
+ Implemented by Andreas Krebbel.
+
+* Using condvars with PI mutexes is now more efficient due to kernel
+ support for requeueing to PI futexes. NPTL support added for x86-64.
Implemented by Ulrich Drepper.
Modified: fsf/trunk/libc/elf/Makefile
==============================================================================
--- fsf/trunk/libc/elf/Makefile (original)
+++ fsf/trunk/libc/elf/Makefile Sun Aug 9 00:05:21 2009
@@ -93,6 +93,9 @@
tst-auditmod1.c tst-auditmod3a.c tst-auditmod3b.c \
tst-auditmod4a.c tst-auditmod4b.c \
tst-audit5.c tst-auditmod5a.c tst-auditmod5b.c \
+ tst-audit6.c tst-auditmod6a.c tst-auditmod6b.c \
+ tst-auditmod6c.c \
+ tst-audit7.c tst-auditmod7a.c tst-auditmod7b.c \
order2mod1.c order2mod2.c order2mod3.c order2mod4.c \
tst-stackguard1.c tst-stackguard1-static.c \
tst-array5.c tst-array5-static.c tst-array5dep.c \
@@ -200,7 +203,7 @@
test-srcs = tst-pathopt
tests-execstack-yes = tst-execstack tst-execstack-needed tst-execstack-prog
ifeq (x86_64,$(config-machine))
-tests += tst-audit3 tst-audit4 tst-audit5
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7
endif
endif
ifeq (yesyes,$(have-fpie)$(build-shared))
@@ -255,7 +258,9 @@
ifeq (x86_64,$(config-machine))
modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
- tst-auditmod5a tst-auditmod5b
+ tst-auditmod5a tst-auditmod5b \
+ tst-auditmod6a tst-auditmod6b tst-auditmod6c \
+ tst-auditmod7a tst-auditmod7b
endif
modules-execstack-yes = tst-execstack-mod
extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))
@@ -987,6 +992,15 @@
$(objpfx)tst-audit5.out: $(objpfx)tst-auditmod5b.so
tst-audit5-ENV = LD_AUDIT=$(objpfx)tst-auditmod5b.so
+$(objpfx)tst-audit6: $(objpfx)tst-auditmod6a.so
+$(objpfx)tst-audit6.out: $(objpfx)tst-auditmod6b.so \
+ $(objpfx)tst-auditmod6c.so
+tst-audit6-ENV = LD_AUDIT=$(objpfx)tst-auditmod6b.so:$(objpfx)tst-auditmod6c.so
+
+$(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
+$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
+tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
+
$(objpfx)tst-global1: $(libdl)
$(objpfx)tst-global1.out: $(objpfx)testobj6.so $(objpfx)testobj2.so
@@ -1134,4 +1148,7 @@
CFLAGS-tst-audit4.c += -mavx
CFLAGS-tst-auditmod4a.c += -mavx
CFLAGS-tst-auditmod4b.c += -mavx
-endif
+CFLAGS-tst-auditmod6b.c += -mavx
+CFLAGS-tst-auditmod6c.c += -mavx
+CFLAGS-tst-auditmod7b.c += -mavx
+endif
Added: fsf/trunk/libc/elf/tst-audit6.c
==============================================================================
--- fsf/trunk/libc/elf/tst-audit6.c (added)
+++ fsf/trunk/libc/elf/tst-audit6.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,28 @@
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <cpuid.h>
+#include <emmintrin.h>
+
+extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
+ __m128i, __m128i, __m128i, __m128i);
+
+int
+main (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ /* Run AVX test only if AVX is supported. */
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ {
+ __m128i xmm = _mm_setzero_si128 ();
+ __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
+
+ xmm = _mm_set1_epi32 (0x98abcdef);
+ if (memcmp (&xmm, &ret, sizeof (ret)))
+ abort ();
+ }
+ return 0;
+}
Added: fsf/trunk/libc/elf/tst-audit7.c
==============================================================================
--- fsf/trunk/libc/elf/tst-audit7.c (added)
+++ fsf/trunk/libc/elf/tst-audit7.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,1 @@
+#include "tst-audit6.c"
Added: fsf/trunk/libc/elf/tst-auditmod6a.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod6a.c (added)
+++ fsf/trunk/libc/elf/tst-auditmod6a.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,46 @@
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <emmintrin.h>
+
+__m128i
+audit_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3,
+ __m128i x4, __m128i x5, __m128i x6, __m128i x7)
+{
+ __m128i xmm;
+
+ xmm = _mm_set1_epi32 (0x100);
+ if (memcmp (&xmm, &x0, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x101);
+ if (memcmp (&xmm, &x1, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x102);
+ if (memcmp (&xmm, &x2, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x103);
+ if (memcmp (&xmm, &x3, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x104);
+ if (memcmp (&xmm, &x4, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x105);
+ if (memcmp (&xmm, &x5, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x106);
+ if (memcmp (&xmm, &x6, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x107);
+ if (memcmp (&xmm, &x7, sizeof (xmm)))
+ abort ();
+
+ return _mm_setzero_si128 ();
+}
Added: fsf/trunk/libc/elf/tst-auditmod6b.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod6b.c (added)
+++ fsf/trunk/libc/elf/tst-auditmod6b.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,220 @@
+/* Verify that changing AVX registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+ if (avx == -1)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ avx = 1;
+ else
+ avx = 0;
+ }
+ return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ for (i = 0; i < 8; i++)
+ if (memcmp (®s->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (®s->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ for (i = 0; i < 8; i += 2)
+ {
+ regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 1);
+ regs->lr_vector[i].xmm[0] = regs->lr_xmm[i];
+ regs->lr_vector[i + 1].ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (i + 2);
+ regs->lr_xmm[i + 1] = regs->lr_vector[i + 1].xmm[0];
+ }
+
+ __m256i ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+ asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+ asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+ asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+ asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+ asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ if (memcmp (&outregs->lrv_xmm0, &xmm, sizeof (xmm))
+ || memcmp (&outregs->lrv_vector0, &xmm, sizeof (xmm)))
+ abort ();
+
+ __m256i ymm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 0x100);
+ if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (i + 0x101);
+ if (memcmp (&inregs->lr_xmm[i + 1],
+ &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ }
+
+ outregs->lrv_vector0.ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (0x12349876);
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
Added: fsf/trunk/libc/elf/tst-auditmod6c.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod6c.c (added)
+++ fsf/trunk/libc/elf/tst-auditmod6c.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,225 @@
+/* Verify that changing AVX registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+ if (avx == -1)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ avx = 1;
+ else
+ avx = 0;
+ }
+ return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+ __m128i xmm;
+ __m256i ymm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 1);
+ if (memcmp (®s->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (®s->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+ regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 0x100);
+ regs->lr_vector[i].xmm[0] = regs->lr_xmm[i];
+
+ ymm = _mm256_set1_epi32 (i + 2);
+ if (memcmp (®s->lr_xmm[i + 1],
+ ®s->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (®s->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ regs->lr_vector[i + 1].ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (i + 0x101);
+ regs->lr_xmm[i + 1] = regs->lr_vector[i + 1].xmm[0];
+ }
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+ asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+ asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+ asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+ asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+ asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m256i ymm = _mm256_set1_epi32 (0x12349876);;
+ if (memcmp (&outregs->lrv_vector0, &ymm, sizeof (ymm)))
+ abort ();
+
+ __m128i xmm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 0x100);
+ if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (i + 0x101);
+ if (memcmp (&inregs->lr_xmm[i + 1],
+ &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ }
+
+ outregs->lrv_vector0.ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (0x98abcdef);
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
Added: fsf/trunk/libc/elf/tst-auditmod7a.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod7a.c (added)
+++ fsf/trunk/libc/elf/tst-auditmod7a.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,1 @@
+#include "tst-auditmod6a.c"
Added: fsf/trunk/libc/elf/tst-auditmod7b.c
==============================================================================
--- fsf/trunk/libc/elf/tst-auditmod7b.c (added)
+++ fsf/trunk/libc/elf/tst-auditmod7b.c Sun Aug 9 00:05:21 2009
@@ -1,0 +1,218 @@
+/* Verify that changing AVX registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+ if (avx == -1)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ avx = 1;
+ else
+ avx = 0;
+ }
+ return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ for (i = 0; i < 8; i++)
+ if (memcmp (®s->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (®s->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ for (i = 0; i < 8; i += 2)
+ {
+ regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 0x100);
+ regs->lr_vector[i + 1].ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (i + 0x101);
+ }
+
+ __m256i ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+ asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+ asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+ asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+ asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+ asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ if (memcmp (&outregs->lrv_xmm0, &xmm, sizeof (xmm))
+ || memcmp (&outregs->lrv_vector0, &xmm, sizeof (xmm)))
+ abort ();
+
+ __m256i ymm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 0x100);
+ if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (i + 0x101);
+ if (memcmp (&inregs->lr_xmm[i + 1],
+ &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ }
+
+ outregs->lrv_vector0.ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (0x98abcdef);
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
Modified: fsf/trunk/libc/nptl/ChangeLog
==============================================================================
--- fsf/trunk/libc/nptl/ChangeLog (original)
+++ fsf/trunk/libc/nptl/ChangeLog Sun Aug 9 00:05:21 2009
@@ -1,3 +1,12 @@
+2009-08-08 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait):
+ Optimize code path used when FUTEX_CLOCK_REALTIME is supported.
+
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+ (__pthread_cond_wait): Optimize by avoiding use of callee-safe
+ register.
+
2009-08-07 Ulrich Drepper <drepper@xxxxxxxxxx>
* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations
Modified: fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S (original)
+++ fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S Sun Aug 9 00:05:21 2009
@@ -45,9 +45,6 @@
cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
#endif
- pushq %r13
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r13, 0)
#define FRAME_SIZE 32
leaq -FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(FRAME_SIZE)
@@ -140,7 +137,7 @@
movl $SYS_futex, %eax
syscall
- movl $1, %r13d
+ movl $1, %r8d
#ifdef __ASSUME_REQUEUE_PI
jmp 62f
#else
@@ -158,7 +155,7 @@
#else
orl %fs:PRIVATE_FUTEX, %esi
#endif
-60: xorl %r13d, %r13d
+60: xorl %r8d, %r8d
movl $SYS_futex, %eax
syscall
@@ -233,20 +230,18 @@
/* If requeue_pi is used the kernel performs the locking of the
mutex. */
11: movq 16(%rsp), %rdi
- testl %r13d, %r13d
+ testl %r8d, %r8d
jnz 18f
callq __pthread_mutex_cond_lock
-14: movq FRAME_SIZE(%rsp), %r13
- leaq FRAME_SIZE+8(%rsp), %rsp
- cfi_adjust_cfa_offset(-(FRAME_SIZE + 8))
+14: leaq FRAME_SIZE(%rsp), %rsp
+ cfi_adjust_cfa_offset(-FRAME_SIZE)
/* We return the result of the mutex_lock operation. */
retq
- cfi_adjust_cfa_offset(8 + FRAME_SIZE)
- cfi_rel_offset(%r13, FRAME_SIZE)
+ cfi_adjust_cfa_offset(FRAME_SIZE)
18: callq __pthread_mutex_cond_lock_adjust
xorl %eax, %eax
@@ -341,9 +336,7 @@
__condvar_cleanup1:
/* Stack frame:
- rsp + 40
- +--------------------------+
- rsp + 32 | %r13 |
+ rsp + 32
+--------------------------+
rsp + 24 | unused |
+--------------------------+
@@ -465,7 +458,6 @@
callq __pthread_mutex_cond_lock
movq 24(%rsp), %rdi
- movq 32(%rsp), %r13
.LcallUR:
call _Unwind_Resume@PLT
hlt
Modified: fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
==============================================================================
--- fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (original)
+++ fsf/trunk/libc/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S Sun Aug 9 00:05:21 2009
@@ -65,33 +65,8 @@
retq
/* Check whether the timeout value is valid. */
-1: pushq %r12
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r12, 0)
- pushq %r13
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r13, 0)
- pushq %r14
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r14, 0)
-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
-# define STACKFRAME 8
-#else
-# define STACKFRAME 24
-#endif
- subq $STACKFRAME, %rsp
- cfi_adjust_cfa_offset(STACKFRAME)
-
- movq %rdi, %r12
- movq %rsi, %r13
-
- /* Check for invalid nanosecond field. */
- cmpq $1000000000, 8(%r13)
- movl $EINVAL, %r14d
+1: cmpq $1000000000, 8(%rsi)
jae 6f
-
- LOCK
- addq $1, NWAITERS(%r12)
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
# ifdef PIC
@@ -102,15 +77,22 @@
je .Lreltmo
#endif
+ /* This push is only needed to store the sem_t pointer for the
+ exception handler. */
+ pushq %rdi
+ cfi_adjust_cfa_offset(8)
+
+ movq %rsi, %r10
+
+ LOCK
+ addq $1, NWAITERS(%rdi)
+
.LcleanupSTART:
13: call __pthread_enable_asynccancel
- movl %eax, (%rsp)
-
- movq %r13, %r10
-#if VALUE == 0
- movq %r12, %rdi
-#else
- leaq VALUE(%r12), %rdi
+ movl %eax, %r8d
+
+#if VALUE != 0
+ leaq VALUE(%rdi), %rdi
#endif
movl $0xffffffff, %r9d
movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi
@@ -118,22 +100,26 @@
movl $SYS_futex, %eax
xorl %edx, %edx
syscall
- movq %rax, %r14
-
- movl (%rsp), %edi
+ movq %rax, %r9
+#if VALUE != 0
+ leaq -VALUE(%rdi), %rdi
+#endif
+
+ xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
-
- testq %r14, %r14
+ movq %r8, %rdi
+
+ testq %r9, %r9
je 11f
- cmpq $-EWOULDBLOCK, %r14
+ cmpq $-EWOULDBLOCK, %r9
jne 3f
11:
#if VALUE == 0
- movl (%r12), %eax
-#else
- movl VALUE(%r12), %eax
+ movl (%rdi), %eax
+#else
+ movl VALUE(%rdi), %eax
#endif
14: testl %eax, %eax
je 13b
@@ -141,49 +127,74 @@
leaq -1(%rax), %rcx
LOCK
#if VALUE == 0
- cmpxchgl %ecx, (%r12)
-#else
- cmpxchgl %ecx, VALUE(%r12)
+ cmpxchgl %ecx, (%rdi)
+#else
+ cmpxchgl %ecx, VALUE(%rdi)
#endif
jne 14b
-10: xorl %eax, %eax
+ xorl %eax, %eax
15: LOCK
- subq $1, NWAITERS(%r12)
-
- addq $STACKFRAME, %rsp
- cfi_adjust_cfa_offset(-STACKFRAME)
- popq %r14
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r14)
- popq %r13
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r13)
- popq %r12
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r12)
+ subq $1, NWAITERS(%rdi)
+
+ leaq 8(%rsp), %rsp
+ cfi_adjust_cfa_offset(-8)
retq
- cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
- cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
- cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
- cfi_rel_offset(%r14, STACKFRAME)
-3: negq %r14
+ cfi_adjust_cfa_offset(8)
+3: negq %r9
+#if USE___THREAD
+ movq errno@gottpoff(%rip), %rdx
+ movl %r9d, %fs:(%rdx)
+#else
+ callq __errno_location@plt
+ movl %r9d, (%rax)
+#endif
+
+ orl $-1, %eax
+ jmp 15b
+
+ cfi_adjust_cfa_offset(-8)
6:
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
- movl %r14d, %fs:(%rdx)
+ movl $EINVAL, %fs:(%rdx)
#else
callq __errno_location@plt
- movl %r14d, (%rax)
+ movl $EINVAL, (%rax)
#endif
orl $-1, %eax
- jmp 15b
+
+ retq
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.Lreltmo:
+ pushq %r12
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r12, 0)
+ pushq %r13
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r13, 0)
+ pushq %r14
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r14, 0)
+
+#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
+# define STACKFRAME 8
+#else
+# define STACKFRAME 24
+#endif
+ subq $STACKFRAME, %rsp
+ cfi_adjust_cfa_offset(STACKFRAME)
+
+ movq %rdi, %r12
+ movq %rsi, %r13
+
+ LOCK
+ addq $1, NWAITERS(%r12)
+
7: xorl %esi, %esi
movq %rsp, %rdi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
@@ -202,7 +213,7 @@
decq %rdi
5: testq %rdi, %rdi
movl $ETIMEDOUT, %r14d
- js 6b /* Time is already up. */
+ js 36f /* Time is already up. */
movq %rdi, (%rsp) /* Store relative timeout. */
movq %rsi, 8(%rsp)
@@ -235,7 +246,7 @@
testq %r14, %r14
je 9f
cmpq $-EWOULDBLOCK, %r14
- jne 3b
+ jne 33f
9:
# if VALUE == 0
@@ -254,15 +265,54 @@
cmpxchgl %ecx, VALUE(%r12)
# endif
jne 8b
- jmp 10b
-#endif
+
+ xorl %eax, %eax
+
+45: LOCK
+ subq $1, NWAITERS(%r12)
+
+ addq $STACKFRAME, %rsp
+ cfi_adjust_cfa_offset(-STACKFRAME)
+ popq %r14
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r14)
+ popq %r13
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r13)
+ popq %r12
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r12)
+ retq
+
+ cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+ cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+ cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+ cfi_rel_offset(%r14, STACKFRAME)
+33: negq %r14
+36:
+#if USE___THREAD
+ movq errno@gottpoff(%rip), %rdx
+ movl %r14d, %fs:(%rdx)
+#else
+ callq __errno_location@plt
+ movl %r14d, (%rax)
+#endif
+
+ orl $-1, %eax
+ jmp 45b
+#endif
+ cfi_endproc
.size sem_timedwait,.-sem_timedwait
.type sem_timedwait_cleanup,@function
sem_timedwait_cleanup:
- LOCK
- subq $1, NWAITERS(%r12)
+ cfi_startproc
+ cfi_adjust_cfa_offset(8)
+
+ movq (%rsp), %rdi
+ LOCK
+ subq $1, NWAITERS(%rdi)
movq %rax, %rdi
.LcallUR:
call _Unwind_Resume@PLT
@@ -270,6 +320,30 @@
.LENDCODE:
cfi_endproc
.size sem_timedwait_cleanup,.-sem_timedwait_cleanup
+
+
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .type sem_timedwait_cleanup2,@function
+sem_timedwait_cleanup2:
+ cfi_startproc
+ cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+ cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+ cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+ cfi_rel_offset(%r14, STACKFRAME)
+
+ LOCK
+ subq $1, NWAITERS(%r12)
+ movq %rax, %rdi
+ movq STACKFRAME(%rsp), %r14
+ movq STACKFRAME+8(%rsp), %r13
+ movq STACKFRAME+16(%rsp), %r12
+.LcallUR2:
+ call _Unwind_Resume@PLT
+ hlt
+.LENDCODE2:
+ cfi_endproc
+ .size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2
+#endif
.section .gcc_except_table,"a",@progbits
@@ -286,13 +360,19 @@
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.uleb128 .LcleanupSTART2-.LSTARTCODE
.uleb128 .LcleanupEND2-.LcleanupSTART2
- .uleb128 sem_timedwait_cleanup-.LSTARTCODE
+ .uleb128 sem_timedwait_cleanup2-.LSTARTCODE
.uleb128 0
#endif
.uleb128 .LcallUR-.LSTARTCODE
.uleb128 .LENDCODE-.LcallUR
.uleb128 0
.uleb128 0
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .uleb128 .LcallUR2-.LSTARTCODE
+ .uleb128 .LENDCODE2-.LcallUR2
+ .uleb128 0
+ .uleb128 0
+#endif
.Lcstend:
Modified: fsf/trunk/libc/sysdeps/i386/configure
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/configure (original)
+++ fsf/trunk/libc/sysdeps/i386/configure Sun Aug 9 00:05:21 2009
@@ -1,14 +1,229 @@
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/i386.
-{ echo "$as_me:$LINENO: checking if gcc provides <cpuid.h>" >&5
-echo $ECHO_N "checking if gcc provides <cpuid.h>... $ECHO_C" >&6; }
-if test "${libc_cv_gcc_cpuid+set}" = set; then
+
+{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
+echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Extract the first word of "grep ggrep" to use in msg output
+if test -z "$GREP"; then
+set dummy grep ggrep; ac_prog_name=$2
+if test "${ac_cv_path_GREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_path_GREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+ # Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ ac_count=`expr $ac_count + 1`
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+ $ac_path_GREP_found && break 3
+ done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+GREP="$ac_cv_path_GREP"
+if test -z "$GREP"; then
+ { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
+echo "${ECHO_T}$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ # Extract the first word of "egrep" to use in msg output
+if test -z "$EGREP"; then
+set dummy egrep; ac_prog_name=$2
+if test "${ac_cv_path_EGREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_path_EGREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+ # Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ ac_count=`expr $ac_count + 1`
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+ $ac_path_EGREP_found && break 3
+ done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+EGREP="$ac_cv_path_EGREP"
+if test -z "$EGREP"; then
+ { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+
+ fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
+echo "${ECHO_T}$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
cat >conftest.$ac_ext <<_ACEOF
-#include <cpuid.h>
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
@@ -27,23 +242,342 @@
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
- libc_cv_gcc_cpuid=yes
+ ac_cv_header_stdc=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- libc_cv_gcc_cpuid=no
+ ac_cv_header_stdc=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ echo "$as_me:$LINENO: result: $libc_cv_gcc_cpuid" >&5
-echo "${ECHO_T}$libc_cv_gcc_cpuid" >&6; }
-if test $libc_cv_gcc_cpuid != yes; then
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ eval "$as_ac_Header=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+if test "${ac_cv_header_cpuid_h+set}" = set; then
+ { echo "$as_me:$LINENO: checking for cpuid.h" >&5
+echo $ECHO_N "checking for cpuid.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_cpuid_h+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_cpuid_h" >&5
+echo "${ECHO_T}$ac_cv_header_cpuid_h" >&6; }
+else
+ # Is the header compilable?
+{ echo "$as_me:$LINENO: checking cpuid.h usability" >&5
+echo $ECHO_N "checking cpuid.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <cpuid.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_header_compiler=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking cpuid.h presence" >&5
+echo $ECHO_N "checking cpuid.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <cpuid.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: cpuid.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: cpuid.h: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: cpuid.h: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: cpuid.h: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: cpuid.h: see the Autoconf documentation" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: cpuid.h: section \"Present But Cannot Be Compiled\"" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: cpuid.h: proceeding with the preprocessor's result" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: cpuid.h: in the future, the compiler will take precedence" >&2;}
+
+ ;;
+esac
+{ echo "$as_me:$LINENO: checking for cpuid.h" >&5
+echo $ECHO_N "checking for cpuid.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_cpuid_h+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_header_cpuid_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_cpuid_h" >&5
+echo "${ECHO_T}$ac_cv_header_cpuid_h" >&6; }
+
+fi
+if test $ac_cv_header_cpuid_h = yes; then
+ :
+else
{ { echo "$as_me:$LINENO: error: gcc must provide the <cpuid.h> header" >&5
echo "$as_me: error: gcc must provide the <cpuid.h> header" >&2;}
{ (exit 1); exit 1; }; }
fi
+
+
{ echo "$as_me:$LINENO: checking if -g produces usable source locations for assembler-with-cpp" >&5
echo $ECHO_N "checking if -g produces usable source locations for assembler-with-cpp... $ECHO_C" >&6; }
Modified: fsf/trunk/libc/sysdeps/i386/configure.in
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/configure.in (original)
+++ fsf/trunk/libc/sysdeps/i386/configure.in Sun Aug 9 00:05:21 2009
@@ -1,12 +1,8 @@
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/i386.
-AC_CACHE_CHECK([if gcc provides <cpuid.h>], libc_cv_gcc_cpuid, [dnl
-AC_COMPILE_IFELSE([#include <cpuid.h>], libc_cv_gcc_cpuid=yes,
- libc_cv_gcc_cpuid=no)])
-if test $libc_cv_gcc_cpuid != yes; then
- AC_MSG_ERROR([gcc must provide the <cpuid.h> header])
-fi
+AC_HEADER_CHECK([cpuid.h], ,
+ [AC_MSG_ERROR([gcc must provide the <cpuid.h> header])])
AC_CACHE_CHECK(if -g produces usable source locations for assembler-with-cpp,
libc_cv_cpp_asm_debuginfo, [dnl
Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcspn.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcspn.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcspn.S Sun Aug 9 00:05:21 2009
@@ -42,6 +42,7 @@
define multiple versions for strpbrk in static library since we
need strpbrk before the initialization happened. */
#if (defined SHARED || !defined USE_AS_STRPBRK) && !defined NOT_IN_libc
+# ifdef SHARED
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
.hidden __i686.get_pc_thunk.bx
@@ -71,6 +72,20 @@
cfi_restore (ebx)
ret
END(STRCSPN)
+# else
+ .text
+ENTRY(STRCSPN)
+ .type STRCSPN, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal STRCSPN_IA32, %eax
+ testl $(1<<20), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET+__cpu_features
+ jz 2f
+ leal STRCSPN_SSE42, %eax
+2: ret
+END(STRCSPN)
+# endif
# undef ENTRY
# define ENTRY(name) \
Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strspn.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strspn.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strspn.S Sun Aug 9 00:05:21 2009
@@ -27,6 +27,7 @@
/* Define multiple versions only for the definition in libc. */
#ifndef NOT_IN_libc
+# ifdef SHARED
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
.hidden __i686.get_pc_thunk.bx
@@ -56,6 +57,20 @@
cfi_restore (ebx)
ret
END(strspn)
+# else
+ .text
+ENTRY(strspn)
+ .type strspn, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __strspn_ia32, %eax
+ testl $(1<<20), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET+__cpu_features
+ jz 2f
+ leal __strspn_sse42, %eax
+2: ret
+END(strspn)
+# endif
# undef ENTRY
# define ENTRY(name) \
Modified: fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S Sun Aug 9 00:05:21 2009
@@ -146,247 +146,17 @@
2: movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
-1: js L(no_avx1)
-
- /* This is to support AVX audit modules. */
- vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
- vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
- vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
- vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
- vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
- vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
- vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
- vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
-
- /* Save xmm0-xmm7 registers to detect if any of them are
- changed by audit module. */
- vmovdqa %xmm0, (LR_SIZE)(%rsp)
- vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
- vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
- vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
- vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
- vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
- vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
- vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
-
-L(no_avx1):
-# endif
-
- movq %rsp, %rcx # La_x86_64_regs pointer to %rcx.
- movq 48(%rbx), %rdx # Load return address if needed.
- movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
- leaq 16(%rbx), %r8
- call _dl_profile_fixup # Call resolver.
-
- movq %rax, %r11 # Save return value.
-
- movq 8(%rbx), %rax # Get back register content.
- movq LR_RDX_OFFSET(%rsp), %rdx
- movq LR_R8_OFFSET(%rsp), %r8
- movq LR_R9_OFFSET(%rsp), %r9
-
- movaps (LR_XMM_OFFSET)(%rsp), %xmm0
- movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
- movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
- movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
- movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
- movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
- movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
- movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx2)
-
- /* Check if any xmm0-xmm7 registers are changed by audit
- module. */
- vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
-
-L(no_avx2):
-1:
-# endif
- movq 16(%rbx), %r10 # Anything in framesize?
- testq %r10, %r10
- jns 3f
-
- /* There's nothing in the frame size, so there
- will be no call to the _dl_call_pltexit. */
-
- /* Get back registers content. */
- movq LR_RCX_OFFSET(%rsp), %rcx
- movq LR_RSI_OFFSET(%rsp), %rsi
- movq LR_RDI_OFFSET(%rsp), %rdi
-
- movq %rbx, %rsp
- movq (%rsp), %rbx
- cfi_restore(rbx)
- cfi_def_cfa_register(%rsp)
-
- addq $48, %rsp # Adjust the stack to the return value
- # (eats the reloc index and link_map)
- cfi_adjust_cfa_offset(-48)
- jmp *%r11 # Jump to function address.
-
-3:
- cfi_adjust_cfa_offset(48)
- cfi_rel_offset(%rbx, 0)
- cfi_def_cfa_register(%rbx)
-
- /* At this point we need to prepare new stack for the function
- which has to be called. We copy the original stack to a
- temporary buffer of the size specified by the 'framesize'
- returned from _dl_profile_fixup */
-
- leaq LR_RSP_OFFSET(%rbx), %rsi # stack
- addq $8, %r10
- andq $0xfffffffffffffff0, %r10
- movq %r10, %rcx
- subq %r10, %rsp
- movq %rsp, %rdi
- shrq $3, %rcx
- rep
- movsq
-
- movq 24(%rdi), %rcx # Get back register content.
- movq 32(%rdi), %rsi
- movq 40(%rdi), %rdi
-
- call *%r11
-
- mov 24(%rbx), %rsp # Drop the copied stack content
-
- /* Now we have to prepare the La_x86_64_retval structure for the
- _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
- so we just need to allocate the sizeof(La_x86_64_retval) space on
- the stack, since the alignment has already been taken care of. */
-# ifdef HAVE_AVX_SUPPORT
- /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
- registers to detect if xmm0/xmm1 registers are changed
- by audit module. */
- subq $(LRV_SIZE + XMM_SIZE*2), %rsp
-# else
- subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
-# endif
- movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
-
- /* Fill in the La_x86_64_retval structure. */
- movq %rax, LRV_RAX_OFFSET(%rcx)
- movq %rdx, LRV_RDX_OFFSET(%rcx)
-
- movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
- movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
-
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx3)
-
- /* This is to support AVX audit modules. */
- vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
- vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
-
- /* Save xmm0/xmm1 registers to detect if they are changed
- by audit module. */
- vmovdqa %xmm0, (LRV_SIZE)(%rcx)
- vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
-
-L(no_avx3):
-# endif
-
- fstpt LRV_ST0_OFFSET(%rcx)
- fstpt LRV_ST1_OFFSET(%rcx)
-
- movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
- movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
- call _dl_call_pltexit
-
- /* Restore return registers. */
- movq LRV_RAX_OFFSET(%rsp), %rax
- movq LRV_RDX_OFFSET(%rsp), %rdx
-
- movaps LRV_XMM0_OFFSET(%rsp), %xmm0
- movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx4)
-
- /* Check if xmm0/xmm1 registers are changed by audit module. */
- vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
- vpmovmskb %xmm2, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
-
-1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
- vpmovmskb %xmm2, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
-
-L(no_avx4):
-1:
-# endif
-
- fldt LRV_ST1_OFFSET(%rsp)
- fldt LRV_ST0_OFFSET(%rsp)
-
- movq %rbx, %rsp
- movq (%rsp), %rbx
- cfi_restore(rbx)
- cfi_def_cfa_register(%rsp)
-
- addq $48, %rsp # Adjust the stack to the return value
- # (eats the reloc index and link_map)
- cfi_adjust_cfa_offset(-48)
- retq
+1: js L(no_avx)
+
+# define RESTORE_AVX
+# include "dl-trampoline.h"
+
+ .align 16
+L(no_avx):
+# endif
+
+# undef RESTORE_AVX
+# include "dl-trampoline.h"
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
Added: fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.h
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.h (added)
+++ fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.h Sun Aug 9 00:05:21 2009
@@ -1,0 +1,269 @@
+/* Partial PLT profile trampoline to save and restore x86-64 vector
+ registers.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef RESTORE_AVX
+ /* This is to support AVX audit modules. */
+ vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
+ vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+
+ /* Save xmm0-xmm7 registers to detect if any of them are
+ changed by audit module. */
+ vmovdqa %xmm0, (LR_SIZE)(%rsp)
+ vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
+ vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
+ vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
+ vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
+ vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
+ vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
+ vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
+#endif
+
+ movq %rsp, %rcx # La_x86_64_regs pointer to %rcx.
+ movq 48(%rbx), %rdx # Load return address if needed.
+ movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
+ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
+ leaq 16(%rbx), %r8
+ call _dl_profile_fixup # Call resolver.
+
+ movq %rax, %r11 # Save return value.
+
+ movq 8(%rbx), %rax # Get back register content.
+ movq LR_RDX_OFFSET(%rsp), %rdx
+ movq LR_R8_OFFSET(%rsp), %r8
+ movq LR_R9_OFFSET(%rsp), %r9
+
+ movaps (LR_XMM_OFFSET)(%rsp), %xmm0
+ movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+ movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+ movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+ movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+ movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+ movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+ movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
+
+#ifdef RESTORE_AVX
+ /* Check if any xmm0-xmm7 registers are changed by audit
+ module. */
+ vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
+ vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+ vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+ vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+ vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+ vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+ vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+ vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+ vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
+
+1:
+#endif
+ movq 16(%rbx), %r10 # Anything in framesize?
+ testq %r10, %r10
+ jns 3f
+
+ /* There's nothing in the frame size, so there
+ will be no call to the _dl_call_pltexit. */
+
+ /* Get back registers content. */
+ movq LR_RCX_OFFSET(%rsp), %rcx
+ movq LR_RSI_OFFSET(%rsp), %rsi
+ movq LR_RDI_OFFSET(%rsp), %rdi
+
+ movq %rbx, %rsp
+ movq (%rsp), %rbx
+ cfi_restore(rbx)
+ cfi_def_cfa_register(%rsp)
+
+ addq $48, %rsp # Adjust the stack to the return value
+ # (eats the reloc index and link_map)
+ cfi_adjust_cfa_offset(-48)
+ jmp *%r11 # Jump to function address.
+
+3:
+ cfi_adjust_cfa_offset(48)
+ cfi_rel_offset(%rbx, 0)
+ cfi_def_cfa_register(%rbx)
+
+ /* At this point we need to prepare new stack for the function
+ which has to be called. We copy the original stack to a
+ temporary buffer of the size specified by the 'framesize'
+ returned from _dl_profile_fixup */
+
+ leaq LR_RSP_OFFSET(%rbx), %rsi # stack
+ addq $8, %r10
+ andq $0xfffffffffffffff0, %r10
+ movq %r10, %rcx
+ subq %r10, %rsp
+ movq %rsp, %rdi
+ shrq $3, %rcx
+ rep
+ movsq
+
+ movq 24(%rdi), %rcx # Get back register content.
+ movq 32(%rdi), %rsi
+ movq 40(%rdi), %rdi
+
+ call *%r11
+
+ mov 24(%rbx), %rsp # Drop the copied stack content
+
+ /* Now we have to prepare the La_x86_64_retval structure for the
+ _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
+ so we just need to allocate the sizeof(La_x86_64_retval) space on
+ the stack, since the alignment has already been taken care of. */
+# ifdef RESTORE_AVX
+ /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
+ registers to detect if xmm0/xmm1 registers are changed
+ by audit module. */
+ subq $(LRV_SIZE + XMM_SIZE*2), %rsp
+# else
+ subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
+# endif
+ movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
+
+ /* Fill in the La_x86_64_retval structure. */
+ movq %rax, LRV_RAX_OFFSET(%rcx)
+ movq %rdx, LRV_RDX_OFFSET(%rcx)
+
+ movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
+ movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
+
+# ifdef RESTORE_AVX
+ /* This is to support AVX audit modules. */
+ vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
+ vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+
+ /* Save xmm0/xmm1 registers to detect if they are changed
+ by audit module. */
+ vmovdqa %xmm0, (LRV_SIZE)(%rcx)
+ vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
+# endif
+
+ fstpt LRV_ST0_OFFSET(%rcx)
+ fstpt LRV_ST1_OFFSET(%rcx)
+
+ movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
+ movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
+ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
+ call _dl_call_pltexit
+
+ /* Restore return registers. */
+ movq LRV_RAX_OFFSET(%rsp), %rax
+ movq LRV_RDX_OFFSET(%rsp), %rdx
+
+ movaps LRV_XMM0_OFFSET(%rsp), %xmm0
+ movaps LRV_XMM1_OFFSET(%rsp), %xmm1
+
+# ifdef RESTORE_AVX
+ /* Check if xmm0/xmm1 registers are changed by audit module. */
+ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
+ vpmovmskb %xmm2, %esi
+ cmpl $0xffff, %esi
+ jne 1f
+ vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
+
+1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
+ vpmovmskb %xmm2, %esi
+ cmpl $0xffff, %esi
+ jne 1f
+ vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+
+1:
+# endif
+
+ fldt LRV_ST1_OFFSET(%rsp)
+ fldt LRV_ST0_OFFSET(%rsp)
+
+ movq %rbx, %rsp
+ movq (%rsp), %rbx
+ cfi_restore(rbx)
+ cfi_def_cfa_register(%rsp)
+
+ addq $48, %rsp # Adjust the stack to the return value
+ # (eats the reloc index and link_map)
+ cfi_adjust_cfa_offset(-48)
+ retq
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/Makefile Sun Aug 9 00:05:21 2009
@@ -4,7 +4,7 @@
endif
ifeq ($(subdir),string)
-sysdep_routines += stpncpy-c strncpy-c
+sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/rawmemchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/rawmemchr.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/rawmemchr.S Sun Aug 9 00:05:21 2009
@@ -38,6 +38,7 @@
strong_alias (rawmemchr, __rawmemchr)
+ .section .text.sse4.2,"ax",@progbits
.align 16
.type __rawmemchr_sse42, @function
__rawmemchr_sse42:
Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp-ssse3.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp-ssse3.S Sun Aug 9 00:05:21 2009
@@ -1,0 +1,3 @@
+#define USE_SSSE3 1
+#define STRCMP __strcmp_ssse3
+#include "../strcmp.S"
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp.S Sun Aug 9 00:05:21 2009
@@ -34,6 +34,7 @@
mov %r9, %r11
#define STRCMP_SSE42 __strncmp_sse42
+#define STRCMP_SSSE3 __strncmp_ssse3
#define STRCMP_SSE2 __strncmp_sse2
#define __GI_STRCMP __GI_strncmp
#else
@@ -41,6 +42,7 @@
#ifndef STRCMP
#define STRCMP strcmp
#define STRCMP_SSE42 __strcmp_sse42
+#define STRCMP_SSSE3 __strcmp_ssse3
#define STRCMP_SSE2 __strcmp_sse2
#define __GI_STRCMP __GI_strcmp
#endif
@@ -60,10 +62,14 @@
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
-1: leaq STRCMP_SSE2(%rip), %rax
+1:
+ leaq STRCMP_SSE42(%rip), %rax
testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
- jz 2f
- leaq STRCMP_SSE42(%rip), %rax
+ jnz 2f
+ leaq STRCMP_SSSE3(%rip), %rax
+ testl $(1<<9), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+ jnz 2f
+ leaq STRCMP_SSE2(%rip), %rax
2: ret
END(STRCMP)
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S Sun Aug 9 00:05:21 2009
@@ -40,6 +40,7 @@
END(strlen)
+ .section .text.sse4.2,"ax",@progbits
.align 16
.type __strlen_sse42, @function
__strlen_sse42:
Added: fsf/trunk/libc/sysdeps/x86_64/multiarch/strncmp-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strncmp-ssse3.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strncmp-ssse3.S Sun Aug 9 00:05:21 2009
@@ -1,0 +1,4 @@
+#define USE_SSSE3 1
+#define STRCMP __strncmp_ssse3
+#define USE_AS_STRNCMP
+#include "../strcmp.S"
Modified: fsf/trunk/libc/sysdeps/x86_64/strcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strcmp.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strcmp.S Sun Aug 9 00:05:21 2009
@@ -51,7 +51,12 @@
# endif
#endif
+#ifndef USE_SSSE3
.text
+#else
+ .section .text.ssse3,"ax",@progbits
+#endif
+
ENTRY (BP_SYM (STRCMP))
#ifdef NOT_IN_libc
/* Simple version since we can't use SSE registers in ld.so. */
@@ -244,9 +249,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
+#ifndef USE_SSSE3
psrldq $1, %xmm3
pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -269,9 +278,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
+#ifndef USE_SSSE3
psrldq $1, %xmm3
- pslldq $15, %xmm2
- por %xmm3, %xmm2 /* merge into one 16byte value */
+ pslldq $15, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -363,9 +376,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $2, %xmm3
pslldq $14, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -389,9 +406,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $2, %xmm3
- pslldq $14, %xmm2
- por %xmm3, %xmm2
+ pslldq $14, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -477,9 +498,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $3, %xmm3
pslldq $13, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -503,9 +528,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $3, %xmm3
- pslldq $13, %xmm2
- por %xmm3, %xmm2
+ pslldq $13, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -591,9 +620,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $4, %xmm3
pslldq $12, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -617,9 +650,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $4, %xmm3
- pslldq $12, %xmm2
- por %xmm3, %xmm2
+ pslldq $12, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -705,9 +742,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $5, %xmm3
pslldq $11, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -731,9 +772,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $5, %xmm3
- pslldq $11, %xmm2
- por %xmm3, %xmm2
+ pslldq $11, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -819,9 +864,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $6, %xmm3
pslldq $10, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -845,9 +894,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $6, %xmm3
- pslldq $10, %xmm2
- por %xmm3, %xmm2
+ pslldq $10, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -933,9 +986,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $7, %xmm3
pslldq $9, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -959,9 +1016,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $7, %xmm3
- pslldq $9, %xmm2
- por %xmm3, %xmm2
+ pslldq $9, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1047,9 +1108,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $8, %xmm3
pslldq $8, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1073,9 +1138,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $8, %xmm3
- pslldq $8, %xmm2
- por %xmm3, %xmm2
+ pslldq $8, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1161,9 +1230,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $9, %xmm3
pslldq $7, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1187,9 +1260,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $9, %xmm3
- pslldq $7, %xmm2
- por %xmm3, %xmm2
+ pslldq $7, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1275,9 +1352,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $10, %xmm3
pslldq $6, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1301,9 +1382,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $10, %xmm3
- pslldq $6, %xmm2
- por %xmm3, %xmm2
+ pslldq $6, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1389,9 +1474,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $11, %xmm3
pslldq $5, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1415,9 +1504,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $11, %xmm3
- pslldq $5, %xmm2
- por %xmm3, %xmm2
+ pslldq $5, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1503,9 +1596,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $12, %xmm3
pslldq $4, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1529,9 +1626,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $12, %xmm3
- pslldq $4, %xmm2
- por %xmm3, %xmm2
+ pslldq $4, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1617,9 +1718,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $13, %xmm3
pslldq $3, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1643,9 +1748,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $13, %xmm3
- pslldq $3, %xmm2
- por %xmm3, %xmm2
+ pslldq $3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1731,9 +1840,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $14, %xmm3
pslldq $2, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1757,9 +1870,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $14, %xmm3
- pslldq $2, %xmm2
- por %xmm3, %xmm2
+ pslldq $2, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1847,9 +1964,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $15, %xmm3
pslldq $1, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1873,9 +1994,13 @@
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $15, %xmm3
- pslldq $1, %xmm2
- por %xmm3, %xmm2
+ pslldq $1, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1