[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commits] r23988 - in /fsf/trunk/libc: ./ benchtests/ malloc/ ports/ ports/sysdeps/unix/sysv/linux/alpha/bits/ ports/sysdeps/unix/sysv...
- To: commits@xxxxxxxxxx
- Subject: [Commits] r23988 - in /fsf/trunk/libc: ./ benchtests/ malloc/ ports/ ports/sysdeps/unix/sysv/linux/alpha/bits/ ports/sysdeps/unix/sysv...
- From: eglibc@xxxxxxxxxx
- Date: Thu, 12 Sep 2013 00:02:13 -0000
Author: eglibc
Date: Thu Sep 12 00:02:11 2013
New Revision: 23988
Log:
Import glibc-mainline for 2013-09-12
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/NEWS
fsf/trunk/libc/benchtests/bench-skeleton.c
fsf/trunk/libc/benchtests/bench-timing.h
fsf/trunk/libc/malloc/malloc.c
fsf/trunk/libc/ports/ChangeLog.alpha
fsf/trunk/libc/ports/ChangeLog.hppa
fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h
fsf/trunk/libc/sunrpc/rpc/types.h
fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps
fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
fsf/trunk/libc/sysdeps/x86_64/strchr.S
fsf/trunk/libc/sysdeps/x86_64/strchrnul.S
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Thu Sep 12 00:02:11 2013
@@ -1,4 +1,56 @@
-2013-09-10 OndÃÂej BÃÂlka <neleai@xxxxxxxxx>
+2013-09-11 Jia Liu <proljc@xxxxxxxxx>
+
+ * sunrpc/rpc/types.h [__APPLE_CC__]: Define __u_char_defined and
+ __daddr_t_defined.
+ [__FreeBSD__]: Likewise.
+
+2013-09-11 OndÃÂej BÃÂlka <neleai@xxxxxxxxx>
+
+ * sysdeps/x86_64/multiarch/ifunc-impl-list.c
+ (__libc_ifunc_impl_list): Remove: __strchr_sse42.
+ * sysdeps/x86_64/multiarch/strchr.S (__strchr_sse42): Remove.
+ (strchr): Remove __strchr_sse42 ifunc selection.
+ * sysdeps/x86_64/strchr.S (strchr): Use optimized implementation.
+ * sysdeps/x86_64/strchrnul.S: Include sysdeps/x86_64/strchr.S.
+
+2013-09-11 Will Newton <will.newton@xxxxxxxxxx>
+
+ * benchtests/bench-timing.h (TIMING_INIT): Rename ITERS
+ parameter to RES. Remove hardcoded 1000 value.
+ * benchtests/bench-skeleton.c (main): Pass RES parameter
+ to TIMING_INIT and multiply result by 1000.
+
+2013-09-10 Adhemerval Zanella <azanella@xxxxxxxxxxxxxxxxxx>
+
+ * sysdeps/powerpc/fpu/libm-test-ulps: Update.
+
+2013-09-11 Andreas Schwab <schwab@xxxxxxx>
+
+ * sysdeps/unix/sysv/linux/bits/fcntl-linux.h (__O_TMPFILE): Define
+ if not defined.
+ (O_TMPFILE) [__USE_GNU]: Define.
+ * sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (__O_TMPFILE):
+ Define.
+
+2013-09-11 Will Newton <will.newton@xxxxxxxxxx>
+
+ [BZ #15857]
+ * malloc/malloc.c (__libc_memalign): Check the value of bytes
+ does not overflow.
+
+2013-09-11 Will Newton <will.newton@xxxxxxxxxx>
+
+ [BZ #15856]
+ * malloc/malloc.c (__libc_valloc): Check the value of bytes
+ does not overflow.
+
+2013-09-11 Will Newton <will.newton@xxxxxxxxxx>
+
+ [BZ #15855]
+ * malloc/malloc.c (__libc_pvalloc): Check the value of bytes
+ does not overflow.
+
+2013-09-10 OndÃÂej BÃÂlka <neleai@xxxxxxxxx>
* sysdeps/ieee754/dbl-64/e_j0.c: Remove DO_NOT_USE_THIS conditionals.
* sysdeps/ieee754/dbl-64/e_j1.c: Likewise.
Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Thu Sep 12 00:02:11 2013
@@ -10,8 +10,8 @@
* The following bugs are resolved with this release:
14155, 14699, 15427, 15522, 15531, 15532, 15736, 15748, 15749, 15797,
- 15844, 15849, 15867, 15886, 15887, 15890, 15892, 15893, 15895, 15897,
- 15905, 15909, 15921, 15939.
+ 15844, 15849, 15855, 15856, 15857, 15867, 15886, 15887, 15890, 15892,
+ 15893, 15895, 15897, 15905, 15909, 15921, 15939.
* CVE-2013-4237 The readdir_r function could write more than NAME_MAX bytes
to the d_name member of struct dirent, or omit the terminating NUL
Modified: fsf/trunk/libc/benchtests/bench-skeleton.c
==============================================================================
--- fsf/trunk/libc/benchtests/bench-skeleton.c (original)
+++ fsf/trunk/libc/benchtests/bench-skeleton.c Thu Sep 12 00:02:11 2013
@@ -53,9 +53,11 @@
memset (&runtime, 0, sizeof (runtime));
- unsigned long iters;
+ unsigned long iters, res;
- TIMING_INIT (iters);
+ TIMING_INIT (res);
+
+ iters = 1000 * res;
for (int v = 0; v < NUM_VARIANTS; v++)
{
Modified: fsf/trunk/libc/benchtests/bench-timing.h
==============================================================================
--- fsf/trunk/libc/benchtests/bench-timing.h (original)
+++ fsf/trunk/libc/benchtests/bench-timing.h Thu Sep 12 00:02:11 2013
@@ -25,10 +25,10 @@
hp_timing_t _dl_hp_timing_overhead;
typedef hp_timing_t timing_t;
-# define TIMING_INIT(iters) \
+# define TIMING_INIT(res) \
({ \
HP_TIMING_DIFF_INIT(); \
- (iters) = 1000; \
+ (res) = 1; \
})
# define TIMING_NOW(var) HP_TIMING_NOW (var)
@@ -43,15 +43,13 @@
#else
typedef uint64_t timing_t;
-/* Measure 1000 times the resolution of the clock. So for a 1ns
- resolution clock, we measure 1000 iterations of the function call at a
- time. Measurements close to the minimum clock resolution won't make
- much sense, but it's better than having nothing at all. */
-# define TIMING_INIT(iters) \
+/* Measure the resolution of the clock so we can scale the number of
+ benchmark iterations by this value. */
+# define TIMING_INIT(res) \
({ \
struct timespec start; \
clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); \
- (iters) = 1000 * start.tv_nsec; \
+ (res) = start.tv_nsec; \
})
# define TIMING_NOW(var) \
Modified: fsf/trunk/libc/malloc/malloc.c
==============================================================================
--- fsf/trunk/libc/malloc/malloc.c (original)
+++ fsf/trunk/libc/malloc/malloc.c Thu Sep 12 00:02:11 2013
@@ -3015,6 +3015,13 @@
/* Otherwise, ensure that it is at least a minimum chunk size */
if (alignment < MINSIZE) alignment = MINSIZE;
+ /* Check for overflow. */
+ if (bytes > SIZE_MAX - alignment - MINSIZE)
+ {
+ __set_errno (ENOMEM);
+ return 0;
+ }
+
arena_get(ar_ptr, bytes + alignment + MINSIZE);
if(!ar_ptr)
return 0;
@@ -3046,6 +3053,13 @@
size_t pagesz = GLRO(dl_pagesize);
+ /* Check for overflow. */
+ if (bytes > SIZE_MAX - pagesz - MINSIZE)
+ {
+ __set_errno (ENOMEM);
+ return 0;
+ }
+
void *(*hook) (size_t, size_t, const void *) =
force_reg (__memalign_hook);
if (__builtin_expect (hook != NULL, 0))
@@ -3081,6 +3095,13 @@
size_t pagesz = GLRO(dl_pagesize);
size_t page_mask = GLRO(dl_pagesize) - 1;
size_t rounded_bytes = (bytes + page_mask) & ~(page_mask);
+
+ /* Check for overflow. */
+ if (bytes > SIZE_MAX - 2*pagesz - MINSIZE)
+ {
+ __set_errno (ENOMEM);
+ return 0;
+ }
void *(*hook) (size_t, size_t, const void *) =
force_reg (__memalign_hook);
Modified: fsf/trunk/libc/ports/ChangeLog.alpha
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.alpha (original)
+++ fsf/trunk/libc/ports/ChangeLog.alpha Thu Sep 12 00:02:11 2013
@@ -1,3 +1,8 @@
+2013-09-11 Andreas Schwab <schwab@xxxxxxx>
+
+ * sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (__O_TMPFILE):
+ Define.
+
2013-08-30 OndÃÂej BÃÂlka <neleai@xxxxxxxxx>
* sysdeps/alpha/alphaev67/stpncpy.S: Fix then/than typos.
Modified: fsf/trunk/libc/ports/ChangeLog.hppa
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.hppa (original)
+++ fsf/trunk/libc/ports/ChangeLog.hppa Thu Sep 12 00:02:11 2013
@@ -1,3 +1,7 @@
+2013-09-11 Andreas Schwab <schwab@xxxxxxx>
+
+ * sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (__O_TMPFILE): Define.
+
2013-08-30 OndÃÂej BÃÂlka <neleai@xxxxxxxxx>
* sysdeps/unix/sysv/linux/hppa/clone.S: Fix typos.
Modified: fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (original)
+++ fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h Thu Sep 12 00:02:11 2013
@@ -36,6 +36,7 @@
#define __O_DIRECT 02000000 /* Direct disk access. */
#define __O_NOATIME 04000000 /* Do not set atime. */
#define __O_PATH 040000000 /* Resolve pathname but do not open file. */
+#define __O_TMPFILE 0100100000 /* Atomically create nameless file. */
/* Not necessary, files are always with 64bit off_t. */
#define __O_LARGEFILE 0
Modified: fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (original)
+++ fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h Thu Sep 12 00:02:11 2013
@@ -37,6 +37,7 @@
#define __O_CLOEXEC 010000000 /* Set close_on_exec. */
#define __O_NOATIME 004000000 /* Do not set atime. */
#define __O_PATH 020000000
+#define __O_TMPFILE 040010000 /* Atomically create nameless file. */
#define __O_LARGEFILE 00004000
Modified: fsf/trunk/libc/sunrpc/rpc/types.h
==============================================================================
--- fsf/trunk/libc/sunrpc/rpc/types.h (original)
+++ fsf/trunk/libc/sunrpc/rpc/types.h Thu Sep 12 00:02:11 2013
@@ -69,6 +69,11 @@
#include <sys/types.h>
#endif
+#if defined __APPLE_CC__ || defined __FreeBSD__
+# define __u_char_defined
+# define __daddr_t_defined
+#endif
+
#ifndef __u_char_defined
typedef __u_char u_char;
typedef __u_short u_short;
Modified: fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps
==============================================================================
--- fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps (original)
+++ fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps Thu Sep 12 00:02:11 2013
@@ -5927,11 +5927,31 @@
idouble: 1
# gamma
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
Test "gamma (0.7)":
double: 1
float: 1
idouble: 1
ifloat: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
Test "gamma (1.2)":
double: 1
float: 2
@@ -6131,9 +6151,9 @@
ldouble: 1
Test "jn (10, 10.0)":
double: 2
-float: 1
+float: 2
idouble: 2
-ifloat: 1
+ifloat: 2
ildouble: 4
ldouble: 4
Test "jn (10, 2.0)":
@@ -6146,6 +6166,14 @@
float: 2
idouble: 2
ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
Test "jn (2, 2.4048255576957729)":
double: 2
float: 1
@@ -6226,11 +6254,31 @@
ldouble: 7
# lgamma
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
Test "lgamma (0.7)":
double: 1
float: 1
idouble: 1
ifloat: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
Test "lgamma (1.2)":
double: 1
float: 2
@@ -7334,6 +7382,19 @@
ifloat: 1
ildouble: 1
ldouble: 1
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
Test "yn (3, 0.125)":
double: 1
idouble: 1
Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h Thu Sep 12 00:02:11 2013
@@ -95,6 +95,9 @@
#endif
#ifndef __O_DSYNC
# define __O_DSYNC 010000
+#endif
+#ifndef __O_TMPFILE
+# define __O_TMPFILE 020200000
#endif
#ifndef F_GETLK
@@ -128,6 +131,7 @@
# define O_DIRECT __O_DIRECT /* Direct disk access. */
# define O_NOATIME __O_NOATIME /* Do not set atime. */
# define O_PATH __O_PATH /* Resolve pathname but do not open file. */
+# define O_TMPFILE __O_TMPFILE /* Atomically create nameless file. */
#endif
/* For now, Linux has no separate synchronicitiy options for read
Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h Thu Sep 12 00:02:11 2013
@@ -39,6 +39,7 @@
#define __O_DIRECT 0x100000 /* direct disk access hint */
#define __O_NOATIME 0x200000 /* Do not set atime. */
#define __O_PATH 0x1000000 /* Resolve pathname but do not open file. */
+#define __O_TMPFILE 0x2010000 /* Atomically create nameless file. */
#if __WORDSIZE == 64
# define __O_LARGEFILE 0
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c Thu Sep 12 00:02:11 2013
@@ -110,7 +110,6 @@
/* Support sysdeps/x86_64/multiarch/strchr.S. */
IFUNC_IMPL (i, name, strchr,
- IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE4_2, __strchr_sse42)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S Thu Sep 12 00:02:11 2013
@@ -29,139 +29,12 @@
jne 1f
call __init_cpu_features
1: leaq __strchr_sse2(%rip), %rax
- testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
- jnz 2f
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jz 2f
- leaq __strchr_sse42(%rip), %rax
- ret
2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
jz 3f
leaq __strchr_sse2_no_bsf(%rip), %rax
3: ret
END(strchr)
-
-/*
- This implementation uses SSE4 instructions to compare up to 16 bytes
- at a time looking for the first occurrence of the character c in the
- string s:
-
- char *strchr (const char *s, int c);
-
- We use 0xa:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_EACH
- | _SIDD_LEAST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
-
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- C C C C C C C C C C C C C C C C
-
- to find out if the first 16byte data element has a byte C and the
- offset of the first byte. There are 3 cases:
-
- 1. The first 16byte data element has the byte C at the offset X.
- 2. The first 16byte data element has EOS and doesn't have the byte C.
- 3. The first 16byte data element is valid and doesn't have the byte C.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
-
- case ECX CFlag ZFlag SFlag
- 1 X 1 0/1 0
- 2 16 0 1 0
- 3 16 0 0 0
-
- We exit from the loop for cases 1 and 2 with jbe which branches
- when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
- X for case 1. */
-
- .section .text.sse4.2,"ax",@progbits
- .align 16
- .type __strchr_sse42, @function
- .globl __strchr_sse42
- .hidden __strchr_sse42
-__strchr_sse42:
- cfi_startproc
- CALL_MCOUNT
- testb %sil, %sil
- je __strend_sse4
- pxor %xmm2, %xmm2
- movd %esi, %xmm1
- movl %edi, %ecx
- pshufb %xmm2, %xmm1
- andl $15, %ecx
- movq %rdi, %r8
- je L(aligned_start)
-
-/* Handle unaligned string. */
- andq $-16, %r8
- movdqa (%r8), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %esi
- /* Remove the leading bytes. */
- sarl %cl, %edx
- sarl %cl, %esi
- testl %esi, %esi
- je L(unaligned_no_match)
- /* Check which byte is a match. */
- bsfl %esi, %eax
- /* Is there a NULL? */
- testl %edx, %edx
- je L(unaligned_match)
- bsfl %edx, %esi
- cmpl %esi, %eax
- /* Return NULL if NULL comes first. */
- ja L(return_null)
-L(unaligned_match):
- addq %rdi, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- testl %edx, %edx
- jne L(return_null)
-
-/* Loop start on aligned string. */
-L(loop):
- addq $16, %r8
-L(aligned_start):
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- addq $16, %r8
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- addq $16, %r8
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- addq $16, %r8
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- jmp L(loop)
-L(wrap):
- jc L(loop_exit)
-
-/* Return NULL. */
-L(return_null):
- xorl %eax, %eax
- ret
-
-/* Loop exit. */
- .p2align 4
-L(loop_exit):
- leaq (%r8,%rcx), %rax
- ret
- cfi_endproc
- .size __strchr_sse42, .-__strchr_sse42
# undef ENTRY
Modified: fsf/trunk/libc/sysdeps/x86_64/strchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strchr.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strchr.S Thu Sep 12 00:02:11 2013
@@ -19,51 +19,174 @@
#include <sysdep.h>
+# ifndef ALIGN
+# define ALIGN(n) .p2align n
+# endif
+
.text
ENTRY (strchr)
movd %esi, %xmm1
- movq %rdi, %rcx
+ movl %edi, %eax
+ andl $4095, %eax
punpcklbw %xmm1, %xmm1
- andq $~15, %rdi
+ cmpl $4032, %eax
+ punpcklwd %xmm1, %xmm1
+ pshufd $0, %xmm1, %xmm1
+ jg L(cross_page)
+ movdqu (%rdi), %xmm0
+ pxor %xmm3, %xmm3
+ movdqa %xmm0, %xmm4
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm3, %xmm4
+ por %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ je L(next_48_bytes)
+ bsf %eax, %eax
+#ifdef AS_STRCHRNUL
+ leaq (%rdi,%rax), %rax
+#else
+ movl $0, %edx
+ leaq (%rdi,%rax), %rax
+ cmpb %sil, (%rax)
+ cmovne %rdx, %rax
+#endif
+ ret
+
+ ALIGN(3)
+ L(next_48_bytes):
+ movdqu 16(%rdi), %xmm0
+ movdqa %xmm0, %xmm4
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm3, %xmm4
+ por %xmm4, %xmm0
+ pmovmskb %xmm0, %ecx
+ movdqu 32(%rdi), %xmm0
+ movdqa %xmm0, %xmm4
+ pcmpeqb %xmm1, %xmm0
+ salq $16, %rcx
+ pcmpeqb %xmm3, %xmm4
+ por %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ movdqu 48(%rdi), %xmm0
+ pcmpeqb %xmm0, %xmm3
+ salq $32, %rax
+ pcmpeqb %xmm1, %xmm0
+ orq %rcx, %rax
+ por %xmm3, %xmm0
+ pmovmskb %xmm0, %ecx
+ salq $48, %rcx
+ orq %rcx, %rax
+ testq %rax, %rax
+ jne L(return)
+L(loop_start):
+ /* We use this alignment to force loop be aligned to 8 but not
+ 16 bytes. This gives better sheduling on AMD processors. */
+ ALIGN(4)
+ pxor %xmm6, %xmm6
+ andq $-64, %rdi
+ ALIGN(3)
+L(loop64):
+ addq $64, %rdi
+ movdqa (%rdi), %xmm5
+ movdqa 16(%rdi), %xmm2
+ movdqa 32(%rdi), %xmm3
+ pxor %xmm1, %xmm5
+ movdqa 48(%rdi), %xmm4
+ pxor %xmm1, %xmm2
+ pxor %xmm1, %xmm3
+ pminub (%rdi), %xmm5
+ pxor %xmm1, %xmm4
+ pminub 16(%rdi), %xmm2
+ pminub 32(%rdi), %xmm3
+ pminub %xmm2, %xmm5
+ pminub 48(%rdi), %xmm4
+ pminub %xmm3, %xmm5
+ pminub %xmm4, %xmm5
+ pcmpeqb %xmm6, %xmm5
+ pmovmskb %xmm5, %eax
+
+ testl %eax, %eax
+ je L(loop64)
+
+ movdqa (%rdi), %xmm5
+ movdqa %xmm5, %xmm0
+ pcmpeqb %xmm1, %xmm5
+ pcmpeqb %xmm6, %xmm0
+ por %xmm0, %xmm5
+ pcmpeqb %xmm6, %xmm2
+ pcmpeqb %xmm6, %xmm3
+ pcmpeqb %xmm6, %xmm4
+
+ pmovmskb %xmm5, %ecx
+ pmovmskb %xmm2, %eax
+ salq $16, %rax
+ pmovmskb %xmm3, %r8d
+ pmovmskb %xmm4, %edx
+ salq $32, %r8
+ orq %r8, %rax
+ orq %rcx, %rax
+ salq $48, %rdx
+ orq %rdx, %rax
+ ALIGN(3)
+L(return):
+ bsfq %rax, %rax
+#ifdef AS_STRCHRNUL
+ leaq (%rdi,%rax), %rax
+#else
+ movl $0, %edx
+ leaq (%rdi,%rax), %rax
+ cmpb %sil, (%rax)
+ cmovne %rdx, %rax
+#endif
+ ret
+ ALIGN(4)
+
+L(cross_page):
+ movq %rdi, %rdx
pxor %xmm2, %xmm2
- punpcklbw %xmm1, %xmm1
- orl $0xffffffff, %esi
- movdqa (%rdi), %xmm0
- pshufd $0, %xmm1, %xmm1
- subq %rdi, %rcx
- movdqa %xmm0, %xmm3
- leaq 16(%rdi), %rdi
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- andl %esi, %edx
- andl %esi, %ecx
- orl %edx, %ecx
- jnz 1f
+ andq $-64, %rdx
+ movdqa %xmm1, %xmm0
+ movdqa (%rdx), %xmm3
+ movdqa %xmm3, %xmm4
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm2, %xmm4
+ por %xmm4, %xmm3
+ pmovmskb %xmm3, %r8d
+ movdqa 16(%rdx), %xmm3
+ movdqa %xmm3, %xmm4
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm2, %xmm4
+ por %xmm4, %xmm3
+ pmovmskb %xmm3, %eax
+ movdqa 32(%rdx), %xmm3
+ movdqa %xmm3, %xmm4
+ pcmpeqb %xmm1, %xmm3
+ salq $16, %rax
+ pcmpeqb %xmm2, %xmm4
+ por %xmm4, %xmm3
+ pmovmskb %xmm3, %r9d
+ movdqa 48(%rdx), %xmm3
+ pcmpeqb %xmm3, %xmm2
+ salq $32, %r9
+ pcmpeqb %xmm3, %xmm0
+ orq %r9, %rax
+ orq %r8, %rax
+ por %xmm2, %xmm0
+ pmovmskb %xmm0, %ecx
+ salq $48, %rcx
+ orq %rcx, %rax
+ movl %edi, %ecx
+ subb %dl, %cl
+ shrq %cl, %rax
+ testq %rax, %rax
+ jne L(return)
+ jmp L(loop_start)
-2: movdqa (%rdi), %xmm0
- leaq 16(%rdi), %rdi
- movdqa %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- orl %edx, %ecx
- jz 2b
-
-1: bsfl %edx, %edx
- jz 4f
- bsfl %ecx, %ecx
- leaq -16(%rdi,%rdx), %rax
- cmpl %edx, %ecx
- je 5f
-4: xorl %eax, %eax
-5: ret
END (strchr)
+#ifndef AS_STRCHRNUL
weak_alias (strchr, index)
libc_hidden_builtin_def (strchr)
-
+#endif
Modified: fsf/trunk/libc/sysdeps/x86_64/strchrnul.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strchrnul.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strchrnul.S Thu Sep 12 00:02:11 2013
@@ -20,43 +20,8 @@
#include <sysdep.h>
-
- .text
-ENTRY (__strchrnul)
- movd %esi, %xmm1
- movq %rdi, %rcx
- punpcklbw %xmm1, %xmm1
- andq $~15, %rdi
- pxor %xmm2, %xmm2
- punpcklbw %xmm1, %xmm1
- orl $0xffffffff, %esi
- movdqa (%rdi), %xmm0
- pshufd $0, %xmm1, %xmm1
- subq %rdi, %rcx
- movdqa %xmm0, %xmm3
- leaq 16(%rdi), %rdi
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- orl %edx, %ecx
- andl %esi, %ecx
- jnz 1f
-
-2: movdqa (%rdi), %xmm0
- leaq 16(%rdi), %rdi
- movdqa %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- orl %edx, %ecx
- jz 2b
-
-1: bsfl %ecx, %edx
- leaq -16(%rdi,%rdx), %rax
- ret
-END (__strchrnul)
+#define strchr __strchrnul
+#define AS_STRCHRNUL
+#include "strchr.S"
weak_alias (__strchrnul, strchrnul)
_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits