[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commits] r23988 - in /fsf/trunk/libc: ./ benchtests/ malloc/ ports/ ports/sysdeps/unix/sysv/linux/alpha/bits/ ports/sysdeps/unix/sysv...



Author: eglibc
Date: Thu Sep 12 00:02:11 2013
New Revision: 23988

Log:
Import glibc-mainline for 2013-09-12

Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/NEWS
    fsf/trunk/libc/benchtests/bench-skeleton.c
    fsf/trunk/libc/benchtests/bench-timing.h
    fsf/trunk/libc/malloc/malloc.c
    fsf/trunk/libc/ports/ChangeLog.alpha
    fsf/trunk/libc/ports/ChangeLog.hppa
    fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
    fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h
    fsf/trunk/libc/sunrpc/rpc/types.h
    fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps
    fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
    fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
    fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
    fsf/trunk/libc/sysdeps/x86_64/strchr.S
    fsf/trunk/libc/sysdeps/x86_64/strchrnul.S

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Thu Sep 12 00:02:11 2013
@@ -1,4 +1,56 @@
-2013-09-10   OndÃÂej BÃÂlka  <neleai@xxxxxxxxx>
+2013-09-11  Jia Liu  <proljc@xxxxxxxxx>
+
+	* sunrpc/rpc/types.h [__APPLE_CC__]: Define __u_char_defined and
+	__daddr_t_defined.
+	[__FreeBSD__]: Likewise.
+
+2013-09-11  OndÃÂej BÃÂlka  <neleai@xxxxxxxxx>
+
+	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
+	(__libc_ifunc_impl_list): Remove: __strchr_sse42.
+	* sysdeps/x86_64/multiarch/strchr.S (__strchr_sse42): Remove.
+	(strchr): Remove __strchr_sse42 ifunc selection.
+	* sysdeps/x86_64/strchr.S (strchr): Use optimized implementation.
+	* sysdeps/x86_64/strchrnul.S: Include sysdeps/x86_64/strchr.S.
+
+2013-09-11  Will Newton  <will.newton@xxxxxxxxxx>
+
+	* benchtests/bench-timing.h (TIMING_INIT): Rename ITERS
+	parameter to RES. Remove hardcoded 1000 value.
+	* benchtests/bench-skeleton.c (main): Pass RES parameter
+	to TIMING_INIT and multiply result by 1000.
+
+2013-09-10  Adhemerval Zanella  <azanella@xxxxxxxxxxxxxxxxxx>
+
+	* sysdeps/powerpc/fpu/libm-test-ulps: Update.
+
+2013-09-11  Andreas Schwab  <schwab@xxxxxxx>
+
+	* sysdeps/unix/sysv/linux/bits/fcntl-linux.h (__O_TMPFILE): Define
+	if not defined.
+	(O_TMPFILE) [__USE_GNU]: Define.
+	* sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (__O_TMPFILE):
+	Define.
+
+2013-09-11  Will Newton  <will.newton@xxxxxxxxxx>
+
+	[BZ #15857]
+	* malloc/malloc.c (__libc_memalign): Check the value of bytes
+	does not overflow.
+
+2013-09-11  Will Newton  <will.newton@xxxxxxxxxx>
+
+	[BZ #15856]
+	* malloc/malloc.c (__libc_valloc): Check the value of bytes
+	does not overflow.
+
+2013-09-11  Will Newton  <will.newton@xxxxxxxxxx>
+
+	[BZ #15855]
+	* malloc/malloc.c (__libc_pvalloc): Check the value of bytes
+	does not overflow.
+
+2013-09-10  OndÃÂej BÃÂlka  <neleai@xxxxxxxxx>
 
 	* sysdeps/ieee754/dbl-64/e_j0.c: Remove DO_NOT_USE_THIS conditionals.
 	* sysdeps/ieee754/dbl-64/e_j1.c: Likewise.

Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Thu Sep 12 00:02:11 2013
@@ -10,8 +10,8 @@
 * The following bugs are resolved with this release:
 
   14155, 14699, 15427, 15522, 15531, 15532, 15736, 15748, 15749, 15797,
-  15844, 15849, 15867, 15886, 15887, 15890, 15892, 15893, 15895, 15897,
-  15905, 15909, 15921, 15939.
+  15844, 15849, 15855, 15856, 15857, 15867, 15886, 15887, 15890, 15892,
+  15893, 15895, 15897, 15905, 15909, 15921, 15939.
 
 * CVE-2013-4237 The readdir_r function could write more than NAME_MAX bytes
   to the d_name member of struct dirent, or omit the terminating NUL

Modified: fsf/trunk/libc/benchtests/bench-skeleton.c
==============================================================================
--- fsf/trunk/libc/benchtests/bench-skeleton.c (original)
+++ fsf/trunk/libc/benchtests/bench-skeleton.c Thu Sep 12 00:02:11 2013
@@ -53,9 +53,11 @@
 
   memset (&runtime, 0, sizeof (runtime));
 
-  unsigned long iters;
+  unsigned long iters, res;
 
-  TIMING_INIT (iters);
+  TIMING_INIT (res);
+
+  iters = 1000 * res;
 
   for (int v = 0; v < NUM_VARIANTS; v++)
     {

Modified: fsf/trunk/libc/benchtests/bench-timing.h
==============================================================================
--- fsf/trunk/libc/benchtests/bench-timing.h (original)
+++ fsf/trunk/libc/benchtests/bench-timing.h Thu Sep 12 00:02:11 2013
@@ -25,10 +25,10 @@
 hp_timing_t _dl_hp_timing_overhead;
 typedef hp_timing_t timing_t;
 
-# define TIMING_INIT(iters) \
+# define TIMING_INIT(res) \
 ({									      \
   HP_TIMING_DIFF_INIT();						      \
-  (iters) = 1000;							      \
+  (res) = 1;							      \
 })
 
 # define TIMING_NOW(var) HP_TIMING_NOW (var)
@@ -43,15 +43,13 @@
 #else
 typedef uint64_t timing_t;
 
-/* Measure 1000 times the resolution of the clock.  So for a 1ns
-   resolution  clock, we measure 1000 iterations of the function call at a
-   time.  Measurements close to the minimum clock resolution won't make
-   much sense, but it's better than having nothing at all.  */
-# define TIMING_INIT(iters) \
+/* Measure the resolution of the clock so we can scale the number of
+   benchmark iterations by this value.  */
+# define TIMING_INIT(res) \
 ({									      \
   struct timespec start;						      \
   clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);			      \
-  (iters) = 1000 * start.tv_nsec;					      \
+  (res) = start.tv_nsec;					      \
 })
 
 # define TIMING_NOW(var) \

Modified: fsf/trunk/libc/malloc/malloc.c
==============================================================================
--- fsf/trunk/libc/malloc/malloc.c (original)
+++ fsf/trunk/libc/malloc/malloc.c Thu Sep 12 00:02:11 2013
@@ -3015,6 +3015,13 @@
   /* Otherwise, ensure that it is at least a minimum chunk size */
   if (alignment <  MINSIZE) alignment = MINSIZE;
 
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
   arena_get(ar_ptr, bytes + alignment + MINSIZE);
   if(!ar_ptr)
     return 0;
@@ -3046,6 +3053,13 @@
 
   size_t pagesz = GLRO(dl_pagesize);
 
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - pagesz - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
   void *(*hook) (size_t, size_t, const void *) =
     force_reg (__memalign_hook);
   if (__builtin_expect (hook != NULL, 0))
@@ -3081,6 +3095,13 @@
   size_t pagesz = GLRO(dl_pagesize);
   size_t page_mask = GLRO(dl_pagesize) - 1;
   size_t rounded_bytes = (bytes + page_mask) & ~(page_mask);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2*pagesz - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
 
   void *(*hook) (size_t, size_t, const void *) =
     force_reg (__memalign_hook);

Modified: fsf/trunk/libc/ports/ChangeLog.alpha
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.alpha (original)
+++ fsf/trunk/libc/ports/ChangeLog.alpha Thu Sep 12 00:02:11 2013
@@ -1,3 +1,8 @@
+2013-09-11  Andreas Schwab  <schwab@xxxxxxx>
+
+	* sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (__O_TMPFILE):
+	Define.
+
 2013-08-30   OndÃÂej BÃÂlka  <neleai@xxxxxxxxx>
 
 	* sysdeps/alpha/alphaev67/stpncpy.S: Fix then/than typos.

Modified: fsf/trunk/libc/ports/ChangeLog.hppa
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.hppa (original)
+++ fsf/trunk/libc/ports/ChangeLog.hppa Thu Sep 12 00:02:11 2013
@@ -1,3 +1,7 @@
+2013-09-11  Andreas Schwab  <schwab@xxxxxxx>
+
+	* sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (__O_TMPFILE): Define.
+
 2013-08-30   OndÃÂej BÃÂlka  <neleai@xxxxxxxxx>
 
 	* sysdeps/unix/sysv/linux/hppa/clone.S: Fix typos.

Modified: fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (original)
+++ fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h Thu Sep 12 00:02:11 2013
@@ -36,6 +36,7 @@
 #define __O_DIRECT	02000000 /* Direct disk access.  */
 #define __O_NOATIME	04000000 /* Do not set atime.  */
 #define __O_PATH	040000000 /* Resolve pathname but do not open file.  */
+#define __O_TMPFILE	0100100000 /* Atomically create nameless file.  */
 
 /* Not necessary, files are always with 64bit off_t.  */
 #define __O_LARGEFILE	0

Modified: fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (original)
+++ fsf/trunk/libc/ports/sysdeps/unix/sysv/linux/hppa/bits/fcntl.h Thu Sep 12 00:02:11 2013
@@ -37,6 +37,7 @@
 #define __O_CLOEXEC	010000000 /* Set close_on_exec.  */
 #define __O_NOATIME	004000000 /* Do not set atime.  */
 #define __O_PATH        020000000
+#define __O_TMPFILE     040010000 /* Atomically create nameless file. */
 
 #define __O_LARGEFILE	00004000
 

Modified: fsf/trunk/libc/sunrpc/rpc/types.h
==============================================================================
--- fsf/trunk/libc/sunrpc/rpc/types.h (original)
+++ fsf/trunk/libc/sunrpc/rpc/types.h Thu Sep 12 00:02:11 2013
@@ -69,6 +69,11 @@
 #include <sys/types.h>
 #endif
 
+#if defined __APPLE_CC__ || defined __FreeBSD__
+# define __u_char_defined
+# define __daddr_t_defined
+#endif
+
 #ifndef __u_char_defined
 typedef __u_char u_char;
 typedef __u_short u_short;

Modified: fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps
==============================================================================
--- fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps (original)
+++ fsf/trunk/libc/sysdeps/powerpc/fpu/libm-test-ulps Thu Sep 12 00:02:11 2013
@@ -5927,11 +5927,31 @@
 idouble: 1
 
 # gamma
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
 Test "gamma (0.7)":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
 Test "gamma (1.2)":
 double: 1
 float: 2
@@ -6131,9 +6151,9 @@
 ldouble: 1
 Test "jn (10, 10.0)":
 double: 2
-float: 1
+float: 2
 idouble: 2
-ifloat: 1
+ifloat: 2
 ildouble: 4
 ldouble: 4
 Test "jn (10, 2.0)":
@@ -6146,6 +6166,14 @@
 float: 2
 idouble: 2
 ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
 Test "jn (2, 2.4048255576957729)":
 double: 2
 float: 1
@@ -6226,11 +6254,31 @@
 ldouble: 7
 
 # lgamma
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
 Test "lgamma (0.7)":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
 Test "lgamma (1.2)":
 double: 1
 float: 2
@@ -7334,6 +7382,19 @@
 ifloat: 1
 ildouble: 1
 ldouble: 1
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
 Test "yn (3, 0.125)":
 double: 1
 idouble: 1

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h Thu Sep 12 00:02:11 2013
@@ -95,6 +95,9 @@
 #endif
 #ifndef __O_DSYNC
 # define __O_DSYNC	 010000
+#endif
+#ifndef __O_TMPFILE
+# define __O_TMPFILE   020200000
 #endif
 
 #ifndef F_GETLK
@@ -128,6 +131,7 @@
 # define O_DIRECT	__O_DIRECT	/* Direct disk access.	*/
 # define O_NOATIME	__O_NOATIME	/* Do not set atime.  */
 # define O_PATH		__O_PATH	/* Resolve pathname but do not open file.  */
+# define O_TMPFILE	__O_TMPFILE	/* Atomically create nameless file.  */
 #endif
 
 /* For now, Linux has no separate synchronicitiy options for read

Modified: fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
==============================================================================
--- fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (original)
+++ fsf/trunk/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h Thu Sep 12 00:02:11 2013
@@ -39,6 +39,7 @@
 #define __O_DIRECT	0x100000 /* direct disk access hint */
 #define __O_NOATIME	0x200000 /* Do not set atime.  */
 #define __O_PATH	0x1000000 /* Resolve pathname but do not open file.  */
+#define __O_TMPFILE	0x2010000 /* Atomically create nameless file.  */
 
 #if __WORDSIZE == 64
 # define __O_LARGEFILE	0

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c Thu Sep 12 00:02:11 2013
@@ -110,7 +110,6 @@
 
   /* Support sysdeps/x86_64/multiarch/strchr.S.  */
   IFUNC_IMPL (i, name, strchr,
-	      IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE4_2, __strchr_sse42)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
 

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strchr.S Thu Sep 12 00:02:11 2013
@@ -29,139 +29,12 @@
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strchr_sse2(%rip), %rax
-	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
-	jnz	2f
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jz	2f
-	leaq	__strchr_sse42(%rip), %rax
-	ret
 2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
 	jz	3f
 	leaq    __strchr_sse2_no_bsf(%rip), %rax
 3:	ret
 END(strchr)
 
-
-/*
-   This implementation uses SSE4 instructions to compare up to 16 bytes
-   at a time looking for the first occurrence of the character c in the
-   string s:
-
-   char *strchr (const char *s, int c);
-
-   We use 0xa:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_EACH
-	| _SIDD_LEAST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
-
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   C C C C C C C C C C C C C C C C
-
-   to find out if the first 16byte data element has a byte C and the
-   offset of the first byte.  There are 3 cases:
-
-   1. The first 16byte data element has the byte C at the offset X.
-   2. The first 16byte data element has EOS and doesn't have the byte C.
-   3. The first 16byte data element is valid and doesn't have the byte C.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
-
-   case		ECX	CFlag	ZFlag	SFlag
-    1		 X	  1	 0/1	  0
-    2		16	  0	  1	  0
-    3		16	  0	  0	  0
-
-   We exit from the loop for cases 1 and 2 with jbe which branches
-   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
-   X for case 1.  */
-
-	.section .text.sse4.2,"ax",@progbits
-	.align	16
-	.type	__strchr_sse42, @function
-	.globl	__strchr_sse42
-	.hidden	__strchr_sse42
-__strchr_sse42:
-	cfi_startproc
-	CALL_MCOUNT
-	testb	%sil, %sil
-	je	__strend_sse4
-	pxor	%xmm2, %xmm2
-	movd	%esi, %xmm1
-	movl	%edi, %ecx
-	pshufb  %xmm2, %xmm1
-	andl	$15, %ecx
-	movq	%rdi, %r8
-	je	L(aligned_start)
-
-/* Handle unaligned string.  */
-	andq	$-16, %r8
-	movdqa	(%r8), %xmm0
-	pcmpeqb	 %xmm0, %xmm2
-	pcmpeqb	 %xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %esi
-	/* Remove the leading  bytes.  */
-	sarl	%cl, %edx
-	sarl	%cl, %esi
-	testl	%esi, %esi
-	je	L(unaligned_no_match)
-	/* Check which byte is a match.  */
-	bsfl	%esi, %eax
-	/* Is there a NULL? */
-	testl	%edx, %edx
-	je      L(unaligned_match)
-	bsfl	%edx, %esi
-	cmpl	%esi, %eax
-	/* Return NULL if NULL comes first.  */
-	ja	L(return_null)
-L(unaligned_match):
-	addq	%rdi, %rax
-	ret
-
-	.p2align 4
-L(unaligned_no_match):
-	testl	%edx, %edx
-	jne	L(return_null)
-
-/* Loop start on aligned string.  */
-L(loop):
-	addq	$16, %r8
-L(aligned_start):
-	pcmpistri	$0x2, (%r8), %xmm1
-	jbe	L(wrap)
-	addq	$16, %r8
-	pcmpistri	$0x2, (%r8), %xmm1
-	jbe	L(wrap)
-	addq	$16, %r8
-	pcmpistri       $0x2, (%r8), %xmm1
-	jbe     L(wrap)
-	addq	$16, %r8
-	pcmpistri	$0x2, (%r8), %xmm1
-	jbe	L(wrap)
-	jmp	L(loop)
-L(wrap):
-	jc	L(loop_exit)
-
-/* Return NULL.  */
-L(return_null):
-	xorl	%eax, %eax
-	ret
-
-/* Loop exit.  */
-	.p2align 4
-L(loop_exit):
-	leaq	(%r8,%rcx), %rax
-	ret
-	cfi_endproc
-	.size	__strchr_sse42, .-__strchr_sse42
 
 
 # undef ENTRY

Modified: fsf/trunk/libc/sysdeps/x86_64/strchr.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strchr.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strchr.S Thu Sep 12 00:02:11 2013
@@ -19,51 +19,174 @@
 
 #include <sysdep.h>
 
+# ifndef ALIGN
+#  define ALIGN(n)	.p2align n
+# endif
+
 
 	.text
 ENTRY (strchr)
 	movd	%esi, %xmm1
-	movq	%rdi, %rcx
+	movl	%edi, %eax
+	andl	$4095, %eax
 	punpcklbw %xmm1, %xmm1
-	andq	$~15, %rdi
+	cmpl	$4032, %eax
+	punpcklwd %xmm1, %xmm1
+	pshufd	$0, %xmm1, %xmm1
+	jg	L(cross_page)
+	movdqu	(%rdi), %xmm0
+	pxor	%xmm3, %xmm3
+	movdqa	%xmm0, %xmm4
+	pcmpeqb	%xmm1, %xmm0
+	pcmpeqb	%xmm3, %xmm4
+	por	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	je	L(next_48_bytes)
+	bsf	%eax, %eax
+#ifdef AS_STRCHRNUL
+	leaq	(%rdi,%rax), %rax
+#else
+	movl	$0, %edx
+	leaq	(%rdi,%rax), %rax
+	cmpb	%sil, (%rax)
+	cmovne	%rdx, %rax
+#endif
+	ret
+
+	ALIGN(3)
+	L(next_48_bytes):
+	movdqu	16(%rdi), %xmm0
+	movdqa	%xmm0, %xmm4
+	pcmpeqb	%xmm1, %xmm0
+	pcmpeqb	%xmm3, %xmm4
+	por	%xmm4, %xmm0
+	pmovmskb %xmm0, %ecx
+	movdqu	32(%rdi), %xmm0
+	movdqa	%xmm0, %xmm4
+	pcmpeqb	%xmm1, %xmm0
+	salq	$16, %rcx
+	pcmpeqb	%xmm3, %xmm4
+	por	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	movdqu	48(%rdi), %xmm0
+	pcmpeqb	%xmm0, %xmm3
+	salq	$32, %rax
+	pcmpeqb	%xmm1, %xmm0
+	orq	%rcx, %rax
+	por	%xmm3, %xmm0
+	pmovmskb %xmm0, %ecx
+	salq	$48, %rcx
+	orq	%rcx, %rax
+	testq	%rax, %rax
+	jne	L(return)
+L(loop_start):
+	/* We use this alignment to force loop be aligned to 8 but not
+	   16 bytes.  This gives better sheduling on AMD processors.  */
+	ALIGN(4)
+	pxor	%xmm6, %xmm6
+	andq	$-64, %rdi
+	ALIGN(3)
+L(loop64):
+	addq	$64, %rdi
+	movdqa	(%rdi), %xmm5
+	movdqa	16(%rdi), %xmm2
+	movdqa	32(%rdi), %xmm3
+	pxor	%xmm1, %xmm5
+	movdqa	48(%rdi), %xmm4
+	pxor	%xmm1, %xmm2
+	pxor	%xmm1, %xmm3
+	pminub	(%rdi), %xmm5
+	pxor	%xmm1, %xmm4
+	pminub	16(%rdi), %xmm2
+	pminub	32(%rdi), %xmm3
+	pminub	%xmm2, %xmm5
+	pminub	48(%rdi), %xmm4
+	pminub	%xmm3, %xmm5
+	pminub	%xmm4, %xmm5
+	pcmpeqb %xmm6, %xmm5
+	pmovmskb %xmm5, %eax
+
+	testl	%eax, %eax
+	je	L(loop64)
+
+	movdqa	(%rdi), %xmm5
+	movdqa	%xmm5, %xmm0
+	pcmpeqb	%xmm1, %xmm5
+	pcmpeqb	%xmm6, %xmm0
+	por	%xmm0, %xmm5
+	pcmpeqb %xmm6, %xmm2
+	pcmpeqb %xmm6, %xmm3
+	pcmpeqb %xmm6, %xmm4
+
+	pmovmskb %xmm5, %ecx
+	pmovmskb %xmm2, %eax
+	salq	$16, %rax
+	pmovmskb %xmm3, %r8d
+	pmovmskb %xmm4, %edx
+	salq	$32, %r8
+	orq	%r8, %rax
+	orq	%rcx, %rax
+	salq	$48, %rdx
+	orq	%rdx, %rax
+	ALIGN(3)
+L(return):
+	bsfq	%rax, %rax
+#ifdef AS_STRCHRNUL
+	leaq	(%rdi,%rax), %rax
+#else
+	movl	$0, %edx
+	leaq	(%rdi,%rax), %rax
+	cmpb	%sil, (%rax)
+	cmovne	%rdx, %rax
+#endif
+	ret
+	ALIGN(4)
+
+L(cross_page):
+	movq	%rdi, %rdx
 	pxor	%xmm2, %xmm2
-	punpcklbw %xmm1, %xmm1
-	orl	$0xffffffff, %esi
-	movdqa	(%rdi), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	subq	%rdi, %rcx
-	movdqa	%xmm0, %xmm3
-	leaq	16(%rdi), %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	andl	%esi, %edx
-	andl	%esi, %ecx
-	orl	%edx, %ecx
-	jnz	1f
+	andq	$-64, %rdx
+	movdqa	%xmm1, %xmm0
+	movdqa	(%rdx), %xmm3
+	movdqa	%xmm3, %xmm4
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm2, %xmm4
+	por	%xmm4, %xmm3
+	pmovmskb %xmm3, %r8d
+	movdqa	16(%rdx), %xmm3
+	movdqa	%xmm3, %xmm4
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm2, %xmm4
+	por	%xmm4, %xmm3
+	pmovmskb %xmm3, %eax
+	movdqa	32(%rdx), %xmm3
+	movdqa	%xmm3, %xmm4
+	pcmpeqb	%xmm1, %xmm3
+	salq	$16, %rax
+	pcmpeqb	%xmm2, %xmm4
+	por	%xmm4, %xmm3
+	pmovmskb %xmm3, %r9d
+	movdqa	48(%rdx), %xmm3
+	pcmpeqb	%xmm3, %xmm2
+	salq	$32, %r9
+	pcmpeqb	%xmm3, %xmm0
+	orq	%r9, %rax
+	orq	%r8, %rax
+	por	%xmm2, %xmm0
+	pmovmskb %xmm0, %ecx
+	salq	$48, %rcx
+	orq	%rcx, %rax
+	movl	%edi, %ecx
+	subb	%dl, %cl
+	shrq	%cl, %rax
+	testq	%rax, %rax
+	jne	L(return)
+	jmp	L(loop_start)
 
-2:	movdqa	(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	movdqa	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	orl	%edx, %ecx
-	jz	2b
-
-1:	bsfl	%edx, %edx
-	jz	4f
-	bsfl	%ecx, %ecx
-	leaq	-16(%rdi,%rdx), %rax
-	cmpl	%edx, %ecx
-	je	5f
-4:	xorl	%eax, %eax
-5:	ret
 END (strchr)
 
+#ifndef AS_STRCHRNUL
 weak_alias (strchr, index)
 libc_hidden_builtin_def (strchr)
-
+#endif

Modified: fsf/trunk/libc/sysdeps/x86_64/strchrnul.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/strchrnul.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/strchrnul.S Thu Sep 12 00:02:11 2013
@@ -20,43 +20,8 @@
 
 #include <sysdep.h>
 
-
-	.text
-ENTRY (__strchrnul)
-	movd	%esi, %xmm1
-	movq	%rdi, %rcx
-	punpcklbw %xmm1, %xmm1
-	andq	$~15, %rdi
-	pxor	%xmm2, %xmm2
-	punpcklbw %xmm1, %xmm1
-	orl	$0xffffffff, %esi
-	movdqa	(%rdi), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	subq	%rdi, %rcx
-	movdqa	%xmm0, %xmm3
-	leaq	16(%rdi), %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	orl	%edx, %ecx
-	andl	%esi, %ecx
-	jnz	1f
-
-2:	movdqa	(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	movdqa	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	orl	%edx, %ecx
-	jz	2b
-
-1:	bsfl	%ecx, %edx
-	leaq	-16(%rdi,%rdx), %rax
-	ret
-END (__strchrnul)
+#define strchr __strchrnul
+#define AS_STRCHRNUL
+#include "strchr.S"
 
 weak_alias (__strchrnul, strchrnul)

_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits