[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r8676 - in /fsf/trunk/libc: ./ elf/ include/ malloc/ nscd/ sysdeps/generic/ sysdeps/x86_64/
- To: commits@xxxxxxxxxx
- Subject: [commits] r8676 - in /fsf/trunk/libc: ./ elf/ include/ malloc/ nscd/ sysdeps/generic/ sysdeps/x86_64/
- From: eglibc@xxxxxxxxxx
- Date: Fri, 17 Jul 2009 07:07:41 -0000
Author: eglibc
Date: Fri Jul 17 00:07:41 2009
New Revision: 8676
Log:
Import glibc-mainline for 2009-07-17
Added:
fsf/trunk/libc/sysdeps/x86_64/memcmp.S
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/elf/dl-misc.c
fsf/trunk/libc/include/atomic.h
fsf/trunk/libc/malloc/malloc.c
fsf/trunk/libc/nscd/aicache.c
fsf/trunk/libc/nscd/cache.c
fsf/trunk/libc/nscd/grpcache.c
fsf/trunk/libc/nscd/hstcache.c
fsf/trunk/libc/nscd/initgrcache.c
fsf/trunk/libc/nscd/mem.c
fsf/trunk/libc/nscd/pwdcache.c
fsf/trunk/libc/nscd/servicescache.c
fsf/trunk/libc/sysdeps/generic/ldsodefs.h
fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Fri Jul 17 00:07:41 2009
@@ -1,3 +1,52 @@
+2009-07-16 Petr Baudis <pasky@xxxxxxx>
+
+ * nscd/mem.c (mempool_alloc): Fix unlock missing in the else branch.
+ * nscd/aicache.c: Remove bogus db->lock unlock.
+ * nscd/grpcache.c: Likewise.
+ * nscd/hstcache.c: Likewise.
+ * nscd/initgrcache.c: Likewise.
+ * nscd/pwdcache.c: Likewise.
+ * nscd/servicescache.c: Likewise.
+
+2009-07-16 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * nscd/cache.c (cache_add): Use atomic_compare_and_exchange_bool_rel
+ instead of atomic_compare_and_exchange_bool_acq to ensure pointer
+ is written before the list head update.
+ Patch by Andreas Schwab <aschwab@xxxxxxxxxx>.
+
+2009-07-16 Ulrich Drepper <drepper@xxxxxxxxxx>
+ Jakub Jelinek <jakub@xxxxxxxxxx>
+
+ * malloc/malloc.c [ATOMIC_FASTBINS] (_int_free): Make check for
+ corruption thread-safe.
+
+2009-07-13 Jakub Jelinek <jakub@xxxxxxxxxx>
+
+ * include/atomic.h (catomic_compare_and_exchange_val_rel): If arch
+ overrides atomic_compare_and_exchange_val_rel, define to
+ atomic_compare_and_exchange_val_rel by default, otherwise default
+ to catomic_compare_and_exchange_val_acq.
+ (catomic_compare_and_exchange_bool_rel): If arch overrides
+ atomic_compare_and_exchange_bool_rel, define to
+ atomic_compare_and_exchange_bool_rel by default.
+ * malloc/malloc.c (_int_free): Revert 2009-07-02 change.
+ Use catomic_compare_and_exchange_val_rel instead of
+ catomic_compare_and_exchange_val_acq.
+
+2009-07-16 Ulrich Drepper <drepper@xxxxxxxxxx>
+
+ * sysdeps/generic/ldsodefs.h: Add prototype for
+ _dl_higher_prime_number.
+ * elf/dl-misc.c (_dl_higher_prime_number): Mark with internal_function.
+
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Optimize
+ restoring of ymm registers a bit.
+
+2009-07-15 H.J. Lu <hongjiu.lu@xxxxxxxxx>
+
+ * sysdeps/x86_64/memcmp.S: New file.
+
2009-07-15 Ulrich Drepper <drepper@xxxxxxxxxx>
* sysdeps/x86-64/dl-trampoline.h: Remove after integrating code into...
Modified: fsf/trunk/libc/elf/dl-misc.c
==============================================================================
--- fsf/trunk/libc/elf/dl-misc.c (original)
+++ fsf/trunk/libc/elf/dl-misc.c Fri Jul 17 00:07:41 2009
@@ -315,6 +315,7 @@
unsigned long int
+internal_function
_dl_higher_prime_number (unsigned long int n)
{
/* These are primes that are near, but slightly smaller than, a
Modified: fsf/trunk/libc/include/atomic.h
==============================================================================
--- fsf/trunk/libc/include/atomic.h (original)
+++ fsf/trunk/libc/include/atomic.h Fri Jul 17 00:07:41 2009
@@ -107,14 +107,19 @@
#endif
+#ifndef catomic_compare_and_exchange_val_rel
+# ifndef atomic_compare_and_exchange_val_rel
+# define catomic_compare_and_exchange_val_rel(mem, newval, oldval) \
+ catomic_compare_and_exchange_val_acq (mem, newval, oldval)
+# else
+# define catomic_compare_and_exchange_val_rel(mem, newval, oldval) \
+ atomic_compare_and_exchange_val_rel (mem, newval, oldval)
+# endif
+#endif
+
+
#ifndef atomic_compare_and_exchange_val_rel
# define atomic_compare_and_exchange_val_rel(mem, newval, oldval) \
- atomic_compare_and_exchange_val_acq (mem, newval, oldval)
-#endif
-
-
-#ifndef catomic_compare_and_exchange_val_rel
-# define catomic_compare_and_exchange_val_rel(mem, newval, oldval) \
atomic_compare_and_exchange_val_acq (mem, newval, oldval)
#endif
@@ -155,15 +160,20 @@
#endif
+#ifndef catomic_compare_and_exchange_bool_rel
+# ifndef atomic_compare_and_exchange_bool_rel
+# define catomic_compare_and_exchange_bool_rel(mem, newval, oldval) \
+ catomic_compare_and_exchange_bool_acq (mem, newval, oldval)
+# else
+# define catomic_compare_and_exchange_bool_rel(mem, newval, oldval) \
+ atomic_compare_and_exchange_bool_rel (mem, newval, oldval)
+# endif
+#endif
+
+
#ifndef atomic_compare_and_exchange_bool_rel
# define atomic_compare_and_exchange_bool_rel(mem, newval, oldval) \
atomic_compare_and_exchange_bool_acq (mem, newval, oldval)
-#endif
-
-
-#ifndef catomic_compare_and_exchange_bool_rel
-# define catomic_compare_and_exchange_bool_rel(mem, newval, oldval) \
- catomic_compare_and_exchange_bool_acq (mem, newval, oldval)
#endif
Modified: fsf/trunk/libc/malloc/malloc.c
==============================================================================
--- fsf/trunk/libc/malloc/malloc.c (original)
+++ fsf/trunk/libc/malloc/malloc.c Fri Jul 17 00:07:41 2009
@@ -4799,8 +4799,29 @@
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
- errstr = "free(): invalid next size (fast)";
- goto errout;
+#ifdef ATOMIC_FASTBINS
+ /* We might not have a lock at this point and concurrent modifications
+ of system_mem might have let to a false positive. Redo the test
+ after getting the lock. */
+ if (have_lock
+ || ({ assert (locked == 0);
+ mutex_lock(&av->mutex);
+ locked = 1;
+ chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
+ || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+ }))
+#endif
+ {
+ errstr = "free(): invalid next size (fast)";
+ goto errout;
+ }
+#ifdef ATOMIC_FASTBINS
+ if (! have_lock)
+ {
+ (void)mutex_unlock(&av->mutex);
+ locked = 0;
+ }
+#endif
}
if (__builtin_expect (perturb_byte, 0))
@@ -4822,9 +4843,8 @@
goto errout;
}
p->fd = fd = old;
- atomic_full_barrier ();
}
- while ((old = catomic_compare_and_exchange_val_acq (fb, p, fd)) != fd);
+ while ((old = catomic_compare_and_exchange_val_rel (fb, p, fd)) != fd);
#else
/* Another simple check: make sure the top of the bin is not the
record we are going to add (i.e., double free). */
Modified: fsf/trunk/libc/nscd/aicache.c
==============================================================================
--- fsf/trunk/libc/nscd/aicache.c (original)
+++ fsf/trunk/libc/nscd/aicache.c Fri Jul 17 00:07:41 2009
@@ -543,8 +543,6 @@
(void) cache_add (req->type, key_copy, req->key_len, &dataset->head,
true, db, uid, he == NULL);
- pthread_rwlock_unlock (&db->lock);
-
/* Mark the old entry as obsolete. */
if (dh != NULL)
dh->usable = false;
Modified: fsf/trunk/libc/nscd/cache.c
==============================================================================
--- fsf/trunk/libc/nscd/cache.c (original)
+++ fsf/trunk/libc/nscd/cache.c Fri Jul 17 00:07:41 2009
@@ -179,7 +179,7 @@
/* Put the new entry in the first position. */
do
newp->next = table->head->array[hash];
- while (atomic_compare_and_exchange_bool_acq (&table->head->array[hash],
+ while (atomic_compare_and_exchange_bool_rel (&table->head->array[hash],
(ref_t) ((char *) newp
- table->data),
(ref_t) newp->next));
Modified: fsf/trunk/libc/nscd/grpcache.c
==============================================================================
--- fsf/trunk/libc/nscd/grpcache.c (original)
+++ fsf/trunk/libc/nscd/grpcache.c Fri Jul 17 00:07:41 2009
@@ -146,8 +146,6 @@
(void) cache_add (req->type, &dataset->strdata, req->key_len,
&dataset->head, true, db, owner, he == NULL);
- pthread_rwlock_unlock (&db->lock);
-
/* Mark the old entry as obsolete. */
if (dh != NULL)
dh->usable = false;
@@ -367,12 +365,10 @@
(void) cache_add (GETGRBYGID, cp, key_offset, &dataset->head,
false, db, owner, false);
}
-
- out:
- pthread_rwlock_unlock (&db->lock);
}
}
+out:
if (__builtin_expect (written != total, 0) && debug_level > 0)
{
char buf[256];
Modified: fsf/trunk/libc/nscd/hstcache.c
==============================================================================
--- fsf/trunk/libc/nscd/hstcache.c (original)
+++ fsf/trunk/libc/nscd/hstcache.c Fri Jul 17 00:07:41 2009
@@ -152,8 +152,6 @@
(void) cache_add (req->type, &dataset->strdata, req->key_len,
&dataset->head, true, db, owner, he == NULL);
-
- pthread_rwlock_unlock (&db->lock);
/* Mark the old entry as obsolete. */
if (dh != NULL)
@@ -404,8 +402,6 @@
(void) cache_add (req->type, key_copy, req->key_len,
&dataset->head, true, db, owner, he == NULL);
-
- pthread_rwlock_unlock (&db->lock);
}
}
Modified: fsf/trunk/libc/nscd/initgrcache.c
==============================================================================
--- fsf/trunk/libc/nscd/initgrcache.c (original)
+++ fsf/trunk/libc/nscd/initgrcache.c Fri Jul 17 00:07:41 2009
@@ -229,8 +229,6 @@
(void) cache_add (req->type, key_copy, req->key_len,
&dataset->head, true, db, uid, he == NULL);
-
- pthread_rwlock_unlock (&db->lock);
/* Mark the old entry as obsolete. */
if (dh != NULL)
@@ -388,8 +386,6 @@
(void) cache_add (INITGROUPS, cp, req->key_len, &dataset->head, true,
db, uid, he == NULL);
-
- pthread_rwlock_unlock (&db->lock);
}
}
Modified: fsf/trunk/libc/nscd/mem.c
==============================================================================
--- fsf/trunk/libc/nscd/mem.c (original)
+++ fsf/trunk/libc/nscd/mem.c Fri Jul 17 00:07:41 2009
@@ -566,9 +566,6 @@
}
}
- if (data_alloc)
- pthread_rwlock_unlock (&db->lock);
-
if (! db->last_alloc_failed)
{
dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
@@ -591,5 +588,8 @@
pthread_mutex_unlock (&db->memlock);
+ if (data_alloc)
+ pthread_rwlock_unlock (&db->lock);
+
return res;
}
Modified: fsf/trunk/libc/nscd/pwdcache.c
==============================================================================
--- fsf/trunk/libc/nscd/pwdcache.c (original)
+++ fsf/trunk/libc/nscd/pwdcache.c Fri Jul 17 00:07:41 2009
@@ -153,8 +153,6 @@
(void) cache_add (req->type, key_copy, req->key_len,
&dataset->head, true, db, owner, he == NULL);
- pthread_rwlock_unlock (&db->lock);
-
/* Mark the old entry as obsolete. */
if (dh != NULL)
dh->usable = false;
@@ -362,12 +360,10 @@
(void) cache_add (GETPWBYUID, cp, key_offset, &dataset->head,
false, db, owner, false);
}
-
- out:
- pthread_rwlock_unlock (&db->lock);
}
}
+out:
if (__builtin_expect (written != total, 0) && debug_level > 0)
{
char buf[256];
Modified: fsf/trunk/libc/nscd/servicescache.c
==============================================================================
--- fsf/trunk/libc/nscd/servicescache.c (original)
+++ fsf/trunk/libc/nscd/servicescache.c Fri Jul 17 00:07:41 2009
@@ -135,8 +135,6 @@
(void) cache_add (req->type, &dataset->strdata, req->key_len,
&dataset->head, true, db, owner, he == NULL);
-
- pthread_rwlock_unlock (&db->lock);
/* Mark the old entry as obsolete. */
if (dh != NULL)
@@ -317,8 +315,6 @@
(void) cache_add (req->type, key_copy, req->key_len,
&dataset->head, true, db, owner, he == NULL);
-
- pthread_rwlock_unlock (&db->lock);
}
}
Modified: fsf/trunk/libc/sysdeps/generic/ldsodefs.h
==============================================================================
--- fsf/trunk/libc/sysdeps/generic/ldsodefs.h (original)
+++ fsf/trunk/libc/sysdeps/generic/ldsodefs.h Fri Jul 17 00:07:41 2009
@@ -335,6 +335,10 @@
extern int _dl_name_match_p (const char *__name, const struct link_map *__map)
internal_function;
+/* Compute next higher prime number. */
+extern unsigned long int _dl_higher_prime_number (unsigned long int n)
+ internal_function;
+
/* Function used as argument for `_dl_receive_error' function. The
arguments are the error code, error string, and the objname the
error occurred in. */
Modified: fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/dl-trampoline.S Fri Jul 17 00:07:41 2009
@@ -185,71 +185,6 @@
movq LR_R8_OFFSET(%rsp), %r8
movq LR_R9_OFFSET(%rsp), %r9
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx2)
-
- /* Check if any xmm0-xmm7 registers are changed by audit
- module. */
- vmovdqa (LR_XMM_OFFSET)(%rsp), %xmm0
- vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm1
- vpmovmskb %xmm1, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
- vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
- vpmovmskb %xmm2, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
- vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm3
- vpmovmskb %xmm3, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
- vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm4
- vpmovmskb %xmm4, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
- vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm5
- vpmovmskb %xmm5, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
- vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm6
- vpmovmskb %xmm6, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
- vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm7
- vpmovmskb %xmm7, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
-
-1: vmovdqa (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
- vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- je 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
- jmp 1f
-
-L(no_avx2):
-# endif
movaps (LR_XMM_OFFSET)(%rsp), %xmm0
movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
@@ -259,7 +194,64 @@
movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-1: movq 16(%rbx), %r10 # Anything in framesize?
+# ifdef HAVE_AVX_SUPPORT
+ cmpl $0, L(have_avx)(%rip)
+ js L(no_avx2)
+
+ /* Check if any xmm0-xmm7 registers are changed by audit
+ module. */
+ vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 1f
+ vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+
+L(no_avx2):
+1:
+# endif
+ movq 16(%rbx), %r10 # Anything in framesize?
testq %r10, %r10
jns 3f
@@ -358,32 +350,31 @@
movq LRV_RAX_OFFSET(%rsp), %rax
movq LRV_RDX_OFFSET(%rsp), %rdx
+ movaps LRV_XMM0_OFFSET(%rsp), %xmm0
+ movaps LRV_XMM1_OFFSET(%rsp), %xmm1
+
# ifdef HAVE_AVX_SUPPORT
cmpl $0, L(have_avx)(%rip)
js L(no_avx4)
/* Check if xmm0/xmm1 registers are changed by audit module. */
- vmovdqa LRV_XMM0_OFFSET(%rsp), %xmm0
- vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm1
- vpmovmskb %xmm1, %esi
+ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
+ vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
je 1f
vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
-1: vmovdqa LRV_XMM1_OFFSET(%rsp), %xmm1
- vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
+1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
je 1f
vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
- jmp 1f
L(no_avx4):
-# endif
- movaps LRV_XMM0_OFFSET(%rsp), %xmm0
- movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-
-1: fldt LRV_ST1_OFFSET(%rsp)
+1:
+# endif
+
+ fldt LRV_ST1_OFFSET(%rsp)
fldt LRV_ST0_OFFSET(%rsp)
movq %rbx, %rsp
Added: fsf/trunk/libc/sysdeps/x86_64/memcmp.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memcmp.S (added)
+++ fsf/trunk/libc/sysdeps/x86_64/memcmp.S Fri Jul 17 00:07:41 2009
@@ -1,0 +1,359 @@
+/* memcmp with SSE2
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY (memcmp)
+ test %rdx, %rdx
+ jz L(finz)
+ cmpq $1, %rdx
+ jle L(finr1b)
+ subq %rdi, %rsi
+ movq %rdx, %r10
+ cmpq $32, %r10
+ jge L(gt32)
+ /* Handle small chunks and last block of less than 32 bytes. */
+L(small):
+ testq $1, %r10
+ jz L(s2b)
+ movzbl (%rdi), %eax
+ movzbl (%rdi, %rsi), %edx
+ subq $1, %r10
+ je L(finz1)
+ addq $1, %rdi
+ subl %edx, %eax
+ jnz L(exit)
+L(s2b):
+ testq $2, %r10
+ jz L(s4b)
+ movzwl (%rdi), %eax
+ movzwl (%rdi, %rsi), %edx
+ subq $2, %r10
+ je L(fin2_7)
+ addq $2, %rdi
+ cmpl %edx, %eax
+ jnz L(fin2_7)
+L(s4b):
+ testq $4, %r10
+ jz L(s8b)
+ movl (%rdi), %eax
+ movl (%rdi, %rsi), %edx
+ subq $4, %r10
+ je L(fin2_7)
+ addq $4, %rdi
+ cmpl %edx, %eax
+ jnz L(fin2_7)
+L(s8b):
+ testq $8, %r10
+ jz L(s16b)
+ movq (%rdi), %rax
+ movq (%rdi, %rsi), %rdx
+ subq $8, %r10
+ je L(fin2_7)
+ addq $8, %rdi
+ cmpq %rdx, %rax
+ jnz L(fin2_7)
+L(s16b):
+ movdqu (%rdi), %xmm1
+ movdqu (%rdi, %rsi), %xmm0
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ xorl %eax, %eax
+ subl $0xffff, %edx
+ jz L(finz)
+ bsfl %edx, %ecx
+ leaq (%rdi, %rcx), %rcx
+ movzbl (%rcx), %eax
+ movzbl (%rsi, %rcx), %edx
+ jmp L(finz1)
+
+ .p2align 4,, 4
+L(finr1b):
+ movzbl (%rdi), %eax
+ movzbl (%rsi), %edx
+L(finz1):
+ subl %edx, %eax
+L(exit):
+ ret
+
+ .p2align 4,, 4
+L(fin2_7):
+ cmpq %rdx, %rax
+ jz L(finz)
+ movq %rax, %r11
+ subq %rdx, %r11
+ bsfq %r11, %rcx
+ sarq $3, %rcx
+ salq $3, %rcx
+ sarq %cl, %rax
+ movzbl %al, %eax
+ sarq %cl, %rdx
+ movzbl %dl, %edx
+ subl %edx, %eax
+ ret
+
+ .p2align 4,, 4
+L(finz):
+ xorl %eax, %eax
+ ret
+
+ /* For blocks bigger than 32 bytes
+ 1. Advance one of the addr pointer to be 16B aligned.
+ 2. Treat the case of both addr pointers aligned to 16B
+ separately to avoid movdqu.
+ 3. Handle any blocks of greater than 64 consecutive bytes with
+ unrolling to reduce branches.
+ 4. At least one addr pointer is 16B aligned, use memory version
+ of pcmbeqb.
+ */
+ .p2align 4,, 4
+L(gt32):
+ movq %rdx, %r11
+ addq %rdi, %r11
+ movq %rdi, %r8
+
+ andq $15, %r8
+ jz L(16am)
+ /* Both pointers may be misaligned. */
+ movdqu (%rdi), %xmm1
+ movdqu (%rdi, %rsi), %xmm0
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ neg %r8
+ leaq 16(%rdi, %r8), %rdi
+L(16am):
+ /* Handle two 16B aligned pointers separately. */
+ testq $15, %rsi
+ jz L(ATR)
+ testq $16, %rdi
+ jz L(A32)
+ movdqu (%rdi, %rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+L(A32):
+ movq %r11, %r10
+ andq $-32, %r10
+ cmpq %r10, %rdi
+ jge L(mt16)
+ /* Pre-unroll to be ready for unrolled 64B loop. */
+ testq $32, %rdi
+ jz L(A64)
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+L(A64):
+ movq %r11, %r10
+ andq $-64, %r10
+ cmpq %r10, %rdi
+ jge L(mt32)
+
+L(A64main):
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ cmpq %rdi, %r10
+ jne L(A64main)
+
+L(mt32):
+ movq %r11, %r10
+ andq $-32, %r10
+ cmpq %r10, %rdi
+ jge L(mt16)
+
+L(A32main):
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqu (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ cmpq %rdi, %r10
+ jne L(A32main)
+L(mt16):
+ subq %rdi, %r11
+ je L(finz)
+ movq %r11, %r10
+ jmp L(small)
+
+ .p2align 4,, 4
+L(neq):
+ bsfl %edx, %ecx
+ movzbl (%rdi, %rcx), %eax
+ addq %rdi, %rsi
+ movzbl (%rsi,%rcx), %edx
+ jmp L(finz1)
+
+ .p2align 4,, 4
+L(ATR):
+ movq %r11, %r10
+ andq $-32, %r10
+ cmpq %r10, %rdi
+ jge L(mt16)
+ testq $16, %rdi
+ jz L(ATR32)
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+ cmpq %rdi, %r10
+ je L(mt16)
+
+L(ATR32):
+ movq %r11, %r10
+ andq $-64, %r10
+ testq $32, %rdi
+ jz L(ATR64)
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+L(ATR64):
+ cmpq %rdi, %r10
+ je L(mt32)
+
+L(ATR64main):
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+ cmpq %rdi, %r10
+ jne L(ATR64main)
+
+ movq %r11, %r10
+ andq $-32, %r10
+ cmpq %r10, %rdi
+ jge L(mt16)
+
+L(ATR32res):
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ movdqa (%rdi,%rsi), %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %edx
+ subl $0xffff, %edx
+ jnz L(neq)
+ addq $16, %rdi
+
+ cmpq %r10, %rdi
+ jne L(ATR32res)
+
+ subq %rdi, %r11
+ je L(finz)
+ movq %r11, %r10
+ jmp L(small)
+ /* Align to 16byte to improve instruction fetch. */
+ .p2align 4,, 4
+END(memcmp)
+
+#undef bcmp
+weak_alias (memcmp, bcmp)
+libc_hidden_builtin_def (memcmp)