[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r14824 - in /fsf/trunk/libc: ./ resolv/ string/ sysdeps/i386/i486/bits/ sysdeps/i386/i686/multiarch/ sysdeps/ieee754/dbl-64/...
- To: commits@xxxxxxxxxx
- Subject: [commits] r14824 - in /fsf/trunk/libc: ./ resolv/ string/ sysdeps/i386/i486/bits/ sysdeps/i386/i686/multiarch/ sysdeps/ieee754/dbl-64/...
- From: eglibc@xxxxxxxxxx
- Date: Fri, 05 Aug 2011 07:02:26 -0000
Author: eglibc
Date: Fri Aug 5 00:02:24 2011
New Revision: 14824
Log:
Import glibc-mainline for 2011-08-05
Added:
fsf/trunk/libc/string/test-wcscmp.c
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/resolv/res_init.c
fsf/trunk/libc/string/Makefile
fsf/trunk/libc/string/test-strcmp.c
fsf/trunk/libc/string/test-string.h
fsf/trunk/libc/sysdeps/i386/i486/bits/string.h
fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S
fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
fsf/trunk/libc/sysdeps/posix/getaddrinfo.c
fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Fri Aug 5 00:02:24 2011
@@ -1,3 +1,67 @@
+2011-08-04 Ulrich Drepper <drepper@xxxxxxxxx>
+
+ * string/test-string.h (IMPL): Use __STRING to expand name and then
+ stringify it.
+
+ * string/test-strcmp.c: Unify most of the WIDE and !WIDE code. Lots
+ of cleanups.
+
+2011-07-22 Liubov Dmitrieva <liubov.dmitrieva@xxxxxxxxx>
+
+ * string/Makefile: Update.
+ (strop-tests): Append strncat.
+ * string/test-wcscmp.c: New file.
+ New comprehensive test for wcscmp.
+ * string/test-strcmp.c: Update.
+ (WIDE): New define.
+
+2011-07-22 Andreas Schwab <schwab@xxxxxxxxxx>
+
+ * resolv/res_init.c (__res_vinit): Properly tokenize nameserver
+ line.
+
+2011-07-26 Andreas Schwab <schwab@xxxxxxxxxx>
+
+ * sysdeps/posix/getaddrinfo.c (gaih_inet): Don't discard result of
+ encoding to ACE if AI_IDN.
+
+2011-08-01 Jakub Jelinek <jakub@xxxxxxxxxx>
+
+ * sysdeps/ieee754/dbl-64/k_rem_pio2.c (__kernel_rem_pio2): Fix up fq
+ to y conversion for prec 3 and __FLT_EVAL_METHOD__ != 0.
+
+2011-07-22 Liubov Dmitrieva <liubov.dmitrieva@xxxxxxxxx>
+
+ * sysdeps/i386/i686/multiarch/strcat-sse2.S: Update.
+ Fix overflow bug in strncat.
+ * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Likewise.
+
+ * string/test-strncat.c: Update.
+ Add new tests for checking overflow bugs.
+
+2011-07-15 Liubov Dmitrieva <liubov.dmitrieva@xxxxxxxxx>
+
+ * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+ strcat-ssse3 strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c.
+ * sysdeps/i386/i686/multiarch/strcat.S: New file.
+ * sysdeps/i386/i686/multiarch/strcat-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strcat-sse2.S: New file.
+ * sysdeps/i386/i686/multiarch/strcat-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/strncat.S: New file.
+ * sysdeps/i386/i686/multiarch/strncat-sse2.S: New file.
+ * sysdeps/i386/i686/multiarch/strncat-ssse3.S: New file.
+
+ * sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+ (USE_AS_STRCAT): Define.
+ Add strcat and strncat support.
+ * sysdeps/i386/i686/multiarch/strlen-sse2.S: Likewise.
+
+2011-07-25 Andreas Schwab <schwab@xxxxxxxxxx>
+
+ * sysdeps/i386/i486/bits/string.h (__strncat_g): Correctly handle
+ __n bigger than INT_MAX+1.
+ (__strncmp_g): Likewise.
+
2011-07-23 Ulrich Drepper <drepper@xxxxxxxxx>
* posix/unistd.h: Define SEEK_DATA and SEEK_HOLE.
Modified: fsf/trunk/libc/resolv/res_init.c
==============================================================================
--- fsf/trunk/libc/resolv/res_init.c (original)
+++ fsf/trunk/libc/resolv/res_init.c Fri Aug 5 00:02:24 2011
@@ -318,7 +318,7 @@
struct in6_addr a6;
char *el;
- if ((el = strchr(cp, '\n')) != NULL)
+ if ((el = strpbrk(cp, " \t\n")) != NULL)
*el = '\0';
if ((el = strchr(cp, SCOPE_DELIMITER)) != NULL)
*el = '\0';
Modified: fsf/trunk/libc/string/Makefile
==============================================================================
--- fsf/trunk/libc/string/Makefile (original)
+++ fsf/trunk/libc/string/Makefile Fri Aug 5 00:02:24 2011
@@ -46,7 +46,7 @@
# for -fbounded-pointer compiles. Glibc uses memchr for explicit checks.
o-objects.ob := memcpy.o memset.o memchr.o
-strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
+strop-tests := wcscmp memchr memcmp memcpy memmove mempcpy memset memccpy \
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
strlen strncmp strncpy strpbrk strrchr strspn memmem \
strstr strcasestr strnlen strcasecmp strncasecmp \
Modified: fsf/trunk/libc/string/test-strcmp.c
==============================================================================
--- fsf/trunk/libc/string/test-strcmp.c (original)
+++ fsf/trunk/libc/string/test-strcmp.c Fri Aug 5 00:02:24 2011
@@ -1,7 +1,8 @@
-/* Test and measure strcmp functions.
- Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Test and measure STRCMP functions.
+ Copyright (C) 1999, 2002, 2003, 2005, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Jakub Jelinek <jakub@xxxxxxxxxx>, 1999.
+ Added wcscmp support by Liubov Dmitrieva <liubov.dmitrieva@xxxxxxxxx>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -21,39 +22,73 @@
#define TEST_MAIN
#include "test-string.h"
-typedef int (*proto_t) (const char *, const char *);
-int simple_strcmp (const char *, const char *);
-int stupid_strcmp (const char *, const char *);
-
-IMPL (stupid_strcmp, 0)
-IMPL (simple_strcmp, 0)
-IMPL (strcmp, 1)
+#ifdef WIDE
+# include <inttypes.h>
+# include <wchar.h>
+
+# define L(str) L##str
+# define STRCMP wcscmp
+# define STRCPY wcscpy
+# define STRLEN wcslen
+# define MEMCPY wmemcpy
+# define SIMPLE_STRCMP simple_wcscmp
+# define STUPID_STRCMP stupid_wcscmp
+# define CHAR wchar_t
+# define UCHAR uint32_t
+# define CHARBYTES 4
+# define CHARBYTESLOG 2
+# define CHARALIGN __alignof__ (CHAR)
+# define MIDCHAR 0x7fffffff
+# define LARGECHAR 0xfffffffe
+#else
+# define L(str) str
+# define STRCMP strcmp
+# define STRCPY strcpy
+# define STRLEN strlen
+# define MEMCPY memcpy
+# define SIMPLE_STRCMP simple_strcmp
+# define STUPID_STRCMP stupid_strcmp
+# define CHAR char
+# define UCHAR unsigned char
+# define CHARBYTES 1
+# define CHARBYTESLOG 0
+# define CHARALIGN 1
+# define MIDCHAR 0x7f
+# define LARGECHAR 0xfe
+#endif
+typedef int (*proto_t) (const CHAR *, const CHAR *);
int
-simple_strcmp (const char *s1, const char *s2)
+SIMPLE_STRCMP (const CHAR *s1, const CHAR *s2)
{
int ret;
- while ((ret = *(unsigned char *) s1 - *(unsigned char *) s2++) == 0
- && *s1++);
+ while ((ret = *(UCHAR *) s1 - *(UCHAR *) s2++) == 0 && *s1++);
return ret;
}
int
-stupid_strcmp (const char *s1, const char *s2)
-{
- size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
+STUPID_STRCMP (const CHAR *s1, const CHAR *s2)
+{
+ size_t ns1 = STRLEN (s1) + 1;
+ size_t ns2 = STRLEN (s2) + 1;
size_t n = ns1 < ns2 ? ns1 : ns2;
int ret = 0;
while (n--)
- if ((ret = *(unsigned char *) s1++ - *(unsigned char *) s2++) != 0)
+ if ((ret = *(UCHAR *) s1++ - *(UCHAR *) s2++) != 0)
break;
return ret;
}
-static void
-do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
+IMPL (STUPID_STRCMP, 1)
+IMPL (SIMPLE_STRCMP, 1)
+IMPL (STRCMP, 1)
+
+static int
+check_result (impl_t *impl,
+ const CHAR *s1, const CHAR *s2,
+ int exp_result)
{
int result = CALL (impl, s1, s2);
if ((exp_result == 0 && result != 0)
@@ -63,8 +98,19 @@
error (0, 0, "Wrong result in function %s %d %d", impl->name,
result, exp_result);
ret = 1;
- return;
- }
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+do_one_test (impl_t *impl,
+ const CHAR *s1, const CHAR *s2,
+ int exp_result)
+{
+ if (check_result (impl, s1, s2, exp_result) < 0)
+ return;
if (HP_TIMING_AVAIL)
{
@@ -90,24 +136,28 @@
int exp_result)
{
size_t i;
- char *s1, *s2;
+
+ CHAR *s1, *s2;
if (len == 0)
return;
- align1 &= 7;
- if (align1 + len + 1 >= page_size)
+ align1 &= 63;
+ if (align1 + (len + 1) * CHARBYTES >= page_size)
return;
- align2 &= 7;
- if (align2 + len + 1 >= page_size)
+ align2 &= 63;
+ if (align2 + (len + 1) * CHARBYTES >= page_size)
return;
- s1 = (char *) (buf1 + align1);
- s2 = (char *) (buf2 + align2);
+ /* Put them close to the end of page. */
+ i = align1 + CHARBYTES * (len + 2);
+ s1 = (CHAR *) (buf1 + ((page_size - i) / 16 * 16) + align1);
+ i = align2 + CHARBYTES * (len + 2);
+ s2 = (CHAR *) (buf2 + ((page_size - i) / 16 * 16) + align2);
for (i = 0; i < len; i++)
- s1[i] = s2[i] = 1 + 23 * i % max_char;
+ s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % max_char;
s1[len] = s2[len] = 0;
s1[len + 1] = 23;
@@ -127,92 +177,116 @@
static void
do_random_tests (void)
{
- size_t i, j, n, align1, align2, pos, len1, len2;
- int result;
- long r;
- unsigned char *p1 = buf1 + page_size - 512;
- unsigned char *p2 = buf2 + page_size - 512;
-
- for (n = 0; n < ITERATIONS; n++)
- {
- align1 = random () & 31;
- if (random () & 1)
- align2 = random () & 31;
- else
- align2 = align1 + (random () & 24);
- pos = random () & 511;
- j = align1 > align2 ? align1 : align2;
- if (pos + j >= 511)
- pos = 510 - j - (random () & 7);
- len1 = random () & 511;
- if (pos >= len1 && (random () & 1))
- len1 = pos + (random () & 7);
- if (len1 + j >= 512)
- len1 = 511 - j - (random () & 7);
- if (pos >= len1)
- len2 = len1;
- else
- len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
- j = (pos > len2 ? pos : len2) + align1 + 64;
- if (j > 512)
- j = 512;
- for (i = 0; i < j; ++i)
- {
- p1[i] = random () & 255;
- if (i < len1 + align1 && !p1[i])
- {
- p1[i] = random () & 255;
- if (!p1[i])
- p1[i] = 1 + (random () & 127);
- }
- }
- for (i = 0; i < j; ++i)
- {
- p2[i] = random () & 255;
- if (i < len2 + align2 && !p2[i])
- {
- p2[i] = random () & 255;
- if (!p2[i])
- p2[i] = 1 + (random () & 127);
- }
- }
-
- result = 0;
- memcpy (p2 + align2, p1 + align1, pos);
- if (pos < len1)
- {
- if (p2[align2 + pos] == p1[align1 + pos])
- {
- p2[align2 + pos] = random () & 255;
- if (p2[align2 + pos] == p1[align1 + pos])
- p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127);
- }
-
- if (p1[align1 + pos] < p2[align2 + pos])
- result = -1;
- else
- result = 1;
- }
- p1[len1 + align1] = 0;
- p2[len2 + align2] = 0;
-
- FOR_EACH_IMPL (impl, 1)
- {
- r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
- /* Test whether on 64-bit architectures where ABI requires
- callee to promote has the promotion been done. */
- asm ("" : "=g" (r) : "0" (r));
- if ((r == 0 && result)
- || (r < 0 && result >= 0)
- || (r > 0 && result <= 0))
- {
- error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
- n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
- ret = 1;
- }
- }
- }
-}
+ for (size_t a = 0; a < CHARBYTES; a += CHARALIGN)
+ for (size_t b = 0; b < CHARBYTES; b += CHARALIGN)
+ {
+ UCHAR *p1 = (UCHAR *) (buf1 + page_size - 512 * CHARBYTES - a);
+ UCHAR *p2 = (UCHAR *) (buf2 + page_size - 512 * CHARBYTES - b);
+
+ for (size_t n = 0; n < ITERATIONS; n++)
+ {
+ size_t align1 = random () & 31;
+ size_t align2;
+ if (random () & 1)
+ align2 = random () & 31;
+ else
+ align2 = align1 + (random () & 24);
+ size_t pos = random () & 511;
+ size_t j = align1 > align2 ? align1 : align2;
+ if (pos + j >= 511)
+ pos = 510 - j - (random () & 7);
+ size_t len1 = random () & 511;
+ if (pos >= len1 && (random () & 1))
+ len1 = pos + (random () & 7);
+ if (len1 + j >= 512)
+ len1 = 511 - j - (random () & 7);
+ size_t len2;
+ if (pos >= len1)
+ len2 = len1;
+ else
+ len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+ j = (pos > len2 ? pos : len2) + align1 + 64;
+ if (j > 512)
+ j = 512;
+ for (size_t i = 0; i < j; ++i)
+ {
+ p1[i] = random () & 255;
+ if (i < len1 + align1 && !p1[i])
+ {
+ p1[i] = random () & 255;
+ if (!p1[i])
+ p1[i] = 1 + (random () & 127);
+ }
+ }
+ for (size_t i = 0; i < j; ++i)
+ {
+ p2[i] = random () & 255;
+ if (i < len2 + align2 && !p2[i])
+ {
+ p2[i] = random () & 255;
+ if (!p2[i])
+ p2[i] = 1 + (random () & 127);
+ }
+ }
+
+ int result = 0;
+ MEMCPY ((CHAR *) (p2 + align2), (CHAR *) (p1 + align1), pos);
+ if (pos < len1)
+ {
+ if (p2[align2 + pos] == p1[align1 + pos])
+ {
+ p2[align2 + pos] = random () & 255;
+ if (p2[align2 + pos] == p1[align1 + pos])
+ p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127);
+ }
+
+ if (p1[align1 + pos] < p2[align2 + pos])
+ result = -1;
+ else
+ result = 1;
+ }
+ p1[len1 + align1] = 0;
+ p2[len2 + align2] = 0;
+
+ FOR_EACH_IMPL (impl, 1)
+ {
+ int r = CALL (impl, (CHAR *) (p1 + align1), (CHAR *) (p2 + align2));
+ /* Test whether on 64-bit architectures where ABI requires
+ callee to promote has the promotion been done. */
+ asm ("" : "=g" (r) : "0" (r));
+ if ((r == 0 && result)
+ || (r < 0 && result >= 0)
+ || (r > 0 && result <= 0))
+ {
+ error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %d != %d, p1 %p p2 %p",
+ n, impl->name, (size_t) (p1 + align1) & 63, (size_t) (p1 + align2) & 63, len1, len2, pos, r, result, p1, p2);
+ ret = 1;
+ }
+ }
+ }
+ }
+}
+
+static void
+check (void)
+{
+ CHAR *s1 = (CHAR *) (buf1 + 0xb2c);
+ CHAR *s2 = (CHAR *) (buf1 + 0xfd8);
+
+ STRCPY(s1, L("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrs"));
+ STRCPY(s2, L("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkLMNOPQRSTUV"));
+
+ size_t l1 = STRLEN (s1);
+ size_t l2 = STRLEN (s2);
+ for (size_t i1 = 0; i1 < l1; i1++)
+ for (size_t i2 = 0; i2 < l2; i2++)
+ {
+ int exp_result = SIMPLE_STRCMP (s1 + i1, s2 + i2);
+ FOR_EACH_IMPL (impl, 0)
+ check_result (impl, s1 + i1, s2 + i2, exp_result);
+ }
+}
+
int
test_main (void)
@@ -220,37 +294,40 @@
size_t i;
test_init ();
+ check();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
printf ("\t%s", impl->name);
putchar ('\n');
- for (i = 1; i < 16; ++i)
- {
- do_test (i, i, i, 127, 0);
- do_test (i, i, i, 127, 1);
- do_test (i, i, i, 127, -1);
- }
-
- for (i = 1; i < 10; ++i)
- {
- do_test (0, 0, 2 << i, 127, 0);
- do_test (0, 0, 2 << i, 254, 0);
- do_test (0, 0, 2 << i, 127, 1);
- do_test (0, 0, 2 << i, 254, 1);
- do_test (0, 0, 2 << i, 127, -1);
- do_test (0, 0, 2 << i, 254, -1);
+ for (i = 1; i < 32; ++i)
+ {
+ do_test (CHARBYTES * i, CHARBYTES * i, i, MIDCHAR, 0);
+ do_test (CHARBYTES * i, CHARBYTES * i, i, MIDCHAR, 1);
+ do_test (CHARBYTES * i, CHARBYTES * i, i, MIDCHAR, -1);
+ }
+
+ for (i = 1; i < 10 + CHARBYTESLOG; ++i)
+ {
+ do_test (0, 0, 2 << i, MIDCHAR, 0);
+ do_test (0, 0, 2 << i, LARGECHAR, 0);
+ do_test (0, 0, 2 << i, MIDCHAR, 1);
+ do_test (0, 0, 2 << i, LARGECHAR, 1);
+ do_test (0, 0, 2 << i, MIDCHAR, -1);
+ do_test (0, 0, 2 << i, LARGECHAR, -1);
+ do_test (0, CHARBYTES * i, 2 << i, MIDCHAR, 1);
+ do_test (CHARBYTES * i, CHARBYTES * (i + 1), 2 << i, LARGECHAR, 1);
}
for (i = 1; i < 8; ++i)
{
- do_test (i, 2 * i, 8 << i, 127, 0);
- do_test (2 * i, i, 8 << i, 254, 0);
- do_test (i, 2 * i, 8 << i, 127, 1);
- do_test (2 * i, i, 8 << i, 254, 1);
- do_test (i, 2 * i, 8 << i, 127, -1);
- do_test (2 * i, i, 8 << i, 254, -1);
+ do_test (CHARBYTES * i, 2 * CHARBYTES * i, 8 << i, MIDCHAR, 0);
+ do_test (2 * CHARBYTES * i, CHARBYTES * i, 8 << i, LARGECHAR, 0);
+ do_test (CHARBYTES * i, 2 * CHARBYTES * i, 8 << i, MIDCHAR, 1);
+ do_test (2 * CHARBYTES * i, CHARBYTES * i, 8 << i, LARGECHAR, 1);
+ do_test (CHARBYTES * i, 2 * CHARBYTES * i, 8 << i, MIDCHAR, -1);
+ do_test (2 * CHARBYTES * i, CHARBYTES * i, 8 << i, LARGECHAR, -1);
}
do_random_tests ();
Modified: fsf/trunk/libc/string/test-string.h
==============================================================================
--- fsf/trunk/libc/string/test-string.h (original)
+++ fsf/trunk/libc/string/test-string.h Fri Aug 5 00:02:24 2011
@@ -1,5 +1,5 @@
/* Test and measure string and memory functions.
- Copyright (C) 1999, 2002, 2004, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2002, 2004, 2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Jakub Jelinek <jakub@xxxxxxxxxx>, 1999.
@@ -18,6 +18,8 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <sys/cdefs.h>
+
typedef struct
{
const char *name;
@@ -29,7 +31,7 @@
#define IMPL(name, test) \
impl_t tst_ ## name \
__attribute__ ((section ("impls"), aligned (sizeof (void *)))) \
- = { #name, (void (*) (void))name, test };
+ = { __STRING (name), (void (*) (void))name, test };
#ifdef TEST_MAIN
Added: fsf/trunk/libc/string/test-wcscmp.c
==============================================================================
--- fsf/trunk/libc/string/test-wcscmp.c (added)
+++ fsf/trunk/libc/string/test-wcscmp.c Fri Aug 5 00:02:24 2011
@@ -1,0 +1,2 @@
+#define WIDE 1
+#include "test-strcmp.c"
Modified: fsf/trunk/libc/sysdeps/i386/i486/bits/string.h
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i486/bits/string.h (original)
+++ fsf/trunk/libc/sysdeps/i386/i486/bits/string.h Fri Aug 5 00:02:24 2011
@@ -1,6 +1,6 @@
/* Optimized, inlined string functions. i486 version.
- Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2004,2007
- Free Software Foundation, Inc.
+ Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2004,2007,2011
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -1058,8 +1058,8 @@
"movl %4, %3\n\t"
"decl %1\n\t"
"1:\n\t"
- "decl %3\n\t"
- "js 2f\n\t"
+ "subl $1,%3\n\t"
+ "jc 2f\n\t"
"movb (%2),%b0\n\t"
"movsb\n\t"
"testb %b0,%b0\n\t"
@@ -1078,8 +1078,8 @@
"leal 1(%1),%1\n\t"
"jne 1b\n"
"2:\n\t"
- "decl %3\n\t"
- "js 3f\n\t"
+ "subl $1,%3\n\t"
+ "jc 3f\n\t"
"movb (%2),%b0\n\t"
"leal 1(%2),%2\n\t"
"movb %b0,(%1)\n\t"
@@ -1219,8 +1219,8 @@
register int __res;
__asm__ __volatile__
("1:\n\t"
- "decl %3\n\t"
- "js 2f\n\t"
+ "subl $1,%3\n\t"
+ "jc 2f\n\t"
"movb (%1),%b0\n\t"
"incl %1\n\t"
"cmpb %b0,(%2)\n\t"
Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/Makefile Fri Aug 5 00:02:24 2011
@@ -12,7 +12,8 @@
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
- strncpy-sse2 stpcpy-sse2 stpncpy-sse2
+ strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
+ strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-varshift.c += -msse4
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S Fri Aug 5 00:02:24 2011
@@ -1,0 +1,1244 @@
+/* strcat with SSE2
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifdef SHARED
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into ECX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into ECX. */ \
+ call __i686.get_pc_thunk.cx; \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ecx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ecx,INDEX,SCALE), %ecx; \
+ /* We loaded the jump table and adjuested ECX. Go. */ \
+ jmp *%ecx
+# else
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+# endif
+
+# ifndef STRCAT
+# define STRCAT __strcat_sse2
+# endif
+
+# define PARMS 4
+# define STR1 PARMS+4
+# define STR2 STR1+4
+
+# ifdef USE_AS_STRNCAT
+# define LEN STR2+8
+# define STR3 STR1+4
+# else
+# define STR3 STR1
+# endif
+
+# define USE_AS_STRCAT
+# ifdef USE_AS_STRNCAT
+# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
+# else
+# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
+# endif
+
+.text
+ENTRY (STRCAT)
+ PUSH (%esi)
+ mov STR1(%esp), %eax
+ mov STR2(%esp), %esi
+# ifdef USE_AS_STRNCAT
+ PUSH (%ebx)
+ movl LEN(%esp), %ebx
+ test %ebx, %ebx
+ jz L(ExitZero)
+# endif
+ cmpb $0, (%esi)
+ mov %esi, %ecx
+ mov %eax, %edx
+ jz L(ExitZero)
+
+ and $63, %ecx
+ and $63, %edx
+ cmp $32, %ecx
+ ja L(StrlenCore7_1)
+ cmp $48, %edx
+ ja L(alignment_prolog)
+
+ pxor %xmm0, %xmm0
+ pxor %xmm4, %xmm4
+ pxor %xmm7, %xmm7
+ movdqu (%eax), %xmm1
+ movdqu (%esi), %xmm5
+ pcmpeqb %xmm1, %xmm0
+ movdqu 16(%esi), %xmm6
+ pmovmskb %xmm0, %ecx
+ pcmpeqb %xmm5, %xmm4
+ pcmpeqb %xmm6, %xmm7
+ test %ecx, %ecx
+ jnz L(exit_less16_)
+ mov %eax, %ecx
+ and $-16, %eax
+ jmp L(loop_prolog)
+
+L(alignment_prolog):
+ pxor %xmm0, %xmm0
+ pxor %xmm4, %xmm4
+ mov %edx, %ecx
+ pxor %xmm7, %xmm7
+ and $15, %ecx
+ and $-16, %eax
+ pcmpeqb (%eax), %xmm0
+ movdqu (%esi), %xmm5
+ movdqu 16(%esi), %xmm6
+ pmovmskb %xmm0, %edx
+ pcmpeqb %xmm5, %xmm4
+ shr %cl, %edx
+ pcmpeqb %xmm6, %xmm7
+ test %edx, %edx
+ jnz L(exit_less16)
+ add %eax, %ecx
+
+ pxor %xmm0, %xmm0
+L(loop_prolog):
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+ .p2align 4
+L(align16_loop):
+ pcmpeqb 16(%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ lea 64(%eax), %eax
+ test %edx, %edx
+ jz L(align16_loop)
+ bsf %edx, %edx
+ add %edx, %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit16):
+ bsf %edx, %edx
+ lea 16(%eax, %edx), %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit32):
+ bsf %edx, %edx
+ lea 32(%eax, %edx), %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit48):
+ bsf %edx, %edx
+ lea 48(%eax, %edx), %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit_less16):
+ bsf %edx, %edx
+ add %ecx, %eax
+ add %edx, %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit_less16_):
+ bsf %ecx, %ecx
+ add %ecx, %eax
+
+ .p2align 4
+L(StartStrcpyPart):
+ pmovmskb %xmm4, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail1)
+
+ movdqu %xmm5, (%eax)
+ pmovmskb %xmm7, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $32, %ebx
+ jbe L(CopyFrom1To32Bytes1Case2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes1)
+
+ mov %esi, %ecx
+ and $-16, %esi
+ and $15, %ecx
+ pxor %xmm0, %xmm0
+# ifdef USE_AS_STRNCAT
+ add %ecx, %ebx
+# endif
+ sub %ecx, %eax
+ jmp L(Unalign16Both)
+
+L(StrlenCore7_1):
+ mov %eax, %ecx
+ pxor %xmm0, %xmm0
+ and $15, %ecx
+ and $-16, %eax
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ shr %cl, %edx
+ test %edx, %edx
+ jnz L(exit_less16_1)
+ add %eax, %ecx
+
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+
+ .p2align 4
+L(align16_loop_1):
+ pcmpeqb 16(%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16_1)
+
+ pcmpeqb 32(%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32_1)
+
+ pcmpeqb 48(%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48_1)
+
+ pcmpeqb 64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ lea 64(%eax), %eax
+ test %edx, %edx
+ jz L(align16_loop_1)
+ bsf %edx, %edx
+ add %edx, %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit16_1):
+ bsf %edx, %edx
+ lea 16(%eax, %edx), %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit32_1):
+ bsf %edx, %edx
+ lea 32(%eax, %edx), %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit48_1):
+ bsf %edx, %edx
+ lea 48(%eax, %edx), %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit_less16_1):
+ bsf %edx, %edx
+ add %ecx, %eax
+ add %edx, %eax
+
+ .p2align 4
+L(StartStrcpyPart_1):
+ mov %esi, %ecx
+ and $15, %ecx
+ and $-16, %esi
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+
+# ifdef USE_AS_STRNCAT
+ cmp $48, %ebx
+ ja L(BigN)
+# endif
+ pcmpeqb (%esi), %xmm1
+# ifdef USE_AS_STRNCAT
+ add %ecx, %ebx
+# endif
+ pmovmskb %xmm1, %edx
+ shr %cl, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ jbe L(CopyFrom1To16BytesTailCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail)
+
+ pcmpeqb 16(%esi), %xmm0
+ pmovmskb %xmm0, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $32, %ebx
+ jbe L(CopyFrom1To32BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes)
+
+ movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+ movdqu %xmm1, (%eax)
+ sub %ecx, %eax
+
+ .p2align 4
+L(Unalign16Both):
+ mov $16, %ecx
+ movdqa (%esi, %ecx), %xmm1
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%eax, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $48, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+L(Unalign16BothBigN):
+ movaps 16(%esi, %ecx), %xmm3
+ movdqu %xmm2, (%eax, %ecx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm4
+ movdqu %xmm3, (%eax, %ecx)
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm1
+ movdqu %xmm4, (%eax, %ecx)
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%eax, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm3
+ movdqu %xmm2, (%eax, %ecx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movdqu %xmm3, (%eax, %ecx)
+ mov %esi, %edx
+ lea 16(%esi, %ecx), %esi
+ and $-0x40, %esi
+ sub %esi, %edx
+ sub %edx, %eax
+# ifdef USE_AS_STRNCAT
+ lea 128(%ebx, %edx), %ebx
+# endif
+ movaps (%esi), %xmm2
+ movaps %xmm2, %xmm4
+ movaps 16(%esi), %xmm5
+ movaps 32(%esi), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 48(%esi), %xmm7
+ pminub %xmm5, %xmm2
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+# ifdef USE_AS_STRNCAT
+ sub $64, %ebx
+ jbe L(UnalignedLeaveCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(Unaligned64Leave)
+
+ .p2align 4
+L(Unaligned64Loop_start):
+ add $64, %eax
+ add $64, %esi
+ movdqu %xmm4, -64(%eax)
+ movaps (%esi), %xmm2
+ movdqa %xmm2, %xmm4
+ movdqu %xmm5, -48(%eax)
+ movaps 16(%esi), %xmm5
+ pminub %xmm5, %xmm2
+ movaps 32(%esi), %xmm3
+ movdqu %xmm6, -32(%eax)
+ movaps %xmm3, %xmm6
+ movdqu %xmm7, -16(%eax)
+ movaps 48(%esi), %xmm7
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+# ifdef USE_AS_STRNCAT
+ sub $64, %ebx
+ jbe L(UnalignedLeaveCase2OrCase3)
+# endif
+ test %edx, %edx
+ jz L(Unaligned64Loop_start)
+
+L(Unaligned64Leave):
+ pxor %xmm1, %xmm1
+
+ pcmpeqb %xmm4, %xmm0
+ pcmpeqb %xmm5, %xmm1
+ pmovmskb %xmm0, %edx
+ pmovmskb %xmm1, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnaligned_0)
+ test %ecx, %ecx
+ jnz L(CopyFrom1To16BytesUnaligned_16)
+
+ pcmpeqb %xmm6, %xmm0
+ pcmpeqb %xmm7, %xmm1
+ pmovmskb %xmm0, %edx
+ pmovmskb %xmm1, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnaligned_32)
+
+ bsf %ecx, %edx
+ movdqu %xmm4, (%eax)
+ movdqu %xmm5, 16(%eax)
+ movdqu %xmm6, 32(%eax)
+ add $48, %esi
+ add $48, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+# ifdef USE_AS_STRNCAT
+ .p2align 4
+L(BigN):
+ pcmpeqb (%esi), %xmm1
+ pmovmskb %xmm1, %edx
+ shr %cl, %edx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail)
+
+ pcmpeqb 16(%esi), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes)
+
+ movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+ movdqu %xmm1, (%eax)
+ sub %ecx, %eax
+ sub $48, %ebx
+ add %ecx, %ebx
+
+ mov $16, %ecx
+ movdqa (%esi, %ecx), %xmm1
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%eax, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+ jmp L(Unalign16BothBigN)
+# endif
+
+/*------------end of main part-------------------------------*/
+
+/* Case1 */
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %ecx, %eax
+ add %ecx, %esi
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesTail):
+ add %ecx, %esi
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes1):
+ add $16, %esi
+ add $16, %eax
+L(CopyFrom1To16BytesTail1):
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes):
+ bsf %edx, %edx
+ add %ecx, %esi
+ add $16, %edx
+ sub %ecx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_0):
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_16):
+ bsf %ecx, %edx
+ movdqu %xmm4, (%eax)
+ add $16, %esi
+ add $16, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_32):
+ bsf %edx, %edx
+ movdqu %xmm4, (%eax)
+ movdqu %xmm5, 16(%eax)
+ add $32, %esi
+ add $32, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+# ifdef USE_AS_STRNCAT
+
+ .p2align 4
+L(CopyFrom1To16BytesExit):
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+/* Case2 */
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %ebx
+ add %ecx, %eax
+ add %ecx, %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32BytesCase2):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ add $16, %edx
+ sub %ecx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTailCase2):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTail1Case2):
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+/* Case2 or Case3, Case3 */
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesCase2)
+L(CopyFrom1To16BytesCase3):
+ add $16, %ebx
+ add %ecx, %eax
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32BytesCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To32BytesCase2)
+ sub %ecx, %ebx
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesTailCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTailCase2)
+ sub %ecx, %ebx
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes1Case2OrCase3):
+ add $16, %eax
+ add $16, %esi
+ sub $16, %ebx
+L(CopyFrom1To16BytesTail1Case2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail1Case2)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+# endif
+
+# ifdef USE_AS_STRNCAT
+ .p2align 4
+L(StrncatExit0):
+ movb %bh, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+# endif
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit1):
+ movb %bh, 1(%eax)
+# endif
+L(Exit1):
+# ifdef USE_AS_STRNCAT
+ movb (%esi), %dh
+# endif
+ movb %dh, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit2):
+ movb %bh, 2(%eax)
+# endif
+L(Exit2):
+ movw (%esi), %dx
+ movw %dx, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit3):
+ movb %bh, 3(%eax)
+# endif
+L(Exit3):
+ movw (%esi), %cx
+ movw %cx, (%eax)
+# ifdef USE_AS_STRNCAT
+ movb 2(%esi), %dh
+# endif
+ movb %dh, 2(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit4):
+ movb %bh, 4(%eax)
+# endif
+L(Exit4):
+ movl (%esi), %edx
+ movl %edx, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit5):
+ movb %bh, 5(%eax)
+# endif
+L(Exit5):
+ movl (%esi), %ecx
+# ifdef USE_AS_STRNCAT
+ movb 4(%esi), %dh
+# endif
+ movb %dh, 4(%eax)
+ movl %ecx, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit6):
+ movb %bh, 6(%eax)
+# endif
+L(Exit6):
+ movl (%esi), %ecx
+ movw 4(%esi), %dx
+ movl %ecx, (%eax)
+ movw %dx, 4(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit7):
+ movb %bh, 7(%eax)
+# endif
+L(Exit7):
+ movl (%esi), %ecx
+ movl 3(%esi), %edx
+ movl %ecx, (%eax)
+ movl %edx, 3(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit8):
+ movb %bh, 8(%eax)
+# endif
+L(Exit8):
+ movlpd (%esi), %xmm0
+ movlpd %xmm0, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit9):
+ movb %bh, 9(%eax)
+# endif
+L(Exit9):
+ movlpd (%esi), %xmm0
+# ifdef USE_AS_STRNCAT
+ movb 8(%esi), %dh
+# endif
+ movb %dh, 8(%eax)
+ movlpd %xmm0, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit10):
+ movb %bh, 10(%eax)
+# endif
+L(Exit10):
+ movlpd (%esi), %xmm0
+ movw 8(%esi), %dx
+ movlpd %xmm0, (%eax)
+ movw %dx, 8(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit11):
+ movb %bh, 11(%eax)
+# endif
+L(Exit11):
+ movlpd (%esi), %xmm0
+ movl 7(%esi), %edx
+ movlpd %xmm0, (%eax)
+ movl %edx, 7(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit12):
+ movb %bh, 12(%eax)
+# endif
+L(Exit12):
+ movlpd (%esi), %xmm0
+ movl 8(%esi), %edx
+ movlpd %xmm0, (%eax)
+ movl %edx, 8(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit13):
+ movb %bh, 13(%eax)
+# endif
+L(Exit13):
+ movlpd (%esi), %xmm0
+ movlpd 5(%esi), %xmm1
+ movlpd %xmm0, (%eax)
+ movlpd %xmm1, 5(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit14):
+ movb %bh, 14(%eax)
+# endif
+L(Exit14):
+ movlpd (%esi), %xmm0
+ movlpd 6(%esi), %xmm1
+ movlpd %xmm0, (%eax)
+ movlpd %xmm1, 6(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit15):
+ movb %bh, 15(%eax)
+# endif
+L(Exit15):
+ movlpd (%esi), %xmm0
+ movlpd 7(%esi), %xmm1
+ movlpd %xmm0, (%eax)
+ movlpd %xmm1, 7(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit16):
+ movb %bh, 16(%eax)
+# endif
+L(Exit16):
+ movdqu (%esi), %xmm0
+ movdqu %xmm0, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit17):
+ movb %bh, 17(%eax)
+# endif
+L(Exit17):
+ movdqu (%esi), %xmm0
+# ifdef USE_AS_STRNCAT
+ movb 16(%esi), %dh
+# endif
+ movdqu %xmm0, (%eax)
+ movb %dh, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit18):
+ movb %bh, 18(%eax)
+# endif
+L(Exit18):
+ movdqu (%esi), %xmm0
+ movw 16(%esi), %cx
+ movdqu %xmm0, (%eax)
+ movw %cx, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit19):
+ movb %bh, 19(%eax)
+# endif
+L(Exit19):
+ movdqu (%esi), %xmm0
+ movl 15(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movl %ecx, 15(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit20):
+ movb %bh, 20(%eax)
+# endif
+L(Exit20):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movl %ecx, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit21):
+ movb %bh, 21(%eax)
+# endif
+L(Exit21):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+# ifdef USE_AS_STRNCAT
+ movb 20(%esi), %dh
+# endif
+ movdqu %xmm0, (%eax)
+ movl %ecx, 16(%eax)
+ movb %dh, 20(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit22):
+ movb %bh, 22(%eax)
+# endif
+L(Exit22):
+ movdqu (%esi), %xmm0
+ movlpd 14(%esi), %xmm3
+ movdqu %xmm0, (%eax)
+ movlpd %xmm3, 14(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit23):
+ movb %bh, 23(%eax)
+# endif
+L(Exit23):
+ movdqu (%esi), %xmm0
+ movlpd 15(%esi), %xmm3
+ movdqu %xmm0, (%eax)
+ movlpd %xmm3, 15(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit24):
+ movb %bh, 24(%eax)
+# endif
+L(Exit24):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit25):
+ movb %bh, 25(%eax)
+# endif
+L(Exit25):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+# ifdef USE_AS_STRNCAT
+ movb 24(%esi), %dh
+# endif
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movb %dh, 24(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit26):
+ movb %bh, 26(%eax)
+# endif
+L(Exit26):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movw 24(%esi), %cx
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movw %cx, 24(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit27):
+ movb %bh, 27(%eax)
+# endif
+L(Exit27):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 23(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movl %ecx, 23(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit28):
+ movb %bh, 28(%eax)
+# endif
+L(Exit28):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 24(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movl %ecx, 24(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit29):
+ movb %bh, 29(%eax)
+# endif
+L(Exit29):
+ movdqu (%esi), %xmm0
+ movdqu 13(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 13(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit30):
+ movb %bh, 30(%eax)
+# endif
+L(Exit30):
+ movdqu (%esi), %xmm0
+ movdqu 14(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 14(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit31):
+ movb %bh, 31(%eax)
+# endif
+L(Exit31):
+ movdqu (%esi), %xmm0
+ movdqu 15(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 15(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit32):
+ movb %bh, 32(%eax)
+# endif
+L(Exit32):
+ movdqu (%esi), %xmm0
+ movdqu 16(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+# ifdef USE_AS_STRNCAT
+
+ .p2align 4
+L(UnalignedLeaveCase2OrCase3):
+ test %edx, %edx
+ jnz L(Unaligned64LeaveCase2)
+L(Unaligned64LeaveCase3):
+ lea 64(%ebx), %ecx
+ and $-16, %ecx
+ add $48, %ebx
+ jl L(CopyFrom1To16BytesCase3)
+ movdqu %xmm4, (%eax)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm5, 16(%eax)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm6, 32(%eax)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm7, 48(%eax)
+ xor %bh, %bh
+ movb %bh, 64(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+L(Unaligned64LeaveCase2):
+ xor %ecx, %ecx
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %edx
+ add $48, %ebx
+ jle L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm5, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm4, (%eax)
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm6, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm5, 16(%eax)
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm7, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm6, 32(%eax)
+ lea 16(%eax, %ecx), %eax
+ lea 16(%esi, %ecx), %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+# endif
+ .p2align 4
+L(ExitZero):
+ RETURN
+
+END (STRCAT)
+
+ .p2align 4
+ .section .rodata
+L(ExitTable):
+ .int JMPTBL(L(Exit1), L(ExitTable))
+ .int JMPTBL(L(Exit2), L(ExitTable))
+ .int JMPTBL(L(Exit3), L(ExitTable))
+ .int JMPTBL(L(Exit4), L(ExitTable))
+ .int JMPTBL(L(Exit5), L(ExitTable))
+ .int JMPTBL(L(Exit6), L(ExitTable))
+ .int JMPTBL(L(Exit7), L(ExitTable))
+ .int JMPTBL(L(Exit8), L(ExitTable))
+ .int JMPTBL(L(Exit9), L(ExitTable))
+ .int JMPTBL(L(Exit10), L(ExitTable))
+ .int JMPTBL(L(Exit11), L(ExitTable))
+ .int JMPTBL(L(Exit12), L(ExitTable))
+ .int JMPTBL(L(Exit13), L(ExitTable))
+ .int JMPTBL(L(Exit14), L(ExitTable))
+ .int JMPTBL(L(Exit15), L(ExitTable))
+ .int JMPTBL(L(Exit16), L(ExitTable))
+ .int JMPTBL(L(Exit17), L(ExitTable))
+ .int JMPTBL(L(Exit18), L(ExitTable))
+ .int JMPTBL(L(Exit19), L(ExitTable))
+ .int JMPTBL(L(Exit20), L(ExitTable))
+ .int JMPTBL(L(Exit21), L(ExitTable))
+ .int JMPTBL(L(Exit22), L(ExitTable))
+ .int JMPTBL(L(Exit23), L(ExitTable))
+ .int JMPTBL(L(Exit24), L(ExitTable))
+ .int JMPTBL(L(Exit25), L(ExitTable))
+ .int JMPTBL(L(Exit26), L(ExitTable))
+ .int JMPTBL(L(Exit27), L(ExitTable))
+ .int JMPTBL(L(Exit28), L(ExitTable))
+ .int JMPTBL(L(Exit29), L(ExitTable))
+ .int JMPTBL(L(Exit30), L(ExitTable))
+ .int JMPTBL(L(Exit31), L(ExitTable))
+ .int JMPTBL(L(Exit32), L(ExitTable))
+# ifdef USE_AS_STRNCAT
+L(ExitStrncatTable):
+ .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
+# endif
+#endif
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S Fri Aug 5 00:02:24 2011
@@ -1,0 +1,573 @@
+/* strcat with SSSE3
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef STRCAT
+# define STRCAT __strcat_ssse3
+# endif
+
+# define PARMS 4
+# define STR1 PARMS+4
+# define STR2 STR1+4
+
+# ifdef USE_AS_STRNCAT
+# define LEN STR2+8
+# endif
+
+# define USE_AS_STRCAT
+
+.text
+ENTRY (STRCAT)
+ PUSH (%edi)
+ mov STR1(%esp), %edi
+ mov %edi, %edx
+
+# define RETURN jmp L(StartStrcpyPart)
+# include "strlen-sse2.S"
+
+L(StartStrcpyPart):
+ mov STR2(%esp), %ecx
+ lea (%edi, %eax), %edx
+# ifdef USE_AS_STRNCAT
+ PUSH (%ebx)
+ mov LEN(%esp), %ebx
+ test %ebx, %ebx
+ jz L(StrncatExit0)
+ cmp $8, %ebx
+ jbe L(StrncatExit8Bytes)
+# endif
+ cmpb $0, (%ecx)
+ jz L(Exit1)
+ cmpb $0, 1(%ecx)
+ jz L(Exit2)
+ cmpb $0, 2(%ecx)
+ jz L(Exit3)
+ cmpb $0, 3(%ecx)
+ jz L(Exit4)
+ cmpb $0, 4(%ecx)
+ jz L(Exit5)
+ cmpb $0, 5(%ecx)
+ jz L(Exit6)
+ cmpb $0, 6(%ecx)
+ jz L(Exit7)
+ cmpb $0, 7(%ecx)
+ jz L(Exit8)
+ cmpb $0, 8(%ecx)
+ jz L(Exit9)
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ jb L(StrncatExit15Bytes)
+# endif
+ cmpb $0, 9(%ecx)
+ jz L(Exit10)
+ cmpb $0, 10(%ecx)
+ jz L(Exit11)
+ cmpb $0, 11(%ecx)
+ jz L(Exit12)
+ cmpb $0, 12(%ecx)
+ jz L(Exit13)
+ cmpb $0, 13(%ecx)
+ jz L(Exit14)
+ cmpb $0, 14(%ecx)
+ jz L(Exit15)
+ cmpb $0, 15(%ecx)
+ jz L(Exit16)
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ je L(StrncatExit16)
+
+# define RETURN1 \
+ POP (%ebx); \
+ POP (%edi); \
+ ret; \
+ CFI_PUSH (%ebx); \
+ CFI_PUSH (%edi)
+# define USE_AS_STRNCPY
+# else
+# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+# include "strcpy-ssse3.S"
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitHigh):
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x08, %ah
+ jnz L(Exit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit1):
+ movb %bh, 1(%edx)
+L(Exit1):
+ movb (%ecx), %al
+ movb %al, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit2):
+ movb %bh, 2(%edx)
+L(Exit2):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit3):
+ movb %bh, 3(%edx)
+L(Exit3):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movb 2(%ecx), %al
+ movb %al, 2(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit4):
+ movb %bh, 4(%edx)
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit5):
+ movb %bh, 5(%edx)
+L(Exit5):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movb 4(%ecx), %al
+ movb %al, 4(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit6):
+ movb %bh, 6(%edx)
+L(Exit6):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movw 4(%ecx), %ax
+ movw %ax, 4(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit7):
+ movb %bh, 7(%edx)
+L(Exit7):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 3(%ecx), %eax
+ movl %eax, 3(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit8):
+ movb %bh, 8(%edx)
+L(Exit8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit9):
+ movb %bh, 9(%edx)
+L(Exit9):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movb 8(%ecx), %al
+ movb %al, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit10):
+ movb %bh, 10(%edx)
+L(Exit10):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movw 8(%ecx), %ax
+ movw %ax, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit11):
+ movb %bh, 11(%edx)
+L(Exit11):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl 7(%ecx), %eax
+ movl %eax, 7(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit12):
+ movb %bh, 12(%edx)
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl 8(%ecx), %eax
+ movl %eax, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit13):
+ movb %bh, 13(%edx)
+L(Exit13):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 5(%ecx), %xmm0
+ movlpd %xmm0, 5(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit14):
+ movb %bh, 14(%edx)
+L(Exit14):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 6(%ecx), %xmm0
+ movlpd %xmm0, 6(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit15):
+ movb %bh, 15(%edx)
+L(Exit15):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit16):
+ movb %bh, 16(%edx)
+L(Exit16):
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+# ifdef USE_AS_STRNCPY
+
+ CFI_PUSH(%esi)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %ebx
+ add %esi, %ecx
+ lea (%esi, %edx), %esi
+ lea -9(%ebx), %edx
+ and $1<<7, %dh
+ or %al, %dh
+ test %dh, %dh
+ lea (%esi), %edx
+ POP (%esi)
+ jz L(ExitHighCase2)
+
+ test $0x01, %al
+ jnz L(Exit1)
+ cmp $1, %ebx
+ je L(StrncatExit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ cmp $2, %ebx
+ je L(StrncatExit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ cmp $3, %ebx
+ je L(StrncatExit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ cmp $4, %ebx
+ je L(StrncatExit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ cmp $5, %ebx
+ je L(StrncatExit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ cmp $6, %ebx
+ je L(StrncatExit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ cmp $7, %ebx
+ je L(StrncatExit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ lea 7(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+ xor %cl, %cl
+ movb %cl, (%eax)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitHighCase2):
+ test $0x01, %ah
+ jnz L(Exit9)
+ cmp $9, %ebx
+ je L(StrncatExit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ cmp $10, %ebx
+ je L(StrncatExit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ cmp $11, %ebx
+ je L(StrncatExit11)
+ test $0x8, %ah
+ jnz L(Exit12)
+ cmp $12, %ebx
+ je L(StrncatExit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ cmp $13, %ebx
+ je L(StrncatExit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ cmp $14, %ebx
+ je L(StrncatExit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ cmp $15, %ebx
+ je L(StrncatExit15)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm1, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ CFI_PUSH(%esi)
+
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase3):
+ add $16, %ebx
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+
+ cmp $8, %ebx
+ ja L(ExitHighCase3)
+ cmp $1, %ebx
+ je L(StrncatExit1)
+ cmp $2, %ebx
+ je L(StrncatExit2)
+ cmp $3, %ebx
+ je L(StrncatExit3)
+ cmp $4, %ebx
+ je L(StrncatExit4)
+ cmp $5, %ebx
+ je L(StrncatExit5)
+ cmp $6, %ebx
+ je L(StrncatExit6)
+ cmp $7, %ebx
+ je L(StrncatExit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movb %bh, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitHighCase3):
+ cmp $9, %ebx
+ je L(StrncatExit9)
+ cmp $10, %ebx
+ je L(StrncatExit10)
+ cmp $11, %ebx
+ je L(StrncatExit11)
+ cmp $12, %ebx
+ je L(StrncatExit12)
+ cmp $13, %ebx
+ je L(StrncatExit13)
+ cmp $14, %ebx
+ je L(StrncatExit14)
+ cmp $15, %ebx
+ je L(StrncatExit15)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm1, 8(%edx)
+ movb %bh, 16(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit0):
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit15Bytes):
+ cmp $9, %ebx
+ je L(StrncatExit9)
+ cmpb $0, 9(%ecx)
+ jz L(Exit10)
+ cmp $10, %ebx
+ je L(StrncatExit10)
+ cmpb $0, 10(%ecx)
+ jz L(Exit11)
+ cmp $11, %ebx
+ je L(StrncatExit11)
+ cmpb $0, 11(%ecx)
+ jz L(Exit12)
+ cmp $12, %ebx
+ je L(StrncatExit12)
+ cmpb $0, 12(%ecx)
+ jz L(Exit13)
+ cmp $13, %ebx
+ je L(StrncatExit13)
+ cmpb $0, 13(%ecx)
+ jz L(Exit14)
+ cmp $14, %ebx
+ je L(StrncatExit14)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+ lea 14(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+ movb %bh, (%eax)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit8Bytes):
+ cmpb $0, (%ecx)
+ jz L(Exit1)
+ cmp $1, %ebx
+ je L(StrncatExit1)
+ cmpb $0, 1(%ecx)
+ jz L(Exit2)
+ cmp $2, %ebx
+ je L(StrncatExit2)
+ cmpb $0, 2(%ecx)
+ jz L(Exit3)
+ cmp $3, %ebx
+ je L(StrncatExit3)
+ cmpb $0, 3(%ecx)
+ jz L(Exit4)
+ cmp $4, %ebx
+ je L(StrncatExit4)
+ cmpb $0, 4(%ecx)
+ jz L(Exit5)
+ cmp $5, %ebx
+ je L(StrncatExit5)
+ cmpb $0, 5(%ecx)
+ jz L(Exit6)
+ cmp $6, %ebx
+ je L(StrncatExit6)
+ cmpb $0, 6(%ecx)
+ jz L(Exit7)
+ cmp $7, %ebx
+ je L(StrncatExit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ lea 7(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+ movb %bh, (%eax)
+ movl %edi, %eax
+ RETURN1
+
+# endif
+END (STRCAT)
+#endif
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcat.S Fri Aug 5 00:02:24 2011
@@ -1,0 +1,130 @@
+/* Multiple versions of strcat
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef USE_AS_STRNCAT
+# ifndef STRCAT
+# define STRCAT strcat
+# endif
+#endif
+
+#ifdef USE_AS_STRNCAT
+# define STRCAT_SSSE3 __strncat_ssse3
+# define STRCAT_SSE2 __strncat_sse2
+# define STRCAT_IA32 __strncat_ia32
+# define __GI_STRCAT __GI_strncat
+#else
+# define STRCAT_SSSE3 __strcat_ssse3
+# define STRCAT_SSE2 __strcat_sse2
+# define STRCAT_IA32 __strcat_ia32
+# define __GI_STRCAT __GI_strcat
+#endif
+
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncat in static library since we
+ need strncat before the initialization happened. */
+#ifndef NOT_IN_libc
+
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(STRCAT)
+ .type STRCAT, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal STRCAT_IA32@GOTOFF(%ebx), %eax
+ testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal STRCAT_SSE2@GOTOFF(%ebx), %eax
+ testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+ jnz 2f
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal STRCAT_SSSE3@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(STRCAT)
+# else
+
+ENTRY(STRCAT)
+ .type STRCAT, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal STRCAT_IA32, %eax
+ testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+ jz 2f
+ leal STRCAT_SSE2, %eax
+ testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
+ jnz 2f
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ jz 2f
+ leal STRCAT_SSSE3, %eax
+2: ret
+END(STRCAT)
+
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCAT_IA32, @function; \
+ .align 16; \
+ STRCAT_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strcat calls through a PLT.
+ The speedup we get from using SSSE3 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
+
+# endif
+#endif
+
+#ifndef USE_AS_STRNCAT
+# include "../../i486/strcat.S"
+#endif
Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S Fri Aug 5 00:02:24 2011
@@ -20,39 +20,39 @@
#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
+# ifndef USE_AS_STRCAT
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-# define STRCPY __strcpy_ssse3
-# endif
-
-# ifdef USE_AS_STRNCPY
-# define PARMS 8
-# define ENTRANCE PUSH(%ebx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
-# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
-# else
-# define PARMS 4
-# define ENTRANCE
-# define RETURN ret
-# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
-# endif
-
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef STRCPY
+# define STRCPY __strcpy_ssse3
+# endif
+
+# ifdef USE_AS_STRNCPY
+# define PARMS 8
+# define ENTRANCE PUSH(%ebx)
+# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
+# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
+# else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN ret
+# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
+# endif
+
+# define STR1 PARMS
+# define STR2 STR1+4
+# define LEN STR2+4
/* In this code following instructions are used for copying:
movb - 1 byte
@@ -60,9 +60,9 @@
movl - 4 byte
movlpd - 8 byte
movaps - 16 byte - requires 16 byte alignment
- of sourse and destination adresses.
+ of sourse and destination adresses.
16 byte alignment: adress is 32bit value,
- right four bit of adress shall be 0.
+ right four bit of adress shall be 0.
*/
.text
@@ -70,13 +70,13 @@
ENTRANCE
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
movl LEN(%esp), %ebx
test %ebx, %ebx
jz L(ExitTail0)
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
-# endif
+# endif
cmpb $0, (%ecx)
jz L(ExitTail1)
cmpb $0, 1(%ecx)
@@ -93,10 +93,10 @@
jz L(ExitTail7)
cmpb $0, 7(%ecx)
jz L(ExitTail8)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
cmp $16, %ebx
jb L(StrncpyExit15Bytes)
-# endif
+# endif
cmpb $0, 8(%ecx)
jz L(ExitTail9)
cmpb $0, 9(%ecx)
@@ -111,18 +111,20 @@
jz L(ExitTail14)
cmpb $0, 14(%ecx)
jz L(ExitTail15)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
cmp $16, %ebx
je L(ExitTail16)
-# endif
+# endif
cmpb $0, 15(%ecx)
jz L(ExitTail16)
PUSH (%edi)
mov %edx, %edi
+# endif
PUSH (%esi)
# ifdef USE_AS_STRNCPY
mov %ecx, %esi
+ sub $16, %ebx
and $0xf, %esi
/* add 16 bytes ecx_shift to ebx */
@@ -159,7 +161,7 @@
/* eax = 0: there isn't end of string from position esi to esi+15 */
# ifdef USE_AS_STRNCPY
- sub $32, %ebx
+ sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %eax, %eax
@@ -2217,12 +2219,17 @@
mov $1, %esi
palignr $15, %xmm1, %xmm6
movaps %xmm6, (%edx)
+# ifdef USE_AS_STRCAT
+ jmp L(CopyFrom1To16Bytes)
+# endif
+
+# ifndef USE_AS_STRCAT
.p2align 4
L(CopyFrom1To16Bytes):
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
add $16, %ebx
-# endif
+# endif
add %esi, %edx
add %esi, %ecx
@@ -2248,20 +2255,20 @@
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 7(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2287,23 +2294,23 @@
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 15(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
@@ -2425,46 +2432,46 @@
jl L(Exit9)
je L(Exit10)
jg L(Exit11)
-# endif
+# endif
.p2align 4
L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea (%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 1(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2473,40 +2480,40 @@
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 2(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 3(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2515,20 +2522,20 @@
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 4(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2537,20 +2544,20 @@
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 5(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2559,20 +2566,20 @@
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 6(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2581,20 +2588,20 @@
movlpd %xmm0, (%edx)
movb 8(%ecx), %al
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 8(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2603,20 +2610,20 @@
movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 9(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2625,20 +2632,20 @@
movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 10(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2647,20 +2654,20 @@
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 11(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2669,20 +2676,20 @@
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 12(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2691,20 +2698,20 @@
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 13(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
.p2align 4
@@ -2713,25 +2720,25 @@
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 14(%edx), %eax
-# else
+# else
movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN1
CFI_POP (%edi)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
.p2align 4
L(Fill0):
RETURN
@@ -2865,11 +2872,11 @@
je L(Fill10)
jmp L(Fill11)
- CFI_PUSH(%edi)
+ CFI_PUSH (%edi)
.p2align 4
L(StrncpyFillTailWithZero1):
- POP (%edi)
+ POP (%edi)
L(StrncpyFillTailWithZero):
pxor %xmm0, %xmm0
xor %edx, %edx
@@ -2916,46 +2923,46 @@
movdqa %xmm0, (%ecx)
lea 16(%ecx), %ecx
jmp L(FillFrom1To16Bytes)
-# endif
+# endif
.p2align 4
L(ExitTail1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea (%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
L(ExitTail2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 1(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -2964,40 +2971,40 @@
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 2(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 3(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3006,20 +3013,20 @@
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 4(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3028,20 +3035,20 @@
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 5(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3050,20 +3057,40 @@
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 6(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+# ifdef USE_AS_STPCPY
+ lea 7(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+# ifdef USE_AS_STRNCPY
+ sub $8, %ebx
+ lea 8(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
RETURN
.p2align 4
@@ -3072,20 +3099,20 @@
movlpd %xmm0, (%edx)
movb 8(%ecx), %al
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 8(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3094,20 +3121,20 @@
movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 9(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3116,20 +3143,20 @@
movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 10(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3138,20 +3165,20 @@
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 11(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3160,20 +3187,20 @@
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 12(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
.p2align 4
@@ -3182,20 +3209,42 @@
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 13(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail15):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+# ifdef USE_AS_STPCPY
+ lea 14(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+# ifdef USE_AS_STRNCPY
+ sub $15, %ebx
+ lea 15(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
RETURN
.p2align 4
@@ -3204,24 +3253,28 @@
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 15(%edx), %eax
-# else
+# else
movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# endif
+# endif
RETURN
-# ifdef USE_AS_STRNCPY
- CFI_PUSH (%esi)
- CFI_PUSH (%edi)
+#endif
+
+# ifdef USE_AS_STRNCPY
+# ifndef USE_AS_STRCAT
+ CFI_PUSH (%esi)
+ CFI_PUSH (%edi)
+# endif
L(StrncpyLeaveCase2OrCase3):
test %eax, %eax
jnz L(Aligned64LeaveCase2)
@@ -3979,9 +4032,13 @@
movaps %xmm6, (%edx, %esi)
lea 1(%esi), %esi
jmp L(CopyFrom1To16BytesCase3)
-
- CFI_POP (%esi)
- CFI_POP (%edi)
+# endif
+
+# ifndef USE_AS_STRCAT
+# ifdef USE_AS_STRNCPY
+ CFI_POP (%esi)
+ CFI_POP (%edi)
+
.p2align 4
L(ExitTail0):
movl %edx, %eax
@@ -4013,31 +4070,19 @@
je L(ExitTail14)
cmpb $0, 13(%ecx)
jz L(ExitTail14)
-# endif
-
- .p2align 4
-L(ExitTail15):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 14(%edx), %eax
-# else
- movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $15, %ebx
- lea 15(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# else
+ movl %edx, %eax
+# endif
RETURN
-# ifdef USE_AS_STRNCPY
.p2align 4
L(StrncpyExit8Bytes):
cmp $1, %ebx
@@ -4068,27 +4113,19 @@
je L(ExitTail7)
cmpb $0, 6(%ecx)
jz L(ExitTail7)
-# endif
- .p2align 4
-L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 7(%edx), %eax
-# else
- movl %edx, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $8, %ebx
- lea 8(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
-# endif
-# endif
+# else
+ movl %edx, %eax
+# endif
RETURN
+# endif
+
END (STRCPY)
-
+# endif
#endif
Modified: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S (original)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S Fri Aug 5 00:02:24 2011
@@ -1,5 +1,5 @@
/* strlen with SSE2
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -18,30 +18,32 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#if defined SHARED && !defined NOT_IN_libc
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-#define PARMS 4
-#define STR PARMS
-#define ENTRANCE
-#define RETURN ret
+#if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc
+# ifndef USE_AS_STRCAT
+
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+# define PARMS 4
+# define STR PARMS
+# define ENTRANCE
+# define RETURN ret
.text
ENTRY (__strlen_sse2)
ENTRANCE
mov STR(%esp), %edx
+# endif
xor %eax, %eax
cmpb $0, (%edx)
jz L(exit_tail0)
@@ -77,9 +79,8 @@
jz L(exit_tail15)
pxor %xmm0, %xmm0
mov %edx, %eax
- mov %edx, %ecx
+ lea 16(%edx), %ecx
and $-16, %eax
- add $16, %ecx
add $16, %eax
pcmpeqb (%eax), %xmm0
@@ -183,51 +184,41 @@
jnz L(exit)
and $-0x40, %eax
- PUSH (%esi)
- PUSH (%edi)
- PUSH (%ebx)
- PUSH (%ebp)
- xor %ebp, %ebp
L(aligned_64):
- pcmpeqb (%eax), %xmm0
- pcmpeqb 16(%eax), %xmm1
- pcmpeqb 32(%eax), %xmm2
- pcmpeqb 48(%eax), %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %esi
- pmovmskb %xmm2, %edi
- pmovmskb %xmm3, %ebx
- or %edx, %ebp
- or %esi, %ebp
- or %edi, %ebp
- or %ebx, %ebp
+ movaps (%eax), %xmm0
+ movaps 16(%eax), %xmm1
+ movaps 32(%eax), %xmm2
+ movaps 48(%eax), %xmm6
+ pminub %xmm1, %xmm0
+ pminub %xmm6, %xmm2
+ pminub %xmm0, %xmm2
+ pcmpeqb %xmm3, %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
lea 64(%eax), %eax
jz L(aligned_64)
-L(48leave):
- test %edx, %edx
- jnz L(aligned_64_exit_16)
- test %esi, %esi
- jnz L(aligned_64_exit_32)
- test %edi, %edi
- jnz L(aligned_64_exit_48)
- mov %ebx, %edx
- lea (%eax), %eax
- jmp L(aligned_64_exit)
-L(aligned_64_exit_48):
- lea -16(%eax), %eax
- mov %edi, %edx
- jmp L(aligned_64_exit)
-L(aligned_64_exit_32):
- lea -32(%eax), %eax
- mov %esi, %edx
- jmp L(aligned_64_exit)
-L(aligned_64_exit_16):
- lea -48(%eax), %eax
-L(aligned_64_exit):
- POP (%ebp)
- POP (%ebx)
- POP (%edi)
- POP (%esi)
+
+ pcmpeqb -64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 48(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqb -32(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqb %xmm6, %xmm3
+ pmovmskb %xmm3, %edx
+ lea -16(%ecx), %ecx
L(exit):
sub %ecx, %eax
test %dl, %dl
@@ -340,8 +331,8 @@
L(exit_tail15):
add $15, %eax
+# ifndef USE_AS_STRCAT
ret
-
END (__strlen_sse2)
-
+# endif
#endif
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-c.c Fri Aug 5 00:02:24 2011
@@ -1,0 +1,8 @@
+#define STRNCAT __strncat_ia32
+#ifdef SHARED
+#undef libc_hidden_def
+#define libc_hidden_def(name) \
+ __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
+#endif
+
+#include "string/strncat.c"
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S Fri Aug 5 00:02:24 2011
@@ -1,0 +1,4 @@
+#define STRCAT __strncat_sse2
+#define USE_AS_STRNCAT
+
+#include "strcat-sse2.S"
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S Fri Aug 5 00:02:24 2011
@@ -1,0 +1,4 @@
+#define STRCAT __strncat_ssse3
+#define USE_AS_STRNCAT
+
+#include "strcat-ssse3.S"
Added: fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S
==============================================================================
--- fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S (added)
+++ fsf/trunk/libc/sysdeps/i386/i686/multiarch/strncat.S Fri Aug 5 00:02:24 2011
@@ -1,0 +1,3 @@
+#define STRCAT strncat
+#define USE_AS_STRNCAT
+#include "strcat.S"
Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c Fri Aug 5 00:02:24 2011
@@ -300,14 +300,20 @@
break;
case 3: /* painful */
for (i=jz;i>0;i--) {
- fw = fq[i-1]+fq[i];
- fq[i] += fq[i-1]-fw;
- fq[i-1] = fw;
+#if __FLT_EVAL_METHOD__ != 0
+ volatile
+#endif
+ double fv = (double)(fq[i-1]+fq[i]);
+ fq[i] += fq[i-1]-fv;
+ fq[i-1] = fv;
}
for (i=jz;i>1;i--) {
- fw = fq[i-1]+fq[i];
- fq[i] += fq[i-1]-fw;
- fq[i-1] = fw;
+#if __FLT_EVAL_METHOD__ != 0
+ volatile
+#endif
+ double fv = (double)(fq[i-1]+fq[i]);
+ fq[i] += fq[i-1]-fv;
+ fq[i-1] = fv;
}
for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
if(ih==0) {
Modified: fsf/trunk/libc/sysdeps/posix/getaddrinfo.c
==============================================================================
--- fsf/trunk/libc/sysdeps/posix/getaddrinfo.c (original)
+++ fsf/trunk/libc/sysdeps/posix/getaddrinfo.c Fri Aug 5 00:02:24 2011
@@ -432,7 +432,10 @@
/* In case the output string is the same as the input string
no new string has been allocated. */
if (p != name)
- malloc_name = true;
+ {
+ name = p;
+ malloc_name = true;
+ }
}
#endif
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/strlen.S Fri Aug 5 00:02:24 2011
@@ -1,5 +1,5 @@
/* strlen(str) -- determine the length of the string STR.
- Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@xxxxxxxxxx>.
This file is part of the GNU C Library.