[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commits] r24205 - in /fsf/trunk/libc: ./ locale/ locale/programs/ localedata/locales/ manual/ ports/ ports/sysdeps/m68k/ sysdeps/gene...
- To: commits@xxxxxxxxxx
- Subject: [Commits] r24205 - in /fsf/trunk/libc: ./ locale/ locale/programs/ localedata/locales/ manual/ ports/ ports/sysdeps/m68k/ sysdeps/gene...
- From: eglibc@xxxxxxxxxx
- Date: Wed, 09 Oct 2013 00:02:14 -0000
Author: eglibc
Date: Wed Oct 9 00:02:12 2013
New Revision: 24205
Log:
Import glibc-mainline for 2013-10-09
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/NEWS
fsf/trunk/libc/locale/loadlocale.c
fsf/trunk/libc/locale/programs/3level.h
fsf/trunk/libc/locale/programs/ld-collate.c
fsf/trunk/libc/locale/programs/ld-ctype.c
fsf/trunk/libc/locale/programs/locfile.c
fsf/trunk/libc/localedata/locales/pap_AN
fsf/trunk/libc/localedata/locales/pap_AW
fsf/trunk/libc/localedata/locales/pap_CW
fsf/trunk/libc/manual/socket.texi
fsf/trunk/libc/ports/ChangeLog.m68k
fsf/trunk/libc/ports/sysdeps/m68k/start.S
fsf/trunk/libc/sysdeps/generic/math_private.h
fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c
fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c
fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c
fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c
fsf/trunk/libc/sysdeps/x86_64/memset.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
fsf/trunk/libc/sysdeps/x86_64/strchr.S
fsf/trunk/libc/sysdeps/x86_64/strrchr.S
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Wed Oct 9 00:02:12 2013
@@ -1,3 +1,66 @@
+2013-10-08 Yogesh Chaudhari <mr.yogesh@xxxxxxxxx>
+
+ [BZ #156]
+ * manual/socket.texi: Added statement about buffer
+ for gethostbyname2_r.
+
+2013-10-08 OndÃÂej BÃÂlka <neleai@xxxxxxxxx>
+
+ * sysdeps/x86_64/memset.S (ALIGN): Macro removed.
+ Use .p2align directive instead, throughout.
+ * sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise.
+ * sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise.
+ * sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: Likewise.
+ * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise.
+ * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
+ * sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: Likewise.
+ * sysdeps/x86_64/strchr.S: Likewise.
+ * sysdeps/x86_64/strrchr.S: Likewise.
+
+2013-10-08 Siddhesh Poyarekar <siddhesh@xxxxxxxxxx>
+
+ * sysdeps/ieee754/dbl-64/e_pow.c: Fix code formatting.
+
+ * sysdeps/ieee754/dbl-64/e_exp.c: Fix code formatting.
+
+ * sysdeps/generic/math_private.h (__mpsin1): Remove
+ declaration.
+ (__mpcos1): Likewise.
+ (__mpsin): New argument __range_reduce.
+ (__mpcos): Likewise.
+ * sysdeps/ieee754/dbl-64/s_sin.c: Likewise.
+ (slow): Use __mpsin and __mpcos.
+ (slow1): Likewise.
+ (slow2): Likewise.
+ (sloww): Likewise.
+ (sloww1): Likewise.
+ (sloww2): Likewise.
+ (bsloww): Likewise.
+ (bsloww1): Likewise.
+ (bsloww2): Likewise.
+ (cslow2): Likewise.
+ (csloww): Likewise.
+ (csloww1): Likewise.
+ (csloww2): Likewise.
+ * sysdeps/ieee754/dbl-64/sincos32.c (__mpsin): Add argument
+ range_reduce. Merge in __mpsin1.
+ (__mpcos): Likewise.
+ (__mpsin1): Remove.
+ (__mpcos1): Likewise.
+
+2013-10-07 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
+
+ * locale/loadlocale.c (_nl_intern_locale_data): Use
+ LOCFILE_ALIGNED_P.
+ * locale/programs/3level.h (CONCAT(add_locale_,TABLE)): Use
+ LOCFILE_ALIGN_UP and LOCFILE_ALIGN.
+ * locale/programs/ld-collate.c (obstack_int32_grow): Assert that
+ obstack data is appropriately aligned.
+ (obstack_int32_grow_fast): Likewise.
+ * locale/programs/ld-ctype.c (ctype_output): Use LOCFILE_ALIGN.
+ * locale/programs/locfile.c (add_locale_uint32): Likewise.
+ (add_locale_uint32_array): Likewise.
+
2013-10-07 Siddhesh Poyarekar <siddhesh@xxxxxxxxxx>
* benchtests/Makefile: Remove ARGLIST and RET variables.
Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Wed Oct 9 00:02:12 2013
@@ -9,11 +9,10 @@
* The following bugs are resolved with this release:
- 431, 13982, 13985, 14155, 14547, 14699, 15048, 15400, 15427, 15522,
+ 156, 431, 13982, 13985, 14155, 14547, 14699, 15048, 15400, 15427, 15522,
15531, 15532, 15608, 15609, 15610, 15632, 15640, 15680, 15681, 15723,
15734, 15735, 15736, 15748, 15749, 15754, 15760, 15797, 15844, 15849,
15855, 15856, 15857, 15859, 15867, 15886, 15887, 15890, 15892, 15893,
- 15895, 15897, 15905, 15909, 15919, 15921, 15887, 15890, 15892, 15893,
15895, 15897, 15905, 15909, 15919, 15921, 15923, 15939, 15963, 15966,
15988.
Modified: fsf/trunk/libc/locale/loadlocale.c
==============================================================================
--- fsf/trunk/libc/locale/loadlocale.c (original)
+++ fsf/trunk/libc/locale/loadlocale.c Wed Oct 9 00:02:12 2013
@@ -148,7 +148,7 @@
newdata->values[cnt].string = newdata->filedata + idx;
else
{
- if (idx % __alignof__ (u_int32_t) != 0)
+ if (!LOCFILE_ALIGNED_P (idx))
goto puntdata;
newdata->values[cnt].word =
*((const u_int32_t *) (newdata->filedata + idx));
Modified: fsf/trunk/libc/locale/programs/3level.h
==============================================================================
--- fsf/trunk/libc/locale/programs/3level.h (original)
+++ fsf/trunk/libc/locale/programs/3level.h Wed Oct 9 00:02:12 2013
@@ -270,7 +270,7 @@
+ t->level1_size * sizeof (uint32_t)
+ (t->level2_size << t->q) * sizeof (uint32_t)
+ (t->level3_size << t->p) * sizeof (ELEMENT);
- t->result_size = (last_offset + 3) & ~3ul;
+ t->result_size = LOCFILE_ALIGN_UP (last_offset);
level2_offset =
5 * sizeof (uint32_t)
@@ -308,7 +308,7 @@
t->level3_size << t->p);
else
abort ();
- align_locale_data (file, 4);
+ align_locale_data (file, LOCFILE_ALIGN);
end_locale_structure (file);
if (t->level1_alloc > 0)
Modified: fsf/trunk/libc/locale/programs/ld-collate.c
==============================================================================
--- fsf/trunk/libc/locale/programs/ld-collate.c (original)
+++ fsf/trunk/libc/locale/programs/ld-collate.c Wed Oct 9 00:02:12 2013
@@ -44,6 +44,7 @@
__attribute ((always_inline))
obstack_int32_grow (struct obstack *obstack, int32_t data)
{
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
data = maybe_swap_uint32 (data);
if (sizeof (int32_t) == sizeof (int))
obstack_int_grow (obstack, data);
@@ -55,6 +56,7 @@
__attribute ((always_inline))
obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
{
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
data = maybe_swap_uint32 (data);
if (sizeof (int32_t) == sizeof (int))
obstack_int_grow_fast (obstack, data);
Modified: fsf/trunk/libc/locale/programs/ld-ctype.c
==============================================================================
--- fsf/trunk/libc/locale/programs/ld-ctype.c (original)
+++ fsf/trunk/libc/locale/programs/ld-ctype.c Wed Oct 9 00:02:12 2013
@@ -1032,7 +1032,7 @@
for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
add_locale_string (&file, ctype->classnames[cnt]);
add_locale_char (&file, 0);
- align_locale_data (&file, 4);
+ align_locale_data (&file, LOCFILE_ALIGN);
end_locale_structure (&file);
break;
@@ -1042,7 +1042,7 @@
for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
add_locale_string (&file, ctype->mapnames[cnt]);
add_locale_char (&file, 0);
- align_locale_data (&file, 4);
+ align_locale_data (&file, LOCFILE_ALIGN);
end_locale_structure (&file);
break;
Modified: fsf/trunk/libc/locale/programs/locfile.c
==============================================================================
--- fsf/trunk/libc/locale/programs/locfile.c (original)
+++ fsf/trunk/libc/locale/programs/locfile.c Wed Oct 9 00:02:12 2013
@@ -627,7 +627,7 @@
void
add_locale_uint32 (struct locale_file *file, uint32_t value)
{
- align_locale_data (file, sizeof (uint32_t));
+ align_locale_data (file, LOCFILE_ALIGN);
record_offset (file);
value = maybe_swap_uint32 (value);
obstack_grow (&file->data, &value, sizeof (value));
@@ -639,7 +639,7 @@
add_locale_uint32_array (struct locale_file *file,
const uint32_t *data, size_t n_elems)
{
- align_locale_data (file, sizeof (uint32_t));
+ align_locale_data (file, LOCFILE_ALIGN);
record_offset (file);
obstack_grow (&file->data, data, n_elems * sizeof (uint32_t));
maybe_swap_uint32_obstack (&file->data, n_elems);
Modified: fsf/trunk/libc/localedata/locales/pap_AN
==============================================================================
--- fsf/trunk/libc/localedata/locales/pap_AN (original)
+++ fsf/trunk/libc/localedata/locales/pap_AN Wed Oct 9 00:02:12 2013
@@ -150,8 +150,10 @@
LC_TELEPHONE
tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025><U006C>"
% FIXME FIND tel_dom_fmt
-int_select "00"
-int_prefix "599"
+% 00
+int_select "<U0030><U0030>"
+% 599
+int_prefix "<U0035><U0039><U0039>"
END LC_TELEPHONE
LC_NAME
Modified: fsf/trunk/libc/localedata/locales/pap_AW
==============================================================================
--- fsf/trunk/libc/localedata/locales/pap_AW (original)
+++ fsf/trunk/libc/localedata/locales/pap_AW Wed Oct 9 00:02:12 2013
@@ -160,8 +160,10 @@
LC_TELEPHONE
tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025><U006C>"
% FIXME FIND tel_dom_fmt
-int_select "00"
-int_prefix "599"
+% 00
+int_select "<U0030><U0030>"
+% 599
+int_prefix "<U0035><U0039><U0039>"
END LC_TELEPHONE
LC_NAME
Modified: fsf/trunk/libc/localedata/locales/pap_CW
==============================================================================
--- fsf/trunk/libc/localedata/locales/pap_CW (original)
+++ fsf/trunk/libc/localedata/locales/pap_CW Wed Oct 9 00:02:12 2013
@@ -160,8 +160,10 @@
LC_TELEPHONE
tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025><U006C>"
% FIXME FIND tel_dom_fmt
-int_select "00"
-int_prefix "599"
+% 00
+int_select "<U0030><U0030>"
+% 599
+int_prefix "<U0035><U0039><U0039>"
END LC_TELEPHONE
LC_NAME
Modified: fsf/trunk/libc/manual/socket.texi
==============================================================================
--- fsf/trunk/libc/manual/socket.texi (original)
+++ fsf/trunk/libc/manual/socket.texi Wed Oct 9 00:02:12 2013
@@ -1290,14 +1290,17 @@
parameters.
A pointer to the buffer, in which the result is stored, is available in
-@code{*@var{result}} after the function call successfully returned. If
-an error occurs or if no entry is found, the pointer @code{*@var{result}}
-is a null pointer. Success is signalled by a zero return value. If the
-function failed the return value is an error number. In addition to the
-errors defined for @code{gethostbyname} it can also be @code{ERANGE}.
-In this case the call should be repeated with a larger buffer.
-Additional error information is not stored in the global variable
-@code{h_errno} but instead in the object pointed to by @var{h_errnop}.
+@code{*@var{result}} after the function call successfully returned. The
+buffer passed as the @var{buf} parameter can be freed only once the caller
+has finished with the result hostent struct, or has copied it including all
+the other memory that it points to. If an error occurs or if no entry is
+found, the pointer @code{*@var{result}} is a null pointer. Success is
+signalled by a zero return value. If the function failed the return value
+is an error number. In addition to the errors defined for
+@code{gethostbyname} it can also be @code{ERANGE}. In this case the call
+should be repeated with a larger buffer. Additional error information is
+not stored in the global variable @code{h_errno} but instead in the object
+pointed to by @var{h_errnop}.
Here's a small example:
@smallexample
Modified: fsf/trunk/libc/ports/ChangeLog.m68k
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.m68k (original)
+++ fsf/trunk/libc/ports/ChangeLog.m68k Wed Oct 9 00:02:12 2013
@@ -1,3 +1,7 @@
+2013-10-08 Andreas Schwab <schwab@xxxxxxx>
+
+ * sysdeps/m68k/start.S [SHARED]: Use PIC.
+
2013-09-20 Andreas Schwab <schwab@xxxxxxxxxxxxxx>
* sysdeps/m68k/ffs.c (__ffs): Define as hidden.
Modified: fsf/trunk/libc/ports/sysdeps/m68k/start.S
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/m68k/start.S (original)
+++ fsf/trunk/libc/ports/sysdeps/m68k/start.S Wed Oct 9 00:02:12 2013
@@ -52,6 +52,8 @@
NULL
*/
+#include <sysdep.h>
+
.text
.globl _start
.type _start,@function
@@ -74,6 +76,24 @@
pea (%a1) /* Push address of the shared library
termination function. */
+#ifdef SHARED
+ /* Load PIC register. */
+ LOAD_GOT (%a5)
+
+ /* Push the address of our own entry points to `.fini' and
+ `.init'. */
+ move.l __libc_csu_fini@GOT(%a5), -(%sp)
+ move.l __libc_csu_init@GOT(%a5), -(%sp)
+
+ pea (%a0) /* Push second argument: argv. */
+ move.l %d0, -(%sp) /* Push first argument: argc. */
+
+ move.l main@GOT(%a5), -(%sp)
+
+ /* Call the user's main function, and exit with its value. But
+ let the libc call main. */
+ jbsr __libc_start_main@PLTPC
+#else
/* Push the address of our own entry points to `.fini' and
`.init'. */
pea __libc_csu_fini
@@ -87,6 +107,7 @@
/* Call the user's main function, and exit with its value. But
let the libc call main. */
jbsr __libc_start_main
+#endif
illegal /* Crash if somehow `exit' does return. */
Modified: fsf/trunk/libc/sysdeps/generic/math_private.h
==============================================================================
--- fsf/trunk/libc/sysdeps/generic/math_private.h (original)
+++ fsf/trunk/libc/sysdeps/generic/math_private.h Wed Oct 9 00:02:12 2013
@@ -356,10 +356,8 @@
extern double __halfulp (double __x, double __y);
extern double __sin32 (double __x, double __res, double __res1);
extern double __cos32 (double __x, double __res, double __res1);
-extern double __mpsin (double __x, double __dx);
-extern double __mpcos (double __x, double __dx);
-extern double __mpsin1 (double __x);
-extern double __mpcos1 (double __x);
+extern double __mpsin (double __x, double __dx, bool __range_reduce);
+extern double __mpcos (double __x, double __dx, bool __range_reduce);
extern double __slowexp (double __x);
extern double __slowpow (double __x, double __y, double __z);
extern void __docos (double __x, double __dx, double __v[]);
Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c Wed Oct 9 00:02:12 2013
@@ -44,221 +44,299 @@
# define SECTION
#endif
-double __slowexp(double);
-
-/***************************************************************************/
-/* An ultimate exp routine. Given an IEEE double machine number x */
-/* it computes the correctly rounded (to nearest) value of e^x */
-/***************************************************************************/
+double __slowexp (double);
+
+/* An ultimate exp routine. Given an IEEE double machine number x it computes
+ the correctly rounded (to nearest) value of e^x. */
double
SECTION
-__ieee754_exp(double x) {
+__ieee754_exp (double x)
+{
double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- mynumber junk1, junk2, binexp = {{0,0}};
- int4 i,j,m,n,ex;
+ mynumber junk1, junk2, binexp = {{0, 0}};
+ int4 i, j, m, n, ex;
double retval;
SET_RESTORE_ROUND (FE_TONEAREST);
junk1.x = x;
m = junk1.i[HIGH_HALF];
- n = m&hugeint;
-
- if (n > smallint && n < bigint) {
-
- y = x*log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
-
- junk1.x = y;
-
- eps = bexp*ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp*ln_two1.x;
-
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) - eps; /* x = bexp*ln(2) + base + del */
- eps = del + del*del*(p3.x*del + p2.x);
-
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
-
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
-
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-
- rem=(bet + bet*eps)+al*eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
- else { retval = __slowexp(x); goto ret; } /*if error is over bound */
- }
-
- if (n <= smallint) { retval = 1.0; goto ret; }
-
- if (n >= badint) {
- if (n > infint) { retval = x+x; goto ret; } /* x is NaN */
- if (n < infint) { retval = (x>0) ? (hhuge*hhuge) : (tiny*tiny); goto ret; }
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0) { retval = x+x; goto ret; } /* x is NaN */
- retval = (x>0)?inf.x:zero; /* |x| = inf; return either inf or 0 */
- goto ret;
- }
-
- y = x*log2e.x + three51.x;
+ n = m & hugeint;
+
+ if (n > smallint && n < bigint)
+ {
+ y = x * log2e.x + three51.x;
+ bexp = y - three51.x; /* multiply the result by 2**bexp */
+
+ junk1.x = y;
+
+ eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
+ t = x - bexp * ln_two1.x;
+
+ y = t + three33.x;
+ base = y - three33.x; /* t rounded to a multiple of 2**-18 */
+ junk2.x = y;
+ del = (t - base) - eps; /* x = bexp*ln(2) + base + del */
+ eps = del + del * del * (p3.x * del + p2.x);
+
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+
+ rem = (bet + bet * eps) + al * eps;
+ res = al + rem;
+ cor = (al - res) + rem;
+ if (res == (res + cor * err_0))
+ {
+ retval = res * binexp.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ } /*if error is over bound */
+ }
+
+ if (n <= smallint)
+ {
+ retval = 1.0;
+ goto ret;
+ }
+
+ if (n >= badint)
+ {
+ if (n > infint)
+ {
+ retval = x + x;
+ goto ret;
+ } /* x is NaN */
+ if (n < infint)
+ {
+ retval = (x > 0) ? (hhuge * hhuge) : (tiny * tiny);
+ goto ret;
+ }
+ /* x is finite, cause either overflow or underflow */
+ if (junk1.i[LOW_HALF] != 0)
+ {
+ retval = x + x;
+ goto ret;
+ } /* x is NaN */
+ retval = (x > 0) ? inf.x : zero; /* |x| = inf; return either inf or 0 */
+ goto ret;
+ }
+
+ y = x * log2e.x + three51.x;
bexp = y - three51.x;
junk1.x = y;
- eps = bexp*ln_two2.x;
- t = x - bexp*ln_two1.x;
+ eps = bexp * ln_two2.x;
+ t = x - bexp * ln_two1.x;
y = t + three33.x;
base = y - three33.x;
junk2.x = y;
del = (t - base) - eps;
- eps = del + del*del*(p3.x*del + p2.x);
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
- rem=(bet + bet*eps)+al*eps;
+ eps = del + del * del * (p3.x * del + p2.x);
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+ rem = (bet + bet * eps) + al * eps;
res = al + rem;
cor = (al - res) + rem;
- if (m>>31) {
- ex=junk1.i[LOW_HALF];
- if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
- if (ex >=-1022) {
- binexp.i[HIGH_HALF] = (1023+ex)<<20;
- if (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
- else { retval = __slowexp(x); goto ret; } /*if error is over bound */
- }
- ex = -(1022+ex);
- binexp.i[HIGH_HALF] = (1023-ex)<<20;
- res*=binexp.x;
- cor*=binexp.x;
- eps=1.0000000001+err_0*binexp.x;
- t=1.0+res;
- y = ((1.0-t)+res)+cor;
- res=t+y;
- cor = (t-res)+y;
- if (res == (res + eps*cor))
- { binexp.i[HIGH_HALF] = 0x00100000;
- retval = (res-1.0)*binexp.x;
- goto ret;
- }
- else { retval = __slowexp(x); goto ret; } /* if error is over bound */
- }
- else {
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
- if (res == (res+cor*err_0)) { retval = res*binexp.x*t256.x; goto ret; }
- else { retval = __slowexp(x); goto ret; }
- }
- ret:
+ if (m >> 31)
+ {
+ ex = junk1.i[LOW_HALF];
+ if (res < 1.0)
+ {
+ res += res;
+ cor += cor;
+ ex -= 1;
+ }
+ if (ex >= -1022)
+ {
+ binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+ if (res == (res + cor * err_0))
+ {
+ retval = res * binexp.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ } /*if error is over bound */
+ }
+ ex = -(1022 + ex);
+ binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+ res *= binexp.x;
+ cor *= binexp.x;
+ eps = 1.0000000001 + err_0 * binexp.x;
+ t = 1.0 + res;
+ y = ((1.0 - t) + res) + cor;
+ res = t + y;
+ cor = (t - res) + y;
+ if (res == (res + eps * cor))
+ {
+ binexp.i[HIGH_HALF] = 0x00100000;
+ retval = (res - 1.0) * binexp.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ } /* if error is over bound */
+ }
+ else
+ {
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+ if (res == (res + cor * err_0))
+ {
+ retval = res * binexp.x * t256.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ }
+ }
+ret:
return retval;
}
#ifndef __ieee754_exp
strong_alias (__ieee754_exp, __exp_finite)
#endif
-/************************************************************************/
-/* Compute e^(x+xx)(Double-Length number) .The routine also receive */
-/* bound of error of previous calculation .If after computing exp */
-/* error bigger than allows routine return non positive number */
-/*else return e^(x + xx) (always positive ) */
-/************************************************************************/
-
+/* Compute e^(x+xx). The routine also receives bound of error of previous
+ calculation. If after computing exp the error exceeds the allowed bounds,
+ the routine returns a non-positive number. Otherwise it returns the
+ computed result, which is always positive. */
double
SECTION
-__exp1(double x, double xx, double error) {
+__exp1 (double x, double xx, double error)
+{
double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- mynumber junk1, junk2, binexp = {{0,0}};
- int4 i,j,m,n,ex;
+ mynumber junk1, junk2, binexp = {{0, 0}};
+ int4 i, j, m, n, ex;
junk1.x = x;
m = junk1.i[HIGH_HALF];
- n = m&hugeint; /* no sign */
-
- if (n > smallint && n < bigint) {
- y = x*log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
-
- junk1.x = y;
-
- eps = bexp*ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp*ln_two1.x;
-
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) + (xx-eps); /* x = bexp*ln(2) + base + del */
- eps = del + del*del*(p3.x*del + p2.x);
-
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
-
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
-
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-
- rem=(bet + bet*eps)+al*eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
- else return -10.0;
- }
-
- if (n <= smallint) return 1.0; /* if x->0 e^x=1 */
-
- if (n >= badint) {
- if (n > infint) return(zero/zero); /* x is NaN, return invalid */
- if (n < infint) return ( (x>0) ? (hhuge*hhuge) : (tiny*tiny) );
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0) return (zero/zero); /* x is NaN */
- return ((x>0)?inf.x:zero ); /* |x| = inf; return either inf or 0 */
- }
-
- y = x*log2e.x + three51.x;
+ n = m & hugeint; /* no sign */
+
+ if (n > smallint && n < bigint)
+ {
+ y = x * log2e.x + three51.x;
+ bexp = y - three51.x; /* multiply the result by 2**bexp */
+
+ junk1.x = y;
+
+ eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
+ t = x - bexp * ln_two1.x;
+
+ y = t + three33.x;
+ base = y - three33.x; /* t rounded to a multiple of 2**-18 */
+ junk2.x = y;
+ del = (t - base) + (xx - eps); /* x = bexp*ln(2) + base + del */
+ eps = del + del * del * (p3.x * del + p2.x);
+
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+
+ rem = (bet + bet * eps) + al * eps;
+ res = al + rem;
+ cor = (al - res) + rem;
+ if (res == (res + cor * (1.0 + error + err_1)))
+ return res * binexp.x;
+ else
+ return -10.0;
+ }
+
+ if (n <= smallint)
+ return 1.0; /* if x->0 e^x=1 */
+
+ if (n >= badint)
+ {
+ if (n > infint)
+ return (zero / zero); /* x is NaN, return invalid */
+ if (n < infint)
+ return ((x > 0) ? (hhuge * hhuge) : (tiny * tiny));
+ /* x is finite, cause either overflow or underflow */
+ if (junk1.i[LOW_HALF] != 0)
+ return (zero / zero); /* x is NaN */
+ return ((x > 0) ? inf.x : zero); /* |x| = inf; return either inf or 0 */
+ }
+
+ y = x * log2e.x + three51.x;
bexp = y - three51.x;
junk1.x = y;
- eps = bexp*ln_two2.x;
- t = x - bexp*ln_two1.x;
+ eps = bexp * ln_two2.x;
+ t = x - bexp * ln_two1.x;
y = t + three33.x;
base = y - three33.x;
junk2.x = y;
- del = (t - base) + (xx-eps);
- eps = del + del*del*(p3.x*del + p2.x);
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
- rem=(bet + bet*eps)+al*eps;
+ del = (t - base) + (xx - eps);
+ eps = del + del * del * (p3.x * del + p2.x);
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+ rem = (bet + bet * eps) + al * eps;
res = al + rem;
cor = (al - res) + rem;
- if (m>>31) {
- ex=junk1.i[LOW_HALF];
- if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
- if (ex >=-1022) {
- binexp.i[HIGH_HALF] = (1023+ex)<<20;
- if (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
- else return -10.0;
- }
- ex = -(1022+ex);
- binexp.i[HIGH_HALF] = (1023-ex)<<20;
- res*=binexp.x;
- cor*=binexp.x;
- eps=1.00000000001+(error+err_1)*binexp.x;
- t=1.0+res;
- y = ((1.0-t)+res)+cor;
- res=t+y;
- cor = (t-res)+y;
- if (res == (res + eps*cor))
- {binexp.i[HIGH_HALF] = 0x00100000; return (res-1.0)*binexp.x;}
- else return -10.0;
- }
- else {
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
- if (res == (res+cor*(1.0+error+err_1)))
- return res*binexp.x*t256.x;
- else return -10.0;
- }
+ if (m >> 31)
+ {
+ ex = junk1.i[LOW_HALF];
+ if (res < 1.0)
+ {
+ res += res;
+ cor += cor;
+ ex -= 1;
+ }
+ if (ex >= -1022)
+ {
+ binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+ if (res == (res + cor * (1.0 + error + err_1)))
+ return res * binexp.x;
+ else
+ return -10.0;
+ }
+ ex = -(1022 + ex);
+ binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+ res *= binexp.x;
+ cor *= binexp.x;
+ eps = 1.00000000001 + (error + err_1) * binexp.x;
+ t = 1.0 + res;
+ y = ((1.0 - t) + res) + cor;
+ res = t + y;
+ cor = (t - res) + y;
+ if (res == (res + eps * cor))
+ {
+ binexp.i[HIGH_HALF] = 0x00100000;
+ return (res - 1.0) * binexp.x;
+ }
+ else
+ return -10.0;
+ }
+ else
+ {
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+ if (res == (res + cor * (1.0 + error + err_1)))
+ return res * binexp.x * t256.x;
+ else
+ return -10.0;
+ }
}
Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c Wed Oct 9 00:02:12 2013
@@ -49,354 +49,407 @@
static const double huge = 1.0e300, tiny = 1.0e-300;
-double __exp1(double x, double xx, double error);
-static double log1(double x, double *delta, double *error);
-static double my_log2(double x, double *delta, double *error);
-double __slowpow(double x, double y,double z);
-static double power1(double x, double y);
-static int checkint(double x);
-
-/***************************************************************************/
-/* An ultimate power routine. Given two IEEE double machine numbers y,x */
-/* it computes the correctly rounded (to nearest) value of X^y. */
-/***************************************************************************/
+double __exp1 (double x, double xx, double error);
+static double log1 (double x, double *delta, double *error);
+static double my_log2 (double x, double *delta, double *error);
+double __slowpow (double x, double y, double z);
+static double power1 (double x, double y);
+static int checkint (double x);
+
+/* An ultimate power routine. Given two IEEE double machine numbers y, x it
+ computes the correctly rounded (to nearest) value of X^y. */
double
SECTION
-__ieee754_pow(double x, double y) {
- double z,a,aa,error, t,a1,a2,y1,y2;
- mynumber u,v;
+__ieee754_pow (double x, double y)
+{
+ double z, a, aa, error, t, a1, a2, y1, y2;
+ mynumber u, v;
int k;
- int4 qx,qy;
- v.x=y;
- u.x=x;
- if (v.i[LOW_HALF] == 0) { /* of y */
- qx = u.i[HIGH_HALF]&0x7fffffff;
- /* Is x a NaN? */
- if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
- return x;
- if (y == 1.0) return x;
- if (y == 2.0) return x*x;
- if (y == -1.0) return 1.0/x;
- if (y == 0) return 1.0;
- }
+ int4 qx, qy;
+ v.x = y;
+ u.x = x;
+ if (v.i[LOW_HALF] == 0)
+ { /* of y */
+ qx = u.i[HIGH_HALF] & 0x7fffffff;
+ /* Is x a NaN? */
+ if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
+ return x;
+ if (y == 1.0)
+ return x;
+ if (y == 2.0)
+ return x * x;
+ if (y == -1.0)
+ return 1.0 / x;
+ if (y == 0)
+ return 1.0;
+ }
/* else */
- if(((u.i[HIGH_HALF]>0 && u.i[HIGH_HALF]<0x7ff00000)|| /* x>0 and not x->0 */
- (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0)) &&
- /* 2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
- (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) { /* if y<-1 or y>1 */
- double retval;
-
- SET_RESTORE_ROUND (FE_TONEAREST);
-
- /* Avoid internal underflow for tiny y. The exact value of y does
- not matter if |y| <= 2**-64. */
- if (ABS (y) < 0x1p-64)
- y = y < 0 ? -0x1p-64 : 0x1p-64;
- z = log1(x,&aa,&error); /* x^y =e^(y log (X)) */
- t = y*CN;
- y1 = t - (t-y);
- y2 = y - y1;
- t = z*CN;
- a1 = t - (t-z);
- a2 = (z - a1)+aa;
- a = y1*a1;
- aa = y2*a1 + y*a2;
- a1 = a+aa;
- a2 = (a-a1)+aa;
- error = error*ABS(y);
- t = __exp1(a1,a2,1.9e16*error); /* return -10 or 0 if wasn't computed exactly */
- retval = (t>0)?t:power1(x,y);
-
- return retval;
- }
-
- if (x == 0) {
- if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
- || (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000) /* NaN */
- return y;
- if (ABS(y) > 1.0e20) return (y>0)?0:1.0/0.0;
- k = checkint(y);
- if (k == -1)
- return y < 0 ? 1.0/x : x;
- else
- return y < 0 ? 1.0/0.0 : 0.0; /* return 0 */
- }
-
- qx = u.i[HIGH_HALF]&0x7fffffff; /* no sign */
- qy = v.i[HIGH_HALF]&0x7fffffff; /* no sign */
-
- if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0)) /* NaN */
+ if (((u.i[HIGH_HALF] > 0 && u.i[HIGH_HALF] < 0x7ff00000) || /* x>0 and not x->0 */
+ (u.i[HIGH_HALF] == 0 && u.i[LOW_HALF] != 0)) &&
+ /* 2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
+ (v.i[HIGH_HALF] & 0x7fffffff) < 0x4ff00000)
+ { /* if y<-1 or y>1 */
+ double retval;
+
+ SET_RESTORE_ROUND (FE_TONEAREST);
+
+ /* Avoid internal underflow for tiny y. The exact value of y does
+ not matter if |y| <= 2**-64. */
+ if (ABS (y) < 0x1p-64)
+ y = y < 0 ? -0x1p-64 : 0x1p-64;
+ z = log1 (x, &aa, &error); /* x^y =e^(y log (X)) */
+ t = y * CN;
+ y1 = t - (t - y);
+ y2 = y - y1;
+ t = z * CN;
+ a1 = t - (t - z);
+ a2 = (z - a1) + aa;
+ a = y1 * a1;
+ aa = y2 * a1 + y * a2;
+ a1 = a + aa;
+ a2 = (a - a1) + aa;
+ error = error * ABS (y);
+ t = __exp1 (a1, a2, 1.9e16 * error); /* return -10 or 0 if wasn't computed exactly */
+ retval = (t > 0) ? t : power1 (x, y);
+
+ return retval;
+ }
+
+ if (x == 0)
+ {
+ if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
+ || (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000) /* NaN */
+ return y;
+ if (ABS (y) > 1.0e20)
+ return (y > 0) ? 0 : 1.0 / 0.0;
+ k = checkint (y);
+ if (k == -1)
+ return y < 0 ? 1.0 / x : x;
+ else
+ return y < 0 ? 1.0 / 0.0 : 0.0; /* return 0 */
+ }
+
+ qx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign */
+ qy = v.i[HIGH_HALF] & 0x7fffffff; /* no sign */
+
+ if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0)) /* NaN */
return x;
- if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0)) /* NaN */
+ if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0)) /* NaN */
return x == 1.0 ? 1.0 : y;
/* if x<0 */
- if (u.i[HIGH_HALF] < 0) {
- k = checkint(y);
- if (k==0) {
- if (qy == 0x7ff00000) {
- if (x == -1.0) return 1.0;
- else if (x > -1.0) return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
- else return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
- }
+ if (u.i[HIGH_HALF] < 0)
+ {
+ k = checkint (y);
+ if (k == 0)
+ {
+ if (qy == 0x7ff00000)
+ {
+ if (x == -1.0)
+ return 1.0;
+ else if (x > -1.0)
+ return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
+ else
+ return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
+ }
+ else if (qx == 0x7ff00000)
+ return y < 0 ? 0.0 : INF.x;
+ return (x - x) / (x - x); /* y not integer and x<0 */
+ }
else if (qx == 0x7ff00000)
- return y < 0 ? 0.0 : INF.x;
- return (x - x) / (x - x); /* y not integer and x<0 */
- }
- else if (qx == 0x7ff00000)
- {
- if (k < 0)
- return y < 0 ? nZERO.x : nINF.x;
- else
- return y < 0 ? 0.0 : INF.x;
- }
- return (k==1)?__ieee754_pow(-x,y):-__ieee754_pow(-x,y); /* if y even or odd */
- }
+ {
+ if (k < 0)
+ return y < 0 ? nZERO.x : nINF.x;
+ else
+ return y < 0 ? 0.0 : INF.x;
+ }
+ /* if y even or odd */
+ return (k == 1) ? __ieee754_pow (-x, y) : -__ieee754_pow (-x, y);
+ }
/* x>0 */
- if (qx == 0x7ff00000) /* x= 2^-0x3ff */
+ if (qx == 0x7ff00000) /* x= 2^-0x3ff */
return y > 0 ? x : 0;
- if (qy > 0x45f00000 && qy < 0x7ff00000) {
- if (x == 1.0) return 1.0;
- if (y>0) return (x>1.0)?huge*huge:tiny*tiny;
- if (y<0) return (x<1.0)?huge*huge:tiny*tiny;
- }
-
- if (x == 1.0) return 1.0;
- if (y>0) return (x>1.0)?INF.x:0;
- if (y<0) return (x<1.0)?INF.x:0;
- return 0; /* unreachable, to make the compiler happy */
+ if (qy > 0x45f00000 && qy < 0x7ff00000)
+ {
+ if (x == 1.0)
+ return 1.0;
+ if (y > 0)
+ return (x > 1.0) ? huge * huge : tiny * tiny;
+ if (y < 0)
+ return (x < 1.0) ? huge * huge : tiny * tiny;
+ }
+
+ if (x == 1.0)
+ return 1.0;
+ if (y > 0)
+ return (x > 1.0) ? INF.x : 0;
+ if (y < 0)
+ return (x < 1.0) ? INF.x : 0;
+ return 0; /* unreachable, to make the compiler happy */
}
+
#ifndef __ieee754_pow
strong_alias (__ieee754_pow, __pow_finite)
#endif
-/**************************************************************************/
-/* Computing x^y using more accurate but more slow log routine */
-/**************************************************************************/
+/* Compute x^y using more accurate but more slow log routine. */
static double
SECTION
-power1(double x, double y) {
- double z,a,aa,error, t,a1,a2,y1,y2;
- z = my_log2(x,&aa,&error);
- t = y*CN;
- y1 = t - (t-y);
+power1 (double x, double y)
+{
+ double z, a, aa, error, t, a1, a2, y1, y2;
+ z = my_log2 (x, &aa, &error);
+ t = y * CN;
+ y1 = t - (t - y);
y2 = y - y1;
- t = z*CN;
- a1 = t - (t-z);
+ t = z * CN;
+ a1 = t - (t - z);
a2 = z - a1;
- a = y*z;
- aa = ((y1*a1-a)+y1*a2+y2*a1)+y2*a2+aa*y;
- a1 = a+aa;
- a2 = (a-a1)+aa;
- error = error*ABS(y);
- t = __exp1(a1,a2,1.9e16*error);
- return (t >= 0)?t:__slowpow(x,y,z);
+ a = y * z;
+ aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y;
+ a1 = a + aa;
+ a2 = (a - a1) + aa;
+ error = error * ABS (y);
+ t = __exp1 (a1, a2, 1.9e16 * error);
+ return (t >= 0) ? t : __slowpow (x, y, z);
}
-/****************************************************************************/
-/* Computing log(x) (x is left argument). The result is the returned double */
-/* + the parameter delta. */
-/* The result is bounded by error (rightmost argument) */
-/****************************************************************************/
+/* Compute log(x) (x is left argument). The result is the returned double + the
+ parameter DELTA. The result is bounded by ERROR. */
static double
SECTION
-log1(double x, double *delta, double *error) {
- int i,j,m;
- double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
- mynumber u,v;
+log1 (double x, double *delta, double *error)
+{
+ int i, j, m;
+ double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+ mynumber u, v;
#ifdef BIG_ENDI
- mynumber
-/**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
+ mynumber /**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
#else
-#ifdef LITTLE_ENDI
- mynumber
-/**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
-#endif
+# ifdef LITTLE_ENDI
+ mynumber /**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
+# endif
#endif
u.x = x;
m = u.i[HIGH_HALF];
*error = 0;
*delta = 0;
- if (m < 0x00100000) /* 1<x<2^-1007 */
- { x = x*t52.x; add = -52.0; u.x = x; m = u.i[HIGH_HALF];}
-
- if ((m&0x000fffff) < 0x0006a09e)
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+ if (m < 0x00100000) /* 1<x<2^-1007 */
+ {
+ x = x * t52.x;
+ add = -52.0;
+ u.x = x;
+ m = u.i[HIGH_HALF];
+ }
+
+ if ((m & 0x000fffff) < 0x0006a09e)
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+ two52.i[LOW_HALF] = (m >> 20);
+ }
else
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+ two52.i[LOW_HALF] = (m >> 20) + 1;
+ }
v.x = u.x + bigu.x;
uu = v.x - bigu.x;
- i = (v.i[LOW_HALF]&0x000003ff)<<2;
- if (two52.i[LOW_HALF] == 1023) /* nx = 0 */
- {
- if (i > 1192 && i < 1208) /* |x-1| < 1.5*2**-10 */
- {
+ i = (v.i[LOW_HALF] & 0x000003ff) << 2;
+ if (two52.i[LOW_HALF] == 1023) /* nx = 0 */
+ {
+ if (i > 1192 && i < 1208) /* |x-1| < 1.5*2**-10 */
+ {
t = x - 1.0;
- t1 = (t+5.0e6)-5.0e6;
- t2 = t-t1;
- e1 = t - 0.5*t1*t1;
- e2 = t*t*t*(r3+t*(r4+t*(r5+t*(r6+t*(r7+t*r8)))))-0.5*t2*(t+t1);
- res = e1+e2;
- *error = 1.0e-21*ABS(t);
- *delta = (e1-res)+e2;
+ t1 = (t + 5.0e6) - 5.0e6;
+ t2 = t - t1;
+ e1 = t - 0.5 * t1 * t1;
+ e2 = (t * t * t * (r3 + t * (r4 + t * (r5 + t * (r6 + t
+ * (r7 + t * r8)))))
+ - 0.5 * t2 * (t + t1));
+ res = e1 + e2;
+ *error = 1.0e-21 * ABS (t);
+ *delta = (e1 - res) + e2;
return res;
- } /* |x-1| < 1.5*2**-10 */
+ } /* |x-1| < 1.5*2**-10 */
else
- {
- v.x = u.x*(ui.x[i]+ui.x[i+1])+bigv.x;
- vv = v.x-bigv.x;
- j = v.i[LOW_HALF]&0x0007ffff;
- j = j+j+j;
- eps = u.x - uu*vv;
- e1 = eps*ui.x[i];
- e2 = eps*(ui.x[i+1]+vj.x[j]*(ui.x[i]+ui.x[i+1]));
- e = e1+e2;
- e2 = ((e1-e)+e2);
- t=ui.x[i+2]+vj.x[j+1];
- t1 = t+e;
- t2 = (((t-t1)+e)+(ui.x[i+3]+vj.x[j+2]))+e2+e*e*(p2+e*(p3+e*p4));
- res=t1+t2;
+ {
+ v.x = u.x * (ui.x[i] + ui.x[i + 1]) + bigv.x;
+ vv = v.x - bigv.x;
+ j = v.i[LOW_HALF] & 0x0007ffff;
+ j = j + j + j;
+ eps = u.x - uu * vv;
+ e1 = eps * ui.x[i];
+ e2 = eps * (ui.x[i + 1] + vj.x[j] * (ui.x[i] + ui.x[i + 1]));
+ e = e1 + e2;
+ e2 = ((e1 - e) + e2);
+ t = ui.x[i + 2] + vj.x[j + 1];
+ t1 = t + e;
+ t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e
+ * (p2 + e * (p3 + e * p4)));
+ res = t1 + t2;
*error = 1.0e-24;
- *delta = (t1-res)+t2;
+ *delta = (t1 - res) + t2;
return res;
- }
- } /* nx = 0 */
- else /* nx != 0 */
- {
+ }
+ } /* nx = 0 */
+ else /* nx != 0 */
+ {
eps = u.x - uu;
- nx = (two52.x - two52e.x)+add;
- e1 = eps*ui.x[i];
- e2 = eps*ui.x[i+1];
- e=e1+e2;
- e2 = (e1-e)+e2;
- t=nx*ln2a.x+ui.x[i+2];
- t1=t+e;
- t2=(((t-t1)+e)+nx*ln2b.x+ui.x[i+3]+e2)+e*e*(q2+e*(q3+e*(q4+e*(q5+e*q6))));
- res = t1+t2;
+ nx = (two52.x - two52e.x) + add;
+ e1 = eps * ui.x[i];
+ e2 = eps * ui.x[i + 1];
+ e = e1 + e2;
+ e2 = (e1 - e) + e2;
+ t = nx * ln2a.x + ui.x[i + 2];
+ t1 = t + e;
+ t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e
+ * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6)))));
+ res = t1 + t2;
*error = 1.0e-21;
- *delta = (t1-res)+t2;
+ *delta = (t1 - res) + t2;
return res;
- } /* nx != 0 */
+ } /* nx != 0 */
}
-/****************************************************************************/
-/* More slow but more accurate routine of log */
-/* Computing log(x)(x is left argument).The result is return double + delta.*/
-/* The result is bounded by error (right argument) */
-/****************************************************************************/
+/* Slower but more accurate routine of log. The returned result is double +
+ DELTA. The result is bounded by ERROR. */
static double
SECTION
-my_log2(double x, double *delta, double *error) {
- int i,j,m;
- double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
- double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
- double y,yy,z,zz,j1,j2,j7,j8;
+my_log2 (double x, double *delta, double *error)
+{
+ int i, j, m;
+ double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+ double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2;
+ double y, yy, z, zz, j1, j2, j7, j8;
#ifndef DLA_FMS
- double j3,j4,j5,j6;
+ double j3, j4, j5, j6;
#endif
- mynumber u,v;
+ mynumber u, v;
#ifdef BIG_ENDI
- mynumber
-/**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
+ mynumber /**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
#else
-#ifdef LITTLE_ENDI
- mynumber
-/**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
-#endif
+# ifdef LITTLE_ENDI
+ mynumber /**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
+# endif
#endif
u.x = x;
m = u.i[HIGH_HALF];
*error = 0;
*delta = 0;
- add=0;
- if (m<0x00100000) { /* x < 2^-1022 */
- x = x*t52.x; add = -52.0; u.x = x; m = u.i[HIGH_HALF]; }
-
- if ((m&0x000fffff) < 0x0006a09e)
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+ add = 0;
+ if (m < 0x00100000)
+ { /* x < 2^-1022 */
+ x = x * t52.x;
+ add = -52.0;
+ u.x = x;
+ m = u.i[HIGH_HALF];
+ }
+
+ if ((m & 0x000fffff) < 0x0006a09e)
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+ two52.i[LOW_HALF] = (m >> 20);
+ }
else
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+ two52.i[LOW_HALF] = (m >> 20) + 1;
+ }
v.x = u.x + bigu.x;
uu = v.x - bigu.x;
- i = (v.i[LOW_HALF]&0x000003ff)<<2;
+ i = (v.i[LOW_HALF] & 0x000003ff) << 2;
/*------------------------------------- |x-1| < 2**-11------------------------------- */
- if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
- {
+ if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
+ {
t = x - 1.0;
- EMULV(t,s3,y,yy,j1,j2,j3,j4,j5);
- ADD2(-0.5,0,y,yy,z,zz,j1,j2);
- MUL2(t,0,z,zz,y,yy,j1,j2,j3,j4,j5,j6,j7,j8);
- MUL2(t,0,y,yy,z,zz,j1,j2,j3,j4,j5,j6,j7,j8);
-
- e1 = t+z;
- e2 = (((t-e1)+z)+zz)+t*t*t*(ss3+t*(s4+t*(s5+t*(s6+t*(s7+t*s8)))));
- res = e1+e2;
- *error = 1.0e-25*ABS(t);
- *delta = (e1-res)+e2;
+ EMULV (t, s3, y, yy, j1, j2, j3, j4, j5);
+ ADD2 (-0.5, 0, y, yy, z, zz, j1, j2);
+ MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8);
+ MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8);
+
+ e1 = t + z;
+ e2 = ((((t - e1) + z) + zz) + t * t * t
+ * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8))))));
+ res = e1 + e2;
+ *error = 1.0e-25 * ABS (t);
+ *delta = (e1 - res) + e2;
return res;
- }
+ }
/*----------------------------- |x-1| > 2**-11 -------------------------- */
else
- { /*Computing log(x) according to log table */
- nx = (two52.x - two52e.x)+add;
+ { /*Computing log(x) according to log table */
+ nx = (two52.x - two52e.x) + add;
ou1 = ui.x[i];
- ou2 = ui.x[i+1];
- lu1 = ui.x[i+2];
- lu2 = ui.x[i+3];
- v.x = u.x*(ou1+ou2)+bigv.x;
- vv = v.x-bigv.x;
- j = v.i[LOW_HALF]&0x0007ffff;
- j = j+j+j;
- eps = u.x - uu*vv;
- ov = vj.x[j];
- lv1 = vj.x[j+1];
- lv2 = vj.x[j+2];
- a = (ou1+ou2)*(1.0+ov);
- a1 = (a+1.0e10)-1.0e10;
- a2 = a*(1.0-a1*uu*vv);
- e1 = eps*a1;
- e2 = eps*a2;
- e = e1+e2;
- e2 = (e1-e)+e2;
- t=nx*ln2a.x+lu1+lv1;
- t1 = t+e;
- t2 = (((t-t1)+e)+(lu2+lv2+nx*ln2b.x+e2))+e*e*(p2+e*(p3+e*p4));
- res=t1+t2;
+ ou2 = ui.x[i + 1];
+ lu1 = ui.x[i + 2];
+ lu2 = ui.x[i + 3];
+ v.x = u.x * (ou1 + ou2) + bigv.x;
+ vv = v.x - bigv.x;
+ j = v.i[LOW_HALF] & 0x0007ffff;
+ j = j + j + j;
+ eps = u.x - uu * vv;
+ ov = vj.x[j];
+ lv1 = vj.x[j + 1];
+ lv2 = vj.x[j + 2];
+ a = (ou1 + ou2) * (1.0 + ov);
+ a1 = (a + 1.0e10) - 1.0e10;
+ a2 = a * (1.0 - a1 * uu * vv);
+ e1 = eps * a1;
+ e2 = eps * a2;
+ e = e1 + e2;
+ e2 = (e1 - e) + e2;
+ t = nx * ln2a.x + lu1 + lv1;
+ t1 = t + e;
+ t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e
+ * (p2 + e * (p3 + e * p4)));
+ res = t1 + t2;
*error = 1.0e-27;
- *delta = (t1-res)+t2;
+ *delta = (t1 - res) + t2;
return res;
- }
+ }
}
-/**********************************************************************/
-/* Routine receives a double x and checks if it is an integer. If not */
-/* it returns 0, else it returns 1 if even or -1 if odd. */
-/**********************************************************************/
+/* This function receives a double x and checks if it is an integer. If not,
+ it returns 0, else it returns 1 if even or -1 if odd. */
static int
SECTION
-checkint(double x) {
- union {int4 i[2]; double x;} u;
- int k,m,n;
+checkint (double x)
+{
+ union
+ {
+ int4 i[2];
+ double x;
+ } u;
+ int k, m, n;
u.x = x;
- m = u.i[HIGH_HALF]&0x7fffffff; /* no sign */
- if (m >= 0x7ff00000) return 0; /* x is +/-inf or NaN */
- if (m >= 0x43400000) return 1; /* |x| >= 2**53 */
- if (m < 0x40000000) return 0; /* |x| < 2, can not be 0 or 1 */
+ m = u.i[HIGH_HALF] & 0x7fffffff; /* no sign */
+ if (m >= 0x7ff00000)
+ return 0; /* x is +/-inf or NaN */
+ if (m >= 0x43400000)
+ return 1; /* |x| >= 2**53 */
+ if (m < 0x40000000)
+ return 0; /* |x| < 2, can not be 0 or 1 */
n = u.i[LOW_HALF];
- k = (m>>20)-1023; /* 1 <= k <= 52 */
- if (k == 52) return (n&1)? -1:1; /* odd or even*/
- if (k>20) {
- if (n<<(k-20)) return 0; /* if not integer */
- return (n<<(k-21))?-1:1;
- }
- if (n) return 0; /*if not integer*/
- if (k == 20) return (m&1)? -1:1;
- if (m<<(k+12)) return 0;
- return (m<<(k+11))?-1:1;
+ k = (m >> 20) - 1023; /* 1 <= k <= 52 */
+ if (k == 52)
+ return (n & 1) ? -1 : 1; /* odd or even */
+ if (k > 20)
+ {
+ if (n << (k - 20))
+ return 0; /* if not integer */
+ return (n << (k - 21)) ? -1 : 1;
+ }
+ if (n)
+ return 0; /*if not integer */
+ if (k == 20)
+ return (m & 1) ? -1 : 1;
+ if (m << (k + 12))
+ return 0;
+ return (m << (k + 11)) ? -1 : 1;
}
Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c Wed Oct 9 00:02:12 2013
@@ -127,10 +127,8 @@
void __dubsin (double x, double dx, double w[]);
void __docos (double x, double dx, double w[]);
-double __mpsin (double x, double dx);
-double __mpcos (double x, double dx);
-double __mpsin1 (double x);
-double __mpcos1 (double x);
+double __mpsin (double x, double dx, bool reduce_range);
+double __mpcos (double x, double dx, bool reduce_range);
static double slow (double x);
static double slow1 (double x);
static double slow2 (double x);
@@ -722,7 +720,7 @@
if (w[0] == w[0] + 1.000000001 * w[1])
return (x > 0) ? w[0] : -w[0];
else
- return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+ return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
}
}
@@ -762,7 +760,7 @@
if (w[0] == w[0] + 1.000000005 * w[1])
return (x > 0) ? w[0] : -w[0];
else
- return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+ return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
}
}
@@ -815,7 +813,7 @@
if (w[0] == w[0] + 1.000000005 * w[1])
return (x > 0) ? w[0] : -w[0];
else
- return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+ return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
}
}
@@ -882,7 +880,7 @@
if (w[0] == w[0] + cor)
return (a > 0) ? w[0] : -w[0];
else
- return __mpsin1 (orig);
+ return __mpsin (orig, 0, true);
}
}
}
@@ -939,7 +937,7 @@
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return __mpsin1 (orig);
+ return __mpsin (orig, 0, true);
}
}
@@ -996,7 +994,7 @@
if (w[0] == w[0] + cor)
return (n & 2) ? -w[0] : w[0];
else
- return __mpsin1 (orig);
+ return __mpsin (orig, 0, true);
}
}
@@ -1028,7 +1026,7 @@
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+ return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
}
}
@@ -1079,7 +1077,7 @@
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+ return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
}
}
@@ -1131,7 +1129,7 @@
if (w[0] == w[0] + cor)
return (n & 2) ? -w[0] : w[0];
else
- return (n & 1) ? __mpsin1 (orig) : __mpcos1 (orig);
+ return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
}
}
@@ -1173,7 +1171,7 @@
if (w[0] == w[0] + 1.000000005 * w[1])
return w[0];
else
- return __mpcos (x, 0);
+ return __mpcos (x, 0, false);
}
}
@@ -1246,7 +1244,7 @@
if (w[0] == w[0] + cor)
return (a > 0) ? w[0] : -w[0];
else
- return __mpcos1 (orig);
+ return __mpcos (orig, 0, true);
}
}
}
@@ -1301,7 +1299,7 @@
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return __mpcos1 (orig);
+ return __mpcos (orig, 0, true);
}
}
@@ -1357,7 +1355,7 @@
if (w[0] == w[0] + cor)
return (n) ? -w[0] : w[0];
else
- return __mpcos1 (orig);
+ return __mpcos (orig, 0, true);
}
}
Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c Wed Oct 9 00:02:12 2013
@@ -187,50 +187,119 @@
return (res < res1) ? res : res1;
}
-/* Compute sin(x+dx) as Multi Precision number and return result as double. */
+/* Compute sin() of double-length number (X + DX) as Multi Precision number and
+ return result as double. If REDUCE_RANGE is true, X is assumed to be the
+ original input and DX is ignored. */
double
SECTION
-__mpsin (double x, double dx)
-{
- int p;
+__mpsin (double x, double dx, bool reduce_range)
+{
double y;
- mp_no a, b, c;
- p = 32;
- __dbl_mp (x, &a, p);
- __dbl_mp (dx, &b, p);
- __add (&a, &b, &c, p);
- if (x > 0.8)
- {
- __sub (&hp, &c, &a, p);
- __c32 (&a, &b, &c, p);
- }
- else
- __c32 (&c, &a, &b, p); /* b = sin(x+dx) */
- __mp_dbl (&b, &y, p);
+ mp_no a, b, c, s;
+ int n;
+ int p = 32;
+
+ if (reduce_range)
+ {
+ n = __mpranred (x, &a, p); /* n is 0, 1, 2 or 3. */
+ __c32 (&a, &c, &s, p);
+ }
+ else
+ {
+ n = -1;
+ __dbl_mp (x, &b, p);
+ __dbl_mp (dx, &c, p);
+ __add (&b, &c, &a, p);
+ if (x > 0.8)
+ {
+ __sub (&hp, &a, &b, p);
+ __c32 (&b, &s, &c, p);
+ }
+ else
+ __c32 (&a, &c, &s, p); /* b = sin(x+dx) */
+ }
+
+ /* Convert result based on which quarter of unit circle y is in. */
+ switch (n)
+ {
+ case 1:
+ __mp_dbl (&c, &y, p);
+ break;
+
+ case 3:
+ __mp_dbl (&c, &y, p);
+ y = -y;
+ break;
+
+ case 2:
+ __mp_dbl (&s, &y, p);
+ y = -y;
+ break;
+
+ /* Quadrant not set, so the result must be sin (X + DX), which is also in
+ S. */
+ case 0:
+ default:
+ __mp_dbl (&s, &y, p);
+ }
return y;
}
-/* Compute cos() of double-length number (x+dx) as Multi Precision number and
- return result as double. */
+/* Compute cos() of double-length number (X + DX) as Multi Precision number and
+ return result as double. If REDUCE_RANGE is true, X is assumed to be the
+ original input and DX is ignored. */
double
SECTION
-__mpcos (double x, double dx)
-{
- int p;
+__mpcos (double x, double dx, bool reduce_range)
+{
double y;
- mp_no a, b, c;
- p = 32;
- __dbl_mp (x, &a, p);
- __dbl_mp (dx, &b, p);
- __add (&a, &b, &c, p);
- if (x > 0.8)
- {
- __sub (&hp, &c, &b, p);
- __c32 (&b, &c, &a, p);
- }
- else
- __c32 (&c, &a, &b, p); /* a = cos(x+dx) */
- __mp_dbl (&a, &y, p);
+ mp_no a, b, c, s;
+ int n;
+ int p = 32;
+
+ if (reduce_range)
+ {
+ n = __mpranred (x, &a, p); /* n is 0, 1, 2 or 3. */
+ __c32 (&a, &c, &s, p);
+ }
+ else
+ {
+ n = -1;
+ __dbl_mp (x, &b, p);
+ __dbl_mp (dx, &c, p);
+ __add (&b, &c, &a, p);
+ if (x > 0.8)
+ {
+ __sub (&hp, &a, &b, p);
+ __c32 (&b, &s, &c, p);
+ }
+ else
+ __c32 (&a, &c, &s, p); /* a = cos(x+dx) */
+ }
+
+ /* Convert result based on which quarter of unit circle y is in. */
+ switch (n)
+ {
+ case 1:
+ __mp_dbl (&s, &y, p);
+ y = -y;
+ break;
+
+ case 3:
+ __mp_dbl (&s, &y, p);
+ break;
+
+ case 2:
+ __mp_dbl (&c, &y, p);
+ y = -y;
+ break;
+
+ /* Quadrant not set, so the result must be cos (X + DX), which is also
+ stored in C. */
+ case 0:
+ default:
+ __mp_dbl (&c, &y, p);
+ }
return y;
}
@@ -294,84 +363,3 @@
return (n & 3);
}
}
-
-/* Multi-Precision sin() function subroutine, for p = 32. It is based on the
- routines mpranred() and c32(). */
-double
-SECTION
-__mpsin1 (double x)
-{
- int p;
- int n;
- mp_no u, s, c;
- double y;
- p = 32;
- n = __mpranred (x, &u, p); /* n is 0, 1, 2 or 3. */
- __c32 (&u, &c, &s, p);
- /* Convert result based on which quarter of unit circle y is in. */
- switch (n)
- {
- case 0:
- __mp_dbl (&s, &y, p);
- return y;
- break;
-
- case 2:
- __mp_dbl (&s, &y, p);
- return -y;
- break;
-
- case 1:
- __mp_dbl (&c, &y, p);
- return y;
- break;
-
- case 3:
- __mp_dbl (&c, &y, p);
- return -y;
- break;
- }
- /* Unreachable, to make the compiler happy. */
- return 0;
-}
-
-/* Multi-Precision cos() function subroutine, for p = 32. It is based on the
- routines mpranred() and c32(). */
-double
-SECTION
-__mpcos1 (double x)
-{
- int p;
- int n;
- mp_no u, s, c;
- double y;
-
- p = 32;
- n = __mpranred (x, &u, p); /* n is 0, 1, 2 or 3. */
- __c32 (&u, &c, &s, p);
- /* Convert result based on which quarter of unit circle y is in. */
- switch (n)
- {
- case 0:
- __mp_dbl (&c, &y, p);
- return y;
- break;
-
- case 2:
- __mp_dbl (&c, &y, p);
- return -y;
- break;
-
- case 1:
- __mp_dbl (&s, &y, p);
- return -y;
- break;
-
- case 3:
- __mp_dbl (&s, &y, p);
- return y;
- break;
- }
- /* Unreachable, to make the compiler happy. */
- return 0;
-}
Modified: fsf/trunk/libc/sysdeps/x86_64/memset.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memset.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/memset.S Wed Oct 9 00:02:12 2013
@@ -18,10 +18,6 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
.text
#if !defined NOT_IN_libc
@@ -71,12 +67,12 @@
L(return):
rep
ret
- ALIGN (4)
+ .p2align 4
L(between_32_64_bytes):
movdqu %xmm8, 16(%rdi)
movdqu %xmm8, -32(%rdi,%rdx)
ret
- ALIGN (4)
+ .p2align 4
L(loop_start):
leaq 64(%rdi), %rcx
movdqu %xmm8, (%rdi)
@@ -92,7 +88,7 @@
andq $-64, %rdx
cmpq %rdx, %rcx
je L(return)
- ALIGN (4)
+ .p2align 4
L(loop):
movdqa %xmm8, (%rcx)
movdqa %xmm8, 16(%rcx)
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S Wed Oct 9 00:02:12 2013
@@ -23,10 +23,6 @@
# ifndef MEMCMP
# define MEMCMP __memcmp_sse4_1
-# endif
-
-# ifndef ALIGN
-# define ALIGN(n) .p2align n
# endif
# define JMPTBL(I, B) (I - B)
@@ -60,7 +56,7 @@
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(firstbyte):
movzbl (%rdi), %eax
movzbl (%rsi), %ecx
@@ -68,7 +64,7 @@
ret
# endif
- ALIGN (4)
+ .p2align 4
L(79bytesormore):
movdqu (%rsi), %xmm1
movdqu (%rdi), %xmm2
@@ -316,7 +312,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(512bytesormore):
# ifdef DATA_CACHE_SIZE_HALF
mov $DATA_CACHE_SIZE_HALF, %R8_LP
@@ -329,7 +325,7 @@
cmp %r8, %rdx
ja L(L2_L3_cache_unaglined)
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(64bytesormore_loop):
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
@@ -361,7 +357,7 @@
L(L2_L3_cache_unaglined):
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(L2_L3_unaligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
@@ -396,7 +392,7 @@
/*
* This case is for machines which are sensitive for unaligned instructions.
*/
- ALIGN (4)
+ .p2align 4
L(2aligned):
cmp $128, %rdx
ja L(128bytesormorein2aligned)
@@ -444,7 +440,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(128bytesormorein2aligned):
cmp $512, %rdx
ja L(512bytesormorein2aligned)
@@ -519,7 +515,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(256bytesormorein2aligned):
sub $256, %rdx
@@ -632,7 +628,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(512bytesormorein2aligned):
# ifdef DATA_CACHE_SIZE_HALF
mov $DATA_CACHE_SIZE_HALF, %R8_LP
@@ -646,7 +642,7 @@
ja L(L2_L3_cache_aglined)
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(64bytesormore_loopin2aligned):
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
@@ -678,7 +674,7 @@
L(L2_L3_cache_aglined):
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(L2_L3_aligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
@@ -711,7 +707,7 @@
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(64bytesormore_loop_end):
add $16, %rdi
add $16, %rsi
@@ -806,7 +802,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(12bytes):
mov -12(%rdi), %rax
mov -12(%rsi), %rcx
@@ -827,7 +823,7 @@
# ifndef USE_AS_WMEMCMP
/* unreal case for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(65bytes):
movdqu -65(%rdi), %xmm1
movdqu -65(%rsi), %xmm2
@@ -864,7 +860,7 @@
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(13bytes):
mov -13(%rdi), %rax
mov -13(%rsi), %rcx
@@ -877,7 +873,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(5bytes):
mov -5(%rdi), %eax
mov -5(%rsi), %ecx
@@ -888,7 +884,7 @@
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(66bytes):
movdqu -66(%rdi), %xmm1
movdqu -66(%rsi), %xmm2
@@ -929,7 +925,7 @@
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(14bytes):
mov -14(%rdi), %rax
mov -14(%rsi), %rcx
@@ -942,7 +938,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(6bytes):
mov -6(%rdi), %eax
mov -6(%rsi), %ecx
@@ -958,7 +954,7 @@
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(67bytes):
movdqu -67(%rdi), %xmm2
movdqu -67(%rsi), %xmm1
@@ -997,7 +993,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(15bytes):
mov -15(%rdi), %rax
mov -15(%rsi), %rcx
@@ -1010,7 +1006,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(7bytes):
mov -7(%rdi), %eax
mov -7(%rsi), %ecx
@@ -1023,7 +1019,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(3bytes):
movzwl -3(%rdi), %eax
movzwl -3(%rsi), %ecx
@@ -1036,7 +1032,7 @@
ret
# endif
- ALIGN (4)
+ .p2align 4
L(68bytes):
movdqu -68(%rdi), %xmm2
movdqu -68(%rsi), %xmm1
@@ -1079,7 +1075,7 @@
# ifndef USE_AS_WMEMCMP
/* unreal cases for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(69bytes):
movdqu -69(%rsi), %xmm1
movdqu -69(%rdi), %xmm2
@@ -1115,7 +1111,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(70bytes):
movdqu -70(%rsi), %xmm1
movdqu -70(%rdi), %xmm2
@@ -1151,7 +1147,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(71bytes):
movdqu -71(%rsi), %xmm1
movdqu -71(%rdi), %xmm2
@@ -1188,7 +1184,7 @@
ret
# endif
- ALIGN (4)
+ .p2align 4
L(72bytes):
movdqu -72(%rsi), %xmm1
movdqu -72(%rdi), %xmm2
@@ -1227,7 +1223,7 @@
# ifndef USE_AS_WMEMCMP
/* unreal cases for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(73bytes):
movdqu -73(%rsi), %xmm1
movdqu -73(%rdi), %xmm2
@@ -1265,7 +1261,7 @@
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(74bytes):
movdqu -74(%rsi), %xmm1
movdqu -74(%rdi), %xmm2
@@ -1302,7 +1298,7 @@
movzwl -2(%rsi), %ecx
jmp L(diffin2bytes)
- ALIGN (4)
+ .p2align 4
L(75bytes):
movdqu -75(%rsi), %xmm1
movdqu -75(%rdi), %xmm2
@@ -1342,7 +1338,7 @@
xor %eax, %eax
ret
# endif
- ALIGN (4)
+ .p2align 4
L(76bytes):
movdqu -76(%rsi), %xmm1
movdqu -76(%rdi), %xmm2
@@ -1388,7 +1384,7 @@
# ifndef USE_AS_WMEMCMP
/* unreal cases for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(77bytes):
movdqu -77(%rsi), %xmm1
movdqu -77(%rdi), %xmm2
@@ -1430,7 +1426,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(78bytes):
movdqu -78(%rsi), %xmm1
movdqu -78(%rdi), %xmm2
@@ -1470,7 +1466,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(79bytes):
movdqu -79(%rsi), %xmm1
movdqu -79(%rdi), %xmm2
@@ -1510,7 +1506,7 @@
xor %eax, %eax
ret
# endif
- ALIGN (4)
+ .p2align 4
L(64bytes):
movdqu -64(%rdi), %xmm2
movdqu -64(%rsi), %xmm1
@@ -1548,7 +1544,7 @@
/*
* Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
*/
- ALIGN (3)
+ .p2align 3
L(less16bytes):
movsbq %dl, %rdx
mov (%rsi, %rdx), %rcx
@@ -1585,7 +1581,7 @@
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(end):
and $0xff, %eax
and $0xff, %ecx
@@ -1599,7 +1595,7 @@
neg %eax
ret
- ALIGN (4)
+ .p2align 4
L(nequal_bigger):
ret
@@ -1611,7 +1607,7 @@
END (MEMCMP)
.section .rodata.sse4.1,"a",@progbits
- ALIGN (3)
+ .p2align 3
# ifndef USE_AS_WMEMCMP
L(table_64bytes):
.int JMPTBL (L(0bytes), L(table_64bytes))
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S Wed Oct 9 00:02:12 2013
@@ -23,10 +23,6 @@
# ifndef MEMCMP
# define MEMCMP __memcmp_ssse3
-# endif
-
-# ifndef ALIGN
-# define ALIGN(n) .p2align n
# endif
/* Warning!
@@ -50,7 +46,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
/* ECX >= 32. */
L(48bytesormore):
movdqu (%rdi), %xmm3
@@ -90,7 +86,7 @@
je L(shr_6)
jmp L(shr_7)
- ALIGN (2)
+ .p2align 2
L(next_unaligned_table):
cmp $8, %edx
je L(shr_8)
@@ -117,7 +113,7 @@
jmp L(shr_12)
# endif
- ALIGN (4)
+ .p2align 4
L(shr_0):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -137,7 +133,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_0_gobble):
movdqa (%rsi), %xmm0
xor %eax, %eax
@@ -180,7 +176,7 @@
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_1):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -207,7 +203,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_1_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -258,7 +254,7 @@
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_2):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -285,7 +281,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_2_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -335,7 +331,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_3):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -362,7 +358,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_3_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -414,7 +410,7 @@
# endif
- ALIGN (4)
+ .p2align 4
L(shr_4):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -441,7 +437,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_4_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -493,7 +489,7 @@
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_5):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -520,7 +516,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_5_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -570,7 +566,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_6):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -597,7 +593,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_6_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -647,7 +643,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_7):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -674,7 +670,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_7_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -726,7 +722,7 @@
# endif
- ALIGN (4)
+ .p2align 4
L(shr_8):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -753,7 +749,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_8_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -805,7 +801,7 @@
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_9):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -832,7 +828,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_9_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -882,7 +878,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_10):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -909,7 +905,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_10_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -959,7 +955,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_11):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -986,7 +982,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_11_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1038,7 +1034,7 @@
# endif
- ALIGN (4)
+ .p2align 4
L(shr_12):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1065,7 +1061,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_12_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1117,7 +1113,7 @@
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_13):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1144,7 +1140,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_13_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1194,7 +1190,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_14):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1221,7 +1217,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_14_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1271,7 +1267,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_15):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1298,7 +1294,7 @@
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_15_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1348,7 +1344,7 @@
add %rcx, %rdi
jmp L(less48bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(exit):
pmovmskb %xmm1, %r8d
sub $0xffff, %r8d
@@ -1389,56 +1385,56 @@
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte16):
movzbl -16(%rdi), %eax
movzbl -16(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte17):
movzbl -15(%rdi), %eax
movzbl -15(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte18):
movzbl -14(%rdi), %eax
movzbl -14(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte19):
movzbl -13(%rdi), %eax
movzbl -13(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte20):
movzbl -12(%rdi), %eax
movzbl -12(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte21):
movzbl -11(%rdi), %eax
movzbl -11(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte22):
movzbl -10(%rdi), %eax
movzbl -10(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(next_24_bytes):
lea 8(%rdi), %rdi
lea 8(%rsi), %rsi
@@ -1479,14 +1475,14 @@
jne L(find_diff)
ret
- ALIGN (4)
+ .p2align 4
L(second_double_word):
mov -12(%rdi), %eax
cmp -12(%rsi), %eax
jne L(find_diff)
ret
- ALIGN (4)
+ .p2align 4
L(next_two_double_words):
and $15, %dh
jz L(fourth_double_word)
@@ -1495,7 +1491,7 @@
jne L(find_diff)
ret
- ALIGN (4)
+ .p2align 4
L(fourth_double_word):
mov -4(%rdi), %eax
cmp -4(%rsi), %eax
@@ -1503,7 +1499,7 @@
ret
# endif
- ALIGN (4)
+ .p2align 4
L(less48bytes):
cmp $8, %ecx
jae L(more8bytes)
@@ -1527,7 +1523,7 @@
jmp L(4bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more8bytes):
cmp $16, %ecx
jae L(more16bytes)
@@ -1551,7 +1547,7 @@
jmp L(12bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more16bytes):
cmp $24, %ecx
jae L(more24bytes)
@@ -1575,7 +1571,7 @@
jmp L(20bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more24bytes):
cmp $32, %ecx
jae L(more32bytes)
@@ -1599,7 +1595,7 @@
jmp L(28bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more32bytes):
cmp $40, %ecx
jae L(more40bytes)
@@ -1623,7 +1619,7 @@
jmp L(36bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more40bytes):
cmp $40, %ecx
je L(40bytes)
@@ -1642,7 +1638,7 @@
je L(46bytes)
jmp L(47bytes)
- ALIGN (4)
+ .p2align 4
L(44bytes):
movl -44(%rdi), %eax
movl -44(%rsi), %ecx
@@ -1702,7 +1698,7 @@
xor %eax, %eax
ret
# else
- ALIGN (4)
+ .p2align 4
L(44bytes):
movl -44(%rdi), %eax
cmp -44(%rsi), %eax
@@ -1753,7 +1749,7 @@
# endif
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(45bytes):
movl -45(%rdi), %eax
movl -45(%rsi), %ecx
@@ -1816,7 +1812,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(46bytes):
movl -46(%rdi), %eax
movl -46(%rsi), %ecx
@@ -1882,7 +1878,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(47bytes):
movl -47(%rdi), %eax
movl -47(%rsi), %ecx
@@ -1951,7 +1947,7 @@
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(find_diff):
cmpb %cl, %al
jne L(set)
@@ -1973,19 +1969,19 @@
# else
/* for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(find_diff):
mov $1, %eax
jg L(find_diff_bigger)
neg %eax
ret
- ALIGN (4)
+ .p2align 4
L(find_diff_bigger):
ret
# endif
- ALIGN (4)
+ .p2align 4
L(equal):
xor %eax, %eax
ret
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S Wed Oct 9 00:02:12 2013
@@ -20,10 +20,6 @@
#include "asm-syntax.h"
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
ENTRY(__memcpy_sse2_unaligned)
movq %rsi, %rax
@@ -44,7 +40,7 @@
movq %rdi, %rax
ret
.p2align 4,,10
- ALIGN(4)
+ .p2align 4
.L31:
movdqu 16(%rsi), %xmm8
cmpq $64, %rdx
@@ -77,7 +73,7 @@
leaq 32(%r10), %r8
leaq 48(%r10), %rax
.p2align 4,,10
- ALIGN(4)
+ .p2align 4
L(loop):
movdqu (%rcx,%r10), %xmm8
movdqa %xmm8, (%rcx)
@@ -151,7 +147,7 @@
.L3:
leaq -1(%rdx), %rax
.p2align 4,,10
- ALIGN(4)
+ .p2align 4
.L11:
movzbl (%rsi,%rax), %edx
movb %dl, (%rdi,%rax)
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S Wed Oct 9 00:02:12 2013
@@ -29,10 +29,6 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3_back
# define MEMCPY_CHK __memcpy_chk_ssse3_back
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
#endif
#define JMPTBL(I, B) I - B
@@ -87,7 +83,7 @@
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
#endif
- ALIGN (4)
+ .p2align 4
L(144bytesormore):
#ifndef USE_AS_MEMMOVE
@@ -119,7 +115,7 @@
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(copy_backward):
#ifdef DATA_CACHE_SIZE
mov $DATA_CACHE_SIZE, %RCX_LP
@@ -149,7 +145,7 @@
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(shl_0):
mov %rdx, %r9
@@ -162,7 +158,7 @@
#endif
jae L(gobble_mem_fwd)
sub $0x80, %rdx
- ALIGN (4)
+ .p2align 4
L(shl_0_loop):
movdqa (%rsi), %xmm1
movdqa %xmm1, (%rdi)
@@ -190,7 +186,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_bwd):
sub $0x80, %rdx
L(copy_backward_loop):
@@ -221,7 +217,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1):
sub $0x80, %rdx
movaps -0x01(%rsi), %xmm1
@@ -258,7 +254,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1_bwd):
movaps -0x01(%rsi), %xmm1
@@ -304,7 +300,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2):
sub $0x80, %rdx
movaps -0x02(%rsi), %xmm1
@@ -341,7 +337,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2_bwd):
movaps -0x02(%rsi), %xmm1
@@ -387,7 +383,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3):
sub $0x80, %rdx
movaps -0x03(%rsi), %xmm1
@@ -424,7 +420,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3_bwd):
movaps -0x03(%rsi), %xmm1
@@ -470,7 +466,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4):
sub $0x80, %rdx
movaps -0x04(%rsi), %xmm1
@@ -507,7 +503,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4_bwd):
movaps -0x04(%rsi), %xmm1
@@ -553,7 +549,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5):
sub $0x80, %rdx
movaps -0x05(%rsi), %xmm1
@@ -590,7 +586,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5_bwd):
movaps -0x05(%rsi), %xmm1
@@ -636,7 +632,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6):
sub $0x80, %rdx
movaps -0x06(%rsi), %xmm1
@@ -673,7 +669,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6_bwd):
movaps -0x06(%rsi), %xmm1
@@ -719,7 +715,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7):
sub $0x80, %rdx
movaps -0x07(%rsi), %xmm1
@@ -756,7 +752,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7_bwd):
movaps -0x07(%rsi), %xmm1
@@ -802,7 +798,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8):
sub $0x80, %rdx
movaps -0x08(%rsi), %xmm1
@@ -839,7 +835,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8_bwd):
movaps -0x08(%rsi), %xmm1
@@ -886,7 +882,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9):
sub $0x80, %rdx
movaps -0x09(%rsi), %xmm1
@@ -923,7 +919,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9_bwd):
movaps -0x09(%rsi), %xmm1
@@ -969,7 +965,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10):
sub $0x80, %rdx
movaps -0x0a(%rsi), %xmm1
@@ -1006,7 +1002,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10_bwd):
movaps -0x0a(%rsi), %xmm1
@@ -1052,7 +1048,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11):
sub $0x80, %rdx
movaps -0x0b(%rsi), %xmm1
@@ -1089,7 +1085,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11_bwd):
movaps -0x0b(%rsi), %xmm1
@@ -1135,7 +1131,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12):
sub $0x80, %rdx
movdqa -0x0c(%rsi), %xmm1
@@ -1173,7 +1169,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12_bwd):
movaps -0x0c(%rsi), %xmm1
@@ -1219,7 +1215,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13):
sub $0x80, %rdx
movaps -0x0d(%rsi), %xmm1
@@ -1256,7 +1252,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13_bwd):
movaps -0x0d(%rsi), %xmm1
@@ -1302,7 +1298,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14):
sub $0x80, %rdx
movaps -0x0e(%rsi), %xmm1
@@ -1339,7 +1335,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14_bwd):
movaps -0x0e(%rsi), %xmm1
@@ -1385,7 +1381,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15):
sub $0x80, %rdx
movaps -0x0f(%rsi), %xmm1
@@ -1422,7 +1418,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15_bwd):
movaps -0x0f(%rsi), %xmm1
@@ -1468,7 +1464,7 @@
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(gobble_mem_fwd):
movdqu (%rsi), %xmm1
movdqu %xmm0, (%r8)
@@ -1570,7 +1566,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(gobble_mem_bwd):
add %rdx, %rsi
add %rdx, %rdi
@@ -2833,7 +2829,7 @@
END (MEMCPY)
.section .rodata.ssse3,"a",@progbits
- ALIGN (3)
+ .p2align 3
L(table_144_bytes_bwd):
.int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
.int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
@@ -2980,7 +2976,7 @@
.int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
.int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
- ALIGN (3)
+ .p2align 3
L(table_144_bytes_fwd):
.int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
.int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
@@ -3127,7 +3123,7 @@
.int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
.int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
- ALIGN (3)
+ .p2align 3
L(shl_table_fwd):
.int JMPTBL (L(shl_0), L(shl_table_fwd))
.int JMPTBL (L(shl_1), L(shl_table_fwd))
@@ -3146,7 +3142,7 @@
.int JMPTBL (L(shl_14), L(shl_table_fwd))
.int JMPTBL (L(shl_15), L(shl_table_fwd))
- ALIGN (3)
+ .p2align 3
L(shl_table_bwd):
.int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
.int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S Wed Oct 9 00:02:12 2013
@@ -29,10 +29,6 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3
# define MEMCPY_CHK __memcpy_chk_ssse3
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
#endif
#define JMPTBL(I, B) I - B
@@ -80,7 +76,7 @@
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(80bytesormore):
#ifndef USE_AS_MEMMOVE
cmp %dil, %sil
@@ -113,7 +109,7 @@
#endif
BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4)
- ALIGN (4)
+ .p2align 4
L(copy_backward):
movdqu -16(%rsi, %rdx), %xmm0
add %rdx, %rsi
@@ -144,7 +140,7 @@
#endif
BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0):
sub $16, %rdx
movdqa (%rsi), %xmm1
@@ -172,7 +168,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble):
#ifdef DATA_CACHE_SIZE_HALF
cmp $DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -228,7 +224,7 @@
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble_mem_loop):
prefetcht0 0x1c0(%rsi)
prefetcht0 0x280(%rsi)
@@ -287,7 +283,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_bwd):
sub $16, %rdx
movdqa -0x10(%rsi), %xmm1
@@ -313,7 +309,7 @@
L(shl_0_less_64bytes_bwd):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble_bwd):
#ifdef DATA_CACHE_SIZE_HALF
cmp $DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -367,7 +363,7 @@
L(shl_0_gobble_bwd_less_64bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble_mem_bwd_loop):
prefetcht0 -0x1c0(%rsi)
prefetcht0 -0x280(%rsi)
@@ -423,7 +419,7 @@
L(shl_0_mem_bwd_less_32bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1):
lea (L(shl_1_loop_L1)-L(shl_1))(%r9), %r9
cmp %rcx, %rdx
@@ -466,7 +462,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1_bwd):
lea (L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -508,7 +504,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2):
lea (L(shl_2_loop_L1)-L(shl_2))(%r9), %r9
cmp %rcx, %rdx
@@ -551,7 +547,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2_bwd):
lea (L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -593,7 +589,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3):
lea (L(shl_3_loop_L1)-L(shl_3))(%r9), %r9
cmp %rcx, %rdx
@@ -636,7 +632,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3_bwd):
lea (L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -678,7 +674,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4):
lea (L(shl_4_loop_L1)-L(shl_4))(%r9), %r9
cmp %rcx, %rdx
@@ -721,7 +717,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4_bwd):
lea (L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -763,7 +759,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5):
lea (L(shl_5_loop_L1)-L(shl_5))(%r9), %r9
cmp %rcx, %rdx
@@ -806,7 +802,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5_bwd):
lea (L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -848,7 +844,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6):
lea (L(shl_6_loop_L1)-L(shl_6))(%r9), %r9
cmp %rcx, %rdx
@@ -891,7 +887,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6_bwd):
lea (L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -933,7 +929,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7):
lea (L(shl_7_loop_L1)-L(shl_7))(%r9), %r9
cmp %rcx, %rdx
@@ -976,7 +972,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7_bwd):
lea (L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1018,7 +1014,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8):
lea (L(shl_8_loop_L1)-L(shl_8))(%r9), %r9
cmp %rcx, %rdx
@@ -1051,7 +1047,7 @@
movaps %xmm5, -0x10(%rdi)
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(shl_8_end):
lea 64(%rdx), %rdx
movaps %xmm4, -0x20(%rdi)
@@ -1061,7 +1057,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8_bwd):
lea (L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1103,7 +1099,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9):
lea (L(shl_9_loop_L1)-L(shl_9))(%r9), %r9
cmp %rcx, %rdx
@@ -1146,7 +1142,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9_bwd):
lea (L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1188,7 +1184,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10):
lea (L(shl_10_loop_L1)-L(shl_10))(%r9), %r9
cmp %rcx, %rdx
@@ -1231,7 +1227,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10_bwd):
lea (L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1273,7 +1269,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11):
lea (L(shl_11_loop_L1)-L(shl_11))(%r9), %r9
cmp %rcx, %rdx
@@ -1316,7 +1312,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11_bwd):
lea (L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1358,7 +1354,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12):
lea (L(shl_12_loop_L1)-L(shl_12))(%r9), %r9
cmp %rcx, %rdx
@@ -1401,7 +1397,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12_bwd):
lea (L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1443,7 +1439,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13):
lea (L(shl_13_loop_L1)-L(shl_13))(%r9), %r9
cmp %rcx, %rdx
@@ -1486,7 +1482,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13_bwd):
lea (L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1528,7 +1524,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14):
lea (L(shl_14_loop_L1)-L(shl_14))(%r9), %r9
cmp %rcx, %rdx
@@ -1571,7 +1567,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14_bwd):
lea (L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1613,7 +1609,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15):
lea (L(shl_15_loop_L1)-L(shl_15))(%r9), %r9
cmp %rcx, %rdx
@@ -1656,7 +1652,7 @@
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15_bwd):
lea (L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1698,7 +1694,7 @@
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(write_72bytes):
movdqu -72(%rsi), %xmm0
movdqu -56(%rsi), %xmm1
@@ -1716,7 +1712,7 @@
mov %rcx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_64bytes):
movdqu -64(%rsi), %xmm0
mov -48(%rsi), %rcx
@@ -1734,7 +1730,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_56bytes):
movdqu -56(%rsi), %xmm0
mov -40(%rsi), %r8
@@ -1750,7 +1746,7 @@
mov %rcx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_48bytes):
mov -48(%rsi), %rcx
mov -40(%rsi), %r8
@@ -1766,7 +1762,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_40bytes):
mov -40(%rsi), %r8
mov -32(%rsi), %r9
@@ -1780,7 +1776,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_32bytes):
mov -32(%rsi), %r9
mov -24(%rsi), %r10
@@ -1792,7 +1788,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_24bytes):
mov -24(%rsi), %r10
mov -16(%rsi), %r11
@@ -1802,7 +1798,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_16bytes):
mov -16(%rsi), %r11
mov -8(%rsi), %rdx
@@ -1810,14 +1806,14 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_8bytes):
mov -8(%rsi), %rdx
mov %rdx, -8(%rdi)
L(write_0bytes):
ret
- ALIGN (4)
+ .p2align 4
L(write_73bytes):
movdqu -73(%rsi), %xmm0
movdqu -57(%rsi), %xmm1
@@ -1837,7 +1833,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_65bytes):
movdqu -65(%rsi), %xmm0
movdqu -49(%rsi), %xmm1
@@ -1855,7 +1851,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_57bytes):
movdqu -57(%rsi), %xmm0
mov -41(%rsi), %r8
@@ -1873,7 +1869,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_49bytes):
movdqu -49(%rsi), %xmm0
mov -33(%rsi), %r9
@@ -1889,7 +1885,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_41bytes):
mov -41(%rsi), %r8
mov -33(%rsi), %r9
@@ -1905,7 +1901,7 @@
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_33bytes):
mov -33(%rsi), %r9
mov -25(%rsi), %r10
@@ -1919,7 +1915,7 @@
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_25bytes):
mov -25(%rsi), %r10
mov -17(%rsi), %r11
@@ -1931,7 +1927,7 @@
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_17bytes):
mov -17(%rsi), %r11
mov -9(%rsi), %rcx
@@ -1941,7 +1937,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_9bytes):
mov -9(%rsi), %rcx
mov -4(%rsi), %edx
@@ -1949,13 +1945,13 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_1bytes):
mov -1(%rsi), %dl
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_74bytes):
movdqu -74(%rsi), %xmm0
movdqu -58(%rsi), %xmm1
@@ -1975,7 +1971,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_66bytes):
movdqu -66(%rsi), %xmm0
movdqu -50(%rsi), %xmm1
@@ -1995,7 +1991,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_58bytes):
movdqu -58(%rsi), %xmm1
mov -42(%rsi), %r8
@@ -2013,7 +2009,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_50bytes):
movdqu -50(%rsi), %xmm0
mov -34(%rsi), %r9
@@ -2029,7 +2025,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_42bytes):
mov -42(%rsi), %r8
mov -34(%rsi), %r9
@@ -2045,7 +2041,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_34bytes):
mov -34(%rsi), %r9
mov -26(%rsi), %r10
@@ -2059,7 +2055,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_26bytes):
mov -26(%rsi), %r10
mov -18(%rsi), %r11
@@ -2071,7 +2067,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_18bytes):
mov -18(%rsi), %r11
mov -10(%rsi), %rcx
@@ -2081,7 +2077,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_10bytes):
mov -10(%rsi), %rcx
mov -4(%rsi), %edx
@@ -2089,13 +2085,13 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_2bytes):
mov -2(%rsi), %dx
mov %dx, -2(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_75bytes):
movdqu -75(%rsi), %xmm0
movdqu -59(%rsi), %xmm1
@@ -2115,7 +2111,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_67bytes):
movdqu -67(%rsi), %xmm0
movdqu -59(%rsi), %xmm1
@@ -2135,7 +2131,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_59bytes):
movdqu -59(%rsi), %xmm0
mov -43(%rsi), %r8
@@ -2153,7 +2149,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_51bytes):
movdqu -51(%rsi), %xmm0
mov -35(%rsi), %r9
@@ -2169,7 +2165,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_43bytes):
mov -43(%rsi), %r8
mov -35(%rsi), %r9
@@ -2185,7 +2181,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_35bytes):
mov -35(%rsi), %r9
mov -27(%rsi), %r10
@@ -2199,7 +2195,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_27bytes):
mov -27(%rsi), %r10
mov -19(%rsi), %r11
@@ -2211,7 +2207,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_19bytes):
mov -19(%rsi), %r11
mov -11(%rsi), %rcx
@@ -2221,7 +2217,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_11bytes):
mov -11(%rsi), %rcx
mov -4(%rsi), %edx
@@ -2229,7 +2225,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_3bytes):
mov -3(%rsi), %dx
mov -2(%rsi), %cx
@@ -2237,7 +2233,7 @@
mov %cx, -2(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_76bytes):
movdqu -76(%rsi), %xmm0
movdqu -60(%rsi), %xmm1
@@ -2257,7 +2253,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_68bytes):
movdqu -68(%rsi), %xmm0
movdqu -52(%rsi), %xmm1
@@ -2275,7 +2271,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_60bytes):
movdqu -60(%rsi), %xmm0
mov -44(%rsi), %r8
@@ -2293,7 +2289,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_52bytes):
movdqu -52(%rsi), %xmm0
mov -36(%rsi), %r9
@@ -2309,7 +2305,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_44bytes):
mov -44(%rsi), %r8
mov -36(%rsi), %r9
@@ -2325,7 +2321,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_36bytes):
mov -36(%rsi), %r9
mov -28(%rsi), %r10
@@ -2339,7 +2335,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_28bytes):
mov -28(%rsi), %r10
mov -20(%rsi), %r11
@@ -2351,7 +2347,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_20bytes):
mov -20(%rsi), %r11
mov -12(%rsi), %rcx
@@ -2361,7 +2357,7 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_12bytes):
mov -12(%rsi), %rcx
mov -4(%rsi), %edx
@@ -2369,13 +2365,13 @@
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_4bytes):
mov -4(%rsi), %edx
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_77bytes):
movdqu -77(%rsi), %xmm0
movdqu -61(%rsi), %xmm1
@@ -2395,7 +2391,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_69bytes):
movdqu -69(%rsi), %xmm0
movdqu -53(%rsi), %xmm1
@@ -2413,7 +2409,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_61bytes):
movdqu -61(%rsi), %xmm0
mov -45(%rsi), %r8
@@ -2431,7 +2427,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_53bytes):
movdqu -53(%rsi), %xmm0
mov -45(%rsi), %r8
@@ -2448,7 +2444,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_45bytes):
mov -45(%rsi), %r8
mov -37(%rsi), %r9
@@ -2464,7 +2460,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_37bytes):
mov -37(%rsi), %r9
mov -29(%rsi), %r10
@@ -2478,7 +2474,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_29bytes):
mov -29(%rsi), %r10
mov -21(%rsi), %r11
@@ -2490,7 +2486,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_21bytes):
mov -21(%rsi), %r11
mov -13(%rsi), %rcx
@@ -2500,7 +2496,7 @@
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_13bytes):
mov -13(%rsi), %rcx
[... 426 lines stripped ...]
_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits