[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commits] r24205 - in /fsf/trunk/libc: ./ locale/ locale/programs/ localedata/locales/ manual/ ports/ ports/sysdeps/m68k/ sysdeps/gene...



Author: eglibc
Date: Wed Oct  9 00:02:12 2013
New Revision: 24205

Log:
Import glibc-mainline for 2013-10-09

Modified:
    fsf/trunk/libc/ChangeLog
    fsf/trunk/libc/NEWS
    fsf/trunk/libc/locale/loadlocale.c
    fsf/trunk/libc/locale/programs/3level.h
    fsf/trunk/libc/locale/programs/ld-collate.c
    fsf/trunk/libc/locale/programs/ld-ctype.c
    fsf/trunk/libc/locale/programs/locfile.c
    fsf/trunk/libc/localedata/locales/pap_AN
    fsf/trunk/libc/localedata/locales/pap_AW
    fsf/trunk/libc/localedata/locales/pap_CW
    fsf/trunk/libc/manual/socket.texi
    fsf/trunk/libc/ports/ChangeLog.m68k
    fsf/trunk/libc/ports/sysdeps/m68k/start.S
    fsf/trunk/libc/sysdeps/generic/math_private.h
    fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c
    fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c
    fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c
    fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c
    fsf/trunk/libc/sysdeps/x86_64/memset.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
    fsf/trunk/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
    fsf/trunk/libc/sysdeps/x86_64/strchr.S
    fsf/trunk/libc/sysdeps/x86_64/strrchr.S

Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Wed Oct  9 00:02:12 2013
@@ -1,3 +1,66 @@
+2013-10-08  Yogesh Chaudhari  <mr.yogesh@xxxxxxxxx>
+
+	[BZ #156]
+	* manual/socket.texi: Added statement about buffer
+	for gethostbyname2_r.
+
+2013-10-08  OndÃÂej BÃÂlka  <neleai@xxxxxxxxx>
+
+	* sysdeps/x86_64/memset.S (ALIGN): Macro removed.
+	Use .p2align directive instead, throughout.
+	* sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
+	* sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: Likewise.
+	* sysdeps/x86_64/strchr.S: Likewise.
+	* sysdeps/x86_64/strrchr.S: Likewise.
+
+2013-10-08  Siddhesh Poyarekar  <siddhesh@xxxxxxxxxx>
+
+	* sysdeps/ieee754/dbl-64/e_pow.c: Fix code formatting.
+
+	* sysdeps/ieee754/dbl-64/e_exp.c: Fix code formatting.
+
+	* sysdeps/generic/math_private.h (__mpsin1): Remove
+	declaration.
+	(__mpcos1): Likewise.
+	(__mpsin): New argument __range_reduce.
+	(__mpcos): Likewise.
+	* sysdeps/ieee754/dbl-64/s_sin.c: Likewise.
+	(slow): Use __mpsin and __mpcos.
+	(slow1): Likewise.
+	(slow2): Likewise.
+	(sloww): Likewise.
+	(sloww1): Likewise.
+	(sloww2): Likewise.
+	(bsloww): Likewise.
+	(bsloww1): Likewise.
+	(bsloww2): Likewise.
+	(cslow2): Likewise.
+	(csloww): Likewise.
+	(csloww1): Likewise.
+	(csloww2): Likewise.
+	* sysdeps/ieee754/dbl-64/sincos32.c (__mpsin): Add argument
+	range_reduce.  Merge in __mpsin1.
+	(__mpcos): Likewise.
+	(__mpsin1): Remove.
+	(__mpcos1): Likewise.
+
+2013-10-07  Joseph Myers  <joseph@xxxxxxxxxxxxxxxx>
+
+	* locale/loadlocale.c (_nl_intern_locale_data): Use
+	LOCFILE_ALIGNED_P.
+	* locale/programs/3level.h (CONCAT(add_locale_,TABLE)): Use
+	LOCFILE_ALIGN_UP and LOCFILE_ALIGN.
+	* locale/programs/ld-collate.c (obstack_int32_grow): Assert that
+	obstack data is appropriately aligned.
+	(obstack_int32_grow_fast): Likewise.
+	* locale/programs/ld-ctype.c (ctype_output): Use LOCFILE_ALIGN.
+	* locale/programs/locfile.c (add_locale_uint32): Likewise.
+	(add_locale_uint32_array): Likewise.
+
 2013-10-07  Siddhesh Poyarekar  <siddhesh@xxxxxxxxxx>
 
 	* benchtests/Makefile: Remove ARGLIST and RET variables.

Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Wed Oct  9 00:02:12 2013
@@ -9,11 +9,10 @@
 
 * The following bugs are resolved with this release:
 
-  431, 13982, 13985, 14155, 14547, 14699, 15048, 15400, 15427, 15522,
+  156, 431, 13982, 13985, 14155, 14547, 14699, 15048, 15400, 15427, 15522,
   15531, 15532, 15608, 15609, 15610, 15632, 15640, 15680, 15681, 15723,
   15734, 15735, 15736, 15748, 15749, 15754, 15760, 15797, 15844, 15849,
   15855, 15856, 15857, 15859, 15867, 15886, 15887, 15890, 15892, 15893,
-  15895, 15897, 15905, 15909, 15919, 15921, 15887, 15890, 15892, 15893,
   15895, 15897, 15905, 15909, 15919, 15921, 15923, 15939, 15963, 15966,
   15988.
 

Modified: fsf/trunk/libc/locale/loadlocale.c
==============================================================================
--- fsf/trunk/libc/locale/loadlocale.c (original)
+++ fsf/trunk/libc/locale/loadlocale.c Wed Oct  9 00:02:12 2013
@@ -148,7 +148,7 @@
 	newdata->values[cnt].string = newdata->filedata + idx;
       else
 	{
-	  if (idx % __alignof__ (u_int32_t) != 0)
+	  if (!LOCFILE_ALIGNED_P (idx))
 	    goto puntdata;
 	  newdata->values[cnt].word =
 	    *((const u_int32_t *) (newdata->filedata + idx));

Modified: fsf/trunk/libc/locale/programs/3level.h
==============================================================================
--- fsf/trunk/libc/locale/programs/3level.h (original)
+++ fsf/trunk/libc/locale/programs/3level.h Wed Oct  9 00:02:12 2013
@@ -270,7 +270,7 @@
     + t->level1_size * sizeof (uint32_t)
     + (t->level2_size << t->q) * sizeof (uint32_t)
     + (t->level3_size << t->p) * sizeof (ELEMENT);
-  t->result_size = (last_offset + 3) & ~3ul;
+  t->result_size = LOCFILE_ALIGN_UP (last_offset);
 
   level2_offset =
     5 * sizeof (uint32_t)
@@ -308,7 +308,7 @@
 			     t->level3_size << t->p);
   else
     abort ();
-  align_locale_data (file, 4);
+  align_locale_data (file, LOCFILE_ALIGN);
   end_locale_structure (file);
 
   if (t->level1_alloc > 0)

Modified: fsf/trunk/libc/locale/programs/ld-collate.c
==============================================================================
--- fsf/trunk/libc/locale/programs/ld-collate.c (original)
+++ fsf/trunk/libc/locale/programs/ld-collate.c Wed Oct  9 00:02:12 2013
@@ -44,6 +44,7 @@
 __attribute ((always_inline))
 obstack_int32_grow (struct obstack *obstack, int32_t data)
 {
+  assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
   data = maybe_swap_uint32 (data);
   if (sizeof (int32_t) == sizeof (int))
     obstack_int_grow (obstack, data);
@@ -55,6 +56,7 @@
 __attribute ((always_inline))
 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
 {
+  assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
   data = maybe_swap_uint32 (data);
   if (sizeof (int32_t) == sizeof (int))
     obstack_int_grow_fast (obstack, data);

Modified: fsf/trunk/libc/locale/programs/ld-ctype.c
==============================================================================
--- fsf/trunk/libc/locale/programs/ld-ctype.c (original)
+++ fsf/trunk/libc/locale/programs/ld-ctype.c Wed Oct  9 00:02:12 2013
@@ -1032,7 +1032,7 @@
 	    for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 	      add_locale_string (&file, ctype->classnames[cnt]);
 	    add_locale_char (&file, 0);
-	    align_locale_data (&file, 4);
+	    align_locale_data (&file, LOCFILE_ALIGN);
 	    end_locale_structure (&file);
 	    break;
 
@@ -1042,7 +1042,7 @@
 	    for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 	      add_locale_string (&file, ctype->mapnames[cnt]);
 	    add_locale_char (&file, 0);
-	    align_locale_data (&file, 4);
+	    align_locale_data (&file, LOCFILE_ALIGN);
 	    end_locale_structure (&file);
 	    break;
 

Modified: fsf/trunk/libc/locale/programs/locfile.c
==============================================================================
--- fsf/trunk/libc/locale/programs/locfile.c (original)
+++ fsf/trunk/libc/locale/programs/locfile.c Wed Oct  9 00:02:12 2013
@@ -627,7 +627,7 @@
 void
 add_locale_uint32 (struct locale_file *file, uint32_t value)
 {
-  align_locale_data (file, sizeof (uint32_t));
+  align_locale_data (file, LOCFILE_ALIGN);
   record_offset (file);
   value = maybe_swap_uint32 (value);
   obstack_grow (&file->data, &value, sizeof (value));
@@ -639,7 +639,7 @@
 add_locale_uint32_array (struct locale_file *file,
 			 const uint32_t *data, size_t n_elems)
 {
-  align_locale_data (file, sizeof (uint32_t));
+  align_locale_data (file, LOCFILE_ALIGN);
   record_offset (file);
   obstack_grow (&file->data, data, n_elems * sizeof (uint32_t));
   maybe_swap_uint32_obstack (&file->data, n_elems);

Modified: fsf/trunk/libc/localedata/locales/pap_AN
==============================================================================
--- fsf/trunk/libc/localedata/locales/pap_AN (original)
+++ fsf/trunk/libc/localedata/locales/pap_AN Wed Oct  9 00:02:12 2013
@@ -150,8 +150,10 @@
 LC_TELEPHONE
 tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025><U006C>"
 % FIXME FIND tel_dom_fmt
-int_select "00"
-int_prefix "599"
+% 00
+int_select  "<U0030><U0030>"
+% 599
+int_prefix  "<U0035><U0039><U0039>"
 END LC_TELEPHONE
 
 LC_NAME

Modified: fsf/trunk/libc/localedata/locales/pap_AW
==============================================================================
--- fsf/trunk/libc/localedata/locales/pap_AW (original)
+++ fsf/trunk/libc/localedata/locales/pap_AW Wed Oct  9 00:02:12 2013
@@ -160,8 +160,10 @@
 LC_TELEPHONE
 tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025><U006C>"
 % FIXME FIND tel_dom_fmt
-int_select "00"
-int_prefix "599"
+% 00
+int_select  "<U0030><U0030>"
+% 599
+int_prefix  "<U0035><U0039><U0039>"
 END LC_TELEPHONE
 
 LC_NAME

Modified: fsf/trunk/libc/localedata/locales/pap_CW
==============================================================================
--- fsf/trunk/libc/localedata/locales/pap_CW (original)
+++ fsf/trunk/libc/localedata/locales/pap_CW Wed Oct  9 00:02:12 2013
@@ -160,8 +160,10 @@
 LC_TELEPHONE
 tel_int_fmt "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025><U006C>"
 % FIXME FIND tel_dom_fmt
-int_select "00"
-int_prefix "599"
+% 00
+int_select  "<U0030><U0030>"
+% 599
+int_prefix  "<U0035><U0039><U0039>"
 END LC_TELEPHONE
 
 LC_NAME

Modified: fsf/trunk/libc/manual/socket.texi
==============================================================================
--- fsf/trunk/libc/manual/socket.texi (original)
+++ fsf/trunk/libc/manual/socket.texi Wed Oct  9 00:02:12 2013
@@ -1290,14 +1290,17 @@
 parameters.
 
 A pointer to the buffer, in which the result is stored, is available in
-@code{*@var{result}} after the function call successfully returned.  If
-an error occurs or if no entry is found, the pointer @code{*@var{result}}
-is a null pointer.  Success is signalled by a zero return value.  If the
-function failed the return value is an error number.  In addition to the
-errors defined for @code{gethostbyname} it can also be @code{ERANGE}.
-In this case the call should be repeated with a larger buffer.
-Additional error information is not stored in the global variable
-@code{h_errno} but instead in the object pointed to by @var{h_errnop}.
+@code{*@var{result}} after the function call successfully returned. The
+buffer passed as the @var{buf} parameter can be freed only once the caller
+has finished with the result hostent struct, or has copied it including all
+the other memory that it points to. If an error occurs or if no entry is
+found, the pointer @code{*@var{result}} is a null pointer. Success is
+signalled by a zero return value.  If the function failed the return value
+is an error number.  In addition to the errors defined for
+@code{gethostbyname} it can also be @code{ERANGE}. In this case the call
+should be repeated with a larger buffer. Additional error information is
+not stored in the global variable @code{h_errno} but instead in the object
+pointed to by @var{h_errnop}.
 
 Here's a small example:
 @smallexample

Modified: fsf/trunk/libc/ports/ChangeLog.m68k
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.m68k (original)
+++ fsf/trunk/libc/ports/ChangeLog.m68k Wed Oct  9 00:02:12 2013
@@ -1,3 +1,7 @@
+2013-10-08  Andreas Schwab  <schwab@xxxxxxx>
+
+	* sysdeps/m68k/start.S [SHARED]: Use PIC.
+
 2013-09-20  Andreas Schwab  <schwab@xxxxxxxxxxxxxx>
 
 	* sysdeps/m68k/ffs.c (__ffs): Define as hidden.

Modified: fsf/trunk/libc/ports/sysdeps/m68k/start.S
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/m68k/start.S (original)
+++ fsf/trunk/libc/ports/sysdeps/m68k/start.S Wed Oct  9 00:02:12 2013
@@ -52,6 +52,8 @@
 					NULL
 */
 
+#include <sysdep.h>
+
 	.text
 	.globl _start
 	.type _start,@function
@@ -74,6 +76,24 @@
 	pea (%a1)		/* Push address of the shared library
 				   termination function.  */
 
+#ifdef SHARED
+	/* Load PIC register.  */
+	LOAD_GOT (%a5)
+
+	/* Push the address of our own entry points to `.fini' and
+	   `.init'.  */
+	move.l __libc_csu_fini@GOT(%a5), -(%sp)
+	move.l __libc_csu_init@GOT(%a5), -(%sp)
+
+	pea (%a0)		/* Push second argument: argv.  */
+	move.l %d0, -(%sp)	/* Push first argument: argc.  */
+
+	move.l main@GOT(%a5), -(%sp)
+
+	/* Call the user's main function, and exit with its value.  But
+	   let the libc call main.  */
+	jbsr __libc_start_main@PLTPC
+#else
 	/* Push the address of our own entry points to `.fini' and
 	   `.init'.  */
 	pea __libc_csu_fini
@@ -87,6 +107,7 @@
 	/* Call the user's main function, and exit with its value.  But
 	   let the libc call main.  */
 	jbsr __libc_start_main
+#endif
 
 	illegal			/* Crash if somehow `exit' does return.  */
 

Modified: fsf/trunk/libc/sysdeps/generic/math_private.h
==============================================================================
--- fsf/trunk/libc/sysdeps/generic/math_private.h (original)
+++ fsf/trunk/libc/sysdeps/generic/math_private.h Wed Oct  9 00:02:12 2013
@@ -356,10 +356,8 @@
 extern double __halfulp (double __x, double __y);
 extern double __sin32 (double __x, double __res, double __res1);
 extern double __cos32 (double __x, double __res, double __res1);
-extern double __mpsin (double __x, double __dx);
-extern double __mpcos (double __x, double __dx);
-extern double __mpsin1 (double __x);
-extern double __mpcos1 (double __x);
+extern double __mpsin (double __x, double __dx, bool __range_reduce);
+extern double __mpcos (double __x, double __dx, bool __range_reduce);
 extern double __slowexp (double __x);
 extern double __slowpow (double __x, double __y, double __z);
 extern void __docos (double __x, double __dx, double __v[]);

Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_exp.c Wed Oct  9 00:02:12 2013
@@ -44,221 +44,299 @@
 # define SECTION
 #endif
 
-double __slowexp(double);
-
-/***************************************************************************/
-/* An ultimate exp routine. Given an IEEE double machine number x          */
-/* it computes the correctly rounded (to nearest) value of e^x             */
-/***************************************************************************/
+double __slowexp (double);
+
+/* An ultimate exp routine. Given an IEEE double machine number x it computes
+   the correctly rounded (to nearest) value of e^x.  */
 double
 SECTION
-__ieee754_exp(double x) {
+__ieee754_exp (double x)
+{
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
-  mynumber junk1, junk2, binexp  = {{0,0}};
-  int4 i,j,m,n,ex;
+  mynumber junk1, junk2, binexp = {{0, 0}};
+  int4 i, j, m, n, ex;
   double retval;
 
   SET_RESTORE_ROUND (FE_TONEAREST);
 
   junk1.x = x;
   m = junk1.i[HIGH_HALF];
-  n = m&hugeint;
-
-  if (n > smallint && n < bigint) {
-
-    y = x*log2e.x + three51.x;
-    bexp = y - three51.x;      /*  multiply the result by 2**bexp        */
-
-    junk1.x = y;
-
-    eps = bexp*ln_two2.x;      /* x = bexp*ln(2) + t - eps               */
-    t = x - bexp*ln_two1.x;
-
-    y = t + three33.x;
-    base = y - three33.x;      /* t rounded to a multiple of 2**-18      */
-    junk2.x = y;
-    del = (t - base) - eps;    /*  x = bexp*ln(2) + base + del           */
-    eps = del + del*del*(p3.x*del + p2.x);
-
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
-
-    i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-    j = (junk2.i[LOW_HALF]&511)<<1;
-
-    al = coar.x[i]*fine.x[j];
-    bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-
-    rem=(bet + bet*eps)+al*eps;
-    res = al + rem;
-    cor = (al - res) + rem;
-    if  (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
-    else { retval = __slowexp(x); goto ret; } /*if error is over bound */
-  }
-
-  if (n <= smallint) { retval = 1.0; goto ret; }
-
-  if (n >= badint) {
-    if (n > infint) { retval = x+x; goto ret; }               /* x is NaN */
-    if (n < infint) { retval = (x>0) ? (hhuge*hhuge) : (tiny*tiny); goto ret; }
-    /* x is finite,  cause either overflow or underflow  */
-    if (junk1.i[LOW_HALF] != 0) { retval = x+x; goto ret; } /*  x is NaN  */
-    retval = (x>0)?inf.x:zero;             /* |x| = inf;  return either inf or 0 */
-    goto ret;
-  }
-
-  y = x*log2e.x + three51.x;
+  n = m & hugeint;
+
+  if (n > smallint && n < bigint)
+    {
+      y = x * log2e.x + three51.x;
+      bexp = y - three51.x;	/*  multiply the result by 2**bexp        */
+
+      junk1.x = y;
+
+      eps = bexp * ln_two2.x;	/* x = bexp*ln(2) + t - eps               */
+      t = x - bexp * ln_two1.x;
+
+      y = t + three33.x;
+      base = y - three33.x;	/* t rounded to a multiple of 2**-18      */
+      junk2.x = y;
+      del = (t - base) - eps;	/*  x = bexp*ln(2) + base + del           */
+      eps = del + del * del * (p3.x * del + p2.x);
+
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+      i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+      j = (junk2.i[LOW_HALF] & 511) << 1;
+
+      al = coar.x[i] * fine.x[j];
+      bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	     + coar.x[i + 1] * fine.x[j + 1]);
+
+      rem = (bet + bet * eps) + al * eps;
+      res = al + rem;
+      cor = (al - res) + rem;
+      if (res == (res + cor * err_0))
+	{
+	  retval = res * binexp.x;
+	  goto ret;
+	}
+      else
+	{
+	  retval = __slowexp (x);
+	  goto ret;
+	}			/*if error is over bound */
+    }
+
+  if (n <= smallint)
+    {
+      retval = 1.0;
+      goto ret;
+    }
+
+  if (n >= badint)
+    {
+      if (n > infint)
+	{
+	  retval = x + x;
+	  goto ret;
+	}			/* x is NaN */
+      if (n < infint)
+	{
+	  retval = (x > 0) ? (hhuge * hhuge) : (tiny * tiny);
+	  goto ret;
+	}
+      /* x is finite,  cause either overflow or underflow  */
+      if (junk1.i[LOW_HALF] != 0)
+	{
+	  retval = x + x;
+	  goto ret;
+	}			/*  x is NaN  */
+      retval = (x > 0) ? inf.x : zero;	/* |x| = inf;  return either inf or 0 */
+      goto ret;
+    }
+
+  y = x * log2e.x + three51.x;
   bexp = y - three51.x;
   junk1.x = y;
-  eps = bexp*ln_two2.x;
-  t = x - bexp*ln_two1.x;
+  eps = bexp * ln_two2.x;
+  t = x - bexp * ln_two1.x;
   y = t + three33.x;
   base = y - three33.x;
   junk2.x = y;
   del = (t - base) - eps;
-  eps = del + del*del*(p3.x*del + p2.x);
-  i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-  j = (junk2.i[LOW_HALF]&511)<<1;
-  al = coar.x[i]*fine.x[j];
-  bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-  rem=(bet + bet*eps)+al*eps;
+  eps = del + del * del * (p3.x * del + p2.x);
+  i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+  j = (junk2.i[LOW_HALF] & 511) << 1;
+  al = coar.x[i] * fine.x[j];
+  bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	 + coar.x[i + 1] * fine.x[j + 1]);
+  rem = (bet + bet * eps) + al * eps;
   res = al + rem;
   cor = (al - res) + rem;
-  if (m>>31) {
-    ex=junk1.i[LOW_HALF];
-    if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
-    if (ex >=-1022) {
-      binexp.i[HIGH_HALF] = (1023+ex)<<20;
-      if  (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
-      else { retval = __slowexp(x); goto ret; } /*if error is over bound */
-    }
-    ex = -(1022+ex);
-    binexp.i[HIGH_HALF] = (1023-ex)<<20;
-    res*=binexp.x;
-    cor*=binexp.x;
-    eps=1.0000000001+err_0*binexp.x;
-    t=1.0+res;
-    y = ((1.0-t)+res)+cor;
-    res=t+y;
-    cor = (t-res)+y;
-    if (res == (res + eps*cor))
-    { binexp.i[HIGH_HALF] = 0x00100000;
-      retval = (res-1.0)*binexp.x;
-      goto ret;
-    }
-    else { retval = __slowexp(x); goto ret; } /*   if error is over bound    */
-  }
-  else {
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
-    if (res == (res+cor*err_0)) { retval = res*binexp.x*t256.x; goto ret; }
-    else { retval = __slowexp(x); goto ret; }
-  }
- ret:
+  if (m >> 31)
+    {
+      ex = junk1.i[LOW_HALF];
+      if (res < 1.0)
+	{
+	  res += res;
+	  cor += cor;
+	  ex -= 1;
+	}
+      if (ex >= -1022)
+	{
+	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+	  if (res == (res + cor * err_0))
+	    {
+	      retval = res * binexp.x;
+	      goto ret;
+	    }
+	  else
+	    {
+	      retval = __slowexp (x);
+	      goto ret;
+	    }			/*if error is over bound */
+	}
+      ex = -(1022 + ex);
+      binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+      res *= binexp.x;
+      cor *= binexp.x;
+      eps = 1.0000000001 + err_0 * binexp.x;
+      t = 1.0 + res;
+      y = ((1.0 - t) + res) + cor;
+      res = t + y;
+      cor = (t - res) + y;
+      if (res == (res + eps * cor))
+	{
+	  binexp.i[HIGH_HALF] = 0x00100000;
+	  retval = (res - 1.0) * binexp.x;
+	  goto ret;
+	}
+      else
+	{
+	  retval = __slowexp (x);
+	  goto ret;
+	}			/*   if error is over bound    */
+    }
+  else
+    {
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+      if (res == (res + cor * err_0))
+	{
+	  retval = res * binexp.x * t256.x;
+	  goto ret;
+	}
+      else
+	{
+	  retval = __slowexp (x);
+	  goto ret;
+	}
+    }
+ret:
   return retval;
 }
 #ifndef __ieee754_exp
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
 
-/************************************************************************/
-/* Compute e^(x+xx)(Double-Length number) .The routine also receive     */
-/* bound of error of previous calculation .If after computing exp       */
-/* error bigger than allows routine return non positive number          */
-/*else return   e^(x + xx)   (always positive )                         */
-/************************************************************************/
-
+/* Compute e^(x+xx).  The routine also receives bound of error of previous
+   calculation.  If after computing exp the error exceeds the allowed bounds,
+   the routine returns a non-positive number.  Otherwise it returns the
+   computed result, which is always positive.  */
 double
 SECTION
-__exp1(double x, double xx, double error) {
+__exp1 (double x, double xx, double error)
+{
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
-  mynumber junk1, junk2, binexp  = {{0,0}};
-  int4 i,j,m,n,ex;
+  mynumber junk1, junk2, binexp = {{0, 0}};
+  int4 i, j, m, n, ex;
 
   junk1.x = x;
   m = junk1.i[HIGH_HALF];
-  n = m&hugeint;                 /* no sign */
-
-  if (n > smallint && n < bigint) {
-    y = x*log2e.x + three51.x;
-    bexp = y - three51.x;      /*  multiply the result by 2**bexp        */
-
-    junk1.x = y;
-
-    eps = bexp*ln_two2.x;      /* x = bexp*ln(2) + t - eps               */
-    t = x - bexp*ln_two1.x;
-
-    y = t + three33.x;
-    base = y - three33.x;      /* t rounded to a multiple of 2**-18      */
-    junk2.x = y;
-    del = (t - base) + (xx-eps);    /*  x = bexp*ln(2) + base + del      */
-    eps = del + del*del*(p3.x*del + p2.x);
-
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
-
-    i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-    j = (junk2.i[LOW_HALF]&511)<<1;
-
-    al = coar.x[i]*fine.x[j];
-    bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-
-    rem=(bet + bet*eps)+al*eps;
-    res = al + rem;
-    cor = (al - res) + rem;
-    if  (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
-    else return -10.0;
-  }
-
-  if (n <= smallint) return 1.0; /*  if x->0 e^x=1 */
-
-  if (n >= badint) {
-    if (n > infint) return(zero/zero);    /* x is NaN,  return invalid */
-    if (n < infint) return ( (x>0) ? (hhuge*hhuge) : (tiny*tiny) );
-    /* x is finite,  cause either overflow or underflow  */
-    if (junk1.i[LOW_HALF] != 0)  return (zero/zero);        /*  x is NaN  */
-    return ((x>0)?inf.x:zero );   /* |x| = inf;  return either inf or 0 */
-  }
-
-  y = x*log2e.x + three51.x;
+  n = m & hugeint;		/* no sign */
+
+  if (n > smallint && n < bigint)
+    {
+      y = x * log2e.x + three51.x;
+      bexp = y - three51.x;	/*  multiply the result by 2**bexp        */
+
+      junk1.x = y;
+
+      eps = bexp * ln_two2.x;	/* x = bexp*ln(2) + t - eps               */
+      t = x - bexp * ln_two1.x;
+
+      y = t + three33.x;
+      base = y - three33.x;	/* t rounded to a multiple of 2**-18      */
+      junk2.x = y;
+      del = (t - base) + (xx - eps);	/*  x = bexp*ln(2) + base + del      */
+      eps = del + del * del * (p3.x * del + p2.x);
+
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+      i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+      j = (junk2.i[LOW_HALF] & 511) << 1;
+
+      al = coar.x[i] * fine.x[j];
+      bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	     + coar.x[i + 1] * fine.x[j + 1]);
+
+      rem = (bet + bet * eps) + al * eps;
+      res = al + rem;
+      cor = (al - res) + rem;
+      if (res == (res + cor * (1.0 + error + err_1)))
+	return res * binexp.x;
+      else
+	return -10.0;
+    }
+
+  if (n <= smallint)
+    return 1.0;			/*  if x->0 e^x=1 */
+
+  if (n >= badint)
+    {
+      if (n > infint)
+	return (zero / zero);	/* x is NaN,  return invalid */
+      if (n < infint)
+	return ((x > 0) ? (hhuge * hhuge) : (tiny * tiny));
+      /* x is finite,  cause either overflow or underflow  */
+      if (junk1.i[LOW_HALF] != 0)
+	return (zero / zero);	/*  x is NaN  */
+      return ((x > 0) ? inf.x : zero);	/* |x| = inf;  return either inf or 0 */
+    }
+
+  y = x * log2e.x + three51.x;
   bexp = y - three51.x;
   junk1.x = y;
-  eps = bexp*ln_two2.x;
-  t = x - bexp*ln_two1.x;
+  eps = bexp * ln_two2.x;
+  t = x - bexp * ln_two1.x;
   y = t + three33.x;
   base = y - three33.x;
   junk2.x = y;
-  del = (t - base) + (xx-eps);
-  eps = del + del*del*(p3.x*del + p2.x);
-  i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-  j = (junk2.i[LOW_HALF]&511)<<1;
-  al = coar.x[i]*fine.x[j];
-  bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-  rem=(bet + bet*eps)+al*eps;
+  del = (t - base) + (xx - eps);
+  eps = del + del * del * (p3.x * del + p2.x);
+  i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+  j = (junk2.i[LOW_HALF] & 511) << 1;
+  al = coar.x[i] * fine.x[j];
+  bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	 + coar.x[i + 1] * fine.x[j + 1]);
+  rem = (bet + bet * eps) + al * eps;
   res = al + rem;
   cor = (al - res) + rem;
-  if (m>>31) {
-    ex=junk1.i[LOW_HALF];
-    if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
-    if (ex >=-1022) {
-      binexp.i[HIGH_HALF] = (1023+ex)<<20;
-      if  (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
-      else return -10.0;
-    }
-    ex = -(1022+ex);
-    binexp.i[HIGH_HALF] = (1023-ex)<<20;
-    res*=binexp.x;
-    cor*=binexp.x;
-    eps=1.00000000001+(error+err_1)*binexp.x;
-    t=1.0+res;
-    y = ((1.0-t)+res)+cor;
-    res=t+y;
-    cor = (t-res)+y;
-    if (res == (res + eps*cor))
-      {binexp.i[HIGH_HALF] = 0x00100000; return (res-1.0)*binexp.x;}
-    else return -10.0;
-  }
-  else {
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
-    if  (res == (res+cor*(1.0+error+err_1)))
-      return res*binexp.x*t256.x;
-    else return -10.0;
-  }
+  if (m >> 31)
+    {
+      ex = junk1.i[LOW_HALF];
+      if (res < 1.0)
+	{
+	  res += res;
+	  cor += cor;
+	  ex -= 1;
+	}
+      if (ex >= -1022)
+	{
+	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+	  if (res == (res + cor * (1.0 + error + err_1)))
+	    return res * binexp.x;
+	  else
+	    return -10.0;
+	}
+      ex = -(1022 + ex);
+      binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+      res *= binexp.x;
+      cor *= binexp.x;
+      eps = 1.00000000001 + (error + err_1) * binexp.x;
+      t = 1.0 + res;
+      y = ((1.0 - t) + res) + cor;
+      res = t + y;
+      cor = (t - res) + y;
+      if (res == (res + eps * cor))
+	{
+	  binexp.i[HIGH_HALF] = 0x00100000;
+	  return (res - 1.0) * binexp.x;
+	}
+      else
+	return -10.0;
+    }
+  else
+    {
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+      if (res == (res + cor * (1.0 + error + err_1)))
+	return res * binexp.x * t256.x;
+      else
+	return -10.0;
+    }
 }

Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/e_pow.c Wed Oct  9 00:02:12 2013
@@ -49,354 +49,407 @@
 
 static const double huge = 1.0e300, tiny = 1.0e-300;
 
-double __exp1(double x, double xx, double error);
-static double log1(double x, double *delta, double *error);
-static double my_log2(double x, double *delta, double *error);
-double __slowpow(double x, double y,double z);
-static double power1(double x, double y);
-static int checkint(double x);
-
-/***************************************************************************/
-/* An ultimate power routine. Given two IEEE double machine numbers y,x    */
-/* it computes the correctly rounded (to nearest) value of X^y.            */
-/***************************************************************************/
+double __exp1 (double x, double xx, double error);
+static double log1 (double x, double *delta, double *error);
+static double my_log2 (double x, double *delta, double *error);
+double __slowpow (double x, double y, double z);
+static double power1 (double x, double y);
+static int checkint (double x);
+
+/* An ultimate power routine. Given two IEEE double machine numbers y, x it
+   computes the correctly rounded (to nearest) value of X^y.  */
 double
 SECTION
-__ieee754_pow(double x, double y) {
-  double z,a,aa,error, t,a1,a2,y1,y2;
-  mynumber u,v;
+__ieee754_pow (double x, double y)
+{
+  double z, a, aa, error, t, a1, a2, y1, y2;
+  mynumber u, v;
   int k;
-  int4 qx,qy;
-  v.x=y;
-  u.x=x;
-  if (v.i[LOW_HALF] == 0) { /* of y */
-    qx = u.i[HIGH_HALF]&0x7fffffff;
-    /* Is x a NaN?  */
-    if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
-      return x;
-    if (y == 1.0) return x;
-    if (y == 2.0) return x*x;
-    if (y == -1.0) return 1.0/x;
-    if (y == 0) return 1.0;
-  }
+  int4 qx, qy;
+  v.x = y;
+  u.x = x;
+  if (v.i[LOW_HALF] == 0)
+    {				/* of y */
+      qx = u.i[HIGH_HALF] & 0x7fffffff;
+      /* Is x a NaN?  */
+      if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
+	return x;
+      if (y == 1.0)
+	return x;
+      if (y == 2.0)
+	return x * x;
+      if (y == -1.0)
+	return 1.0 / x;
+      if (y == 0)
+	return 1.0;
+    }
   /* else */
-  if(((u.i[HIGH_HALF]>0 && u.i[HIGH_HALF]<0x7ff00000)||        /* x>0 and not x->0 */
-       (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0))  &&
-				      /*   2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
-      (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) {              /* if y<-1 or y>1   */
-    double retval;
-
-    SET_RESTORE_ROUND (FE_TONEAREST);
-
-    /* Avoid internal underflow for tiny y.  The exact value of y does
-       not matter if |y| <= 2**-64.  */
-    if (ABS (y) < 0x1p-64)
-      y = y < 0 ? -0x1p-64 : 0x1p-64;
-    z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
-    t = y*CN;
-    y1 = t - (t-y);
-    y2 = y - y1;
-    t = z*CN;
-    a1 = t - (t-z);
-    a2 = (z - a1)+aa;
-    a = y1*a1;
-    aa = y2*a1 + y*a2;
-    a1 = a+aa;
-    a2 = (a-a1)+aa;
-    error = error*ABS(y);
-    t = __exp1(a1,a2,1.9e16*error);     /* return -10 or 0 if wasn't computed exactly */
-    retval = (t>0)?t:power1(x,y);
-
-    return retval;
-  }
-
-  if (x == 0) {
-    if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
-	|| (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000) /* NaN */
-      return y;
-    if (ABS(y) > 1.0e20) return (y>0)?0:1.0/0.0;
-    k = checkint(y);
-    if (k == -1)
-      return y < 0 ? 1.0/x : x;
-    else
-      return y < 0 ? 1.0/0.0 : 0.0;                               /* return 0 */
-  }
-
-  qx = u.i[HIGH_HALF]&0x7fffffff;  /*   no sign   */
-  qy = v.i[HIGH_HALF]&0x7fffffff;  /*   no sign   */
-
-  if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0)) /* NaN */
+  if (((u.i[HIGH_HALF] > 0 && u.i[HIGH_HALF] < 0x7ff00000) ||	/* x>0 and not x->0 */
+       (u.i[HIGH_HALF] == 0 && u.i[LOW_HALF] != 0)) &&
+      /*   2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
+      (v.i[HIGH_HALF] & 0x7fffffff) < 0x4ff00000)
+    {				/* if y<-1 or y>1   */
+      double retval;
+
+      SET_RESTORE_ROUND (FE_TONEAREST);
+
+      /* Avoid internal underflow for tiny y.  The exact value of y does
+         not matter if |y| <= 2**-64.  */
+      if (ABS (y) < 0x1p-64)
+	y = y < 0 ? -0x1p-64 : 0x1p-64;
+      z = log1 (x, &aa, &error);	/* x^y  =e^(y log (X)) */
+      t = y * CN;
+      y1 = t - (t - y);
+      y2 = y - y1;
+      t = z * CN;
+      a1 = t - (t - z);
+      a2 = (z - a1) + aa;
+      a = y1 * a1;
+      aa = y2 * a1 + y * a2;
+      a1 = a + aa;
+      a2 = (a - a1) + aa;
+      error = error * ABS (y);
+      t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn't computed exactly */
+      retval = (t > 0) ? t : power1 (x, y);
+
+      return retval;
+    }
+
+  if (x == 0)
+    {
+      if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
+	  || (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000)	/* NaN */
+	return y;
+      if (ABS (y) > 1.0e20)
+	return (y > 0) ? 0 : 1.0 / 0.0;
+      k = checkint (y);
+      if (k == -1)
+	return y < 0 ? 1.0 / x : x;
+      else
+	return y < 0 ? 1.0 / 0.0 : 0.0;	/* return 0 */
+    }
+
+  qx = u.i[HIGH_HALF] & 0x7fffffff;	/*   no sign   */
+  qy = v.i[HIGH_HALF] & 0x7fffffff;	/*   no sign   */
+
+  if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0))	/* NaN */
     return x;
-  if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0)) /* NaN */
+  if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0))	/* NaN */
     return x == 1.0 ? 1.0 : y;
 
   /* if x<0 */
-  if (u.i[HIGH_HALF] < 0) {
-    k = checkint(y);
-    if (k==0) {
-      if (qy == 0x7ff00000) {
-	if (x == -1.0) return 1.0;
-	else if (x > -1.0) return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
-	else return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
-      }
+  if (u.i[HIGH_HALF] < 0)
+    {
+      k = checkint (y);
+      if (k == 0)
+	{
+	  if (qy == 0x7ff00000)
+	    {
+	      if (x == -1.0)
+		return 1.0;
+	      else if (x > -1.0)
+		return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
+	      else
+		return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
+	    }
+	  else if (qx == 0x7ff00000)
+	    return y < 0 ? 0.0 : INF.x;
+	  return (x - x) / (x - x);	/* y not integer and x<0 */
+	}
       else if (qx == 0x7ff00000)
-	return y < 0 ? 0.0 : INF.x;
-      return (x - x) / (x - x);                   /* y not integer and x<0 */
-    }
-    else if (qx == 0x7ff00000)
-      {
-	if (k < 0)
-	  return y < 0 ? nZERO.x : nINF.x;
-	else
-	  return y < 0 ? 0.0 : INF.x;
-      }
-    return (k==1)?__ieee754_pow(-x,y):-__ieee754_pow(-x,y); /* if y even or odd */
-  }
+	{
+	  if (k < 0)
+	    return y < 0 ? nZERO.x : nINF.x;
+	  else
+	    return y < 0 ? 0.0 : INF.x;
+	}
+      /* if y even or odd */
+      return (k == 1) ? __ieee754_pow (-x, y) : -__ieee754_pow (-x, y);
+    }
   /* x>0 */
 
-  if (qx == 0x7ff00000)                              /* x= 2^-0x3ff */
+  if (qx == 0x7ff00000)		/* x= 2^-0x3ff */
     return y > 0 ? x : 0;
 
-  if (qy > 0x45f00000 && qy < 0x7ff00000) {
-    if (x == 1.0) return 1.0;
-    if (y>0) return (x>1.0)?huge*huge:tiny*tiny;
-    if (y<0) return (x<1.0)?huge*huge:tiny*tiny;
-  }
-
-  if (x == 1.0) return 1.0;
-  if (y>0) return (x>1.0)?INF.x:0;
-  if (y<0) return (x<1.0)?INF.x:0;
-  return 0;     /* unreachable, to make the compiler happy */
+  if (qy > 0x45f00000 && qy < 0x7ff00000)
+    {
+      if (x == 1.0)
+	return 1.0;
+      if (y > 0)
+	return (x > 1.0) ? huge * huge : tiny * tiny;
+      if (y < 0)
+	return (x < 1.0) ? huge * huge : tiny * tiny;
+    }
+
+  if (x == 1.0)
+    return 1.0;
+  if (y > 0)
+    return (x > 1.0) ? INF.x : 0;
+  if (y < 0)
+    return (x < 1.0) ? INF.x : 0;
+  return 0;			/* unreachable, to make the compiler happy */
 }
+
 #ifndef __ieee754_pow
 strong_alias (__ieee754_pow, __pow_finite)
 #endif
 
-/**************************************************************************/
-/* Computing x^y using more accurate but more slow log routine            */
-/**************************************************************************/
+/* Compute x^y using more accurate but more slow log routine.  */
 static double
 SECTION
-power1(double x, double y) {
-  double z,a,aa,error, t,a1,a2,y1,y2;
-  z = my_log2(x,&aa,&error);
-  t = y*CN;
-  y1 = t - (t-y);
+power1 (double x, double y)
+{
+  double z, a, aa, error, t, a1, a2, y1, y2;
+  z = my_log2 (x, &aa, &error);
+  t = y * CN;
+  y1 = t - (t - y);
   y2 = y - y1;
-  t = z*CN;
-  a1 = t - (t-z);
+  t = z * CN;
+  a1 = t - (t - z);
   a2 = z - a1;
-  a = y*z;
-  aa = ((y1*a1-a)+y1*a2+y2*a1)+y2*a2+aa*y;
-  a1 = a+aa;
-  a2 = (a-a1)+aa;
-  error = error*ABS(y);
-  t = __exp1(a1,a2,1.9e16*error);
-  return (t >= 0)?t:__slowpow(x,y,z);
+  a = y * z;
+  aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y;
+  a1 = a + aa;
+  a2 = (a - a1) + aa;
+  error = error * ABS (y);
+  t = __exp1 (a1, a2, 1.9e16 * error);
+  return (t >= 0) ? t : __slowpow (x, y, z);
 }
 
-/****************************************************************************/
-/* Computing log(x) (x is left argument). The result is the returned double */
-/* + the parameter delta.                                                   */
-/* The result is bounded by error (rightmost argument)                      */
-/****************************************************************************/
+/* Compute log(x) (x is left argument). The result is the returned double + the
+   parameter DELTA.  The result is bounded by ERROR.  */
 static double
 SECTION
-log1(double x, double *delta, double *error) {
-  int i,j,m;
-  double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-  mynumber u,v;
+log1 (double x, double *delta, double *error)
+{
+  int i, j, m;
+  double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+  mynumber u, v;
 #ifdef BIG_ENDI
-  mynumber
-/**/ two52          = {{0x43300000, 0x00000000}}; /* 2**52         */
+  mynumber /**/ two52 = {{0x43300000, 0x00000000}};	/* 2**52  */
 #else
-#ifdef LITTLE_ENDI
-  mynumber
-/**/ two52          = {{0x00000000, 0x43300000}}; /* 2**52         */
-#endif
+# ifdef LITTLE_ENDI
+  mynumber /**/ two52 = {{0x00000000, 0x43300000}};	/* 2**52  */
+# endif
 #endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
   *error = 0;
   *delta = 0;
-  if (m < 0x00100000)             /*  1<x<2^-1007 */
-    { x = x*t52.x; add = -52.0; u.x = x; m = u.i[HIGH_HALF];}
-
-  if ((m&0x000fffff) < 0x0006a09e)
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+  if (m < 0x00100000)		/*  1<x<2^-1007 */
+    {
+      x = x * t52.x;
+      add = -52.0;
+      u.x = x;
+      m = u.i[HIGH_HALF];
+    }
+
+  if ((m & 0x000fffff) < 0x0006a09e)
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+      two52.i[LOW_HALF] = (m >> 20);
+    }
   else
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+      two52.i[LOW_HALF] = (m >> 20) + 1;
+    }
 
   v.x = u.x + bigu.x;
   uu = v.x - bigu.x;
-  i = (v.i[LOW_HALF]&0x000003ff)<<2;
-  if (two52.i[LOW_HALF] == 1023)         /* nx = 0              */
-  {
-      if (i > 1192 && i < 1208)          /* |x-1| < 1.5*2**-10  */
-      {
+  i = (v.i[LOW_HALF] & 0x000003ff) << 2;
+  if (two52.i[LOW_HALF] == 1023)	/* nx = 0              */
+    {
+      if (i > 1192 && i < 1208)	/* |x-1| < 1.5*2**-10  */
+	{
 	  t = x - 1.0;
-	  t1 = (t+5.0e6)-5.0e6;
-	  t2 = t-t1;
-	  e1 = t - 0.5*t1*t1;
-	  e2 = t*t*t*(r3+t*(r4+t*(r5+t*(r6+t*(r7+t*r8)))))-0.5*t2*(t+t1);
-	  res = e1+e2;
-	  *error = 1.0e-21*ABS(t);
-	  *delta = (e1-res)+e2;
+	  t1 = (t + 5.0e6) - 5.0e6;
+	  t2 = t - t1;
+	  e1 = t - 0.5 * t1 * t1;
+	  e2 = (t * t * t * (r3 + t * (r4 + t * (r5 + t * (r6 + t
+							   * (r7 + t * r8)))))
+		- 0.5 * t2 * (t + t1));
+	  res = e1 + e2;
+	  *error = 1.0e-21 * ABS (t);
+	  *delta = (e1 - res) + e2;
 	  return res;
-      }                  /* |x-1| < 1.5*2**-10  */
+	}			/* |x-1| < 1.5*2**-10  */
       else
-      {
-	  v.x = u.x*(ui.x[i]+ui.x[i+1])+bigv.x;
-	  vv = v.x-bigv.x;
-	  j = v.i[LOW_HALF]&0x0007ffff;
-	  j = j+j+j;
-	  eps = u.x - uu*vv;
-	  e1 = eps*ui.x[i];
-	  e2 = eps*(ui.x[i+1]+vj.x[j]*(ui.x[i]+ui.x[i+1]));
-	  e = e1+e2;
-	  e2 =  ((e1-e)+e2);
-	  t=ui.x[i+2]+vj.x[j+1];
-	  t1 = t+e;
-	  t2 = (((t-t1)+e)+(ui.x[i+3]+vj.x[j+2]))+e2+e*e*(p2+e*(p3+e*p4));
-	  res=t1+t2;
+	{
+	  v.x = u.x * (ui.x[i] + ui.x[i + 1]) + bigv.x;
+	  vv = v.x - bigv.x;
+	  j = v.i[LOW_HALF] & 0x0007ffff;
+	  j = j + j + j;
+	  eps = u.x - uu * vv;
+	  e1 = eps * ui.x[i];
+	  e2 = eps * (ui.x[i + 1] + vj.x[j] * (ui.x[i] + ui.x[i + 1]));
+	  e = e1 + e2;
+	  e2 = ((e1 - e) + e2);
+	  t = ui.x[i + 2] + vj.x[j + 1];
+	  t1 = t + e;
+	  t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e
+		* (p2 + e * (p3 + e * p4)));
+	  res = t1 + t2;
 	  *error = 1.0e-24;
-	  *delta = (t1-res)+t2;
+	  *delta = (t1 - res) + t2;
 	  return res;
-      }
-  }   /* nx = 0 */
-  else                            /* nx != 0   */
-  {
+	}
+    }				/* nx = 0 */
+  else				/* nx != 0   */
+    {
       eps = u.x - uu;
-      nx = (two52.x - two52e.x)+add;
-      e1 = eps*ui.x[i];
-      e2 = eps*ui.x[i+1];
-      e=e1+e2;
-      e2 = (e1-e)+e2;
-      t=nx*ln2a.x+ui.x[i+2];
-      t1=t+e;
-      t2=(((t-t1)+e)+nx*ln2b.x+ui.x[i+3]+e2)+e*e*(q2+e*(q3+e*(q4+e*(q5+e*q6))));
-      res = t1+t2;
+      nx = (two52.x - two52e.x) + add;
+      e1 = eps * ui.x[i];
+      e2 = eps * ui.x[i + 1];
+      e = e1 + e2;
+      e2 = (e1 - e) + e2;
+      t = nx * ln2a.x + ui.x[i + 2];
+      t1 = t + e;
+      t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e
+	    * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6)))));
+      res = t1 + t2;
       *error = 1.0e-21;
-      *delta = (t1-res)+t2;
+      *delta = (t1 - res) + t2;
       return res;
-  }                                /* nx != 0   */
+    }				/* nx != 0   */
 }
 
-/****************************************************************************/
-/* More slow but more accurate routine of log                               */
-/* Computing log(x)(x is left argument).The result is return double + delta.*/
-/* The result is bounded by error (right argument)                           */
-/****************************************************************************/
+/* Slower but more accurate routine of log.  The returned result is double +
+   DELTA.  The result is bounded by ERROR.  */
 static double
 SECTION
-my_log2(double x, double *delta, double *error) {
-  int i,j,m;
-  double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-  double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
-  double y,yy,z,zz,j1,j2,j7,j8;
+my_log2 (double x, double *delta, double *error)
+{
+  int i, j, m;
+  double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+  double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2;
+  double y, yy, z, zz, j1, j2, j7, j8;
 #ifndef DLA_FMS
-  double j3,j4,j5,j6;
+  double j3, j4, j5, j6;
 #endif
-  mynumber u,v;
+  mynumber u, v;
 #ifdef BIG_ENDI
-  mynumber
-/**/ two52          = {{0x43300000, 0x00000000}}; /* 2**52         */
+  mynumber /**/ two52 = {{0x43300000, 0x00000000}};	/* 2**52  */
 #else
-#ifdef LITTLE_ENDI
-  mynumber
-/**/ two52          = {{0x00000000, 0x43300000}}; /* 2**52         */
-#endif
+# ifdef LITTLE_ENDI
+  mynumber /**/ two52 = {{0x00000000, 0x43300000}};	/* 2**52  */
+# endif
 #endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
   *error = 0;
   *delta = 0;
-  add=0;
-  if (m<0x00100000) {  /* x < 2^-1022 */
-    x = x*t52.x;  add = -52.0; u.x = x; m = u.i[HIGH_HALF]; }
-
-  if ((m&0x000fffff) < 0x0006a09e)
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+  add = 0;
+  if (m < 0x00100000)
+    {				/* x < 2^-1022 */
+      x = x * t52.x;
+      add = -52.0;
+      u.x = x;
+      m = u.i[HIGH_HALF];
+    }
+
+  if ((m & 0x000fffff) < 0x0006a09e)
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+      two52.i[LOW_HALF] = (m >> 20);
+    }
   else
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+      two52.i[LOW_HALF] = (m >> 20) + 1;
+    }
 
   v.x = u.x + bigu.x;
   uu = v.x - bigu.x;
-  i = (v.i[LOW_HALF]&0x000003ff)<<2;
+  i = (v.i[LOW_HALF] & 0x000003ff) << 2;
   /*------------------------------------- |x-1| < 2**-11-------------------------------  */
-  if ((two52.i[LOW_HALF] == 1023)  && (i == 1200))
-  {
+  if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
+    {
       t = x - 1.0;
-      EMULV(t,s3,y,yy,j1,j2,j3,j4,j5);
-      ADD2(-0.5,0,y,yy,z,zz,j1,j2);
-      MUL2(t,0,z,zz,y,yy,j1,j2,j3,j4,j5,j6,j7,j8);
-      MUL2(t,0,y,yy,z,zz,j1,j2,j3,j4,j5,j6,j7,j8);
-
-      e1 = t+z;
-      e2 = (((t-e1)+z)+zz)+t*t*t*(ss3+t*(s4+t*(s5+t*(s6+t*(s7+t*s8)))));
-      res = e1+e2;
-      *error = 1.0e-25*ABS(t);
-      *delta = (e1-res)+e2;
+      EMULV (t, s3, y, yy, j1, j2, j3, j4, j5);
+      ADD2 (-0.5, 0, y, yy, z, zz, j1, j2);
+      MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8);
+      MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8);
+
+      e1 = t + z;
+      e2 = ((((t - e1) + z) + zz) + t * t * t
+	    * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8))))));
+      res = e1 + e2;
+      *error = 1.0e-25 * ABS (t);
+      *delta = (e1 - res) + e2;
       return res;
-  }
+    }
   /*----------------------------- |x-1| > 2**-11  --------------------------  */
   else
-  {          /*Computing log(x) according to log table                        */
-      nx = (two52.x - two52e.x)+add;
+    {				/*Computing log(x) according to log table                        */
+      nx = (two52.x - two52e.x) + add;
       ou1 = ui.x[i];
-      ou2 = ui.x[i+1];
-      lu1 = ui.x[i+2];
-      lu2 = ui.x[i+3];
-      v.x = u.x*(ou1+ou2)+bigv.x;
-      vv = v.x-bigv.x;
-      j = v.i[LOW_HALF]&0x0007ffff;
-      j = j+j+j;
-      eps = u.x - uu*vv;
-      ov  = vj.x[j];
-      lv1 = vj.x[j+1];
-      lv2 = vj.x[j+2];
-      a = (ou1+ou2)*(1.0+ov);
-      a1 = (a+1.0e10)-1.0e10;
-      a2 = a*(1.0-a1*uu*vv);
-      e1 = eps*a1;
-      e2 = eps*a2;
-      e = e1+e2;
-      e2 = (e1-e)+e2;
-      t=nx*ln2a.x+lu1+lv1;
-      t1 = t+e;
-      t2 = (((t-t1)+e)+(lu2+lv2+nx*ln2b.x+e2))+e*e*(p2+e*(p3+e*p4));
-      res=t1+t2;
+      ou2 = ui.x[i + 1];
+      lu1 = ui.x[i + 2];
+      lu2 = ui.x[i + 3];
+      v.x = u.x * (ou1 + ou2) + bigv.x;
+      vv = v.x - bigv.x;
+      j = v.i[LOW_HALF] & 0x0007ffff;
+      j = j + j + j;
+      eps = u.x - uu * vv;
+      ov = vj.x[j];
+      lv1 = vj.x[j + 1];
+      lv2 = vj.x[j + 2];
+      a = (ou1 + ou2) * (1.0 + ov);
+      a1 = (a + 1.0e10) - 1.0e10;
+      a2 = a * (1.0 - a1 * uu * vv);
+      e1 = eps * a1;
+      e2 = eps * a2;
+      e = e1 + e2;
+      e2 = (e1 - e) + e2;
+      t = nx * ln2a.x + lu1 + lv1;
+      t1 = t + e;
+      t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e
+	    * (p2 + e * (p3 + e * p4)));
+      res = t1 + t2;
       *error = 1.0e-27;
-      *delta = (t1-res)+t2;
+      *delta = (t1 - res) + t2;
       return res;
-  }
+    }
 }
 
-/**********************************************************************/
-/* Routine receives a double x and checks if it is an integer. If not */
-/* it returns 0, else it returns 1 if even or -1 if odd.              */
-/**********************************************************************/
+/* This function receives a double x and checks if it is an integer.  If not,
+   it returns 0, else it returns 1 if even or -1 if odd.  */
 static int
 SECTION
-checkint(double x) {
-  union {int4 i[2]; double x;} u;
-  int k,m,n;
+checkint (double x)
+{
+  union
+  {
+    int4 i[2];
+    double x;
+  } u;
+  int k, m, n;
   u.x = x;
-  m = u.i[HIGH_HALF]&0x7fffffff;    /* no sign */
-  if (m >= 0x7ff00000) return 0;    /*  x is +/-inf or NaN  */
-  if (m >= 0x43400000) return 1;    /*  |x| >= 2**53   */
-  if (m < 0x40000000) return 0;     /* |x| < 2,  can not be 0 or 1  */
+  m = u.i[HIGH_HALF] & 0x7fffffff;	/* no sign */
+  if (m >= 0x7ff00000)
+    return 0;			/*  x is +/-inf or NaN  */
+  if (m >= 0x43400000)
+    return 1;			/*  |x| >= 2**53   */
+  if (m < 0x40000000)
+    return 0;			/* |x| < 2,  can not be 0 or 1  */
   n = u.i[LOW_HALF];
-  k = (m>>20)-1023;                 /*  1 <= k <= 52   */
-  if (k == 52) return (n&1)? -1:1;  /* odd or even*/
-  if (k>20) {
-    if (n<<(k-20)) return 0;        /* if not integer */
-    return (n<<(k-21))?-1:1;
-  }
-  if (n) return 0;                  /*if  not integer*/
-  if (k == 20) return (m&1)? -1:1;
-  if (m<<(k+12)) return 0;
-  return (m<<(k+11))?-1:1;
+  k = (m >> 20) - 1023;		/*  1 <= k <= 52   */
+  if (k == 52)
+    return (n & 1) ? -1 : 1;	/* odd or even */
+  if (k > 20)
+    {
+      if (n << (k - 20))
+	return 0;		/* if not integer */
+      return (n << (k - 21)) ? -1 : 1;
+    }
+  if (n)
+    return 0;			/*if  not integer */
+  if (k == 20)
+    return (m & 1) ? -1 : 1;
+  if (m << (k + 12))
+    return 0;
+  return (m << (k + 11)) ? -1 : 1;
 }

Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_sin.c Wed Oct  9 00:02:12 2013
@@ -127,10 +127,8 @@
 
 void __dubsin (double x, double dx, double w[]);
 void __docos (double x, double dx, double w[]);
-double __mpsin (double x, double dx);
-double __mpcos (double x, double dx);
-double __mpsin1 (double x);
-double __mpcos1 (double x);
+double __mpsin (double x, double dx, bool reduce_range);
+double __mpcos (double x, double dx, bool reduce_range);
 static double slow (double x);
 static double slow1 (double x);
 static double slow2 (double x);
@@ -722,7 +720,7 @@
       if (w[0] == w[0] + 1.000000001 * w[1])
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+	return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
     }
 }
 
@@ -762,7 +760,7 @@
       if (w[0] == w[0] + 1.000000005 * w[1])
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+	return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
     }
 }
 
@@ -815,7 +813,7 @@
       if (w[0] == w[0] + 1.000000005 * w[1])
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+	return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
     }
 }
 
@@ -882,7 +880,7 @@
 	  if (w[0] == w[0] + cor)
 	    return (a > 0) ? w[0] : -w[0];
 	  else
-	    return __mpsin1 (orig);
+	    return __mpsin (orig, 0, true);
 	}
     }
 }
@@ -939,7 +937,7 @@
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return __mpsin1 (orig);
+	return __mpsin (orig, 0, true);
     }
 }
 
@@ -996,7 +994,7 @@
       if (w[0] == w[0] + cor)
 	return (n & 2) ? -w[0] : w[0];
       else
-	return __mpsin1 (orig);
+	return __mpsin (orig, 0, true);
     }
 }
 
@@ -1028,7 +1026,7 @@
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+	return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
     }
 }
 
@@ -1079,7 +1077,7 @@
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+	return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
     }
 }
 
@@ -1131,7 +1129,7 @@
       if (w[0] == w[0] + cor)
 	return (n & 2) ? -w[0] : w[0];
       else
-	return (n & 1) ? __mpsin1 (orig) : __mpcos1 (orig);
+	return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
     }
 }
 
@@ -1173,7 +1171,7 @@
       if (w[0] == w[0] + 1.000000005 * w[1])
 	return w[0];
       else
-	return __mpcos (x, 0);
+	return __mpcos (x, 0, false);
     }
 }
 
@@ -1246,7 +1244,7 @@
 	  if (w[0] == w[0] + cor)
 	    return (a > 0) ? w[0] : -w[0];
 	  else
-	    return __mpcos1 (orig);
+	    return __mpcos (orig, 0, true);
 	}
     }
 }
@@ -1301,7 +1299,7 @@
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return __mpcos1 (orig);
+	return __mpcos (orig, 0, true);
     }
 }
 
@@ -1357,7 +1355,7 @@
       if (w[0] == w[0] + cor)
 	return (n) ? -w[0] : w[0];
       else
-	return __mpcos1 (orig);
+	return __mpcos (orig, 0, true);
     }
 }
 

Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/sincos32.c Wed Oct  9 00:02:12 2013
@@ -187,50 +187,119 @@
     return (res < res1) ? res : res1;
 }
 
-/* Compute sin(x+dx) as Multi Precision number and return result as double.  */
+/* Compute sin() of double-length number (X + DX) as Multi Precision number and
+   return result as double.  If REDUCE_RANGE is true, X is assumed to be the
+   original input and DX is ignored.  */
 double
 SECTION
-__mpsin (double x, double dx)
-{
-  int p;
+__mpsin (double x, double dx, bool reduce_range)
+{
   double y;
-  mp_no a, b, c;
-  p = 32;
-  __dbl_mp (x, &a, p);
-  __dbl_mp (dx, &b, p);
-  __add (&a, &b, &c, p);
-  if (x > 0.8)
-    {
-      __sub (&hp, &c, &a, p);
-      __c32 (&a, &b, &c, p);
-    }
-  else
-    __c32 (&c, &a, &b, p);	/* b = sin(x+dx)  */
-  __mp_dbl (&b, &y, p);
+  mp_no a, b, c, s;
+  int n;
+  int p = 32;
+
+  if (reduce_range)
+    {
+      n = __mpranred (x, &a, p);	/* n is 0, 1, 2 or 3.  */
+      __c32 (&a, &c, &s, p);
+    }
+  else
+    {
+      n = -1;
+      __dbl_mp (x, &b, p);
+      __dbl_mp (dx, &c, p);
+      __add (&b, &c, &a, p);
+      if (x > 0.8)
+        {
+          __sub (&hp, &a, &b, p);
+          __c32 (&b, &s, &c, p);
+        }
+      else
+        __c32 (&a, &c, &s, p);	/* b = sin(x+dx)  */
+    }
+
+  /* Convert result based on which quarter of unit circle y is in.  */
+  switch (n)
+    {
+    case 1:
+      __mp_dbl (&c, &y, p);
+      break;
+
+    case 3:
+      __mp_dbl (&c, &y, p);
+      y = -y;
+      break;
+
+    case 2:
+      __mp_dbl (&s, &y, p);
+      y = -y;
+      break;
+
+    /* Quadrant not set, so the result must be sin (X + DX), which is also in
+       S.  */
+    case 0:
+    default:
+      __mp_dbl (&s, &y, p);
+    }
   return y;
 }
 
-/* Compute cos() of double-length number (x+dx) as Multi Precision number and
-   return result as double.  */
+/* Compute cos() of double-length number (X + DX) as Multi Precision number and
+   return result as double.  If REDUCE_RANGE is true, X is assumed to be the
+   original input and DX is ignored.  */
 double
 SECTION
-__mpcos (double x, double dx)
-{
-  int p;
+__mpcos (double x, double dx, bool reduce_range)
+{
   double y;
-  mp_no a, b, c;
-  p = 32;
-  __dbl_mp (x, &a, p);
-  __dbl_mp (dx, &b, p);
-  __add (&a, &b, &c, p);
-  if (x > 0.8)
-    {
-      __sub (&hp, &c, &b, p);
-      __c32 (&b, &c, &a, p);
-    }
-  else
-    __c32 (&c, &a, &b, p);	/* a = cos(x+dx)     */
-  __mp_dbl (&a, &y, p);
+  mp_no a, b, c, s;
+  int n;
+  int p = 32;
+
+  if (reduce_range)
+    {
+      n = __mpranred (x, &a, p);	/* n is 0, 1, 2 or 3.  */
+      __c32 (&a, &c, &s, p);
+    }
+  else
+    {
+      n = -1;
+      __dbl_mp (x, &b, p);
+      __dbl_mp (dx, &c, p);
+      __add (&b, &c, &a, p);
+      if (x > 0.8)
+        {
+          __sub (&hp, &a, &b, p);
+          __c32 (&b, &s, &c, p);
+        }
+      else
+        __c32 (&a, &c, &s, p);	/* a = cos(x+dx)     */
+    }
+
+  /* Convert result based on which quarter of unit circle y is in.  */
+  switch (n)
+    {
+    case 1:
+      __mp_dbl (&s, &y, p);
+      y = -y;
+      break;
+
+    case 3:
+      __mp_dbl (&s, &y, p);
+      break;
+
+    case 2:
+      __mp_dbl (&c, &y, p);
+      y = -y;
+      break;
+
+    /* Quadrant not set, so the result must be cos (X + DX), which is also
+       stored in C.  */
+    case 0:
+    default:
+      __mp_dbl (&c, &y, p);
+    }
   return y;
 }
 
@@ -294,84 +363,3 @@
       return (n & 3);
     }
 }
-
-/* Multi-Precision sin() function subroutine, for p = 32.  It is based on the
-   routines mpranred() and c32().  */
-double
-SECTION
-__mpsin1 (double x)
-{
-  int p;
-  int n;
-  mp_no u, s, c;
-  double y;
-  p = 32;
-  n = __mpranred (x, &u, p);	/* n is 0, 1, 2 or 3.  */
-  __c32 (&u, &c, &s, p);
-  /* Convert result based on which quarter of unit circle y is in.  */
-  switch (n)
-    {
-    case 0:
-      __mp_dbl (&s, &y, p);
-      return y;
-      break;
-
-    case 2:
-      __mp_dbl (&s, &y, p);
-      return -y;
-      break;
-
-    case 1:
-      __mp_dbl (&c, &y, p);
-      return y;
-      break;
-
-    case 3:
-      __mp_dbl (&c, &y, p);
-      return -y;
-      break;
-    }
-  /* Unreachable, to make the compiler happy.  */
-  return 0;
-}
-
-/* Multi-Precision cos() function subroutine, for p = 32.  It is based on the
-   routines mpranred() and c32().  */
-double
-SECTION
-__mpcos1 (double x)
-{
-  int p;
-  int n;
-  mp_no u, s, c;
-  double y;
-
-  p = 32;
-  n = __mpranred (x, &u, p);	/* n is 0, 1, 2 or 3.  */
-  __c32 (&u, &c, &s, p);
-  /* Convert result based on which quarter of unit circle y is in.  */
-  switch (n)
-    {
-    case 0:
-      __mp_dbl (&c, &y, p);
-      return y;
-      break;
-
-    case 2:
-      __mp_dbl (&c, &y, p);
-      return -y;
-      break;
-
-    case 1:
-      __mp_dbl (&s, &y, p);
-      return -y;
-      break;
-
-    case 3:
-      __mp_dbl (&s, &y, p);
-      return y;
-      break;
-    }
-  /* Unreachable, to make the compiler happy.  */
-  return 0;
-}

Modified: fsf/trunk/libc/sysdeps/x86_64/memset.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/memset.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/memset.S Wed Oct  9 00:02:12 2013
@@ -18,10 +18,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
 
 	.text
 #if !defined NOT_IN_libc
@@ -71,12 +67,12 @@
 L(return):
 	rep
 	ret
-	ALIGN (4)
+	.p2align 4
 L(between_32_64_bytes):
 	movdqu	%xmm8, 16(%rdi)
 	movdqu	%xmm8, -32(%rdi,%rdx)
 	ret
-	ALIGN (4)
+	.p2align 4
 L(loop_start):
 	leaq	64(%rdi), %rcx
 	movdqu	%xmm8, (%rdi)
@@ -92,7 +88,7 @@
 	andq	$-64, %rdx
 	cmpq	%rdx, %rcx
 	je	L(return)
-	ALIGN (4)
+	.p2align 4
 L(loop):
 	movdqa	%xmm8, (%rcx)
 	movdqa	%xmm8, 16(%rcx)

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S Wed Oct  9 00:02:12 2013
@@ -23,10 +23,6 @@
 
 # ifndef MEMCMP
 #  define MEMCMP	__memcmp_sse4_1
-# endif
-
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
 # endif
 
 # define JMPTBL(I, B)	(I - B)
@@ -60,7 +56,7 @@
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
 # ifndef USE_AS_WMEMCMP
-	ALIGN (4)
+	.p2align 4
 L(firstbyte):
 	movzbl	(%rdi), %eax
 	movzbl	(%rsi), %ecx
@@ -68,7 +64,7 @@
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(79bytesormore):
 	movdqu	(%rsi), %xmm1
 	movdqu	(%rdi), %xmm2
@@ -316,7 +312,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(512bytesormore):
 # ifdef DATA_CACHE_SIZE_HALF
 	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
@@ -329,7 +325,7 @@
 	cmp	%r8, %rdx
 	ja	L(L2_L3_cache_unaglined)
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loop):
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
@@ -361,7 +357,7 @@
 
 L(L2_L3_cache_unaglined):
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(L2_L3_unaligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
@@ -396,7 +392,7 @@
 /*
  * This case is for machines which are sensitive for unaligned instructions.
  */
-	ALIGN (4)
+	.p2align 4
 L(2aligned):
 	cmp	$128, %rdx
 	ja	L(128bytesormorein2aligned)
@@ -444,7 +440,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(128bytesormorein2aligned):
 	cmp	$512, %rdx
 	ja	L(512bytesormorein2aligned)
@@ -519,7 +515,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(256bytesormorein2aligned):
 
 	sub	$256, %rdx
@@ -632,7 +628,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(512bytesormorein2aligned):
 # ifdef DATA_CACHE_SIZE_HALF
 	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
@@ -646,7 +642,7 @@
 	ja	L(L2_L3_cache_aglined)
 
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loopin2aligned):
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
@@ -678,7 +674,7 @@
 L(L2_L3_cache_aglined):
 	sub	$64, %rdx
 
-	ALIGN (4)
+	.p2align 4
 L(L2_L3_aligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
@@ -711,7 +707,7 @@
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
 
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loop_end):
 	add	$16, %rdi
 	add	$16, %rsi
@@ -806,7 +802,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(12bytes):
 	mov	-12(%rdi), %rax
 	mov	-12(%rsi), %rcx
@@ -827,7 +823,7 @@
 
 # ifndef USE_AS_WMEMCMP
 /* unreal case for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(65bytes):
 	movdqu	-65(%rdi), %xmm1
 	movdqu	-65(%rsi), %xmm2
@@ -864,7 +860,7 @@
 	sub	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(13bytes):
 	mov	-13(%rdi), %rax
 	mov	-13(%rsi), %rcx
@@ -877,7 +873,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(5bytes):
 	mov	-5(%rdi), %eax
 	mov	-5(%rsi), %ecx
@@ -888,7 +884,7 @@
 	sub	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(66bytes):
 	movdqu	-66(%rdi), %xmm1
 	movdqu	-66(%rsi), %xmm2
@@ -929,7 +925,7 @@
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(14bytes):
 	mov	-14(%rdi), %rax
 	mov	-14(%rsi), %rcx
@@ -942,7 +938,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(6bytes):
 	mov	-6(%rdi), %eax
 	mov	-6(%rsi), %ecx
@@ -958,7 +954,7 @@
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(67bytes):
 	movdqu	-67(%rdi), %xmm2
 	movdqu	-67(%rsi), %xmm1
@@ -997,7 +993,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(15bytes):
 	mov	-15(%rdi), %rax
 	mov	-15(%rsi), %rcx
@@ -1010,7 +1006,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(7bytes):
 	mov	-7(%rdi), %eax
 	mov	-7(%rsi), %ecx
@@ -1023,7 +1019,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(3bytes):
 	movzwl	-3(%rdi), %eax
 	movzwl	-3(%rsi), %ecx
@@ -1036,7 +1032,7 @@
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(68bytes):
 	movdqu	-68(%rdi), %xmm2
 	movdqu	-68(%rsi), %xmm1
@@ -1079,7 +1075,7 @@
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(69bytes):
 	movdqu	-69(%rsi), %xmm1
 	movdqu	-69(%rdi), %xmm2
@@ -1115,7 +1111,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(70bytes):
 	movdqu	-70(%rsi), %xmm1
 	movdqu	-70(%rdi), %xmm2
@@ -1151,7 +1147,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(71bytes):
 	movdqu	-71(%rsi), %xmm1
 	movdqu	-71(%rdi), %xmm2
@@ -1188,7 +1184,7 @@
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(72bytes):
 	movdqu	-72(%rsi), %xmm1
 	movdqu	-72(%rdi), %xmm2
@@ -1227,7 +1223,7 @@
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(73bytes):
 	movdqu	-73(%rsi), %xmm1
 	movdqu	-73(%rdi), %xmm2
@@ -1265,7 +1261,7 @@
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(74bytes):
 	movdqu	-74(%rsi), %xmm1
 	movdqu	-74(%rdi), %xmm2
@@ -1302,7 +1298,7 @@
 	movzwl	-2(%rsi), %ecx
 	jmp	L(diffin2bytes)
 
-	ALIGN (4)
+	.p2align 4
 L(75bytes):
 	movdqu	-75(%rsi), %xmm1
 	movdqu	-75(%rdi), %xmm2
@@ -1342,7 +1338,7 @@
 	xor	%eax, %eax
 	ret
 # endif
-	ALIGN (4)
+	.p2align 4
 L(76bytes):
 	movdqu	-76(%rsi), %xmm1
 	movdqu	-76(%rdi), %xmm2
@@ -1388,7 +1384,7 @@
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(77bytes):
 	movdqu	-77(%rsi), %xmm1
 	movdqu	-77(%rdi), %xmm2
@@ -1430,7 +1426,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(78bytes):
 	movdqu	-78(%rsi), %xmm1
 	movdqu	-78(%rdi), %xmm2
@@ -1470,7 +1466,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(79bytes):
 	movdqu	-79(%rsi), %xmm1
 	movdqu	-79(%rdi), %xmm2
@@ -1510,7 +1506,7 @@
 	xor	%eax, %eax
 	ret
 # endif
-	ALIGN (4)
+	.p2align 4
 L(64bytes):
 	movdqu	-64(%rdi), %xmm2
 	movdqu	-64(%rsi), %xmm1
@@ -1548,7 +1544,7 @@
 /*
  * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
  */
-	ALIGN (3)
+	.p2align 3
 L(less16bytes):
 	movsbq	%dl, %rdx
 	mov	(%rsi, %rdx), %rcx
@@ -1585,7 +1581,7 @@
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(end):
 	and	$0xff, %eax
 	and	$0xff, %ecx
@@ -1599,7 +1595,7 @@
 	neg	%eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(nequal_bigger):
 	ret
 
@@ -1611,7 +1607,7 @@
 END (MEMCMP)
 
 	.section .rodata.sse4.1,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 # ifndef USE_AS_WMEMCMP
 L(table_64bytes):
 	.int	JMPTBL (L(0bytes), L(table_64bytes))

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S Wed Oct  9 00:02:12 2013
@@ -23,10 +23,6 @@
 
 # ifndef MEMCMP
 #  define MEMCMP	__memcmp_ssse3
-# endif
-
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
 # endif
 
 /* Warning!
@@ -50,7 +46,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 /* ECX >= 32.  */
 L(48bytesormore):
 	movdqu	(%rdi), %xmm3
@@ -90,7 +86,7 @@
 	je	L(shr_6)
 	jmp	L(shr_7)
 
-	ALIGN	(2)
+	.p2align 2
 L(next_unaligned_table):
 	cmp	$8, %edx
 	je	L(shr_8)
@@ -117,7 +113,7 @@
 	jmp	L(shr_12)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_0):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -137,7 +133,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_0_gobble):
 	movdqa	(%rsi), %xmm0
 	xor	%eax, %eax
@@ -180,7 +176,7 @@
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_1):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -207,7 +203,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_1_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -258,7 +254,7 @@
 	jmp	L(less48bytes)
 
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_2):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -285,7 +281,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_2_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -335,7 +331,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_3):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -362,7 +358,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_3_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -414,7 +410,7 @@
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_4):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -441,7 +437,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_4_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -493,7 +489,7 @@
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_5):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -520,7 +516,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_5_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -570,7 +566,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_6):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -597,7 +593,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_6_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -647,7 +643,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_7):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -674,7 +670,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_7_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -726,7 +722,7 @@
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_8):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -753,7 +749,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_8_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -805,7 +801,7 @@
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_9):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -832,7 +828,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_9_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -882,7 +878,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_10):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -909,7 +905,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_10_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -959,7 +955,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_11):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -986,7 +982,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_11_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1038,7 +1034,7 @@
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_12):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1065,7 +1061,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_12_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1117,7 +1113,7 @@
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_13):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1144,7 +1140,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_13_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1194,7 +1190,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_14):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1221,7 +1217,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_14_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1271,7 +1267,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_15):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1298,7 +1294,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_15_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1348,7 +1344,7 @@
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 # endif
-	ALIGN	(4)
+	.p2align 4
 L(exit):
 	pmovmskb %xmm1, %r8d
 	sub	$0xffff, %r8d
@@ -1389,56 +1385,56 @@
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte16):
 	movzbl	-16(%rdi), %eax
 	movzbl	-16(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte17):
 	movzbl	-15(%rdi), %eax
 	movzbl	-15(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte18):
 	movzbl	-14(%rdi), %eax
 	movzbl	-14(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte19):
 	movzbl	-13(%rdi), %eax
 	movzbl	-13(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte20):
 	movzbl	-12(%rdi), %eax
 	movzbl	-12(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte21):
 	movzbl	-11(%rdi), %eax
 	movzbl	-11(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte22):
 	movzbl	-10(%rdi), %eax
 	movzbl	-10(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(next_24_bytes):
 	lea	8(%rdi), %rdi
 	lea	8(%rsi), %rsi
@@ -1479,14 +1475,14 @@
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(second_double_word):
 	mov	-12(%rdi), %eax
 	cmp	-12(%rsi), %eax
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(next_two_double_words):
 	and	$15, %dh
 	jz	L(fourth_double_word)
@@ -1495,7 +1491,7 @@
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(fourth_double_word):
 	mov	-4(%rdi), %eax
 	cmp	-4(%rsi), %eax
@@ -1503,7 +1499,7 @@
 	ret
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(less48bytes):
 	cmp	$8, %ecx
 	jae	L(more8bytes)
@@ -1527,7 +1523,7 @@
 	jmp	L(4bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more8bytes):
 	cmp	$16, %ecx
 	jae	L(more16bytes)
@@ -1551,7 +1547,7 @@
 	jmp	L(12bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more16bytes):
 	cmp	$24, %ecx
 	jae	L(more24bytes)
@@ -1575,7 +1571,7 @@
 	jmp	L(20bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more24bytes):
 	cmp	$32, %ecx
 	jae	L(more32bytes)
@@ -1599,7 +1595,7 @@
 	jmp	L(28bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more32bytes):
 	cmp	$40, %ecx
 	jae	L(more40bytes)
@@ -1623,7 +1619,7 @@
 	jmp	L(36bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more40bytes):
 	cmp	$40, %ecx
 	je	L(40bytes)
@@ -1642,7 +1638,7 @@
 	je	L(46bytes)
 	jmp	L(47bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(44bytes):
 	movl	-44(%rdi), %eax
 	movl	-44(%rsi), %ecx
@@ -1702,7 +1698,7 @@
 	xor	%eax, %eax
 	ret
 # else
-	ALIGN	(4)
+	.p2align 4
 L(44bytes):
 	movl	-44(%rdi), %eax
 	cmp	-44(%rsi), %eax
@@ -1753,7 +1749,7 @@
 # endif
 
 # ifndef USE_AS_WMEMCMP
-	ALIGN	(4)
+	.p2align 4
 L(45bytes):
 	movl	-45(%rdi), %eax
 	movl	-45(%rsi), %ecx
@@ -1816,7 +1812,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(46bytes):
 	movl	-46(%rdi), %eax
 	movl	-46(%rsi), %ecx
@@ -1882,7 +1878,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(47bytes):
 	movl	-47(%rdi), %eax
 	movl	-47(%rsi), %ecx
@@ -1951,7 +1947,7 @@
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(find_diff):
 	cmpb	%cl, %al
 	jne	L(set)
@@ -1973,19 +1969,19 @@
 # else
 
 /* for wmemcmp */
-	ALIGN	(4)
+	.p2align 4
 L(find_diff):
 	mov	$1, %eax
 	jg	L(find_diff_bigger)
 	neg	%eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(find_diff_bigger):
 	ret
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(equal):
 	xor	%eax, %eax
 	ret

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S Wed Oct  9 00:02:12 2013
@@ -20,10 +20,6 @@
 
 #include "asm-syntax.h"
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 
 ENTRY(__memcpy_sse2_unaligned)
 	movq	%rsi, %rax
@@ -44,7 +40,7 @@
 	movq	%rdi, %rax
 	ret
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 .L31:
 	movdqu	16(%rsi), %xmm8
 	cmpq	$64, %rdx
@@ -77,7 +73,7 @@
 	leaq	32(%r10), %r8
 	leaq	48(%r10), %rax
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 L(loop):
 	movdqu	(%rcx,%r10), %xmm8
 	movdqa	%xmm8, (%rcx)
@@ -151,7 +147,7 @@
 .L3:
 	leaq	-1(%rdx), %rax
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 .L11:
 	movzbl	(%rsi,%rax), %edx
 	movb	%dl, (%rdi,%rax)

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S Wed Oct  9 00:02:12 2013
@@ -29,10 +29,6 @@
 #ifndef MEMCPY
 # define MEMCPY		__memcpy_ssse3_back
 # define MEMCPY_CHK	__memcpy_chk_ssse3_back
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
 #endif
 
 #define JMPTBL(I, B)	I - B
@@ -87,7 +83,7 @@
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 #endif
 
-	ALIGN (4)
+	.p2align 4
 L(144bytesormore):
 
 #ifndef USE_AS_MEMMOVE
@@ -119,7 +115,7 @@
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
 #ifdef DATA_CACHE_SIZE
 	mov	$DATA_CACHE_SIZE, %RCX_LP
@@ -149,7 +145,7 @@
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
 
 	mov	%rdx, %r9
@@ -162,7 +158,7 @@
 #endif
 	jae	L(gobble_mem_fwd)
 	sub	$0x80, %rdx
-	ALIGN (4)
+	.p2align 4
 L(shl_0_loop):
 	movdqa	(%rsi), %xmm1
 	movdqa	%xmm1, (%rdi)
@@ -190,7 +186,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_bwd):
 	sub	$0x80, %rdx
 L(copy_backward_loop):
@@ -221,7 +217,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
 	sub	$0x80, %rdx
 	movaps	-0x01(%rsi), %xmm1
@@ -258,7 +254,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1_bwd):
 	movaps	-0x01(%rsi), %xmm1
 
@@ -304,7 +300,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2):
 	sub	$0x80, %rdx
 	movaps	-0x02(%rsi), %xmm1
@@ -341,7 +337,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2_bwd):
 	movaps	-0x02(%rsi), %xmm1
 
@@ -387,7 +383,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3):
 	sub	$0x80, %rdx
 	movaps -0x03(%rsi), %xmm1
@@ -424,7 +420,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3_bwd):
 	movaps	-0x03(%rsi), %xmm1
 
@@ -470,7 +466,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4):
 	sub	$0x80, %rdx
 	movaps	-0x04(%rsi), %xmm1
@@ -507,7 +503,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4_bwd):
 	movaps	-0x04(%rsi), %xmm1
 
@@ -553,7 +549,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5):
 	sub	$0x80, %rdx
 	movaps	-0x05(%rsi), %xmm1
@@ -590,7 +586,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5_bwd):
 	movaps	-0x05(%rsi), %xmm1
 
@@ -636,7 +632,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6):
 	sub	$0x80, %rdx
 	movaps	-0x06(%rsi), %xmm1
@@ -673,7 +669,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6_bwd):
 	movaps	-0x06(%rsi), %xmm1
 
@@ -719,7 +715,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7):
 	sub	$0x80, %rdx
 	movaps	-0x07(%rsi), %xmm1
@@ -756,7 +752,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7_bwd):
 	movaps	-0x07(%rsi), %xmm1
 
@@ -802,7 +798,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8):
 	sub	$0x80, %rdx
 	movaps	-0x08(%rsi), %xmm1
@@ -839,7 +835,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8_bwd):
 	movaps	-0x08(%rsi), %xmm1
 
@@ -886,7 +882,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9):
 	sub	$0x80, %rdx
 	movaps	-0x09(%rsi), %xmm1
@@ -923,7 +919,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9_bwd):
 	movaps	-0x09(%rsi), %xmm1
 
@@ -969,7 +965,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10):
 	sub	$0x80, %rdx
 	movaps	-0x0a(%rsi), %xmm1
@@ -1006,7 +1002,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10_bwd):
 	movaps	-0x0a(%rsi), %xmm1
 
@@ -1052,7 +1048,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11):
 	sub	$0x80, %rdx
 	movaps	-0x0b(%rsi), %xmm1
@@ -1089,7 +1085,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11_bwd):
 	movaps	-0x0b(%rsi), %xmm1
 
@@ -1135,7 +1131,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12):
 	sub	$0x80, %rdx
 	movdqa	-0x0c(%rsi), %xmm1
@@ -1173,7 +1169,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12_bwd):
 	movaps	-0x0c(%rsi), %xmm1
 
@@ -1219,7 +1215,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13):
 	sub	$0x80, %rdx
 	movaps	-0x0d(%rsi), %xmm1
@@ -1256,7 +1252,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13_bwd):
 	movaps	-0x0d(%rsi), %xmm1
 
@@ -1302,7 +1298,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14):
 	sub	$0x80, %rdx
 	movaps	-0x0e(%rsi), %xmm1
@@ -1339,7 +1335,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14_bwd):
 	movaps	-0x0e(%rsi), %xmm1
 
@@ -1385,7 +1381,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15):
 	sub	$0x80, %rdx
 	movaps	-0x0f(%rsi), %xmm1
@@ -1422,7 +1418,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15_bwd):
 	movaps	-0x0f(%rsi), %xmm1
 
@@ -1468,7 +1464,7 @@
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(gobble_mem_fwd):
 	movdqu	(%rsi), %xmm1
 	movdqu	%xmm0, (%r8)
@@ -1570,7 +1566,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(gobble_mem_bwd):
 	add	%rdx, %rsi
 	add	%rdx, %rdi
@@ -2833,7 +2829,7 @@
 END (MEMCPY)
 
 	.section .rodata.ssse3,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 L(table_144_bytes_bwd):
 	.int	JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
 	.int	JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
@@ -2980,7 +2976,7 @@
 	.int	JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
 	.int	JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
 
-	ALIGN (3)
+	.p2align 3
 L(table_144_bytes_fwd):
 	.int	JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
 	.int	JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
@@ -3127,7 +3123,7 @@
 	.int	JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
 	.int	JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_fwd):
 	.int	JMPTBL (L(shl_0), L(shl_table_fwd))
 	.int	JMPTBL (L(shl_1), L(shl_table_fwd))
@@ -3146,7 +3142,7 @@
 	.int	JMPTBL (L(shl_14), L(shl_table_fwd))
 	.int	JMPTBL (L(shl_15), L(shl_table_fwd))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_bwd):
 	.int	JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
 	.int	JMPTBL (L(shl_1_bwd), L(shl_table_bwd))

Modified: fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
==============================================================================
--- fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S (original)
+++ fsf/trunk/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S Wed Oct  9 00:02:12 2013
@@ -29,10 +29,6 @@
 #ifndef MEMCPY
 # define MEMCPY		__memcpy_ssse3
 # define MEMCPY_CHK	__memcpy_chk_ssse3
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
 #endif
 
 #define JMPTBL(I, B)	I - B
@@ -80,7 +76,7 @@
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(80bytesormore):
 #ifndef USE_AS_MEMMOVE
 	cmp	%dil, %sil
@@ -113,7 +109,7 @@
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
 	movdqu	-16(%rsi, %rdx), %xmm0
 	add	%rdx, %rsi
@@ -144,7 +140,7 @@
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
 	sub	$16, %rdx
 	movdqa	(%rsi), %xmm1
@@ -172,7 +168,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -228,7 +224,7 @@
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_loop):
 	prefetcht0 0x1c0(%rsi)
 	prefetcht0 0x280(%rsi)
@@ -287,7 +283,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_bwd):
 	sub	$16, %rdx
 	movdqa	-0x10(%rsi), %xmm1
@@ -313,7 +309,7 @@
 L(shl_0_less_64bytes_bwd):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_bwd):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -367,7 +363,7 @@
 L(shl_0_gobble_bwd_less_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_bwd_loop):
 	prefetcht0 -0x1c0(%rsi)
 	prefetcht0 -0x280(%rsi)
@@ -423,7 +419,7 @@
 L(shl_0_mem_bwd_less_32bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
 	lea	(L(shl_1_loop_L1)-L(shl_1))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -466,7 +462,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1_bwd):
 	lea	(L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -508,7 +504,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2):
 	lea	(L(shl_2_loop_L1)-L(shl_2))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -551,7 +547,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2_bwd):
 	lea	(L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -593,7 +589,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3):
 	lea	(L(shl_3_loop_L1)-L(shl_3))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -636,7 +632,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3_bwd):
 	lea	(L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -678,7 +674,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4):
 	lea	(L(shl_4_loop_L1)-L(shl_4))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -721,7 +717,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4_bwd):
 	lea	(L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -763,7 +759,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5):
 	lea	(L(shl_5_loop_L1)-L(shl_5))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -806,7 +802,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5_bwd):
 	lea	(L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -848,7 +844,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6):
 	lea	(L(shl_6_loop_L1)-L(shl_6))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -891,7 +887,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6_bwd):
 	lea	(L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -933,7 +929,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7):
 	lea	(L(shl_7_loop_L1)-L(shl_7))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -976,7 +972,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7_bwd):
 	lea	(L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1018,7 +1014,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8):
 	lea	(L(shl_8_loop_L1)-L(shl_8))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1051,7 +1047,7 @@
 	movaps	%xmm5, -0x10(%rdi)
 	jmp	*%r9
 	ud2
-	ALIGN (4)
+	.p2align 4
 L(shl_8_end):
 	lea	64(%rdx), %rdx
 	movaps	%xmm4, -0x20(%rdi)
@@ -1061,7 +1057,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8_bwd):
 	lea	(L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1103,7 +1099,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9):
 	lea	(L(shl_9_loop_L1)-L(shl_9))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1146,7 +1142,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9_bwd):
 	lea	(L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1188,7 +1184,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10):
 	lea	(L(shl_10_loop_L1)-L(shl_10))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1231,7 +1227,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10_bwd):
 	lea	(L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1273,7 +1269,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11):
 	lea	(L(shl_11_loop_L1)-L(shl_11))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1316,7 +1312,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11_bwd):
 	lea	(L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1358,7 +1354,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12):
 	lea	(L(shl_12_loop_L1)-L(shl_12))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1401,7 +1397,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12_bwd):
 	lea	(L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1443,7 +1439,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13):
 	lea	(L(shl_13_loop_L1)-L(shl_13))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1486,7 +1482,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13_bwd):
 	lea	(L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1528,7 +1524,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14):
 	lea	(L(shl_14_loop_L1)-L(shl_14))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1571,7 +1567,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14_bwd):
 	lea	(L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1613,7 +1609,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15):
 	lea	(L(shl_15_loop_L1)-L(shl_15))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1656,7 +1652,7 @@
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15_bwd):
 	lea	(L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1698,7 +1694,7 @@
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(write_72bytes):
 	movdqu	-72(%rsi), %xmm0
 	movdqu	-56(%rsi), %xmm1
@@ -1716,7 +1712,7 @@
 	mov	 %rcx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_64bytes):
 	movdqu	-64(%rsi), %xmm0
 	mov	-48(%rsi), %rcx
@@ -1734,7 +1730,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_56bytes):
 	movdqu	-56(%rsi), %xmm0
 	mov	-40(%rsi), %r8
@@ -1750,7 +1746,7 @@
 	mov	 %rcx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_48bytes):
 	mov	-48(%rsi), %rcx
 	mov	-40(%rsi), %r8
@@ -1766,7 +1762,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_40bytes):
 	mov	-40(%rsi), %r8
 	mov	-32(%rsi), %r9
@@ -1780,7 +1776,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_32bytes):
 	mov	-32(%rsi), %r9
 	mov	-24(%rsi), %r10
@@ -1792,7 +1788,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_24bytes):
 	mov	-24(%rsi), %r10
 	mov	-16(%rsi), %r11
@@ -1802,7 +1798,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_16bytes):
 	mov	-16(%rsi), %r11
 	mov	-8(%rsi), %rdx
@@ -1810,14 +1806,14 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_8bytes):
 	mov	-8(%rsi), %rdx
 	mov	 %rdx, -8(%rdi)
 L(write_0bytes):
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_73bytes):
 	movdqu	-73(%rsi), %xmm0
 	movdqu	-57(%rsi), %xmm1
@@ -1837,7 +1833,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_65bytes):
 	movdqu	-65(%rsi), %xmm0
 	movdqu	-49(%rsi), %xmm1
@@ -1855,7 +1851,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_57bytes):
 	movdqu	-57(%rsi), %xmm0
 	mov	-41(%rsi), %r8
@@ -1873,7 +1869,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_49bytes):
 	movdqu	-49(%rsi), %xmm0
 	mov	-33(%rsi), %r9
@@ -1889,7 +1885,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_41bytes):
 	mov	-41(%rsi), %r8
 	mov	-33(%rsi), %r9
@@ -1905,7 +1901,7 @@
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_33bytes):
 	mov	-33(%rsi), %r9
 	mov	-25(%rsi), %r10
@@ -1919,7 +1915,7 @@
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_25bytes):
 	mov	-25(%rsi), %r10
 	mov	-17(%rsi), %r11
@@ -1931,7 +1927,7 @@
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_17bytes):
 	mov	-17(%rsi), %r11
 	mov	-9(%rsi), %rcx
@@ -1941,7 +1937,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_9bytes):
 	mov	-9(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -1949,13 +1945,13 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_1bytes):
 	mov	-1(%rsi), %dl
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_74bytes):
 	movdqu	-74(%rsi), %xmm0
 	movdqu	-58(%rsi), %xmm1
@@ -1975,7 +1971,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_66bytes):
 	movdqu	-66(%rsi), %xmm0
 	movdqu	-50(%rsi), %xmm1
@@ -1995,7 +1991,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_58bytes):
 	movdqu	-58(%rsi), %xmm1
 	mov	-42(%rsi), %r8
@@ -2013,7 +2009,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_50bytes):
 	movdqu	-50(%rsi), %xmm0
 	mov	-34(%rsi), %r9
@@ -2029,7 +2025,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_42bytes):
 	mov	-42(%rsi), %r8
 	mov	-34(%rsi), %r9
@@ -2045,7 +2041,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_34bytes):
 	mov	-34(%rsi), %r9
 	mov	-26(%rsi), %r10
@@ -2059,7 +2055,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_26bytes):
 	mov	-26(%rsi), %r10
 	mov	-18(%rsi), %r11
@@ -2071,7 +2067,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_18bytes):
 	mov	-18(%rsi), %r11
 	mov	-10(%rsi), %rcx
@@ -2081,7 +2077,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_10bytes):
 	mov	-10(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2089,13 +2085,13 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_2bytes):
 	mov	-2(%rsi), %dx
 	mov	 %dx, -2(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_75bytes):
 	movdqu	-75(%rsi), %xmm0
 	movdqu	-59(%rsi), %xmm1
@@ -2115,7 +2111,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_67bytes):
 	movdqu	-67(%rsi), %xmm0
 	movdqu	-59(%rsi), %xmm1
@@ -2135,7 +2131,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_59bytes):
 	movdqu	-59(%rsi), %xmm0
 	mov	-43(%rsi), %r8
@@ -2153,7 +2149,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_51bytes):
 	movdqu	-51(%rsi), %xmm0
 	mov	-35(%rsi), %r9
@@ -2169,7 +2165,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_43bytes):
 	mov	-43(%rsi), %r8
 	mov	-35(%rsi), %r9
@@ -2185,7 +2181,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_35bytes):
 	mov	-35(%rsi), %r9
 	mov	-27(%rsi), %r10
@@ -2199,7 +2195,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_27bytes):
 	mov	-27(%rsi), %r10
 	mov	-19(%rsi), %r11
@@ -2211,7 +2207,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_19bytes):
 	mov	-19(%rsi), %r11
 	mov	-11(%rsi), %rcx
@@ -2221,7 +2217,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_11bytes):
 	mov	-11(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2229,7 +2225,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_3bytes):
 	mov	-3(%rsi), %dx
 	mov	-2(%rsi), %cx
@@ -2237,7 +2233,7 @@
 	mov	 %cx, -2(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_76bytes):
 	movdqu	-76(%rsi), %xmm0
 	movdqu	-60(%rsi), %xmm1
@@ -2257,7 +2253,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_68bytes):
 	movdqu	-68(%rsi), %xmm0
 	movdqu	-52(%rsi), %xmm1
@@ -2275,7 +2271,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_60bytes):
 	movdqu	-60(%rsi), %xmm0
 	mov	-44(%rsi), %r8
@@ -2293,7 +2289,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_52bytes):
 	movdqu	-52(%rsi), %xmm0
 	mov	-36(%rsi), %r9
@@ -2309,7 +2305,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_44bytes):
 	mov	-44(%rsi), %r8
 	mov	-36(%rsi), %r9
@@ -2325,7 +2321,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_36bytes):
 	mov	-36(%rsi), %r9
 	mov	-28(%rsi), %r10
@@ -2339,7 +2335,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_28bytes):
 	mov	-28(%rsi), %r10
 	mov	-20(%rsi), %r11
@@ -2351,7 +2347,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_20bytes):
 	mov	-20(%rsi), %r11
 	mov	-12(%rsi), %rcx
@@ -2361,7 +2357,7 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_12bytes):
 	mov	-12(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2369,13 +2365,13 @@
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_4bytes):
 	mov	-4(%rsi), %edx
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_77bytes):
 	movdqu	-77(%rsi), %xmm0
 	movdqu	-61(%rsi), %xmm1
@@ -2395,7 +2391,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_69bytes):
 	movdqu	-69(%rsi), %xmm0
 	movdqu	-53(%rsi), %xmm1
@@ -2413,7 +2409,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_61bytes):
 	movdqu	-61(%rsi), %xmm0
 	mov	-45(%rsi), %r8
@@ -2431,7 +2427,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_53bytes):
 	movdqu	-53(%rsi), %xmm0
 	mov	-45(%rsi), %r8
@@ -2448,7 +2444,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_45bytes):
 	mov	-45(%rsi), %r8
 	mov	-37(%rsi), %r9
@@ -2464,7 +2460,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_37bytes):
 	mov	-37(%rsi), %r9
 	mov	-29(%rsi), %r10
@@ -2478,7 +2474,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_29bytes):
 	mov	-29(%rsi), %r10
 	mov	-21(%rsi), %r11
@@ -2490,7 +2486,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_21bytes):
 	mov	-21(%rsi), %r11
 	mov	-13(%rsi), %rcx
@@ -2500,7 +2496,7 @@
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_13bytes):
 	mov	-13(%rsi), %rcx

[... 426 lines stripped ...]
_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits