[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commits] r21548 - in /fsf/trunk/libc: ./ math/ ports/ ports/sysdeps/tile/ ports/sysdeps/tile/nptl/ ports/sysdeps/tile/tilegx/ sysdeps...
- To: commits@xxxxxxxxxx
- Subject: [Commits] r21548 - in /fsf/trunk/libc: ./ math/ ports/ ports/sysdeps/tile/ ports/sysdeps/tile/nptl/ ports/sysdeps/tile/tilegx/ sysdeps...
- From: eglibc@xxxxxxxxxx
- Date: Tue, 06 Nov 2012 16:19:41 -0000
Author: eglibc
Date: Tue Nov 6 16:19:40 2012
New Revision: 21548
Log:
Import glibc-mainline for 2012-11-06
Added:
fsf/trunk/libc/ports/sysdeps/tile/memcopy.h
fsf/trunk/libc/ports/sysdeps/tile/nptl/pthread_spin_unlock.c
fsf/trunk/libc/ports/sysdeps/tile/tilegx/Makefile
fsf/trunk/libc/ports/sysdeps/tile/wordcopy.c
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/NEWS
fsf/trunk/libc/math/libm-test.inc
fsf/trunk/libc/ports/ChangeLog.tile
fsf/trunk/libc/ports/sysdeps/tile/crti.S
fsf/trunk/libc/ports/sysdeps/tile/dl-runtime.c
fsf/trunk/libc/ports/sysdeps/tile/math_private.h
fsf/trunk/libc/ports/sysdeps/tile/start.S
fsf/trunk/libc/ports/sysdeps/tile/tilegx/memcpy.c
fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_fma.c
fsf/trunk/libc/sysdeps/ieee754/ldbl-128/s_fmal.c
fsf/trunk/libc/sysdeps/ieee754/ldbl-96/s_fmal.c
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Tue Nov 6 16:19:40 2012
@@ -1,3 +1,16 @@
+2012-11-06 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
+
+ [BZ #14793]
+ * sysdeps/ieee754/dbl-64/s_fma.c (__fma): In case of large z
+ exponent and small x and y exponents, scale x or y up. Increase
+ by 2 the exponent used in scaling up.
+ * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
+ * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
+ * math/libm-test.inc (fma_test): Add more tests.
+ (fma_test_towardzero): Likewise.
+ (fma_test_downward): Likewise.
+ (fma_test_upward): Likewise.
+
2012-11-05 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
[BZ #14805]
Modified: fsf/trunk/libc/NEWS
==============================================================================
--- fsf/trunk/libc/NEWS (original)
+++ fsf/trunk/libc/NEWS Tue Nov 6 16:19:40 2012
@@ -18,7 +18,7 @@
14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562,
14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638,
14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743,
- 14767, 14783, 14784, 14785, 14796, 14797, 14801, 14805.
+ 14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805.
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
Optimized versions of memcpy, memset, and memcmp added for System z10 and
Modified: fsf/trunk/libc/math/libm-test.inc
==============================================================================
--- fsf/trunk/libc/math/libm-test.inc (original)
+++ fsf/trunk/libc/math/libm-test.inc Tue Nov 6 16:19:40 2012
@@ -4662,6 +4662,14 @@
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
@@ -4712,6 +4720,14 @@
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
@@ -4748,6 +4764,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L);
@@ -4791,6 +4815,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
#endif
END (fma);
@@ -4884,6 +4916,14 @@
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
@@ -4914,6 +4954,14 @@
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
@@ -4944,6 +4992,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
@@ -4974,6 +5030,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
#endif
}
@@ -5070,6 +5134,14 @@
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1.000002p127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1.000002p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
@@ -5100,6 +5172,14 @@
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1.0000000000001p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1.0000000000001p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
@@ -5130,6 +5210,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1.0000000000000002p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1.0000000000000002p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
@@ -5160,6 +5248,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1.0000000000000000000000000001p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1.0000000000000000000000000001p16319L);
#endif
}
@@ -5256,6 +5352,14 @@
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1.000002p127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1.000002p103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103);
+ TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103);
+ TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
@@ -5286,6 +5390,14 @@
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1.0000000000001p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1.0000000000001p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970);
+ TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
@@ -5316,6 +5428,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1.0000000000000002p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1.0000000000000002p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L);
+ TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
@@ -5346,6 +5466,14 @@
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1.0000000000000000000000000001p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1.0000000000000000000000000001p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L);
+ TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
#endif
}
Modified: fsf/trunk/libc/ports/ChangeLog.tile
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.tile (original)
+++ fsf/trunk/libc/ports/ChangeLog.tile Tue Nov 6 16:19:40 2012
@@ -1,3 +1,26 @@
+2012-11-06 Chris Metcalf <cmetcalf@xxxxxxxxxx>
+
+ * sysdeps/tile/nptl/pthread_spin_unlock.c: New file.
+
+2012-11-05 Chris Metcalf <cmetcalf@xxxxxxxxxx>
+
+ * sysdeps/tile/math_private.h: Provide additional no-op defines
+ for exception and rounding macros.
+
+ * sysdeps/tile/tilegx/Makefile: Generate Makefile fragment to determine
+ whether to build elf-init.c and gmon-start.c with -mcmodel=large.
+ * sysdeps/tile/crti.S: Support large memory model.
+ * sysdeps/tile/start.S: Likewise.
+
+2012-11-02 Chris Metcalf <cmetcalf@xxxxxxxxxx>
+
+ * sysdeps/tile/dl-runtime.c (_dl_after_load): Handle simulator
+ notification better for dlopen() of relative paths.
+
+ * sysdeps/tile/tilegx/memcpy.c (__memcpy): Optimize.
+ * sysdeps/tile/memcopy.h: New file.
+ * sysdeps/tile/wordcopy.c: New file.
+
2012-11-03 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
[BZ #3439]
Modified: fsf/trunk/libc/ports/sysdeps/tile/crti.S
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/crti.S (original)
+++ fsf/trunk/libc/ports/sysdeps/tile/crti.S Tue Nov 6 16:19:40 2012
@@ -70,16 +70,17 @@
#if PREINIT_FUNCTION_WEAK
lnk r2
0:
-#ifdef __tilegx__
+# ifdef __tilegx__
+ moveli r1, hw2_last(_GLOBAL_OFFSET_TABLE_ - 0b)
{
- moveli r1, hw1_last(_GLOBAL_OFFSET_TABLE_ - 0b)
+ shl16insli r1, r1, hw1(_GLOBAL_OFFSET_TABLE_ - 0b)
moveli r0, hw1_last_got(PREINIT_FUNCTION)
}
{
shl16insli r1, r1, hw0(_GLOBAL_OFFSET_TABLE_ - 0b)
shl16insli r0, r0, hw0_got(PREINIT_FUNCTION)
}
-#else
+# else
{
moveli r1, lo16(_GLOBAL_OFFSET_TABLE_ - 0b)
moveli r0, got_lo16(PREINIT_FUNCTION)
@@ -88,13 +89,25 @@
auli r1, r1, ha16(_GLOBAL_OFFSET_TABLE_ - 0b)
auli r0, r0, got_ha16(PREINIT_FUNCTION)
}
-#endif
+# endif
ADD_PTR r0, r0, r1
ADD_PTR r0, r0, r2
LD_PTR r0, r0
BEQZ r0, .Lno_weak_fn
+ jalr r0
+#elif defined(__tilegx__)
+ /* Since we are calling from the start of the object to the PLT,
+ call by loading the full address into a register. */
+ lnk r2
+0:
+ moveli r0, hw2_last_plt(PREINIT_FUNCTION - 0b)
+ shl16insli r0, r0, hw1_plt(PREINIT_FUNCTION - 0b)
+ shl16insli r0, r0, hw0_plt(PREINIT_FUNCTION - 0b)
+ add r0, r0, r2
+ jalr r0
+#else
+ jal plt(PREINIT_FUNCTION)
#endif
- jal plt(PREINIT_FUNCTION)
.Lno_weak_fn:
.section .fini,"ax",@progbits
Modified: fsf/trunk/libc/ports/sysdeps/tile/dl-runtime.c
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/dl-runtime.c (original)
+++ fsf/trunk/libc/ports/sysdeps/tile/dl-runtime.c Tue Nov 6 16:19:40 2012
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@xxxxxxxxxx>, 2011.
@@ -28,44 +28,119 @@
#include <sys/mman.h>
#include <arch/sim.h>
-/* Support notifying the simulator about new objects. */
+/* Like realpath(), but simplified: no dynamic memory use, no lstat(),
+ no set_errno(), no valid "rpath" on error, etc. This handles some
+ simple cases where the simulator might not have a valid entry for
+ a loaded Elf object, in particular dlopen() with a relative path.
+ For this relatively rare case, one could also imagine using
+ link_map.l_origin to avoid the getcwd() here, but the simpler code
+ here seems like a better solution. */
+static char *
+dl_realpath (const char *name, char *rpath)
+{
+ char *dest;
+ const char *start, *end;
+
+ if (name[0] != '/')
+ {
+ if (!__getcwd (rpath, PATH_MAX))
+ return NULL;
+ dest = __rawmemchr (rpath, '\0');
+ }
+ else
+ {
+ rpath[0] = '/';
+ dest = rpath + 1;
+ }
+
+ for (start = end = name; *start; start = end)
+ {
+ /* Skip sequence of multiple path-separators. */
+ while (*start == '/')
+ ++start;
+
+ /* Find end of path component. */
+ for (end = start; *end && *end != '/'; ++end)
+ /* Nothing. */;
+
+ if (end - start == 0)
+ break;
+ else if (end - start == 1 && start[0] == '.')
+ /* nothing */;
+ else if (end - start == 2 && start[0] == '.' && start[1] == '.')
+ {
+ /* Back up to previous component, ignore if at root already. */
+ if (dest > rpath + 1)
+ while ((--dest)[-1] != '/');
+ }
+ else
+ {
+ if (dest[-1] != '/')
+ *dest++ = '/';
+
+ if (dest + (end - start) >= rpath + PATH_MAX)
+ return NULL;
+
+ dest = __mempcpy (dest, start, end - start);
+ *dest = '\0';
+ }
+ }
+ if (dest > rpath + 1 && dest[-1] == '/')
+ --dest;
+ *dest = '\0';
+
+ return rpath;
+}
+
+/* Support notifying the simulator about new objects. */
void internal_function
_dl_after_load (struct link_map *l)
{
int shift;
+ char pathbuf[PATH_MAX];
+ char *path;
-#define DLPUTC(c) __insn_mtspr(SPR_SIM_CONTROL, \
- (SIM_CONTROL_DLOPEN \
- | ((c) << _SIM_CONTROL_OPERATOR_BITS)))
+ /* Don't bother if not in the simulator. */
+ if (__insn_mfspr (SPR_SIM_CONTROL) == 0)
+ return;
- /* Write the library address in hex. */
+#define DLPUTC(c) __insn_mtspr (SPR_SIM_CONTROL, \
+ (SIM_CONTROL_DLOPEN \
+ | ((c) << _SIM_CONTROL_OPERATOR_BITS)))
+
+ /* Write the library address in hex. */
DLPUTC ('0');
DLPUTC ('x');
for (shift = (int) sizeof (unsigned long) * 8 - 4; shift >= 0; shift -= 4)
DLPUTC ("0123456789abcdef"[(l->l_map_start >> shift) & 0xF]);
DLPUTC (':');
- /* Write the library path, including the terminating '\0'. */
+ /* Write the library path, including the terminating '\0'. */
+ path = dl_realpath (l->l_name, pathbuf) ?: l->l_name;
for (size_t i = 0;; i++)
{
- DLPUTC (l->l_name[i]);
- if (l->l_name[i] == '\0')
+ DLPUTC (path[i]);
+ if (path[i] == '\0')
break;
}
#undef DLPUTC
}
-/* Support notifying the simulator about removed objects prior to munmap(). */
+/* Support notifying the simulator about removed objects prior to munmap(). */
void internal_function
_dl_unmap (struct link_map *l)
{
int shift;
-#define DLPUTC(c) __insn_mtspr(SPR_SIM_CONTROL, \
- (SIM_CONTROL_DLCLOSE \
- | ((c) << _SIM_CONTROL_OPERATOR_BITS)))
+ /* Don't bother if not in the simulator. */
+ if (__insn_mfspr (SPR_SIM_CONTROL) == 0)
+ return;
- /* Write the library address in hex. */
+#define DLPUTC(c) __insn_mtspr (SPR_SIM_CONTROL, \
+ (SIM_CONTROL_DLCLOSE \
+ | ((c) << _SIM_CONTROL_OPERATOR_BITS)))
+
+ /* Write the library address in hex. */
DLPUTC ('0');
DLPUTC ('x');
for (shift = (int) sizeof (unsigned long) * 8 - 4; shift >= 0; shift -= 4)
Modified: fsf/trunk/libc/ports/sysdeps/tile/math_private.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/math_private.h (original)
+++ fsf/trunk/libc/ports/sysdeps/tile/math_private.h Tue Nov 6 16:19:40 2012
@@ -1,13 +1,31 @@
#ifndef _MATH_PRIVATE_H
+
+/* Internally, we suppress any use of exception or rounding other
+ than what is supported by the hardware. This does mean that some
+ code will silently fail to report exceptions, set rounding mode
+ as expected, etc., but it allows math code to compile that otherwise
+ wouldn't (such as math/s_fma.c) and so is valuable.
+
+ We intentionally ignore the "exception" arguments of functions that
+ take an exception, since we can't even evaluate the argument
+ without causing a build failure. The extra level of statement
+ expression wrapping avoids "statement with no effect" warnings.
+ Since the callers don't check for errors anyway, we just claim
+ success in every case.
+
+ The overrides for libc_ functions must happen before we include
+ the generic math_private.h, and the overrides for regular
+ <fenv.h> functions must happen afterwards, to avoid clashing with
+ the declarations of those functions. */
+
+#define libc_fesetround(rnd) ({ 0; })
+#define libc_fetestexcept(exc) ({ 0; })
+#define libc_feholdexcept_setround(env, exc) ({ (void) (env); 0; })
+#define libc_feupdateenv_test(env, exc) ({ (void) (env); 0; })
#include_next <math_private.h>
-/* We have no exception support, so feraiseexcept() must be a no-op.
- And since we don't define FE_INVALID, FE_DIVBYZERO, etc., we
- must ignore the argument of feraiseexcept() as well. we return
- "1" to indicate we failed to raise an exception, though none of
- the callers in glibc actually care. The extra level of statement
- expression wrapping avoids "statement with no effect" warnings. */
-#define feraiseexcept(excepts) ({ 1; })
+#define feraiseexcept(excepts) ({ 0; })
+#define feclearexcept(exc) ({ 0; })
#endif
Added: fsf/trunk/libc/ports/sysdeps/tile/memcopy.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/memcopy.h (added)
+++ fsf/trunk/libc/ports/sysdeps/tile/memcopy.h Tue Nov 6 16:19:40 2012
@@ -1,0 +1,27 @@
+/* memcopy.h -- definitions for memory copy functions. Tile version.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdeps/generic/memcopy.h>
+#include <bits/wordsize.h>
+
+/* Support more efficient copying on tilegx32, which supports
+ long long as a native 64-bit type. */
+#if defined (__tilegx__) && __WORDSIZE == 32
+# undef op_t
+# define op_t unsigned long long int
+#endif
Added: fsf/trunk/libc/ports/sysdeps/tile/nptl/pthread_spin_unlock.c
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/nptl/pthread_spin_unlock.c (added)
+++ fsf/trunk/libc/ports/sysdeps/tile/nptl/pthread_spin_unlock.c Tue Nov 6 16:19:40 2012
@@ -1,0 +1,33 @@
+/* pthread_spin_unlock -- unlock a spin lock. Tile version.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "pthreadP.h"
+#include <atomic.h>
+
+int
+pthread_spin_unlock (pthread_spinlock_t *lock)
+{
+#ifdef __tilegx__
+ /* Use exchange() to bypass the write buffer. */
+ atomic_exchange_rel (lock, 0);
+#else
+ atomic_full_barrier ();
+ *lock = 0;
+#endif
+ return 0;
+}
Modified: fsf/trunk/libc/ports/sysdeps/tile/start.S
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/start.S (original)
+++ fsf/trunk/libc/ports/sysdeps/tile/start.S Tue Nov 6 16:19:40 2012
@@ -126,27 +126,37 @@
moveli r0, hw2_last(main - .Lmy_pc)
}
{
+ shl16insli r0, r0, hw1(main - .Lmy_pc)
moveli r3, hw2_last(__libc_csu_init - .Lmy_pc)
- shl16insli r0, r0, hw1(main - .Lmy_pc)
}
{
+ shl16insli r0, r0, hw0(main - .Lmy_pc)
shl16insli r3, r3, hw1(__libc_csu_init - .Lmy_pc)
- shl16insli r0, r0, hw0(main - .Lmy_pc)
}
{
+ ADD_PTR r0, r0, r13
shl16insli r3, r3, hw0(__libc_csu_init - .Lmy_pc)
+ }
+ {
+ moveli r12, hw2_last_plt(__libc_start_main - .Lmy_pc)
+ ADD_PTR r3, r3, r13
+ }
+ {
+ shl16insli r12, r12, hw1_plt(__libc_start_main - .Lmy_pc)
moveli r4, hw2_last(__libc_csu_fini - .Lmy_pc)
}
{
- ADD_PTR r0, r0, r13
+ shl16insli r12, r12, hw0_plt(__libc_start_main - .Lmy_pc)
shl16insli r4, r4, hw1(__libc_csu_fini - .Lmy_pc)
}
{
- ADD_PTR r3, r3, r13
+ ADD_PTR r12, r12, r13
shl16insli r4, r4, hw0(__libc_csu_fini - .Lmy_pc)
}
{
ADD_PTR r4, r4, r13
+ jalr r12
+ }
#else
addli r0, r13, lo16(main - .Lmy_pc)
}
@@ -160,13 +170,12 @@
}
{
auli r4, r4, ha16(__libc_csu_fini - .Lmy_pc)
-
-#endif
-
/* Call the user's main function, and exit with its value.
But let the libc call main. */
j plt(__libc_start_main)
}
+#endif
+
{
/* Tell backtracer to give up (_start has no caller). */
info INFO_OP_CANNOT_BACKTRACE
Added: fsf/trunk/libc/ports/sysdeps/tile/tilegx/Makefile
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/tilegx/Makefile (added)
+++ fsf/trunk/libc/ports/sysdeps/tile/tilegx/Makefile Tue Nov 6 16:19:40 2012
@@ -1,0 +1,18 @@
+include $(common-objpfx)cflags-mcmodel-large.mk
+
+$(common-objpfx)cflags-mcmodel-large.mk: $(common-objpfx)config.make
+ mcmodel=no; \
+ $(CC) -S -o /dev/null -xc /dev/null -mcmodel=large && mcmodel=yes; \
+ echo "cflags-mcmodel-large = $$mcmodel" > $@
+
+ifeq ($(subdir),csu)
+ifeq (yes,$(cflags-mcmodel-large))
+# elf-init.c is in libc_nonshared.o (the end of the shared object) but
+# must reach the _init symbol at the very start of the shared object.
+CFLAGS-elf-init.c += -mcmodel=large
+
+# __gmon_start__ is at the very start of the shared object when linked
+# with profiling, but calls to libc.so via the PLT at the very end.
+CFLAGS-gmon-start.c += -mcmodel=large
+endif
+endif
Modified: fsf/trunk/libc/ports/sysdeps/tile/tilegx/memcpy.c
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/tilegx/memcpy.c (original)
+++ fsf/trunk/libc/ports/sysdeps/tile/tilegx/memcpy.c Tue Nov 6 16:19:40 2012
@@ -19,10 +19,8 @@
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
+#include <memcopy.h>
#include <arch/chip.h>
-
-/* Must be 8 bytes in size. */
-#define word_t uint64_t
/* How many cache lines ahead should we prefetch? */
#define PREFETCH_LINES_AHEAD 3
@@ -34,8 +32,8 @@
const char *__restrict src1 = (const char *) srcv;
const char *__restrict src1_end;
const char *__restrict prefetch;
- word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
- word_t final; /* Final bytes to write to trailing word, if any */
+ op_t *__restrict dst8; /* 8-byte pointer to destination memory. */
+ op_t final; /* Final bytes to write to trailing word, if any */
long i;
if (n < 16)
@@ -55,101 +53,169 @@
{
__insn_prefetch (prefetch);
prefetch += CHIP_L2_LINE_SIZE ();
- prefetch = (prefetch > src1_end) ? prefetch : src1;
+ prefetch = (prefetch < src1_end) ? prefetch : src1;
}
/* Copy bytes until dst is word-aligned. */
- for (; (uintptr_t) dst1 & (sizeof (word_t) - 1); n--)
+ for (; (uintptr_t) dst1 & (sizeof (op_t) - 1); n--)
*dst1++ = *src1++;
/* 8-byte pointer to destination memory. */
- dst8 = (word_t *) dst1;
-
- if (__builtin_expect ((uintptr_t) src1 & (sizeof (word_t) - 1), 0))
- {
- /* Misaligned copy. Copy 8 bytes at a time, but don't bother
- with other fanciness.
- TODO: Consider prefetching and using wh64 as well. */
-
- /* Create an aligned src8. */
- const word_t *__restrict src8 =
- (const word_t *) ((uintptr_t) src1 & -sizeof (word_t));
- word_t b;
-
- word_t a = *src8++;
- for (; n >= sizeof (word_t); n -= sizeof (word_t))
- {
- b = *src8++;
- a = __insn_dblalign (a, b, src1);
- *dst8++ = a;
- a = b;
- }
-
+ dst8 = (op_t *) dst1;
+
+ if (__builtin_expect ((uintptr_t) src1 & (sizeof (op_t) - 1), 0))
+ {
+ /* Misaligned copy. Use glibc's _wordcopy_fwd_dest_aligned, but
+ inline it to avoid prologue/epilogue. TODO: Consider
+ prefetching and using wh64 as well. */
+ void * srci;
+ op_t a0, a1, a2, a3;
+ long int dstp = (long int) dst1;
+ long int srcp = (long int) src1;
+ long int len = n / OPSIZ;
+
+ /* Save the initial source pointer so we know the number of
+ bytes to shift for merging two unaligned results. */
+ srci = (void *) srcp;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the
+ `op_t' it points in the middle of. */
+ srcp &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ case 2:
+ a1 = ((op_t *) srcp)[0];
+ a2 = ((op_t *) srcp)[1];
+ len += 2;
+ srcp += 2 * OPSIZ;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ len += 1;
+ srcp += 2 * OPSIZ;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return dstv;
+ a3 = ((op_t *) srcp)[0];
+ a0 = ((op_t *) srcp)[1];
+ len += 0;
+ srcp += 2 * OPSIZ;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp)[0];
+ a3 = ((op_t *) srcp)[1];
+ srcp += 2 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do
+ {
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ a2 = __insn_dblalign (a2, a3, srci);
+ ((op_t *) dstp)[0] = a2;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do3:
+ a1 = ((op_t *) srcp)[0];
+ a3 = __insn_dblalign (a3, a0, srci);
+ ((op_t *) dstp)[0] = a3;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do2:
+ a2 = ((op_t *) srcp)[0];
+ a0 = __insn_dblalign (a0, a1, srci);
+ ((op_t *) dstp)[0] = a0;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do1:
+ a3 = ((op_t *) srcp)[0];
+ a1 = __insn_dblalign (a1, a2, srci);
+ ((op_t *) dstp)[0] = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = __insn_dblalign (a2, a3, srci);
+
+ n = n % OPSIZ;
if (n == 0)
- return dstv;
-
- b = ((const char *) src8 <= src1_end) ? *src8 : 0;
-
- /* Final source bytes to write to trailing partial word, if any. */
- final = __insn_dblalign (a, b, src1);
+ return dstv;
+
+ a0 = ((const char *) srcp <= src1_end) ? ((op_t *) srcp)[0] : 0;
+
+ final = __insn_dblalign (a3, a0, srci);
+ dst8 = (op_t *)(dstp + OPSIZ);
}
else
{
/* Aligned copy. */
- const word_t *__restrict src8 = (const word_t *) src1;
+ const op_t *__restrict src8 = (const op_t *) src1;
/* src8 and dst8 are both word-aligned. */
if (n >= CHIP_L2_LINE_SIZE ())
{
/* Copy until 'dst' is cache-line-aligned. */
for (; (uintptr_t) dst8 & (CHIP_L2_LINE_SIZE () - 1);
- n -= sizeof (word_t))
+ n -= sizeof (op_t))
*dst8++ = *src8++;
- /* If copying to self, return. The test is cheap enough
- that we do it despite the fact that the memcpy() contract
- doesn't require us to support overlapping dst and src.
- This is the most common case of overlap, and any close
- overlap will cause corruption due to the wh64 below.
- This case is particularly important since the compiler
- will emit memcpy() calls for aggregate copies even if it
- can't prove that src != dst. */
- if (__builtin_expect (dst8 == src8, 0))
- return dstv;
-
for (; n >= CHIP_L2_LINE_SIZE ();)
- {
- __insn_wh64 (dst8);
-
- /* Prefetch and advance to next line to prefetch, but
- don't go past the end. */
- __insn_prefetch (prefetch);
- prefetch += CHIP_L2_LINE_SIZE ();
- prefetch = (prefetch > src1_end) ? prefetch :
- (const char *) src8;
-
- /* Copy an entire cache line. Manually unrolled to
- avoid idiosyncracies of compiler unrolling. */
-#define COPY_WORD(offset) ({ dst8[offset] = src8[offset]; n -= 8; })
- COPY_WORD (0);
- COPY_WORD (1);
- COPY_WORD (2);
- COPY_WORD (3);
- COPY_WORD (4);
- COPY_WORD (5);
- COPY_WORD (6);
- COPY_WORD (7);
+ {
+ op_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+ /* Prefetch and advance to next line to prefetch, but
+ don't go past the end. */
+ __insn_prefetch (prefetch);
+ prefetch += CHIP_L2_LINE_SIZE ();
+ prefetch = (prefetch < src1_end) ? prefetch :
+ (const char *) src8;
+
+ /* Do all the loads before wh64. This is necessary if
+ [src8, src8+7] and [dst8, dst8+7] share the same
+ cache line and dst8 <= src8, as can be the case when
+ called from memmove, or with code tested on x86 whose
+ memcpy always works with forward copies. */
+ tmp0 = *src8++;
+ tmp1 = *src8++;
+ tmp2 = *src8++;
+ tmp3 = *src8++;
+ tmp4 = *src8++;
+ tmp5 = *src8++;
+ tmp6 = *src8++;
+ tmp7 = *src8++;
+
+ __insn_wh64 (dst8);
+
+ *dst8++ = tmp0;
+ *dst8++ = tmp1;
+ *dst8++ = tmp2;
+ *dst8++ = tmp3;
+ *dst8++ = tmp4;
+ *dst8++ = tmp5;
+ *dst8++ = tmp6;
+ *dst8++ = tmp7;
+
+ n -= 64;
+ }
#if CHIP_L2_LINE_SIZE() != 64
# error "Fix code that assumes particular L2 cache line size."
#endif
-
- dst8 += CHIP_L2_LINE_SIZE () / sizeof (word_t);
- src8 += CHIP_L2_LINE_SIZE () / sizeof (word_t);
- }
}
- for (; n >= sizeof (word_t); n -= sizeof (word_t))
+ for (; n >= sizeof (op_t); n -= sizeof (op_t))
*dst8++ = *src8++;
if (__builtin_expect (n == 0, 1))
Added: fsf/trunk/libc/ports/sysdeps/tile/wordcopy.c
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/tile/wordcopy.c (added)
+++ fsf/trunk/libc/ports/sysdeps/tile/wordcopy.c Tue Nov 6 16:19:40 2012
@@ -1,0 +1,449 @@
+/* wordcopy.c -- subroutines for memory copy functions. Tile version.
+ Copyright (C) 1991-2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* To optimize for tile, we make the following changes from the
+ default glibc version:
+ - Use the double align instruction instead of the MERGE macro.
+ - Since we don't have offset addressing mode, make sure the loads /
+ stores in the inner loop always have indices of 0.
+ - Use post-increment addresses in the inner loops, which yields
+ better scheduling. */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* Provide the appropriate dblalign builtin to shift two registers
+ based on the alignment of a pointer held in a third register. */
+#ifdef __tilegx__
+#define DBLALIGN __insn_dblalign
+#else
+#define DBLALIGN __insn_dword_align
+#endif
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ switch (len % 8)
+ {
+ case 2:
+ a0 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len += 6;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len += 5;
+ goto do2;
+ case 4:
+ a0 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len += 4;
+ goto do3;
+ case 5:
+ a1 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len += 3;
+ goto do4;
+ case 6:
+ a0 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len += 2;
+ goto do5;
+ case 7:
+ a1 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len += 1;
+ goto do6;
+
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a0 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ goto do7;
+ case 1:
+ a1 = ((op_t *) srcp)[0];
+ srcp += OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do
+ {
+ do8:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do7:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do6:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do5:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do3:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do2:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do1:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+
+ len -= 8;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = a1;
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ void * srci;
+ op_t a0, a1, a2, a3;
+
+ /* Save the initial source pointer so we know the number of bytes to
+ shift for merging two unaligned results. */
+ srci = (void *) srcp;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ case 2:
+ a1 = ((op_t *) srcp)[0];
+ a2 = ((op_t *) srcp)[1];
+ len += 2;
+ srcp += 2 * OPSIZ;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ len += 1;
+ srcp += 2 * OPSIZ;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a3 = ((op_t *) srcp)[0];
+ a0 = ((op_t *) srcp)[1];
+ len += 0;
+ srcp += 2 * OPSIZ;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp)[0];
+ a3 = ((op_t *) srcp)[1];
+ srcp += 2 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do
+ {
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ a2 = DBLALIGN (a2, a3, srci);
+ ((op_t *) dstp)[0] = a2;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do3:
+ a1 = ((op_t *) srcp)[0];
+ a3 = DBLALIGN (a3, a0, srci);
+ ((op_t *) dstp)[0] = a3;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do2:
+ a2 = ((op_t *) srcp)[0];
+ a0 = DBLALIGN (a0, a1, srci);
+ ((op_t *) dstp)[0] = a0;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ do1:
+ a3 = ((op_t *) srcp)[0];
+ a1 = DBLALIGN (a1, a2, srci);
+ ((op_t *) dstp)[0] = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = DBLALIGN (a2, a3, srci);
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+ SRCP to block finishing right before DSTP with LEN `op_t' words
+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
+ operations on `op_t's. */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+ long int srcp1;
+
+ srcp1 = srcp - 1 * OPSIZ;
+ srcp -= 2 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+
+ switch (len % 8)
+ {
+ case 2:
+ a0 = ((op_t *) srcp1)[0];
+ len += 6;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp1)[0];
+ len += 5;
+ goto do2;
+ case 4:
+ a0 = ((op_t *) srcp1)[0];
+ len += 4;
+ goto do3;
+ case 5:
+ a1 = ((op_t *) srcp1)[0];
+ len += 3;
+ goto do4;
+ case 6:
+ a0 = ((op_t *) srcp1)[0];
+ len += 2;
+ goto do5;
+ case 7:
+ a1 = ((op_t *) srcp1)[0];
+ len += 1;
+ goto do6;
+
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a0 = ((op_t *) srcp1)[0];
+ goto do7;
+ case 1:
+ a1 = ((op_t *) srcp1)[0];
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do
+ {
+ do8:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do7:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do6:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do5:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do3:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do2:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do1:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+
+ len -= 8;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = a1;
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+ before SRCP to block finishing right before DSTP with LEN `op_t'
+ words (not LEN bytes!). DSTP should be aligned for memory
+ operations on `op_t', but SRCP must *not* be aligned. */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ void * srci;
+ op_t a0, a1, a2, a3;
+ op_t b0, b1, b2, b3;
+
+ /* Save the initial source pointer so we know the number of bytes to
+ shift for merging two unaligned results. */
+ srci = (void *) srcp;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the op_t
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ srcp += OPSIZ;
+
+ switch (len % 4)
+ {
+ case 2:
+ srcp -= 3 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ b2 = ((op_t *) srcp)[2];
+ b1 = a1 = ((op_t *) srcp)[1];
+ len += 2;
+ goto do1;
+ case 3:
+ srcp -= 3 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ b3 = ((op_t *) srcp)[2];
+ b2 = a2 = ((op_t *) srcp)[1];
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ srcp -= 3 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ b0 = ((op_t *) srcp)[2];
+ b3 = a3 = ((op_t *) srcp)[1];
+ goto do3;
+ case 1:
+ srcp -= 3 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ b1 = ((op_t *) srcp)[2];
+ b0 = a0 = ((op_t *) srcp)[1];
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do
+ {
+ do4:
+ b3 = a3 = ((op_t *) srcp)[0];
+ a0 = DBLALIGN (a0, b1, srci);
+ ((op_t *) dstp)[0] = a0;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do3:
+ b2 = a2 = ((op_t *) srcp)[0];
+ a3 = DBLALIGN (a3, b0, srci);
+ ((op_t *) dstp)[0] = a3;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do2:
+ b1 = a1 = ((op_t *) srcp)[0];
+ a2 = DBLALIGN (a2, b3, srci);
+ ((op_t *) dstp)[0] = a2;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ do1:
+ b0 = a0 = ((op_t *) srcp)[0];
+ a1 = DBLALIGN (a1, b2, srci);
+ ((op_t *) dstp)[0] = a1;
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ a0 = DBLALIGN (a0, b1, srci);
+ ((op_t *) dstp)[0] = a0;
+}
Modified: fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_fma.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_fma.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/dbl-64/s_fma.c Tue Nov 6 16:19:40 2012
@@ -114,8 +114,17 @@
{
/* Similarly.
If z exponent is very large and x and y exponents are
- very small, it doesn't matter if we don't adjust it. */
- if (u.ieee.exponent > v.ieee.exponent)
+ very small, adjust them up to avoid spurious underflows,
+ rather than down. */
+ if (u.ieee.exponent + v.ieee.exponent
+ <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG)
+ {
+ if (u.ieee.exponent > v.ieee.exponent)
+ u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
+ else
+ v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
+ }
+ else if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > DBL_MANT_DIG)
u.ieee.exponent -= DBL_MANT_DIG;
@@ -145,15 +154,15 @@
<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
{
if (u.ieee.exponent > v.ieee.exponent)
- u.ieee.exponent += 2 * DBL_MANT_DIG;
- else
- v.ieee.exponent += 2 * DBL_MANT_DIG;
- if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
+ u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
+ else
+ v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
+ if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6)
{
if (w.ieee.exponent)
- w.ieee.exponent += 2 * DBL_MANT_DIG;
+ w.ieee.exponent += 2 * DBL_MANT_DIG + 2;
else
- w.d *= 0x1p106;
+ w.d *= 0x1p108;
adjust = -1;
}
/* Otherwise x * y should just affect inexact
@@ -238,19 +247,19 @@
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
- return v.d * 0x1p-106;
+ return v.d * 0x1p-108;
/* If result rounded to zero is not subnormal, no double
rounding will occur. */
- if (v.ieee.exponent > 106)
- return (a1 + u.d) * 0x1p-106;
- /* If v.d * 0x1p-106 with round to zero is a subnormal above
- or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
+ if (v.ieee.exponent > 108)
+ return (a1 + u.d) * 0x1p-108;
+ /* If v.d * 0x1p-108 with round to zero is a subnormal above
+ or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa
down just by 1 bit, which means v.ieee.mantissa1 |= j would
change the round bit, not sticky or guard bit.
- v.d * 0x1p-106 never normalizes by shifting up,
+ v.d * 0x1p-108 never normalizes by shifting up,
so round bit plus sticky bit should be already enough
for proper rounding. */
- if (v.ieee.exponent == 106)
+ if (v.ieee.exponent == 108)
{
/* If the exponent would be in the normal range when
rounding to normal precision with unbounded exponent
@@ -260,8 +269,8 @@
if (TININESS_AFTER_ROUNDING)
{
w.d = a1 + u.d;
- if (w.ieee.exponent == 107)
- return w.d * 0x1p-106;
+ if (w.ieee.exponent == 109)
+ return w.d * 0x1p-108;
}
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
@@ -270,12 +279,12 @@
w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa1 &= ~3U;
- v.d *= 0x1p-106;
+ v.d *= 0x1p-108;
w.d *= 0x1p-2;
return v.d + w.d;
}
v.ieee.mantissa1 |= j;
- return v.d * 0x1p-106;
+ return v.d * 0x1p-108;
}
}
#ifndef __fma
Modified: fsf/trunk/libc/sysdeps/ieee754/ldbl-128/s_fmal.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/ldbl-128/s_fmal.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/ldbl-128/s_fmal.c Tue Nov 6 16:19:40 2012
@@ -118,8 +118,17 @@
{
/* Similarly.
If z exponent is very large and x and y exponents are
- very small, it doesn't matter if we don't adjust it. */
- if (u.ieee.exponent > v.ieee.exponent)
+ very small, adjust them up to avoid spurious underflows,
+ rather than down. */
+ if (u.ieee.exponent + v.ieee.exponent
+ <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG)
+ {
+ if (u.ieee.exponent > v.ieee.exponent)
+ u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ else
+ v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ }
+ else if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > LDBL_MANT_DIG)
u.ieee.exponent -= LDBL_MANT_DIG;
@@ -149,15 +158,15 @@
<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
{
if (u.ieee.exponent > v.ieee.exponent)
- u.ieee.exponent += 2 * LDBL_MANT_DIG;
- else
- v.ieee.exponent += 2 * LDBL_MANT_DIG;
- if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
+ u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ else
+ v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
{
if (w.ieee.exponent)
- w.ieee.exponent += 2 * LDBL_MANT_DIG;
+ w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
- w.d *= 0x1p226L;
+ w.d *= 0x1p228L;
adjust = -1;
}
/* Otherwise x * y should just affect inexact
@@ -242,19 +251,19 @@
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
- return v.d * 0x1p-226L;
+ return v.d * 0x1p-228L;
/* If result rounded to zero is not subnormal, no double
rounding will occur. */
- if (v.ieee.exponent > 226)
- return (a1 + u.d) * 0x1p-226L;
- /* If v.d * 0x1p-226L with round to zero is a subnormal above
- or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa
+ if (v.ieee.exponent > 228)
+ return (a1 + u.d) * 0x1p-228L;
+ /* If v.d * 0x1p-228L with round to zero is a subnormal above
+ or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
down just by 1 bit, which means v.ieee.mantissa3 |= j would
change the round bit, not sticky or guard bit.
- v.d * 0x1p-226L never normalizes by shifting up,
+ v.d * 0x1p-228L never normalizes by shifting up,
so round bit plus sticky bit should be already enough
for proper rounding. */
- if (v.ieee.exponent == 226)
+ if (v.ieee.exponent == 228)
{
/* If the exponent would be in the normal range when
rounding to normal precision with unbounded exponent
@@ -264,8 +273,8 @@
if (TININESS_AFTER_ROUNDING)
{
w.d = a1 + u.d;
- if (w.ieee.exponent == 227)
- return w.d * 0x1p-226L;
+ if (w.ieee.exponent == 229)
+ return w.d * 0x1p-228L;
}
/* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa3 & 1 is the round bit and j is our sticky
@@ -274,12 +283,12 @@
w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa3 &= ~3U;
- v.d *= 0x1p-226L;
+ v.d *= 0x1p-228L;
w.d *= 0x1p-2L;
return v.d + w.d;
}
v.ieee.mantissa3 |= j;
- return v.d * 0x1p-226L;
+ return v.d * 0x1p-228L;
}
}
weak_alias (__fmal, fmal)
Modified: fsf/trunk/libc/sysdeps/ieee754/ldbl-96/s_fmal.c
==============================================================================
--- fsf/trunk/libc/sysdeps/ieee754/ldbl-96/s_fmal.c (original)
+++ fsf/trunk/libc/sysdeps/ieee754/ldbl-96/s_fmal.c Tue Nov 6 16:19:40 2012
@@ -116,8 +116,17 @@
{
/* Similarly.
If z exponent is very large and x and y exponents are
- very small, it doesn't matter if we don't adjust it. */
- if (u.ieee.exponent > v.ieee.exponent)
+ very small, adjust them up to avoid spurious underflows,
+ rather than down. */
+ if (u.ieee.exponent + v.ieee.exponent
+ <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG)
+ {
+ if (u.ieee.exponent > v.ieee.exponent)
+ u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ else
+ v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ }
+ else if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > LDBL_MANT_DIG)
u.ieee.exponent -= LDBL_MANT_DIG;
@@ -147,15 +156,15 @@
<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
{
if (u.ieee.exponent > v.ieee.exponent)
- u.ieee.exponent += 2 * LDBL_MANT_DIG;
- else
- v.ieee.exponent += 2 * LDBL_MANT_DIG;
- if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
+ u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ else
+ v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
+ if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
{
if (w.ieee.exponent)
- w.ieee.exponent += 2 * LDBL_MANT_DIG;
+ w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
- w.d *= 0x1p128L;
+ w.d *= 0x1p130L;
adjust = -1;
}
/* Otherwise x * y should just affect inexact
@@ -240,19 +249,19 @@
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
- return v.d * 0x1p-128L;
+ return v.d * 0x1p-130L;
/* If result rounded to zero is not subnormal, no double
rounding will occur. */
- if (v.ieee.exponent > 128)
- return (a1 + u.d) * 0x1p-128L;
- /* If v.d * 0x1p-128L with round to zero is a subnormal above
- or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa
+ if (v.ieee.exponent > 130)
+ return (a1 + u.d) * 0x1p-130L;
+ /* If v.d * 0x1p-130L with round to zero is a subnormal above
+ or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa
down just by 1 bit, which means v.ieee.mantissa1 |= j would
change the round bit, not sticky or guard bit.
- v.d * 0x1p-128L never normalizes by shifting up,
+ v.d * 0x1p-130L never normalizes by shifting up,
so round bit plus sticky bit should be already enough
for proper rounding. */
- if (v.ieee.exponent == 128)
+ if (v.ieee.exponent == 130)
{
/* If the exponent would be in the normal range when
rounding to normal precision with unbounded exponent
@@ -262,8 +271,8 @@
if (TININESS_AFTER_ROUNDING)
{
w.d = a1 + u.d;
- if (w.ieee.exponent == 129)
- return w.d * 0x1p-128L;
+ if (w.ieee.exponent == 131)
+ return w.d * 0x1p-130L;
}
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
@@ -272,12 +281,12 @@
w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa1 &= ~3U;
- v.d *= 0x1p-128L;
+ v.d *= 0x1p-130L;
w.d *= 0x1p-2L;
return v.d + w.d;
}
v.ieee.mantissa1 |= j;
- return v.d * 0x1p-128L;
+ return v.d * 0x1p-130L;
}
}
weak_alias (__fmal, fmal)
_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits