[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commits] r23322 - in /fsf/trunk/libc: ./ elf/ ports/ ports/sysdeps/arm/ ports/sysdeps/arm/armv7/multiarch/
- To: commits@xxxxxxxxxx
- Subject: [Commits] r23322 - in /fsf/trunk/libc: ./ elf/ ports/ ports/sysdeps/arm/ ports/sysdeps/arm/armv7/multiarch/
- From: eglibc@xxxxxxxxxx
- Date: Wed, 19 Jun 2013 00:02:08 -0000
Author: eglibc
Date: Wed Jun 19 00:02:07 2013
New Revision: 23322
Log:
Import glibc-mainline for 2013-06-19
Added:
fsf/trunk/libc/ports/sysdeps/arm/test-fpucw.c
Modified:
fsf/trunk/libc/ChangeLog
fsf/trunk/libc/elf/rtld-Rules
fsf/trunk/libc/ports/ChangeLog.arm
fsf/trunk/libc/ports/sysdeps/arm/arm-features.h
fsf/trunk/libc/ports/sysdeps/arm/arm-mcount.S
fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
fsf/trunk/libc/ports/sysdeps/arm/fpu_control.h
Modified: fsf/trunk/libc/ChangeLog
==============================================================================
--- fsf/trunk/libc/ChangeLog (original)
+++ fsf/trunk/libc/ChangeLog Wed Jun 19 00:02:07 2013
@@ -1,3 +1,9 @@
+2013-06-18 Roland McGrath <roland@xxxxxxxxxxxxx>
+
+ * elf/rtld-Rules (rtld-compile-command.S): New variable.
+ (rtld-compile-command.s, rtld-compile-command.c): New variables.
+ ($(objpfx)rtld-%.os rules): Use them.
+
2013-06-17 Adhemerval Zanella <azanella@xxxxxxxxxxxxxxxxxx>
* nptl/sysdeps/powerpc/tls.h (tcbhead_t): Add Event-Based Branch
Modified: fsf/trunk/libc/elf/rtld-Rules
==============================================================================
--- fsf/trunk/libc/elf/rtld-Rules (original)
+++ fsf/trunk/libc/elf/rtld-Rules Wed Jun 19 00:02:07 2013
@@ -88,29 +88,39 @@
# Some other subdir's Makefile has provided all its normal rules,
# and we just provide some additional definitions.
+rtld-compile-command.S = $(compile-command.S) $(rtld-CPPFLAGS)
+rtld-compile-command.s = $(compile-command.s) $(rtld-CPPFLAGS)
+rtld-compile-command.c = $(compile-command.c) $(rtld-CPPFLAGS)
+
# These are the basic compilation rules corresponding to the Makerules ones.
# The sysd-rules generated makefile already defines pattern rules for rtld-%
# targets built from sysdeps source files.
$(objpfx)rtld-%.os: rtld-%.S $(before-compile)
- $(compile-command.S) $(rtld-CPPFLAGS)
+ $(rtld-compile-command.S)
$(objpfx)rtld-%.os: rtld-%.s $(before-compile)
- $(compile-command.s) $(rtld-CPPFLAGS)
+ $(rtld-compile-command.s)
$(objpfx)rtld-%.os: rtld-%.c $(before-compile)
- $(compile-command.c) $(rtld-CPPFLAGS)
+ $(rtld-compile-command.c)
$(objpfx)rtld-%.os: %.S $(before-compile)
- $(compile-command.S) $(rtld-CPPFLAGS)
+ $(rtld-compile-command.S)
$(objpfx)rtld-%.os: %.s $(before-compile)
- $(compile-command.s) $(rtld-CPPFLAGS)
+ $(rtld-compile-command.s)
$(objpfx)rtld-%.os: %.c $(before-compile)
- $(compile-command.c) $(rtld-CPPFLAGS)
+ $(rtld-compile-command.c)
# The rules for generated source files.
-$(objpfx)rtld-%.os: $(objpfx)rtld-%.S $(before-compile); $(compile-command.S)
-$(objpfx)rtld-%.os: $(objpfx)rtld-%.s $(before-compile); $(compile-command.s)
-$(objpfx)rtld-%.os: $(objpfx)rtld-%.c $(before-compile); $(compile-command.c)
-$(objpfx)rtld-%.os: $(objpfx)%.S $(before-compile); $(compile-command.S)
-$(objpfx)rtld-%.os: $(objpfx)%.s $(before-compile); $(compile-command.s)
-$(objpfx)rtld-%.os: $(objpfx)%.c $(before-compile); $(compile-command.c)
+$(objpfx)rtld-%.os: $(objpfx)rtld-%.S $(before-compile)
+ $(rtld-compile-command.S)
+$(objpfx)rtld-%.os: $(objpfx)rtld-%.s $(before-compile)
+ $(rtld-compile-command.s)
+$(objpfx)rtld-%.os: $(objpfx)rtld-%.c $(before-compile)
+ $(rtld-compile-command.c)
+$(objpfx)rtld-%.os: $(objpfx)%.S $(before-compile)
+ $(rtld-compile-command.S)
+$(objpfx)rtld-%.os: $(objpfx)%.s $(before-compile)
+ $(rtld-compile-command.s)
+$(objpfx)rtld-%.os: $(objpfx)%.c $(before-compile)
+ $(rtld-compile-command.c)
# The command line setting of rtld-modules (see above) tells us
# what we need to build, and that tells us what dependency files we need.
Modified: fsf/trunk/libc/ports/ChangeLog.arm
==============================================================================
--- fsf/trunk/libc/ports/ChangeLog.arm (original)
+++ fsf/trunk/libc/ports/ChangeLog.arm Wed Jun 19 00:02:07 2013
@@ -1,3 +1,30 @@
+2013-06-18 Roland McGrath <roland@xxxxxxxxxxxxx>
+
+ * sysdeps/arm/arm-mcount.S: Comment typo fix.
+
+ * sysdeps/arm/arm-features.h (ARM_BX_NINSNS): New macro.
+ * sysdeps/arm/armv7/multiarch/memcpy_impl.S: Macroize the
+ computed-jump dispatch sections. Use sfi_breg throughout.
+ [ARM_ALWAYS_BX]: Define a different version of the dispatch macros
+ that uses bx rather than add-to-pc, and respects ARM_BX_ALIGN_LOG2.
+ [!USE_NEON] (D_l, D_h): Use r10, r11 rather than r8, r9.
+ (tmp2): Use r8 rather than r10.
+
+ * sysdeps/arm/armv7/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list)
+ [__ARM_NEON__]: Do not refer to HWCAP_ARM_NEON.
+ [!__SOFTFP__]: Do not refer to HWCAP_ARM_VFP.
+
+2013-06-18 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
+
+ * sysdeps/arm/fpu_control.h [!(_LIBC && !_LIBC_TEST) &&
+ __SOFTFP__] (_FPU_GETCW): Define to (cw) = 0.
+ [!(_LIBC && !_LIBC_TEST) && __SOFTFP__] (_FPU_SETCW): Define to
+ (void) (cw).
+
+ * sysdeps/arm/fpu_control.h [!_LIBC && __SOFTFP__]: Change
+ condition to [!(_LIBC && !_LIBC_TEST) && __SOFTFP__].
+ * sysdeps/arm/test-fpucw.c: New file.
+
2013-06-17 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
[BZ #14907]
Modified: fsf/trunk/libc/ports/sysdeps/arm/arm-features.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/arm/arm-features.h (original)
+++ fsf/trunk/libc/ports/sysdeps/arm/arm-features.h Wed Jun 19 00:02:07 2013
@@ -53,6 +53,14 @@
# define ARM_BX_ALIGN_LOG2 2
#endif
+/* The number of instructions that 'bx' expands to. A more-specific
+ arm-features.h that defines 'bx' as a macro should define this to the
+ number instructions it expands to. This is used only in a context
+ where the 'bx' expansion won't cross an ARM_BX_ALIGN_LOG2 boundary. */
+#ifndef ARM_BX_NINSNS
+# define ARM_BX_NINSNS 1
+#endif
+
/* An OS-specific arm-features.h file may define ARM_NO_INDEX_REGISTER to
indicate that the two-register addressing modes must never be used. */
Modified: fsf/trunk/libc/ports/sysdeps/arm/arm-mcount.S
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/arm/arm-mcount.S (original)
+++ fsf/trunk/libc/ports/sysdeps/arm/arm-mcount.S Wed Jun 19 00:02:07 2013
@@ -39,7 +39,7 @@
The calling sequence looks something like:
func:
push {lr}
- bl __gnu_mount_nc
+ bl __gnu_mcount_nc
<function body>
*/
Modified: fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c (original)
+++ fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c Wed Jun 19 00:02:07 2013
@@ -16,6 +16,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <stdbool.h>
#include <string.h>
#include <ldsodefs.h>
#include <sysdep.h>
@@ -29,21 +30,25 @@
size_t max)
{
size_t i = 0;
- int hwcap;
- hwcap = GLRO(dl_hwcap);
+ bool use_neon = true;
+#ifdef __ARM_NEON__
+# define __memcpy_neon memcpy
+#else
+ use_neon = (GLRO(dl_hwcap) & HWCAP_ARM_NEON) != 0;
+#endif
+
+#ifndef __ARM_NEON__
+ bool use_vfp = true;
+# ifdef __SOFTFP__
+ use_vfp = (GLRO(dl_hwcap) & HWCAP_ARM_VFP) != 0;
+# endif
+#endif
IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_NEON,
-#ifdef __ARM_NEON__
- memcpy
-#else
- __memcpy_neon
-#endif
- )
+ IFUNC_IMPL_ADD (array, i, memcpy, use_neon, __memcpy_neon)
#ifndef __ARM_NEON__
- IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_VFP,
- __memcpy_vfp)
+ IFUNC_IMPL_ADD (array, i, memcpy, use_vfp, __memcpy_vfp)
#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm));
Modified: fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S (original)
+++ fsf/trunk/libc/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S Wed Jun 19 00:02:07 2013
@@ -33,6 +33,7 @@
#define NO_THUMB
#endif
#include <sysdep.h>
+#include <arm-features.h>
.syntax unified
/* This implementation requires ARM state. */
@@ -71,7 +72,139 @@
/* Locals. */
#define tmp1 r3
#define dst ip
-#define tmp2 r10
+#define tmp2 r8
+
+/* These two macros both work by repeated invocation of the macro
+ dispatch_step (not defined here). That macro performs one "step",
+ doing one load instruction and one store instruction to copy one
+ "unit". On entry, TMP1 contains the number of bytes to be copied,
+ a multiple of the unit size. The macro clobbers TMP1 in the
+ process of doing a computed jump to the tail containing the
+ appropriate number of steps.
+
+ In dispatch_7_dword, dispatch_step is invoked seven times, with an
+ argument that is 7 for the first and 1 for the last. Units are
+ double-words (8 bytes). TMP1 is at most 56.
+
+ In dispatch_15_word, dispatch_step is invoked fifteen times,
+ with an argument that is 15 for the first and 1 for the last.
+ Units are words (4 bytes). TMP1 is at most 60. */
+
+#ifndef ARM_ALWAYS_BX
+# if ARM_BX_ALIGN_LOG2 != 2
+# error case not handled
+# endif
+ .macro dispatch_7_dword
+ rsb tmp1, tmp1, #((7 * 8) - PC_OFS + INSN_SIZE)
+ add pc, pc, tmp1
+ dispatch_step 7
+ dispatch_step 6
+ dispatch_step 5
+ dispatch_step 4
+ dispatch_step 3
+ dispatch_step 2
+ dispatch_step 1
+ .purgem dispatch_step
+ .endm
+
+ .macro dispatch_15_word
+ rsb tmp1, tmp1, #((15 * 4) - PC_OFS/2 + INSN_SIZE/2)
+ add pc, pc, tmp1, lsl #1
+ dispatch_step 15
+ dispatch_step 14
+ dispatch_step 13
+ dispatch_step 12
+ dispatch_step 11
+ dispatch_step 10
+ dispatch_step 9
+ dispatch_step 8
+ dispatch_step 7
+ dispatch_step 6
+ dispatch_step 5
+ dispatch_step 4
+ dispatch_step 3
+ dispatch_step 2
+ dispatch_step 1
+ .purgem dispatch_step
+ .endm
+#else
+# if ARM_BX_ALIGN_LOG2 < 4
+# error case not handled
+# endif
+ .macro dispatch_helper steps, log2_bytes_per_step
+ .p2align ARM_BX_ALIGN_LOG2
+ /* TMP1 gets (max_bytes - bytes_to_copy), where max_bytes is
+ (STEPS << LOG2_BYTES_PER_STEP).
+ So this is (steps_to_skip << LOG2_BYTES_PER_STEP). */
+ rsb tmp1, tmp1, #(\steps << \log2_bytes_per_step)
+ /* Pad so that the add;bx pair immediately precedes an alignment
+ boundary. Hence, TMP1=0 will run all the steps. */
+ .rept (1 << (ARM_BX_ALIGN_LOG2 - 2)) - (2 + ARM_BX_NINSNS)
+ nop
+ .endr
+ /* Shifting down LOG2_BYTES_PER_STEP gives us the number of
+ steps to skip, then shifting up ARM_BX_ALIGN_LOG2 gives us
+ the (byte) distance to add to the PC. */
+ add tmp1, pc, tmp1, lsl #(ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step)
+ bx tmp1
+ .endm
+
+ .macro dispatch_7_dword
+ dispatch_helper 7, 3
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 7
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 6
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 5
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 4
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 3
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 2
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 1
+ .p2align ARM_BX_ALIGN_LOG2
+ .purgem dispatch_step
+ .endm
+
+ .macro dispatch_15_word
+ dispatch_helper 15, 2
+ dispatch_step 15
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 14
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 13
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 12
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 11
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 10
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 9
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 8
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 7
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 6
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 5
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 4
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 3
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 2
+ .p2align ARM_BX_ALIGN_LOG2
+ dispatch_step 1
+ .p2align ARM_BX_ALIGN_LOG2
+ .purgem dispatch_step
+ .endm
+
+#endif
#ifndef USE_NEON
/* For bulk copies using GP registers. */
@@ -81,8 +214,9 @@
#define B_h r5
#define C_l r6
#define C_h r7
-#define D_l r8
-#define D_h r9
+/* Don't use the pair r8,r9 because in some EABI variants r9 is reserved. */
+#define D_l r10
+#define D_h r11
#endif
/* Number of lines ahead to pre-fetch data. If you change this the code
@@ -92,40 +226,71 @@
#ifdef USE_VFP
.macro cpy_line_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
+ sfi_breg dst, \
+ vstr \vreg, [\B, #\base]
+ sfi_breg src, \
+ vldr \vreg, [\B, #\base]
+ sfi_breg dst, \
+ vstr d0, [\B, #\base + 8]
+ sfi_breg src, \
+ vldr d0, [\B, #\base + 8]
+ sfi_breg dst, \
+ vstr d1, [\B, #\base + 16]
+ sfi_breg src, \
+ vldr d1, [\B, #\base + 16]
+ sfi_breg dst, \
+ vstr d2, [\B, #\base + 24]
+ sfi_breg src, \
+ vldr d2, [\B, #\base + 24]
+ sfi_breg dst, \
+ vstr \vreg, [\B, #\base + 32]
+ sfi_breg src, \
+ vldr \vreg, [\B, #\base + prefetch_lines * 64 - 32]
+ sfi_breg dst, \
+ vstr d0, [\B, #\base + 40]
+ sfi_breg src, \
+ vldr d0, [\B, #\base + 40]
+ sfi_breg dst, \
+ vstr d1, [\B, #\base + 48]
+ sfi_breg src, \
+ vldr d1, [\B, #\base + 48]
+ sfi_breg dst, \
+ vstr d2, [\B, #\base + 56]
+ sfi_breg src, \
+ vldr d2, [\B, #\base + 56]
.endm
.macro cpy_tail_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
+ sfi_breg dst, \
+ vstr \vreg, [\B, #\base]
+ sfi_breg src, \
+ vldr \vreg, [\B, #\base]
+ sfi_breg dst, \
+ vstr d0, [\B, #\base + 8]
+ sfi_breg src, \
+ vldr d0, [\B, #\base + 8]
+ sfi_breg dst, \
+ vstr d1, [\B, #\base + 16]
+ sfi_breg src, \
+ vldr d1, [\B, #\base + 16]
+ sfi_breg dst, \
+ vstr d2, [\B, #\base + 24]
+ sfi_breg src, \
+ vldr d2, [\B, #\base + 24]
+ sfi_breg dst, \
+ vstr \vreg, [\B, #\base + 32]
+ sfi_breg dst, \
+ vstr d0, [\B, #\base + 40]
+ sfi_breg src, \
+ vldr d0, [\B, #\base + 40]
+ sfi_breg dst, \
+ vstr d1, [\B, #\base + 48]
+ sfi_breg src, \
+ vldr d1, [\B, #\base + 48]
+ sfi_breg dst, \
+ vstr d2, [\B, #\base + 56]
+ sfi_breg src, \
+ vldr d2, [\B, #\base + 56]
.endm
#endif
@@ -140,81 +305,62 @@
.Ltail63unaligned:
#ifdef USE_NEON
+ /* These need an extra layer of macro just to work around a
+ bug in the assembler's parser when an operand starts with
+ a {...}. http://sourceware.org/bugzilla/show_bug.cgi?id=15647
+ tracks that bug; it was not fixed as of binutils-2.23.2. */
+ .macro neon_load_d0 reg
+ vld1.8 {d0}, [\reg]!
+ .endm
+ .macro neon_store_d0 reg
+ vst1.8 {d0}, [\reg]!
+ .endm
+
+ /* These are used by the NaCl sfi_breg macro. */
+ .macro _sfi_breg_dmask_neon_load_d0 reg
+ _sfi_dmask \reg
+ .endm
+ .macro _sfi_breg_dmask_neon_store_d0 reg
+ _sfi_dmask \reg
+ .endm
+
and tmp1, count, #0x38
- rsb tmp1, tmp1, #(56 - PC_OFS + INSN_SIZE)
- add pc, pc, tmp1
- vld1.8 {d0}, [src]! /* 14 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 12 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 10 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 8 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 6 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 4 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 2 words to go. */
- vst1.8 {d0}, [dst]!
+ .macro dispatch_step i
+ sfi_breg src, neon_load_d0 \B
+ sfi_breg dst, neon_store_d0 \B
+ .endm
+ dispatch_7_dword
tst count, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
+ sfi_breg src, \
+ ldrne tmp1, [\B], #4
+ sfi_breg dst, \
+ strne tmp1, [\B], #4
#else
/* Copy up to 15 full words of data. May not be aligned. */
/* Cannot use VFP for unaligned data. */
and tmp1, count, #0x3c
add dst, dst, tmp1
add src, src, tmp1
- rsb tmp1, tmp1, #(60 - PC_OFS/2 + INSN_SIZE/2)
/* Jump directly into the sequence below at the correct offset. */
- add pc, pc, tmp1, lsl #1
-
- ldr tmp1, [src, #-60] /* 15 words to go. */
- str tmp1, [dst, #-60]
-
- ldr tmp1, [src, #-56] /* 14 words to go. */
- str tmp1, [dst, #-56]
- ldr tmp1, [src, #-52]
- str tmp1, [dst, #-52]
-
- ldr tmp1, [src, #-48] /* 12 words to go. */
- str tmp1, [dst, #-48]
- ldr tmp1, [src, #-44]
- str tmp1, [dst, #-44]
-
- ldr tmp1, [src, #-40] /* 10 words to go. */
- str tmp1, [dst, #-40]
- ldr tmp1, [src, #-36]
- str tmp1, [dst, #-36]
-
- ldr tmp1, [src, #-32] /* 8 words to go. */
- str tmp1, [dst, #-32]
- ldr tmp1, [src, #-28]
- str tmp1, [dst, #-28]
-
- ldr tmp1, [src, #-24] /* 6 words to go. */
- str tmp1, [dst, #-24]
- ldr tmp1, [src, #-20]
- str tmp1, [dst, #-20]
-
- ldr tmp1, [src, #-16] /* 4 words to go. */
- str tmp1, [dst, #-16]
- ldr tmp1, [src, #-12]
- str tmp1, [dst, #-12]
-
- ldr tmp1, [src, #-8] /* 2 words to go. */
- str tmp1, [dst, #-8]
- ldr tmp1, [src, #-4]
- str tmp1, [dst, #-4]
+ .macro dispatch_step i
+ sfi_breg src, \
+ ldr tmp1, [\B, #-(\i * 4)]
+ sfi_breg dst, \
+ str tmp1, [\B, #-(\i * 4)]
+ .endm
+ dispatch_15_word
#endif
lsls count, count, #31
- ldrhcs tmp1, [src], #2
- ldrbne src, [src] /* Src is dead, use as a scratch. */
- strhcs tmp1, [dst], #2
- strbne src, [dst]
+ sfi_breg src, \
+ ldrhcs tmp1, [\B], #2
+ sfi_breg src, \
+ ldrbne src, [\B] /* Src is dead, use as a scratch. */
+ sfi_breg dst, \
+ strhcs tmp1, [\B], #2
+ sfi_breg dst, \
+ strbne src, [\B]
bx lr
.Lcpy_not_short:
@@ -242,13 +388,19 @@
beq 1f
rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
+ sfi_breg src, \
+ ldrmi tmp1, [\B], #4
+ sfi_breg dst, \
+ strmi tmp1, [\B], #4
lsls tmp2, tmp2, #2
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src], #1
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst], #1
+ sfi_breg src, \
+ ldrhcs tmp1, [\B], #2
+ sfi_breg src, \
+ ldrbne tmp2, [\B], #1
+ sfi_breg dst, \
+ strhcs tmp1, [\B], #2
+ sfi_breg dst, \
+ strbne tmp2, [\B], #1
1:
subs tmp2, count, #64 /* Use tmp2 for count. */
@@ -260,24 +412,40 @@
.Lcpy_body_medium: /* Count in tmp2. */
#ifdef USE_VFP
1:
- vldr d0, [src, #0]
+ sfi_breg src, \
+ vldr d0, [\B, #0]
subs tmp2, tmp2, #64
- vldr d1, [src, #8]
- vstr d0, [dst, #0]
- vldr d0, [src, #16]
- vstr d1, [dst, #8]
- vldr d1, [src, #24]
- vstr d0, [dst, #16]
- vldr d0, [src, #32]
- vstr d1, [dst, #24]
- vldr d1, [src, #40]
- vstr d0, [dst, #32]
- vldr d0, [src, #48]
- vstr d1, [dst, #40]
- vldr d1, [src, #56]
- vstr d0, [dst, #48]
+ sfi_breg src, \
+ vldr d1, [\B, #8]
+ sfi_breg dst, \
+ vstr d0, [\B, #0]
+ sfi_breg src, \
+ vldr d0, [\B, #16]
+ sfi_breg dst, \
+ vstr d1, [\B, #8]
+ sfi_breg src, \
+ vldr d1, [\B, #24]
+ sfi_breg dst, \
+ vstr d0, [\B, #16]
+ sfi_breg src, \
+ vldr d0, [\B, #32]
+ sfi_breg dst, \
+ vstr d1, [\B, #24]
+ sfi_breg src, \
+ vldr d1, [\B, #40]
+ sfi_breg dst, \
+ vstr d0, [\B, #32]
+ sfi_breg src, \
+ vldr d0, [\B, #48]
+ sfi_breg dst, \
+ vstr d1, [\B, #40]
+ sfi_breg src, \
+ vldr d1, [\B, #56]
+ sfi_breg dst, \
+ vstr d0, [\B, #48]
add src, src, #64
- vstr d1, [dst, #56]
+ sfi_breg dst, \
+ vstr d1, [\B, #56]
add dst, dst, #64
bge 1b
tst tmp2, #0x3f
@@ -287,43 +455,49 @@
and tmp1, tmp2, #0x38
add dst, dst, tmp1
add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFS + INSN_SIZE)
- add pc, pc, tmp1
-
- vldr d0, [src, #-56] /* 14 words to go. */
- vstr d0, [dst, #-56]
- vldr d0, [src, #-48] /* 12 words to go. */
- vstr d0, [dst, #-48]
- vldr d0, [src, #-40] /* 10 words to go. */
- vstr d0, [dst, #-40]
- vldr d0, [src, #-32] /* 8 words to go. */
- vstr d0, [dst, #-32]
- vldr d0, [src, #-24] /* 6 words to go. */
- vstr d0, [dst, #-24]
- vldr d0, [src, #-16] /* 4 words to go. */
- vstr d0, [dst, #-16]
- vldr d0, [src, #-8] /* 2 words to go. */
- vstr d0, [dst, #-8]
+ .macro dispatch_step i
+ sfi_breg src, \
+ vldr d0, [\B, #-(\i * 8)]
+ sfi_breg dst, \
+ vstr d0, [\B, #-(\i * 8)]
+ .endm
+ dispatch_7_dword
#else
sub src, src, #8
sub dst, dst, #8
1:
- ldrd A_l, A_h, [src, #8]
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #16]
- strd A_l, A_h, [dst, #16]
- ldrd A_l, A_h, [src, #24]
- strd A_l, A_h, [dst, #24]
- ldrd A_l, A_h, [src, #32]
- strd A_l, A_h, [dst, #32]
- ldrd A_l, A_h, [src, #40]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #48]
- strd A_l, A_h, [dst, #48]
- ldrd A_l, A_h, [src, #56]
- strd A_l, A_h, [dst, #56]
- ldrd A_l, A_h, [src, #64]!
- strd A_l, A_h, [dst, #64]!
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #8]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #8]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #16]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #16]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #24]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #24]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #32]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #32]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #40]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #40]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #48]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #48]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #56]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #56]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #64]!
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #64]!
subs tmp2, tmp2, #64
bge 1b
tst tmp2, #0x3f
@@ -349,32 +523,29 @@
and tmp1, tmp2, #0x38
add dst, dst, tmp1
add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFS + INSN_SIZE)
- add pc, pc, tmp1
- ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
- strd A_l, A_h, [dst, #-56]
- ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
- strd A_l, A_h, [dst, #-48]
- ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
- strd A_l, A_h, [dst, #-40]
- ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
- strd A_l, A_h, [dst, #-32]
- ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
- strd A_l, A_h, [dst, #-24]
- ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
- strd A_l, A_h, [dst, #-16]
- ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
- strd A_l, A_h, [dst, #-8]
-
+ .macro dispatch_step i
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #-(\i * 8)]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #-(\i * 8)]
+ .endm
+ dispatch_7_dword
#endif
+
tst tmp2, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
+ sfi_breg src, \
+ ldrne tmp1, [\B], #4
+ sfi_breg dst, \
+ strne tmp1, [\B], #4
lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src]
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst]
+ sfi_breg src, \
+ ldrhcs tmp1, [\B], #2
+ sfi_breg src, \
+ ldrbne tmp2, [\B]
+ sfi_breg dst, \
+ strhcs tmp1, [\B], #2
+ sfi_breg dst, \
+ strbne tmp2, [\B]
.Ldone:
ldr tmp2, [sp], #FRAME_SIZE
@@ -394,15 +565,23 @@
copy position into a register. This should act like a PLD
operation but we won't have to repeat the transfer. */
- vldr d3, [src, #0]
- vldr d4, [src, #64]
- vldr d5, [src, #128]
- vldr d6, [src, #192]
- vldr d7, [src, #256]
-
- vldr d0, [src, #8]
- vldr d1, [src, #16]
- vldr d2, [src, #24]
+ sfi_breg src, \
+ vldr d3, [\B, #0]
+ sfi_breg src, \
+ vldr d4, [\B, #64]
+ sfi_breg src, \
+ vldr d5, [\B, #128]
+ sfi_breg src, \
+ vldr d6, [\B, #192]
+ sfi_breg src, \
+ vldr d7, [\B, #256]
+
+ sfi_breg src, \
+ vldr d0, [\B, #8]
+ sfi_breg src, \
+ vldr d1, [\B, #16]
+ sfi_breg src, \
+ vldr d2, [\B, #24]
add src, src, #32
subs tmp2, tmp2, #prefetch_lines * 64 * 2
@@ -427,19 +606,31 @@
add src, src, #3 * 64
add dst, dst, #3 * 64
cpy_tail_vfp d6, 0
- vstr d7, [dst, #64]
- vldr d7, [src, #64]
- vstr d0, [dst, #64 + 8]
- vldr d0, [src, #64 + 8]
- vstr d1, [dst, #64 + 16]
- vldr d1, [src, #64 + 16]
- vstr d2, [dst, #64 + 24]
- vldr d2, [src, #64 + 24]
- vstr d7, [dst, #64 + 32]
+ sfi_breg dst, \
+ vstr d7, [\B, #64]
+ sfi_breg src, \
+ vldr d7, [\B, #64]
+ sfi_breg dst, \
+ vstr d0, [\B, #64 + 8]
+ sfi_breg src, \
+ vldr d0, [\B, #64 + 8]
+ sfi_breg dst, \
+ vstr d1, [\B, #64 + 16]
+ sfi_breg src, \
+ vldr d1, [\B, #64 + 16]
+ sfi_breg dst, \
+ vstr d2, [\B, #64 + 24]
+ sfi_breg src, \
+ vldr d2, [\B, #64 + 24]
+ sfi_breg dst, \
+ vstr d7, [\B, #64 + 32]
add src, src, #96
- vstr d0, [dst, #64 + 40]
- vstr d1, [dst, #64 + 48]
- vstr d2, [dst, #64 + 56]
+ sfi_breg dst, \
+ vstr d0, [\B, #64 + 40]
+ sfi_breg dst, \
+ vstr d1, [\B, #64 + 48]
+ sfi_breg dst, \
+ vstr d2, [\B, #64 + 56]
add dst, dst, #128
add tmp2, tmp2, #prefetch_lines * 64
b .Lcpy_body_medium
@@ -450,59 +641,83 @@
/* Pre-bias src and dst. */
sub src, src, #8
sub dst, dst, #8
- pld [src, #8]
- pld [src, #72]
+ sfi_pld src, #8
+ sfi_pld src, #72
subs tmp2, tmp2, #64
- pld [src, #136]
- ldrd A_l, A_h, [src, #8]
+ sfi_pld src, #136
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #8]
strd B_l, B_h, [sp, #8]
cfi_rel_offset (B_l, 8)
cfi_rel_offset (B_h, 12)
- ldrd B_l, B_h, [src, #16]
+ sfi_breg src, \
+ ldrd B_l, B_h, [\B, #16]
strd C_l, C_h, [sp, #16]
cfi_rel_offset (C_l, 16)
cfi_rel_offset (C_h, 20)
- ldrd C_l, C_h, [src, #24]
+ sfi_breg src, \
+ ldrd C_l, C_h, [\B, #24]
strd D_l, D_h, [sp, #24]
cfi_rel_offset (D_l, 24)
cfi_rel_offset (D_h, 28)
- pld [src, #200]
- ldrd D_l, D_h, [src, #32]!
+ sfi_pld src, #200
+ sfi_breg src, \
+ ldrd D_l, D_h, [\B, #32]!
b 1f
.p2align 6
2:
- pld [src, #232]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldrd D_l, D_h, [src, #64]!
+ sfi_pld src, #232
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #40]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #40]
+ sfi_breg dst, \
+ strd B_l, B_h, [\B, #48]
+ sfi_breg src, \
+ ldrd B_l, B_h, [\B, #48]
+ sfi_breg dst, \
+ strd C_l, C_h, [\B, #56]
+ sfi_breg src, \
+ ldrd C_l, C_h, [\B, #56]
+ sfi_breg dst, \
+ strd D_l, D_h, [\B, #64]!
+ sfi_breg src, \
+ ldrd D_l, D_h, [\B, #64]!
subs tmp2, tmp2, #64
1:
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldrd D_l, D_h, [src, #32]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #8]
+ sfi_breg src, \
+ ldrd A_l, A_h, [\B, #8]
+ sfi_breg dst, \
+ strd B_l, B_h, [\B, #16]
+ sfi_breg src, \
+ ldrd B_l, B_h, [\B, #16]
+ sfi_breg dst, \
+ strd C_l, C_h, [\B, #24]
+ sfi_breg src, \
+ ldrd C_l, C_h, [\B, #24]
+ sfi_breg dst, \
+ strd D_l, D_h, [\B, #32]
+ sfi_breg src, \
+ ldrd D_l, D_h, [\B, #32]
bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #40]
add src, src, #40
- strd B_l, B_h, [dst, #48]
+ sfi_breg dst, \
+ strd B_l, B_h, [\B, #48]
ldrd B_l, B_h, [sp, #8]
cfi_restore (B_l)
cfi_restore (B_h)
- strd C_l, C_h, [dst, #56]
+ sfi_breg dst, \
+ strd C_l, C_h, [\B, #56]
ldrd C_l, C_h, [sp, #16]
cfi_restore (C_l)
cfi_restore (C_h)
- strd D_l, D_h, [dst, #64]
+ sfi_breg dst, \
+ strd D_l, D_h, [\B, #64]
ldrd D_l, D_h, [sp, #24]
cfi_restore (D_l)
cfi_restore (D_h)
@@ -519,113 +734,173 @@
cfi_remember_state
.Lcpy_notaligned:
- pld [src]
- pld [src, #64]
+ sfi_pld src
+ sfi_pld src, #64
/* There's at least 64 bytes to copy, but there is no mutual
alignment. */
/* Bring DST to 64-bit alignment. */
lsls tmp2, dst, #29
- pld [src, #(2 * 64)]
+ sfi_pld src, #(2 * 64)
beq 1f
rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
+ sfi_breg src, \
+ ldrmi tmp1, [\B], #4
+ sfi_breg dst, \
+ strmi tmp1, [\B], #4
lsls tmp2, tmp2, #2
- ldrbne tmp1, [src], #1
- ldrhcs tmp2, [src], #2
- strbne tmp1, [dst], #1
- strhcs tmp2, [dst], #2
+ sfi_breg src, \
+ ldrbne tmp1, [\B], #1
+ sfi_breg src, \
+ ldrhcs tmp2, [\B], #2
+ sfi_breg dst, \
+ strbne tmp1, [\B], #1
+ sfi_breg dst, \
+ strhcs tmp2, [\B], #2
1:
- pld [src, #(3 * 64)]
+ sfi_pld src, #(3 * 64)
subs count, count, #64
ldrmi tmp2, [sp], #FRAME_SIZE
bmi .Ltail63unaligned
- pld [src, #(4 * 64)]
+ sfi_pld src, #(4 * 64)
#ifdef USE_NEON
- vld1.8 {d0-d3}, [src]!
- vld1.8 {d4-d7}, [src]!
+ /* These need an extra layer of macro just to work around a
+ bug in the assembler's parser when an operand starts with
+ a {...}. */
+ .macro neon_load_multi reglist, basereg
+ vld1.8 {\reglist}, [\basereg]!
+ .endm
+ .macro neon_store_multi reglist, basereg
+ vst1.8 {\reglist}, [ALIGN (\basereg, 64)]!
+ .endm
+
+ /* These are used by the NaCl sfi_breg macro. */
+ .macro _sfi_breg_dmask_neon_load_multi reg
+ _sfi_dmask \reg
+ .endm
+ .macro _sfi_breg_dmask_neon_store_multi reg
+ _sfi_dmask \reg
+ .endm
+
+ sfi_breg src, neon_load_multi d0-d3, \B
+ sfi_breg src, neon_load_multi d4-d7, \B
subs count, count, #64
bmi 2f
1:
- pld [src, #(4 * 64)]
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vld1.8 {d0-d3}, [src]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- vld1.8 {d4-d7}, [src]!
+ sfi_pld src, #(4 * 64)
+ sfi_breg dst, neon_store_multi d0-d3, \B
+ sfi_breg src, neon_load_multi d0-d3, \B
+ sfi_breg dst, neon_store_multi d4-d7, \B
+ sfi_breg src, neon_load_multi d4-d7, \B
subs count, count, #64
bpl 1b
2:
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
+ sfi_breg dst, neon_store_multi d0-d3, \B
+ sfi_breg dst, neon_store_multi d4-d7, \B
ands count, count, #0x3f
#else
/* Use an SMS style loop to maximize the I/O bandwidth. */
sub src, src, #4
sub dst, dst, #8
subs tmp2, count, #64 /* Use tmp2 for count. */
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
+ sfi_breg src, \
+ ldr A_l, [\B, #4]
+ sfi_breg src, \
+ ldr A_h, [\B, #8]
strd B_l, B_h, [sp, #8]
cfi_rel_offset (B_l, 8)
cfi_rel_offset (B_h, 12)
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
+ sfi_breg src, \
+ ldr B_l, [\B, #12]
+ sfi_breg src, \
+ ldr B_h, [\B, #16]
strd C_l, C_h, [sp, #16]
cfi_rel_offset (C_l, 16)
cfi_rel_offset (C_h, 20)
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
+ sfi_breg src, \
+ ldr C_l, [\B, #20]
+ sfi_breg src, \
+ ldr C_h, [\B, #24]
strd D_l, D_h, [sp, #24]
cfi_rel_offset (D_l, 24)
cfi_rel_offset (D_h, 28)
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]!
+ sfi_breg src, \
+ ldr D_l, [\B, #28]
+ sfi_breg src, \
+ ldr D_h, [\B, #32]!
b 1f
.p2align 6
2:
- pld [src, #(5 * 64) - (32 - 4)]
- strd A_l, A_h, [dst, #40]
- ldr A_l, [src, #36]
- ldr A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldr B_l, [src, #44]
- ldr B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldr C_l, [src, #52]
- ldr C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldr D_l, [src, #60]
- ldr D_h, [src, #64]!
+ sfi_pld src, #(5 * 64) - (32 - 4)
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #40]
+ sfi_breg src, \
+ ldr A_l, [\B, #36]
+ sfi_breg src, \
+ ldr A_h, [\B, #40]
+ sfi_breg dst, \
+ strd B_l, B_h, [\B, #48]
+ sfi_breg src, \
+ ldr B_l, [\B, #44]
+ sfi_breg src, \
+ ldr B_h, [\B, #48]
+ sfi_breg dst, \
+ strd C_l, C_h, [\B, #56]
+ sfi_breg src, \
+ ldr C_l, [\B, #52]
+ sfi_breg src, \
+ ldr C_h, [\B, #56]
+ sfi_breg dst, \
+ strd D_l, D_h, [\B, #64]!
+ sfi_breg src, \
+ ldr D_l, [\B, #60]
+ sfi_breg src, \
+ ldr D_h, [\B, #64]!
subs tmp2, tmp2, #64
1:
- strd A_l, A_h, [dst, #8]
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #8]
+ sfi_breg src, \
+ ldr A_l, [\B, #4]
+ sfi_breg src, \
+ ldr A_h, [\B, #8]
+ sfi_breg dst, \
+ strd B_l, B_h, [\B, #16]
+ sfi_breg src, \
+ ldr B_l, [\B, #12]
+ sfi_breg src, \
+ ldr B_h, [\B, #16]
+ sfi_breg dst, \
+ strd C_l, C_h, [\B, #24]
+ sfi_breg src, \
+ ldr C_l, [\B, #20]
+ sfi_breg src, \
+ ldr C_h, [\B, #24]
+ sfi_breg dst, \
+ strd D_l, D_h, [\B, #32]
+ sfi_breg src, \
+ ldr D_l, [\B, #28]
+ sfi_breg src, \
+ ldr D_h, [\B, #32]
bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
+ sfi_breg dst, \
+ strd A_l, A_h, [\B, #40]
add src, src, #36
- strd B_l, B_h, [dst, #48]
+ sfi_breg dst, \
+ strd B_l, B_h, [\B, #48]
ldrd B_l, B_h, [sp, #8]
cfi_restore (B_l)
cfi_restore (B_h)
- strd C_l, C_h, [dst, #56]
+ sfi_breg dst, \
+ strd C_l, C_h, [\B, #56]
ldrd C_l, C_h, [sp, #16]
cfi_restore (C_l)
cfi_restore (C_h)
- strd D_l, D_h, [dst, #64]
+ sfi_breg dst, \
+ strd D_l, D_h, [\B, #64]
ldrd D_l, D_h, [sp, #24]
cfi_restore (D_l)
cfi_restore (D_h)
Modified: fsf/trunk/libc/ports/sysdeps/arm/fpu_control.h
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/arm/fpu_control.h (original)
+++ fsf/trunk/libc/ports/sysdeps/arm/fpu_control.h Wed Jun 19 00:02:07 2013
@@ -19,13 +19,13 @@
#ifndef _FPU_CONTROL_H
#define _FPU_CONTROL_H
-#if !defined(_LIBC) && defined(__SOFTFP__)
+#if !(defined(_LIBC) && !defined(_LIBC_TEST)) && defined(__SOFTFP__)
#define _FPU_RESERVED 0xffffffff
#define _FPU_DEFAULT 0x00000000
typedef unsigned int fpu_control_t;
-#define _FPU_GETCW(cw) 0
-#define _FPU_SETCW(cw) do { } while (0)
+#define _FPU_GETCW(cw) (cw) = 0
+#define _FPU_SETCW(cw) (void) (cw)
extern fpu_control_t __fpu_control;
#else
Added: fsf/trunk/libc/ports/sysdeps/arm/test-fpucw.c
==============================================================================
--- fsf/trunk/libc/ports/sysdeps/arm/test-fpucw.c (added)
+++ fsf/trunk/libc/ports/sysdeps/arm/test-fpucw.c Wed Jun 19 00:02:07 2013
@@ -1,0 +1,5 @@
+/* Defining _LIBC_TEST stops fpu_control.h from defining the
+ hard-float versions of macros (for use with dynamic VFP detection)
+ when compiling for soft-float. */
+#define _LIBC_TEST
+#include <math/test-fpucw.c>
_______________________________________________
Commits mailing list
Commits@xxxxxxxxxx
http://eglibc.org/cgi-bin/mailman/listinfo/commits