[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[patches] [patch] ARM Thumb-2 support
- To: patches@xxxxxxxxxx
- Subject: [patches] [patch] ARM Thumb-2 support
- From: Paul Brook <paul@xxxxxxxxxxxxxxxx>
- Date: Wed, 28 Mar 2007 20:36:11 +0100
The attached patch allows ARM glibc to be built as Thumb-2 code.
There are still odd bits of standalone ARM assembly code, but all teh C code
and inline assembly is Thumb capable.
Tested on arm-non-linux-gnueabi in both ARM and Thumb mode.
Ok for trunk and eglibc2_5?
Paul
2006-03-28 Paul Brook <paul@xxxxxxxxxxxxxxxx>
* sysdeps/arm/machine-gmon.h (MCOUNT): Add Thumb-2 implementation.
* sysdeps/arm/dl-machine.h (elf_machine_dynamic): Ditto.
* sysdeps/arm/tls-macros.h: Add alignment for Thumb-2.
* sysdeps/arm/elf/start.S: Switch to thumb mode for Thumb-2.
* sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (INTERNAL_SYSCALL_RAW):
Add Thumb implementation.
* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c: Enforce
alignment for Thumb-2.
* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c: Ditto.
* sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (atomic_full_barrier,
__arch_compare_and_exchange_val_32_acq): Add Thumb-2 implementation.
Index: sysdeps/arm/machine-gmon.h
===================================================================
--- sysdeps/arm/machine-gmon.h (revision 166940)
+++ sysdeps/arm/machine-gmon.h (working copy)
@@ -50,6 +50,28 @@ static void mcount_internal (u_long from
}
*/
+#ifdef __thumb2__
+
+#define MCOUNT \
+void _mcount (void) \
+{ \
+ __asm__("push {r0, r1, r2, r3};" \
+ "movs fp, fp;" \
+ "it eq;" \
+ "moveq r1, #0;" \
+ "itttt ne;" \
+ "ldrne r1, [fp, $-4];" \
+ "ldrne r0, [fp, $-12];" \
+ "movnes r0, r0;" \
+ "ldrne r0, [r0, $-4];" \
+ "movs r0, r0;" \
+ "it ne;" \
+ "blne mcount_internal;" \
+ "pop {r0, r1, r2, r3}"); \
+}
+
+#else
+
#define MCOUNT \
void _mcount (void) \
{ \
@@ -65,3 +87,4 @@ void _mcount (void) \
"ldmia sp!, {r0, r1, r2, r3}"); \
}
+#endif
Index: sysdeps/arm/tls-macros.h
===================================================================
--- sysdeps/arm/tls-macros.h (revision 166940)
+++ sysdeps/arm/tls-macros.h (working copy)
@@ -4,6 +4,7 @@
asm ("ldr %0, 1f; " \
"add %0, %1, %0; " \
"b 2f; " \
+ ".align 2; " \
"1: .word " #x "(tpoff); " \
"2: " \
: "=&r" (__result) : "r" (tp)); \
@@ -16,6 +17,7 @@
"3: ldr %0, [pc, %0];" \
"add %0, %1, %0; " \
"b 2f; " \
+ ".align 2; " \
"1: .word " #x "(gottpoff) + (. - 3b - 8); " \
"2: " \
: "=&r" (__result) : "r" (tp)); \
@@ -28,12 +30,14 @@
asm ("ldr %0, 2f; " \
"1: add %0, pc, %0; " \
"b 3f; " \
+ ".align 2; " \
"2: .word " #x "(tlsldm) + (. - 1b - 8); " \
"3: " \
: "=r" (__result)); \
__result = (char *)__tls_get_addr (__result); \
asm ("ldr %0, 1f; " \
"b 2f; " \
+ ".align 2; " \
"1: .word " #x "(tlsldo); " \
"2: " \
: "=r" (__offset)); \
@@ -45,6 +49,7 @@
asm ("ldr %0, 2f; " \
"1: add %0, pc, %0; " \
"b 3f; " \
+ ".align 2; " \
"2: .word " #x "(tlsgd) + (. - 1b - 8); " \
"3: " \
: "=r" (__result)); \
Index: sysdeps/arm/dl-machine.h
===================================================================
--- sysdeps/arm/dl-machine.h (revision 166940)
+++ sysdeps/arm/dl-machine.h (working copy)
@@ -53,11 +53,22 @@ static inline Elf32_Addr __attribute__ (
elf_machine_dynamic (void)
{
Elf32_Addr dynamic;
+#ifdef __thumb2__
+ long tmp;
+ asm ("ldr\t%0, 1f\n\t"
+ "adr\t%1, 1f\n\t"
+ "add\t%0, %1\n\t"
+ "b 2f\n"
+ ".align 2\n"
+ "1: .word _GLOBAL_OFFSET_TABLE_ - 1b\n"
+ "2:" : "=r" (dynamic), "=r"(tmp));
+#else
asm ("ldr %0, 2f\n"
"1: ldr %0, [pc, %0]\n"
"b 3f\n"
"2: .word _GLOBAL_OFFSET_TABLE_ - (1b+8)\n"
"3:" : "=r" (dynamic));
+#endif
return dynamic;
}
Index: sysdeps/arm/elf/start.S
===================================================================
--- sysdeps/arm/elf/start.S (revision 166940)
+++ sysdeps/arm/elf/start.S (working copy)
@@ -58,6 +58,10 @@
...
NULL
*/
+#if defined(__thumb2__)
+ .thumb
+ .syntax unified
+#endif
.text
.globl _start
Index: sysdeps/unix/sysv/linux/arm/eabi/sysdep.h
===================================================================
--- sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (revision 166940)
+++ sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (working copy)
@@ -42,6 +42,30 @@
argument; otherwise the (optional) compatibility code for APCS binaries
may be invoked. */
+#ifdef __thumb__
+/* Hide the use of r7 from the compiler, this would be a lot
+ easier but for the fact that the syscalls can exceed 255.
+ For the moment the LOAD_ARGS_7 is sacrificed.
+ We can't use push/pop inside the asm because that breaks
+ unwinding (ie. thread cancellation). */
+#undef LOAD_ARGS_7
+#undef INTERNAL_SYSCALL_RAW
+#define INTERNAL_SYSCALL_RAW(name, err, nr, args...) \
+ ({ \
+ int _sys_buf[2]; \
+ register int _a1 asm ("a1"); \
+ register int *_r6 asm ("r6") = _sys_buf; \
+ *_r6 = name; \
+ LOAD_ARGS_##nr (args) \
+ asm volatile ("str r7, [r6, #4]\n\t" \
+ "ldr r7, [r6]\n\t" \
+ "swi 0 @ syscall " #name "\n\t" \
+ "ldr r7, [r6, #4]" \
+ : "=r" (_a1) \
+ : "r" (_r6) ASM_ARGS_##nr \
+ : "memory"); \
+ _a1; })
+#else /* ARM */
#undef INTERNAL_SYSCALL_RAW
#define INTERNAL_SYSCALL_RAW(name, err, nr, args...) \
({ \
@@ -53,6 +77,7 @@
: "r" (_nr) ASM_ARGS_##nr \
: "memory"); \
_a1; })
+#endif
/* For EABI, non-constant syscalls are actually pretty easy... */
#undef INTERNAL_SYSCALL_NCS
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c
===================================================================
--- sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c (revision 166940)
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c (working copy)
@@ -66,6 +66,7 @@ asm (
"4: bl init\n"
" ldr r3, [r4, r5]\n"
" b 5b\n"
+" .align 2\n"
"1: .word _GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
"2: .word libgcc_s_resume(GOTOFF)\n"
" .size _Unwind_Resume, .-_Unwind_Resume\n"
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c
===================================================================
--- sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c (revision 166940)
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c (working copy)
@@ -80,6 +80,7 @@ asm (
"4: bl pthread_cancel_init\n"
" ldr r3, [r4, r5]\n"
" b 5b\n"
+" .align 2\n"
"1: .word _GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
"2: .word libgcc_s_resume(GOTOFF)\n"
" .size _Unwind_Resume, .-_Unwind_Resume\n"
Index: sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h
===================================================================
--- sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (revision 166940)
+++ sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (working copy)
@@ -60,12 +60,21 @@ void __arm_link_error (void);
#else
+#ifdef __thumb2__
+#define atomic_full_barrier() \
+ __asm__ __volatile__ \
+ ("movw\tip, #0x0fa0\n\t" \
+ "movt\tip, #0xffff\n\t" \
+ "blx\tip" \
+ : : : "ip", "lr", "cc", "memory");
+#else
#define atomic_full_barrier() \
__asm__ __volatile__ \
("mov\tip, #0xffff0fff\n\t" \
"mov\tlr, pc\n\t" \
"add\tpc, ip, #(0xffff0fa0 - 0xffff0fff)" \
: : : "ip", "lr", "cc", "memory");
+#endif
#endif
@@ -84,6 +93,9 @@ void __arm_link_error (void);
specify one to work around GCC PR rtl-optimization/21223. Otherwise
it may cause a_oldval or a_tmp to be moved to a different register. */
+#ifdef __thumb2__
+/* Thumb-2 has ldrex/strex. However it does not have barrier instructions,
+ so we still need to use the kernel helper. */
#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
({ register __typeof (oldval) a_oldval asm ("r0"); \
register __typeof (oldval) a_newval asm ("r1") = (newval); \
@@ -91,20 +103,45 @@ void __arm_link_error (void);
register __typeof (oldval) a_tmp asm ("r3"); \
register __typeof (oldval) a_oldval2 asm ("r4") = (oldval); \
__asm__ __volatile__ \
- ("0:\tldr\t%1,[%3]\n\t" \
- "cmp\t%1, %4\n\t" \
+ ("0:\tldr\t%[tmp],[%[ptr]]\n\t" \
+ "cmp\t%[tmp], %[old2]\n\t" \
"bne\t1f\n\t" \
- "mov\t%0, %4\n\t" \
- "mov\t%1, #0xffff0fff\n\t" \
+ "mov\t%[old], %[old2]\n\t" \
+ "movw\t%[tmp], #0x0fc0\n\t" \
+ "movt\t%[tmp], #0xffff\n\t" \
+ "blx\t%[tmp]\n\t" \
+ "bcc\t0b\n\t" \
+ "mov\t%[tmp], %[old2]\n\t" \
+ "1:" \
+ : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp) \
+ : [new] "r" (a_newval), [ptr] "r" (a_ptr), \
+ [old2] "r" (a_oldval2) \
+ : "ip", "lr", "cc", "memory"); \
+ a_tmp; })
+#else
+#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+ ({ register __typeof (oldval) a_oldval asm ("r0"); \
+ register __typeof (oldval) a_newval asm ("r1") = (newval); \
+ register __typeof (mem) a_ptr asm ("r2") = (mem); \
+ register __typeof (oldval) a_tmp asm ("r3"); \
+ register __typeof (oldval) a_oldval2 asm ("r4") = (oldval); \
+ __asm__ __volatile__ \
+ ("0:\tldr\t%[tmp],[%[ptr]]\n\t" \
+ "cmp\t%[tmp], %[old2]\n\t" \
+ "bne\t1f\n\t" \
+ "mov\t%[old], %[old2]\n\t" \
+ "mov\t%[tmp], #0xffff0fff\n\t" \
"mov\tlr, pc\n\t" \
- "add\tpc, %1, #(0xffff0fc0 - 0xffff0fff)\n\t" \
+ "add\tpc, %[tmp], #(0xffff0fc0 - 0xffff0fff)\n\t" \
"bcc\t0b\n\t" \
- "mov\t%1, %4\n\t" \
+ "mov\t%[tmp], %[old2]\n\t" \
"1:" \
- : "=&r" (a_oldval), "=&r" (a_tmp) \
- : "r" (a_newval), "r" (a_ptr), "r" (a_oldval2) \
+ : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp) \
+ : [new] "r" (a_newval), [ptr] "r" (a_ptr), \
+ [old2] "r" (a_oldval2) \
: "ip", "lr", "cc", "memory"); \
a_tmp; })
+#endif
#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
({ __arm_link_error (); oldval; })