[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patches] [patch] ARM Thumb-2 support



The attached patch allows ARM glibc to be built as Thumb-2 code.
There are still odd bits of standalone ARM assembly code, but all teh C code 
and inline assembly is Thumb capable.

Tested on arm-non-linux-gnueabi in both ARM and Thumb mode.
Ok for trunk and eglibc2_5?

Paul

2006-03-28  Paul Brook  <paul@xxxxxxxxxxxxxxxx>

	* sysdeps/arm/machine-gmon.h (MCOUNT): Add Thumb-2 implementation.
	* sysdeps/arm/dl-machine.h (elf_machine_dynamic): Ditto.
	* sysdeps/arm/tls-macros.h: Add alignment for Thumb-2.
	* sysdeps/arm/elf/start.S: Switch to thumb mode for Thumb-2.
	* sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (INTERNAL_SYSCALL_RAW):
	Add Thumb implementation.
	* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c: Enforce
	alignment for Thumb-2.
	* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c: Ditto.
	* sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (atomic_full_barrier,
	__arch_compare_and_exchange_val_32_acq): Add Thumb-2 implementation.

Index: sysdeps/arm/machine-gmon.h
===================================================================
--- sysdeps/arm/machine-gmon.h	(revision 166940)
+++ sysdeps/arm/machine-gmon.h	(working copy)
@@ -50,6 +50,28 @@ static void mcount_internal (u_long from
    }
 */
 
+#ifdef __thumb2__
+
+#define MCOUNT								\
+void _mcount (void)							\
+{									\
+  __asm__("push		{r0, r1, r2, r3};"				\
+	  "movs		fp, fp;"				      	\
+	  "it		eq;"						\
+          "moveq	r1, #0;"					\
+	  "itttt	ne;"						\
+	  "ldrne	r1, [fp, $-4];"					\
+	  "ldrne	r0, [fp, $-12];"				\
+	  "movnes	r0, r0;"					\
+	  "ldrne	r0, [r0, $-4];"					\
+	  "movs		r0, r0;"					\
+	  "it		ne;"						\
+	  "blne		mcount_internal;"				\
+	  "pop		{r0, r1, r2, r3}");				\
+}
+
+#else
+
 #define MCOUNT								\
 void _mcount (void)							\
 {									\
@@ -65,3 +87,4 @@ void _mcount (void)							\
 	  "ldmia	sp!, {r0, r1, r2, r3}");			\
 }
 
+#endif
Index: sysdeps/arm/tls-macros.h
===================================================================
--- sysdeps/arm/tls-macros.h	(revision 166940)
+++ sysdeps/arm/tls-macros.h	(working copy)
@@ -4,6 +4,7 @@
      asm ("ldr %0, 1f; "				\
 	  "add %0, %1, %0; "				\
 	  "b 2f; "					\
+	  ".align 2; "					\
 	  "1: .word " #x "(tpoff); "			\
 	  "2: "						\
 	  : "=&r" (__result) : "r" (tp));		\
@@ -16,6 +17,7 @@
 	  "3: ldr %0, [pc, %0];"			\
 	  "add %0, %1, %0; "				\
 	  "b 2f; "					\
+	  ".align 2; "					\
 	  "1: .word " #x "(gottpoff) + (. - 3b - 8); "	\
 	  "2: "						\
 	  : "=&r" (__result) : "r" (tp));		\
@@ -28,12 +30,14 @@
      asm ("ldr %0, 2f; "				\
 	  "1: add %0, pc, %0; "				\
 	  "b 3f; "					\
+	  ".align 2; "					\
 	  "2: .word " #x "(tlsldm) + (. - 1b - 8); "	\
 	  "3: "						\
 	  : "=r" (__result));				\
      __result = (char *)__tls_get_addr (__result);	\
      asm ("ldr %0, 1f; "				\
 	  "b 2f; "					\
+	  ".align 2; "					\
 	  "1: .word " #x "(tlsldo); "			\
 	  "2: "						\
 	  : "=r" (__offset));				\
@@ -45,6 +49,7 @@
      asm ("ldr %0, 2f; "				\
 	  "1: add %0, pc, %0; "				\
 	  "b 3f; "					\
+	  ".align 2; "					\
 	  "2: .word " #x "(tlsgd) + (. - 1b - 8); "	\
 	  "3: "						\
 	  : "=r" (__result));				\
Index: sysdeps/arm/dl-machine.h
===================================================================
--- sysdeps/arm/dl-machine.h	(revision 166940)
+++ sysdeps/arm/dl-machine.h	(working copy)
@@ -53,11 +53,22 @@ static inline Elf32_Addr __attribute__ (
 elf_machine_dynamic (void)
 {
   Elf32_Addr dynamic;
+#ifdef __thumb2__
+  long tmp;
+  asm ("ldr\t%0, 1f\n\t"
+       "adr\t%1, 1f\n\t"
+       "add\t%0, %1\n\t"
+       "b 2f\n"
+       ".align 2\n"
+       "1: .word _GLOBAL_OFFSET_TABLE_ - 1b\n"
+       "2:" : "=r" (dynamic), "=r"(tmp));
+#else
   asm ("ldr %0, 2f\n"
        "1: ldr %0, [pc, %0]\n"
        "b 3f\n"
        "2: .word _GLOBAL_OFFSET_TABLE_ - (1b+8)\n"
        "3:" : "=r" (dynamic));
+#endif
   return dynamic;
 }
 
Index: sysdeps/arm/elf/start.S
===================================================================
--- sysdeps/arm/elf/start.S	(revision 166940)
+++ sysdeps/arm/elf/start.S	(working copy)
@@ -58,6 +58,10 @@
 		...
 					NULL
 */
+#if defined(__thumb2__)
+	.thumb
+	.syntax unified
+#endif
 
 	.text
 	.globl _start
Index: sysdeps/unix/sysv/linux/arm/eabi/sysdep.h
===================================================================
--- sysdeps/unix/sysv/linux/arm/eabi/sysdep.h	(revision 166940)
+++ sysdeps/unix/sysv/linux/arm/eabi/sysdep.h	(working copy)
@@ -42,6 +42,30 @@
    argument; otherwise the (optional) compatibility code for APCS binaries
    may be invoked.  */
 
+#ifdef __thumb__
+/* Hide the use of r7 from the compiler, this would be a lot
+   easier but for the fact that the syscalls can exceed 255.
+   For the moment the LOAD_ARGS_7 is sacrificed.
+   We can't use push/pop inside the asm because that breaks
+   unwinding (ie. thread cancellation).  */
+#undef LOAD_ARGS_7
+#undef INTERNAL_SYSCALL_RAW
+#define INTERNAL_SYSCALL_RAW(name, err, nr, args...)		\
+  ({								\
+      int _sys_buf[2];						\
+      register int _a1 asm ("a1");				\
+      register int *_r6 asm ("r6") = _sys_buf;			\
+      *_r6 = name;						\
+      LOAD_ARGS_##nr (args)					\
+      asm volatile ("str        r7, [r6, #4]\n\t"		\
+                    "ldr      r7, [r6]\n\t"			\
+                    "swi      0       @ syscall " #name "\n\t"	\
+                    "ldr      r7, [r6, #4]"			\
+                   : "=r" (_a1)					\
+                    : "r" (_r6) ASM_ARGS_##nr			\
+                    : "memory");				\
+       _a1; })
+#else /* ARM */
 #undef INTERNAL_SYSCALL_RAW
 #define INTERNAL_SYSCALL_RAW(name, err, nr, args...)		\
   ({								\
@@ -53,6 +77,7 @@
 		     : "r" (_nr) ASM_ARGS_##nr			\
 		     : "memory");				\
        _a1; })
+#endif
 
 /* For EABI, non-constant syscalls are actually pretty easy...  */
 #undef INTERNAL_SYSCALL_NCS
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c
===================================================================
--- sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c	(revision 166940)
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c	(working copy)
@@ -66,6 +66,7 @@ asm (
 "4:	bl	init\n"
 "	ldr	r3, [r4, r5]\n"
 "	b	5b\n"
+"	.align 2\n"
 "1:	.word	_GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
 "2:	.word	libgcc_s_resume(GOTOFF)\n"
 "	.size	_Unwind_Resume, .-_Unwind_Resume\n"
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c
===================================================================
--- sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c	(revision 166940)
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c	(working copy)
@@ -80,6 +80,7 @@ asm (
 "4:	bl	pthread_cancel_init\n"
 "	ldr	r3, [r4, r5]\n"
 "	b	5b\n"
+"	.align 2\n"
 "1:	.word	_GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
 "2:	.word	libgcc_s_resume(GOTOFF)\n"
 "	.size	_Unwind_Resume, .-_Unwind_Resume\n"
Index: sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h
===================================================================
--- sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h	(revision 166940)
+++ sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h	(working copy)
@@ -60,12 +60,21 @@ void __arm_link_error (void);
 
 #else
 
+#ifdef __thumb2__
+#define atomic_full_barrier() \
+     __asm__ __volatile__						      \
+	     ("movw\tip, #0x0fa0\n\t"					      \
+	      "movt\tip, #0xffff\n\t"					      \
+	      "blx\tip"							      \
+	      : : : "ip", "lr", "cc", "memory");
+#else
 #define atomic_full_barrier() \
      __asm__ __volatile__						      \
 	     ("mov\tip, #0xffff0fff\n\t"				      \
 	      "mov\tlr, pc\n\t"						      \
 	      "add\tpc, ip, #(0xffff0fa0 - 0xffff0fff)"			      \
 	      : : : "ip", "lr", "cc", "memory");
+#endif
 
 #endif
 
@@ -84,6 +93,9 @@ void __arm_link_error (void);
    specify one to work around GCC PR rtl-optimization/21223.  Otherwise
    it may cause a_oldval or a_tmp to be moved to a different register.  */
 
+#ifdef __thumb2__
+/* Thumb-2 has ldrex/strex.  However it does not have barrier instructions,
+   so we still need to use the kernel helper.  */
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
   ({ register __typeof (oldval) a_oldval asm ("r0");			      \
      register __typeof (oldval) a_newval asm ("r1") = (newval);		      \
@@ -91,20 +103,45 @@ void __arm_link_error (void);
      register __typeof (oldval) a_tmp asm ("r3");			      \
      register __typeof (oldval) a_oldval2 asm ("r4") = (oldval);	      \
      __asm__ __volatile__						      \
-	     ("0:\tldr\t%1,[%3]\n\t"					      \
-	      "cmp\t%1, %4\n\t"						      \
+	     ("0:\tldr\t%[tmp],[%[ptr]]\n\t"				      \
+	      "cmp\t%[tmp], %[old2]\n\t"				      \
 	      "bne\t1f\n\t"						      \
-	      "mov\t%0, %4\n\t"						      \
-	      "mov\t%1, #0xffff0fff\n\t"				      \
+	      "mov\t%[old], %[old2]\n\t"				      \
+	      "movw\t%[tmp], #0x0fc0\n\t"				      \
+	      "movt\t%[tmp], #0xffff\n\t"				      \
+	      "blx\t%[tmp]\n\t"						      \
+	      "bcc\t0b\n\t"						      \
+	      "mov\t%[tmp], %[old2]\n\t"				      \
+	      "1:"							      \
+	      : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp)		      \
+	      : [new] "r" (a_newval), [ptr] "r" (a_ptr),		      \
+		[old2] "r" (a_oldval2)					      \
+	      : "ip", "lr", "cc", "memory");				      \
+     a_tmp; })
+#else
+#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+  ({ register __typeof (oldval) a_oldval asm ("r0");			      \
+     register __typeof (oldval) a_newval asm ("r1") = (newval);		      \
+     register __typeof (mem) a_ptr asm ("r2") = (mem);			      \
+     register __typeof (oldval) a_tmp asm ("r3");			      \
+     register __typeof (oldval) a_oldval2 asm ("r4") = (oldval);	      \
+     __asm__ __volatile__						      \
+	     ("0:\tldr\t%[tmp],[%[ptr]]\n\t"				      \
+	      "cmp\t%[tmp], %[old2]\n\t"				      \
+	      "bne\t1f\n\t"						      \
+	      "mov\t%[old], %[old2]\n\t"				      \
+	      "mov\t%[tmp], #0xffff0fff\n\t"				      \
 	      "mov\tlr, pc\n\t"						      \
-	      "add\tpc, %1, #(0xffff0fc0 - 0xffff0fff)\n\t"		      \
+	      "add\tpc, %[tmp], #(0xffff0fc0 - 0xffff0fff)\n\t"		      \
 	      "bcc\t0b\n\t"						      \
-	      "mov\t%1, %4\n\t"						      \
+	      "mov\t%[tmp], %[old2]\n\t"				      \
 	      "1:"							      \
-	      : "=&r" (a_oldval), "=&r" (a_tmp)				      \
-	      : "r" (a_newval), "r" (a_ptr), "r" (a_oldval2)		      \
+	      : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp)		      \
+	      : [new] "r" (a_newval), [ptr] "r" (a_ptr),		      \
+		[old2] "r" (a_oldval2)					      \
 	      : "ip", "lr", "cc", "memory");				      \
      a_tmp; })
+#endif
 
 #define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
   ({ __arm_link_error (); oldval; })