[PATCH] Clean up read write lock assembly

- Move the slow path fallbacks to their own assembly files
This makes them much easier to read and is needed for the next change.
- Add CFI annotations for unwinding (XXX need review)
- Remove constant case which can never happen with out of line spinlocks
- Use patchable LOCK prefixes
- Don't use lock sections anymore for inline code because they can't
be expressed by the unwinder (this adds one taken jump to the lock
fast path)

Cc: jbeulich@novell.com

Signed-off-by: Andi Kleen <ak@suse.de>
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
index ccef6ae..b78d417 100644
--- a/arch/x86_64/lib/Makefile
+++ b/arch/x86_64/lib/Makefile
@@ -9,4 +9,4 @@
 lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
 	usercopy.o getuser.o putuser.o  \
 	thunk.o clear_page.o copy_page.o bitstr.o bitops.o
-lib-y += memcpy.o memmove.o memset.o copy_user.o
+lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o
diff --git a/arch/x86_64/lib/rwlock.S b/arch/x86_64/lib/rwlock.S
new file mode 100644
index 0000000..0cde1f8
--- /dev/null
+++ b/arch/x86_64/lib/rwlock.S
@@ -0,0 +1,38 @@
+/* Slow paths of read/write spinlocks. */
+
+#include <linux/linkage.h>
+#include <asm/rwlock.h>
+#include <asm/alternative-asm.i>
+#include <asm/dwarf2.h>
+
+/* rdi:	pointer to rwlock_t */
+ENTRY(__write_lock_failed)
+	CFI_STARTPROC
+	LOCK_PREFIX
+	addl $RW_LOCK_BIAS,(%rdi)
+1:	rep
+	nop
+	cmpl $RW_LOCK_BIAS,(%rdi)
+	jne 1b
+	LOCK_PREFIX
+	subl $RW_LOCK_BIAS,(%rdi)
+	jnz  __write_lock_failed
+	ret
+	CFI_ENDPROC
+END(__write_lock_failed)
+
+/* rdi:	pointer to rwlock_t */
+ENTRY(__read_lock_failed)
+	CFI_STARTPROC
+	LOCK_PREFIX
+	incl (%rdi)
+1:	rep
+	nop
+	cmpl $1,(%rdi)
+	js 1b
+	LOCK_PREFIX
+	decl (%rdi)
+	js __read_lock_failed
+	ret
+	CFI_ENDPROC
+END(__read_lock_failed)
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index 332ea5d..6cff27c 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -67,33 +67,3 @@
 	RESTORE_ARGS 1
 	ret
 	CFI_ENDPROC
-
-#ifdef CONFIG_SMP
-/* Support for read/write spinlocks. */
-	.text
-/* rax:	pointer to rwlock_t */	
-ENTRY(__write_lock_failed)
-	lock
-	addl $RW_LOCK_BIAS,(%rax)
-1:	rep
-	nop
-	cmpl $RW_LOCK_BIAS,(%rax)
-	jne 1b
-	lock 
-	subl $RW_LOCK_BIAS,(%rax)
-	jnz  __write_lock_failed
-	ret
-
-/* rax:	pointer to rwlock_t */	
-ENTRY(__read_lock_failed)
-	lock
-	incl (%rax)
-1:	rep
-	nop
-	cmpl $1,(%rax)
-	js 1b
-	lock
-	decl (%rax)
-	js __read_lock_failed
-	ret
-#endif
diff --git a/include/asm-x86_64/rwlock.h b/include/asm-x86_64/rwlock.h
index dea0e94..28a080d 100644
--- a/include/asm-x86_64/rwlock.h
+++ b/include/asm-x86_64/rwlock.h
@@ -18,69 +18,21 @@
 #ifndef _ASM_X86_64_RWLOCK_H
 #define _ASM_X86_64_RWLOCK_H
 
-#include <linux/stringify.h>
-
 #define RW_LOCK_BIAS		 0x01000000
-#define RW_LOCK_BIAS_STR	"0x01000000"
+#define RW_LOCK_BIAS_STR	 "0x01000000"
 
-#define __build_read_lock_ptr(rw, helper)   \
+#define __build_read_lock(rw)   \
 	asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \
-		     "js 2f\n" \
+		     "jns 1f\n" \
+		     "call __read_lock_failed\n" \
 		     "1:\n" \
-		    LOCK_SECTION_START("") \
-		     "2:\tcall " helper "\n\t" \
-		     "jmp 1b\n" \
-		    LOCK_SECTION_END \
-		     ::"a" (rw) : "memory")
+		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory")
 
-#define __build_read_lock_const(rw, helper)   \
-	asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \
-		     "js 2f\n" \
+#define __build_write_lock(rw) \
+	asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t" \
+		     "jz 1f\n" \
+		     "\tcall __write_lock_failed\n\t" \
 		     "1:\n" \
-		    LOCK_SECTION_START("") \
-		     "2:\tpushq %%rax\n\t" \
-		     "leaq %0,%%rax\n\t" \
-		     "call " helper "\n\t" \
-		     "popq %%rax\n\t" \
-		     "jmp 1b\n" \
-		    LOCK_SECTION_END \
-		     :"=m" (*((volatile int *)rw))::"memory")
-
-#define __build_read_lock(rw, helper)	do { \
-						if (__builtin_constant_p(rw)) \
-							__build_read_lock_const(rw, helper); \
-						else \
-							__build_read_lock_ptr(rw, helper); \
-					} while (0)
-
-#define __build_write_lock_ptr(rw, helper) \
-	asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
-		     "jnz 2f\n" \
-		     "1:\n" \
-		     LOCK_SECTION_START("") \
-		     "2:\tcall " helper "\n\t" \
-		     "jmp 1b\n" \
-		     LOCK_SECTION_END \
-		     ::"a" (rw) : "memory")
-
-#define __build_write_lock_const(rw, helper) \
-	asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
-		     "jnz 2f\n" \
-		     "1:\n" \
-		    LOCK_SECTION_START("") \
-		     "2:\tpushq %%rax\n\t" \
-		     "leaq %0,%%rax\n\t" \
-		     "call " helper "\n\t" \
-		     "popq %%rax\n\t" \
-		     "jmp 1b\n" \
-		    LOCK_SECTION_END \
-		     :"=m" (*((volatile long *)rw))::"memory")
-
-#define __build_write_lock(rw, helper)	do { \
-						if (__builtin_constant_p(rw)) \
-							__build_write_lock_const(rw, helper); \
-						else \
-							__build_write_lock_ptr(rw, helper); \
-					} while (0)
+		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory")
 
 #endif
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index 248a79f..a8e3d89 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -79,13 +79,6 @@
  *
  * On x86, we implement read-write locks as a 32-bit counter
  * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores.  See
- * semaphore.h for details.  -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
  */
 
 #define __raw_read_can_lock(x)		((int)(x)->lock > 0)
@@ -93,12 +86,12 @@
 
 static inline void __raw_read_lock(raw_rwlock_t *rw)
 {
-	__build_read_lock(rw, "__read_lock_failed");
+	__build_read_lock(rw);
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
 {
-	__build_write_lock(rw, "__write_lock_failed");
+	__build_write_lock(rw);
 }
 
 static inline int __raw_read_trylock(raw_rwlock_t *lock)