[S390] Inline assembly cleanup.

Major cleanup of all s390 inline assemblies. They now have a common
coding style. Quite a few have been shortened, mainly by using register
asm variables. Use of the EX_TABLE macro helps  as well. The atomic ops,
bit ops and locking inlines new use the Q-constraint if a newer gcc
is used.  That results in slightly better code.

Thanks to Christian Borntraeger for proof reading the changes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index e2047b0..72ae4ef 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -38,25 +38,14 @@
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
 
-#ifdef __s390x__
 #define set_fs(x) \
 ({									\
 	unsigned long __pto;						\
 	current->thread.mm_segment = (x);				\
 	__pto = current->thread.mm_segment.ar4 ?			\
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
-	asm volatile ("lctlg 7,7,%0" : : "m" (__pto) );			\
+	__ctl_load(__pto, 7, 7);					\
 })
-#else /* __s390x__ */
-#define set_fs(x) \
-({									\
-	unsigned long __pto;						\
-	current->thread.mm_segment = (x);				\
-	__pto = current->thread.mm_segment.ar4 ?			\
-		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
-	asm volatile ("lctl  7,7,%0" : : "m" (__pto) );			\
-})
-#endif /* __s390x__ */
 
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)