[S390] Inline assembly cleanup.

Major cleanup of all s390 inline assemblies. They now have a common
coding style. Quite a few have been shortened, mainly by using register
asm variables. Use of the EX_TABLE macro helps  as well. The atomic ops,
bit ops and locking inlines new use the Q-constraint if a newer gcc
is used.  That results in slightly better code.

Thanks to Christian Borntraeger for proof reading the changes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index efd836c..2b13708 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -105,63 +105,6 @@
 };
 
 /*
- * Standard fixup and ex_table sections for crypt_s390 inline functions.
- * label 0: the s390 crypto operation
- * label 1: just after 1 to catch illegal operation exception
- *          (unsupported model)
- * label 6: the return point after fixup
- * label 7: set error value if exception _in_ crypto operation
- * label 8: set error value if illegal operation exception
- * [ret] is the variable to receive the error code
- * [ERR] is the error code value
- */
-#ifndef CONFIG_64BIT
-#define __crypt_s390_fixup \
-	".section .fixup,\"ax\" \n"	\
-	"7:	lhi	%0,%h[e1] \n"	\
-	"	bras	1,9f \n"	\
-	"	.long	6b \n"		\
-	"8:	lhi	%0,%h[e2] \n"	\
-	"	bras	1,9f \n"	\
-	"	.long	6b \n"		\
-	"9:	l	1,0(1) \n"	\
-	"	br	1 \n"		\
-	".previous \n"			\
-	".section __ex_table,\"a\" \n"	\
-	"	.align	4 \n"		\
-	"	.long	0b,7b \n"	\
-	"	.long	1b,8b \n"	\
-	".previous"
-#else /* CONFIG_64BIT */
-#define __crypt_s390_fixup \
-	".section .fixup,\"ax\" \n"	\
-	"7:	lhi	%0,%h[e1] \n"	\
-	"	jg	6b \n"		\
-	"8:	lhi	%0,%h[e2] \n"	\
-	"	jg	6b \n"		\
-	".previous\n"			\
-	".section __ex_table,\"a\" \n"	\
-	"	.align	8 \n"		\
-	"	.quad	0b,7b \n"	\
-	"	.quad	1b,8b \n"	\
-	".previous"
-#endif /* CONFIG_64BIT */
-
-/*
- * Standard code for setting the result of s390 crypto instructions.
- * %0: the register which will receive the result
- * [result]: the register containing the result (e.g. second operand length
- * to compute number of processed bytes].
- */
-#ifndef CONFIG_64BIT
-#define __crypt_s390_set_result \
-	"	lr	%0,%[result] \n"
-#else /* CONFIG_64BIT */
-#define __crypt_s390_set_result \
-	"	lgr	%0,%[result] \n"
-#endif
-
-/*
  * Executes the KM (CIPHER MESSAGE) operation of the CPU.
  * @param func: the function code passed to KM; see crypt_s390_km_func
  * @param param: address of parameter block; see POP for details on each func
@@ -176,28 +119,24 @@
 {
 	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
 	register void* __param asm("1") = param;
-	register u8* __dest asm("4") = dest;
 	register const u8* __src asm("2") = src;
 	register long __src_len asm("3") = src_len;
+	register u8* __dest asm("4") = dest;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB92E0000,%1,%2 \n" /* KM opcode */
+	asm volatile(
+		"0:	.insn	rre,0xb92e0000,%3,%1 \n" /* KM opcode */
 		"1:	brc	1,0b \n" /* handle partial completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__dest), "+a" (__src),
-		  [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+		"	ahi	%0,%h7\n"
+		"2:	ahi	%0,%h8\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -215,28 +154,24 @@
 {
 	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
 	register void* __param asm("1") = param;
-	register u8* __dest asm("4") = dest;
 	register const u8* __src asm("2") = src;
 	register long __src_len asm("3") = src_len;
+	register u8* __dest asm("4") = dest;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB92F0000,%1,%2 \n" /* KMC opcode */
+	asm volatile(
+		"0:	.insn	rre,0xb92f0000,%3,%1 \n" /* KMC opcode */
 		"1:	brc	1,0b \n" /* handle partial completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__dest), "+a" (__src),
-		  [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+		"	ahi	%0,%h7\n"
+		"2:	ahi	%0,%h8\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -258,22 +193,19 @@
 	register long __src_len asm("3") = src_len;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB93E0000,%1,%1 \n" /* KIMD opcode */
-		"1:	brc	1,0b \n" /* handle partical completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__src), [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && (func & CRYPT_S390_FUNC_MASK)){
-		ret = src_len - ret;
-	}
-	return ret;
+	asm volatile(
+		"0:	.insn	rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	ahi	%0,%h6\n"
+		"2:	ahi	%0,%h7\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -294,22 +226,19 @@
 	register long __src_len asm("3") = src_len;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB93F0000,%1,%1 \n" /* KLMD opcode */
-		"1:	brc	1,0b \n" /* handle partical completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__src), [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+	asm volatile(
+		"0:	.insn	rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	ahi	%0,%h6\n"
+		"2:	ahi	%0,%h7\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -331,22 +260,19 @@
 	register long __src_len asm("3") = src_len;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB91E0000,%5,%5 \n" /* KMAC opcode */
-		"1:	brc	1,0b \n" /* handle partical completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__src), [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+	asm volatile(
+		"0:	.insn	rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	ahi	%0,%h6\n"
+		"2:	ahi	%0,%h7\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /**
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 684384f..443fa37 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -333,22 +333,14 @@
 	register unsigned long _subcode asm("0") = subcode;
 	register unsigned long _size asm("1") = size;
 
-	asm volatile ("   diag    %2,%0,0x204\n"
-		      "0: \n" ".section __ex_table,\"a\"\n"
-#ifndef __s390x__
-		      "    .align 4\n"
-		      "    .long  0b,0b\n"
-#else
-		      "    .align 8\n"
-		      "    .quad  0b,0b\n"
-#endif
-		      ".previous":"+d" (_subcode), "+d"(_size)
-		      :"d"(addr)
-		      :"memory");
+	asm volatile(
+		"	diag	%2,%0,0x204\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
+		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
 	if (_subcode)
 		return -1;
-	else
-		return _size;
+	return _size;
 }
 
 /*
@@ -491,8 +483,7 @@
 
 static void diag224(void *ptr)
 {
-	asm volatile("   diag    %0,%1,0x224\n"
-		     : :"d" (0), "d"(ptr) : "memory");
+	asm volatile("diag %0,%1,0x224" : :"d" (0), "d"(ptr) : "memory");
 }
 
 static int diag224_get_name_table(void)
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 91b2884..c46e3d4 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -544,10 +544,7 @@
 		current->ptrace &= ~PT_DTRACE;
 		task_unlock(current);
 		current->thread.fp_regs.fpc=0;
-		__asm__ __volatile__
-		        ("sr  0,0\n\t"
-		         "sfpc 0,0\n\t"
-			 : : :"0");
+		asm volatile("sfpc %0,0" : : "d" (0));
 	}
         putname(filename);
 out:
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 4ef44e5..1eae74e 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -25,11 +25,8 @@
  */
 int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 {
-	const int mask = 0x40000000L;
-	unsigned long flags;
-	int return_code;
-	int return_len;
-	int cmdlen;
+	unsigned long flags, cmdlen;
+	int return_code, return_len;
 
 	spin_lock_irqsave(&cpcmd_lock, flags);
 	cmdlen = strlen(cmd);
@@ -38,64 +35,44 @@
 	ASCEBC(cpcmd_buf, cmdlen);
 
 	if (response != NULL && rlen > 0) {
+		register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+		register unsigned long reg3 asm ("3") = (addr_t) response;
+		register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L;
+		register unsigned long reg5 asm ("5") = rlen;
+
 		memset(response, 0, rlen);
+		asm volatile(
 #ifndef CONFIG_64BIT
-		asm volatile (	"lra	2,0(%2)\n"
-				"lr	4,%3\n"
-				"o	4,%6\n"
-				"lra	3,0(%4)\n"
-				"lr	5,%5\n"
-				"diag	2,4,0x8\n"
-				"brc	8, 1f\n"
-				"ar	5, %5\n"
-				"1: \n"
-				"lr	%0,4\n"
-				"lr	%1,5\n"
-				: "=d" (return_code), "=d" (return_len)
-				: "a" (cpcmd_buf), "d" (cmdlen),
-				"a" (response), "d" (rlen), "m" (mask)
-				: "cc", "2", "3", "4", "5" );
+			"	diag	%2,%0,0x8\n"
+			"	brc	8,1f\n"
+			"	ar	%1,%4\n"
 #else /* CONFIG_64BIT */
-                asm volatile (	"lrag	2,0(%2)\n"
-				"lgr	4,%3\n"
-				"o	4,%6\n"
-				"lrag	3,0(%4)\n"
-				"lgr	5,%5\n"
-				"sam31\n"
-				"diag	2,4,0x8\n"
-				"sam64\n"
-				"brc	8, 1f\n"
-				"agr	5, %5\n"
-				"1: \n"
-				"lgr	%0,4\n"
-				"lgr	%1,5\n"
-				: "=d" (return_code), "=d" (return_len)
-				: "a" (cpcmd_buf), "d" (cmdlen),
-				"a" (response), "d" (rlen), "m" (mask)
-				: "cc", "2", "3", "4", "5" );
+			"	sam31\n"
+			"	diag	%2,%0,0x8\n"
+			"	sam64\n"
+			"	brc	8,1f\n"
+			"	agr	%1,%4\n"
 #endif /* CONFIG_64BIT */
+			"1:\n"
+			: "+d" (reg4), "+d" (reg5)
+			: "d" (reg2), "d" (reg3), "d" (rlen) : "cc");
+		return_code = (int) reg4;
+		return_len = (int) reg5;
                 EBCASC(response, rlen);
         } else {
+		register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+		register unsigned long reg3 asm ("3") = cmdlen;
 		return_len = 0;
+		asm volatile(
 #ifndef CONFIG_64BIT
-                asm volatile (	"lra	2,0(%1)\n"
-				"lr	3,%2\n"
-				"diag	2,3,0x8\n"
-				"lr	%0,3\n"
-				: "=d" (return_code)
-				: "a" (cpcmd_buf), "d" (cmdlen)
-				: "2", "3"  );
+			"	diag	%1,%0,0x8\n"
 #else /* CONFIG_64BIT */
-                asm volatile (	"lrag	2,0(%1)\n"
-				"lgr	3,%2\n"
-				"sam31\n"
-				"diag	2,3,0x8\n"
-				"sam64\n"
-				"lgr	%0,3\n"
-				: "=d" (return_code)
-				: "a" (cpcmd_buf), "d" (cmdlen)
-				: "2", "3" );
+			"	sam31\n"
+			"	diag	%1,%0,0x8\n"
+			"	sam64\n"
 #endif /* CONFIG_64BIT */
+			: "+d" (reg3) : "d" (reg2) : "cc");
+		return_code = (int) reg3;
         }
 	spin_unlock_irqrestore(&cpcmd_lock, flags);
 	if (response_code != NULL)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6555cc4..1f5e782 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -120,24 +120,15 @@
 
 static int diag308(unsigned long subcode, void *addr)
 {
-	register unsigned long _addr asm("0") = (unsigned long)addr;
+	register unsigned long _addr asm("0") = (unsigned long) addr;
 	register unsigned long _rc asm("1") = 0;
 
-	asm volatile (
-		"   diag %0,%2,0x308\n"
-		"0: \n"
-		".section __ex_table,\"a\"\n"
-#ifdef CONFIG_64BIT
-		"   .align 8\n"
-		"   .quad 0b, 0b\n"
-#else
-		"   .align 4\n"
-		"   .long 0b, 0b\n"
-#endif
-		".previous\n"
+	asm volatile(
+		"	diag	%0,%2,0x308\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
 		: "+d" (_addr), "+d" (_rc)
-		: "d" (subcode) : "cc", "memory" );
-
+		: "d" (subcode) : "cc", "memory");
 	return _rc;
 }
 
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index d3cbfa3..6603fbb 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -45,7 +45,7 @@
 #include <asm/irq.h>
 #include <asm/timer.h>
 
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
 /*
  * Return saved PC of a blocked thread. used in kernel/sched.
@@ -177,7 +177,8 @@
 
 extern void kernel_thread_starter(void);
 
-__asm__(".align 4\n"
+asm(
+	".align 4\n"
 	"kernel_thread_starter:\n"
 	"    la    2,0(10)\n"
 	"    basr  14,9\n"
diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c
index 8dfb690..191303f 100644
--- a/arch/s390/kernel/semaphore.c
+++ b/arch/s390/kernel/semaphore.c
@@ -26,17 +26,17 @@
 {
 	int old_val, new_val;
 
-        __asm__ __volatile__("   l     %0,0(%3)\n"
-                             "0: ltr   %1,%0\n"
-			     "   jhe   1f\n"
-			     "   lhi   %1,0\n"
-			     "1: ar    %1,%4\n"
-                             "   cs    %0,%1,0(%3)\n"
-                             "   jl    0b\n"
-                             : "=&d" (old_val), "=&d" (new_val),
-			       "=m" (sem->count)
-			     : "a" (&sem->count), "d" (incr), "m" (sem->count)
-			     : "cc" );
+	asm volatile(
+		"	l	%0,0(%3)\n"
+		"0:	ltr	%1,%0\n"
+		"	jhe	1f\n"
+		"	lhi	%1,0\n"
+		"1:	ar	%1,%4\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b\n"
+		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count)
+		: "a" (&sem->count), "d" (incr), "m" (sem->count)
+		: "cc");
 	return old_val;
 }
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e3d9325..a21cfbb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -101,7 +101,7 @@
         /*
          * Store processor id in lowcore (used e.g. in timer_interrupt)
          */
-        asm volatile ("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id));
+	asm volatile("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id));
         S390_lowcore.cpu_data.cpu_addr = addr;
 
         /*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b2e6f4c..a8e6199 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -63,7 +63,7 @@
 static void smp_ext_bitcall_others(ec_bit_sig);
 
 /*
- * Structure and data for smp_call_function(). This is designed to minimise
+5B * Structure and data for smp_call_function(). This is designed to minimise
  * static memory requirements. It also looks cleaner.
  */
 static DEFINE_SPINLOCK(call_lock);
@@ -418,59 +418,49 @@
 /*
  * parameter area for the set/clear control bit callbacks
  */
-typedef struct
-{
-	__u16 start_ctl;
-	__u16 end_ctl;
+struct ec_creg_mask_parms {
 	unsigned long orvals[16];
 	unsigned long andvals[16];
-} ec_creg_mask_parms;
+};
 
 /*
  * callback for setting/clearing control bits
  */
 void smp_ctl_bit_callback(void *info) {
-	ec_creg_mask_parms *pp;
+	struct ec_creg_mask_parms *pp = info;
 	unsigned long cregs[16];
 	int i;
 	
-	pp = (ec_creg_mask_parms *) info;
-	__ctl_store(cregs[pp->start_ctl], pp->start_ctl, pp->end_ctl);
-	for (i = pp->start_ctl; i <= pp->end_ctl; i++)
+	__ctl_store(cregs, 0, 15);
+	for (i = 0; i <= 15; i++)
 		cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i];
-	__ctl_load(cregs[pp->start_ctl], pp->start_ctl, pp->end_ctl);
+	__ctl_load(cregs, 0, 15);
 }
 
 /*
  * Set a bit in a control register of all cpus
  */
-void smp_ctl_set_bit(int cr, int bit) {
-        ec_creg_mask_parms parms;
+void smp_ctl_set_bit(int cr, int bit)
+{
+	struct ec_creg_mask_parms parms;
 
-	parms.start_ctl = cr;
-	parms.end_ctl = cr;
+	memset(&parms.orvals, 0, sizeof(parms.orvals));
+	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.orvals[cr] = 1 << bit;
-	parms.andvals[cr] = -1L;
-	preempt_disable();
-	smp_call_function(smp_ctl_bit_callback, &parms, 0, 1);
-        __ctl_set_bit(cr, bit);
-	preempt_enable();
+	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
 }
 
 /*
  * Clear a bit in a control register of all cpus
  */
-void smp_ctl_clear_bit(int cr, int bit) {
-        ec_creg_mask_parms parms;
+void smp_ctl_clear_bit(int cr, int bit)
+{
+	struct ec_creg_mask_parms parms;
 
-	parms.start_ctl = cr;
-	parms.end_ctl = cr;
-	parms.orvals[cr] = 0;
+	memset(&parms.orvals, 0, sizeof(parms.orvals));
+	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.andvals[cr] = ~(1L << bit);
-	preempt_disable();
-	smp_call_function(smp_ctl_bit_callback, &parms, 0, 1);
-        __ctl_clear_bit(cr, bit);
-	preempt_enable();
+	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
 }
 
 /*
@@ -650,9 +640,9 @@
 	sf->gprs[9] = (unsigned long) sf;
 	cpu_lowcore->save_area[15] = (unsigned long) sf;
 	__ctl_store(cpu_lowcore->cregs_save_area[0], 0, 15);
-	__asm__ __volatile__("stam  0,15,0(%0)"
-			     : : "a" (&cpu_lowcore->access_regs_save_area)
-			     : "memory");
+	asm volatile(
+		"	stam	0,15,0(%0)"
+		: : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
 	cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
         cpu_lowcore->current_task = (unsigned long) idle;
         cpu_lowcore->cpu_data.cpu_nr = cpu;
@@ -708,7 +698,7 @@
 __cpu_disable(void)
 {
 	unsigned long flags;
-	ec_creg_mask_parms cr_parms;
+	struct ec_creg_mask_parms cr_parms;
 	int cpu = smp_processor_id();
 
 	spin_lock_irqsave(&smp_reserve_lock, flags);
@@ -724,30 +714,21 @@
 		pfault_fini();
 #endif
 
-	/* disable all external interrupts */
+	memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals));
+	memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals));
 
-	cr_parms.start_ctl = 0;
-	cr_parms.end_ctl = 0;
+	/* disable all external interrupts */
 	cr_parms.orvals[0] = 0;
 	cr_parms.andvals[0] = ~(1<<15 | 1<<14 | 1<<13 | 1<<12 |
 				1<<11 | 1<<10 | 1<< 6 | 1<< 4);
-	smp_ctl_bit_callback(&cr_parms);
-
 	/* disable all I/O interrupts */
-
-	cr_parms.start_ctl = 6;
-	cr_parms.end_ctl = 6;
 	cr_parms.orvals[6] = 0;
 	cr_parms.andvals[6] = ~(1<<31 | 1<<30 | 1<<29 | 1<<28 |
 				1<<27 | 1<<26 | 1<<25 | 1<<24);
-	smp_ctl_bit_callback(&cr_parms);
-
 	/* disable most machine checks */
-
-	cr_parms.start_ctl = 14;
-	cr_parms.end_ctl = 14;
 	cr_parms.orvals[14] = 0;
 	cr_parms.andvals[14] = ~(1<<28 | 1<<27 | 1<<26 | 1<<25 | 1<<24);
+
 	smp_ctl_bit_callback(&cr_parms);
 
 	spin_unlock_irqrestore(&smp_reserve_lock, flags);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 74e6178..1981c61 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -351,10 +351,12 @@
 	int cc;
 
         /* kick the TOD clock */
-        asm volatile ("STCK 0(%1)\n\t"
-                      "IPM  %0\n\t"
-                      "SRL  %0,28" : "=r" (cc) : "a" (&init_timer_cc) 
-				   : "memory", "cc");
+	asm volatile(
+		"	stck	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (cc), "=m" (init_timer_cc)
+		: "a" (&init_timer_cc) : "cc");
         switch (cc) {
         case 0: /* clock in set state: all is fine */
                 break;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index c4982c9..3eb4fab 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -597,8 +597,7 @@
 		local_irq_enable();
 
 	if (MACHINE_HAS_IEEE)
-		__asm__ volatile ("stfpc %0\n\t" 
-				  : "=m" (current->thread.fp_regs.fpc));
+		asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
 
 #ifdef CONFIG_MATHEMU
         else if (regs->psw.mask & PSW_MASK_PSTATE) {
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 468f4ea..027c474 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -27,9 +27,7 @@
          * yield the megahertz number of the cpu. The important function
          * is udelay and that is done using the tod clock. -- martin.
          */
-        __asm__ __volatile__(
-                "0: brct %0,0b"
-                : /* no outputs */ : "r" ((loops/2) + 1));
+	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
 /*
@@ -38,13 +36,12 @@
  */
 void __udelay(unsigned long usecs)
 {
-        uint64_t start_cc, end_cc;
+	uint64_t start_cc;
 
         if (usecs == 0)
                 return;
-        asm volatile ("STCK %0" : "=m" (start_cc));
+	start_cc = get_clock();
         do {
 		cpu_relax();
-                asm volatile ("STCK %0" : "=m" (end_cc));
-        } while (((end_cc - start_cc)/4096) < usecs);
+	} while (((get_clock() - start_cc)/4096) < usecs);
 }
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
index b4957c8..6b9aec5 100644
--- a/arch/s390/math-emu/math.c
+++ b/arch/s390/math-emu/math.c
@@ -1564,52 +1564,52 @@
 }
 
 static inline void emu_load_regd(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* load reg from fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     ld    0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
-                : "1" );
+	asm volatile(		/* load reg from fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	ld	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
+		: "1");
 }
 
 static inline void emu_load_rege(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* load reg from fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     le    0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-                : "1" );
+	asm volatile(		/* load reg from fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	le	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
+		: "1");
 }
 
 static inline void emu_store_regd(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* store reg to fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     std   0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
-                : "1" );
+	asm volatile(		/* store reg to fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	std	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
+		: "1");
 }
 
 
 static inline void emu_store_rege(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* store reg to fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     ste   0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-                : "1" );
+	asm volatile(		/* store reg to fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	ste	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
+		: "1");
 }
 
 int math_emu_b3(__u8 *opcode, struct pt_regs * regs) {
@@ -2089,23 +2089,22 @@
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
                 /* we got an exception therfore ry can't be in {0,2,4,6} */
-                __asm__ __volatile (       /* load rx from fp_regs.fprs[ry] */
-                        "     bras  1,0f\n"
-                        "     ld    0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" (opc & 0xf0),
-                          "a" (&fp_regs->fprs[opc & 0xf].d)
-                        : "1" );
+		asm volatile(		/* load rx from fp_regs.fprs[ry] */
+			"	bras	1,0f\n"
+			"	ld	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].d)
+			: "1");
         } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-                __asm__ __volatile (       /* store ry to fp_regs.fprs[rx] */
-                        "     bras  1,0f\n"
-                        "     std   0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" ((opc & 0xf) << 4),
-                          "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
-                        : "1" );
+		asm volatile (		/* store ry to fp_regs.fprs[rx] */
+			"	bras	1,0f\n"
+			"	std	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" ((opc & 0xf) << 4),
+			  "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
+			: "1");
         } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
                 fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
 	return 0;
@@ -2120,23 +2119,22 @@
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
                 /* we got an exception therfore ry can't be in {0,2,4,6} */
-                __asm__ __volatile (       /* load rx from fp_regs.fprs[ry] */
-                        "     bras  1,0f\n"
-                        "     le    0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" (opc & 0xf0),
-                          "a" (&fp_regs->fprs[opc & 0xf].f)
-                        : "1" );
+		asm volatile(		/* load rx from fp_regs.fprs[ry] */
+			"	bras	1,0f\n"
+			"	le	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].f)
+			: "1");
         } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-                __asm__ __volatile (       /* store ry to fp_regs.fprs[rx] */
-                        "     bras  1,0f\n"
-                        "     ste   0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" ((opc & 0xf) << 4),
-                          "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
-                        : "1" );
+		asm volatile(		/* store ry to fp_regs.fprs[rx] */
+			"	bras	1,0f\n"
+			"	ste	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" ((opc & 0xf) << 4),
+			  "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
+			: "1");
         } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
                 fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
 	return 0;
diff --git a/arch/s390/math-emu/sfp-util.h b/arch/s390/math-emu/sfp-util.h
index ab556b6..5b6ca45 100644
--- a/arch/s390/math-emu/sfp-util.h
+++ b/arch/s390/math-emu/sfp-util.h
@@ -4,48 +4,51 @@
 #include <asm/byteorder.h>
 
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
-        unsigned int __sh = (ah);			\
-        unsigned int __sl = (al);			\
-        __asm__ ("   alr  %1,%3\n"			\
-                 "   brc  12,0f\n"			\
-                 "   ahi  %0,1\n"			\
-                 "0: alr  %0,%2"			\
-                 : "+&d" (__sh), "+d" (__sl)		\
-                 : "d" (bh), "d" (bl) : "cc" );		\
-        (sh) = __sh;					\
-        (sl) = __sl;					\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	alr	%1,%3\n"		\
+		"	brc	12,0f\n"		\
+		"	ahi	%0,1\n"			\
+		"0:	alr  %0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
 })
 
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
-       unsigned int __sh = (ah);			\
-       unsigned int __sl = (al);			\
-       __asm__ ("   slr  %1,%3\n"			\
-                "   brc  3,0f\n"			\
-                "   ahi  %0,-1\n"			\
-                "0: slr  %0,%2"				\
-                : "+&d" (__sh), "+d" (__sl)		\
-                : "d" (bh), "d" (bl) : "cc" );		\
-       (sh) = __sh;					\
-       (sl) = __sl;					\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	slr	%1,%3\n"		\
+		"	brc	3,0f\n"			\
+		"	ahi	%0,-1\n"		\
+		"0:	slr	%0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
 })
 
 /* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
 #define umul_ppmm(wh, wl, u, v) ({			\
-        unsigned int __wh = u;				\
-        unsigned int __wl = v;				\
-        __asm__ ("   ltr  1,%0\n"			\
-                 "   mr   0,%1\n"			\
-                 "   jnm  0f\n"				\
-                 "   alr  0,%1\n"			\
-                 "0: ltr  %1,%1\n"			\
-                 "   jnm  1f\n"				\
-                 "   alr  0,%0\n"			\
-                 "1: lr   %0,0\n"			\
-                 "   lr   %1,1\n"			\
-                 : "+d" (__wh), "+d" (__wl)		\
-                 : : "0", "1", "cc" );			\
-        wh = __wh;					\
-        wl = __wl;					\
+	unsigned int __wh = u;				\
+	unsigned int __wl = v;				\
+	asm volatile(					\
+		"	ltr	1,%0\n"			\
+		"	mr	0,%1\n"			\
+		"	jnm	0f\n"				\
+		"	alr	0,%1\n"			\
+		"0:	ltr	%1,%1\n"			\
+		"	jnm	1f\n"				\
+		"	alr	0,%0\n"			\
+		"1:	lr	%0,0\n"			\
+		"	lr	%1,1\n"			\
+		: "+d" (__wh), "+d" (__wl)		\
+		: : "0", "1", "cc");			\
+	wh = __wh;					\
+	wl = __wl;					\
 })
 
 #define udiv_qrnnd(q, r, n1, n0, d)			\
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 9b11e3e..226275d 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -142,17 +142,17 @@
 
 	rx = (unsigned long) parameter;
 	ry = (unsigned long) func;
-	__asm__ __volatile__(
+	asm volatile(
 #ifdef CONFIG_64BIT
-		"   sam31\n" // switch to 31 bit
-		"   diag    %0,%1,0x64\n"
-		"   sam64\n" // switch back to 64 bit
+		"	sam31\n"
+		"	diag	%0,%1,0x64\n"
+		"	sam64\n"
 #else
-		"   diag    %0,%1,0x64\n"
+		"	diag	%0,%1,0x64\n"
 #endif
-		"   ipm     %2\n"
-		"   srl     %2,28\n"
-		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc" );
+		"	ipm	%2\n"
+		"	srl	%2,28\n"
+		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
 	*ret1 = rx;
 	*ret2 = ry;
 	return rc;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a393c30..f2b9a84 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -424,20 +424,13 @@
 
 	if (pfault_disable)
 		return -1;
-        __asm__ __volatile__(
-                "    diag  %1,%0,0x258\n"
-		"0:  j     2f\n"
-		"1:  la    %0,8\n"
+	asm volatile(
+		"	diag	%1,%0,0x258\n"
+		"0:	j	2f\n"
+		"1:	la	%0,8\n"
 		"2:\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-#ifndef CONFIG_64BIT
-		"   .long  0b,1b\n"
-#else /* CONFIG_64BIT */
-		"   .quad  0b,1b\n"
-#endif /* CONFIG_64BIT */
-		".previous"
-                : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc" );
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
         __ctl_set_bit(0, 9);
         return rc;
 }
@@ -450,18 +443,11 @@
 	if (pfault_disable)
 		return;
 	__ctl_clear_bit(0,9);
-        __asm__ __volatile__(
-                "    diag  %0,0,0x258\n"
+	asm volatile(
+		"	diag	%0,0,0x258\n"
 		"0:\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-#ifndef CONFIG_64BIT
-		"   .long  0b,0b\n"
-#else /* CONFIG_64BIT */
-		"   .quad  0b,0b\n"
-#endif /* CONFIG_64BIT */
-		".previous"
-		: : "a" (&refbk), "m" (refbk) : "cc" );
+		EX_TABLE(0b,0b)
+		: : "a" (&refbk), "m" (refbk) : "cc");
 }
 
 asmlinkage void
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index cfd9b8f..127044e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -45,26 +45,17 @@
 {
         if (addr >= 0x7ff00000)
                 return;
+	asm volatile(
 #ifdef CONFIG_64BIT
-        asm volatile (
-		"   sam31\n"
-		"   diag %0,%0,0x10\n"
-		"0: sam64\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 8\n"
-		"   .quad 0b, 0b\n"
-		".previous\n"
-		: : "a" (addr));
+		"	sam31\n"
+		"	diag	%0,%0,0x10\n"
+		"0:	sam64\n"
 #else
-        asm volatile (
-		"   diag %0,%0,0x10\n"
+		"	diag	%0,%0,0x10\n"
 		"0:\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-		"   .long 0b, 0b\n"
-		".previous\n"
-		: : "a" (addr));
 #endif
+		EX_TABLE(0b,0b)
+		: : "a" (addr));
 }
 
 void show_mem(void)
@@ -156,11 +147,10 @@
 	S390_lowcore.kernel_asce = pgdir_k;
 
         /* enable virtual mapping in kernel mode */
-        __asm__ __volatile__("    LCTL  1,1,%0\n"
-                             "    LCTL  7,7,%0\n"
-                             "    LCTL  13,13,%0\n"
-                             "    SSM   %1" 
-			     : : "m" (pgdir_k), "m" (ssm_mask));
+	__ctl_load(pgdir_k, 1, 1);
+	__ctl_load(pgdir_k, 7, 7);
+	__ctl_load(pgdir_k, 13, 13);
+	__raw_local_irq_ssm(ssm_mask);
 
         local_flush_tlb();
         return;
@@ -241,11 +231,10 @@
 	S390_lowcore.kernel_asce = pgdir_k;
 
         /* enable virtual mapping in kernel mode */
-        __asm__ __volatile__("lctlg 1,1,%0\n\t"
-                             "lctlg 7,7,%0\n\t"
-                             "lctlg 13,13,%0\n\t"
-                             "ssm   %1"
-			     : :"m" (pgdir_k), "m" (ssm_mask));
+	__ctl_load(pgdir_k, 1, 1);
+	__ctl_load(pgdir_k, 7, 7);
+	__ctl_load(pgdir_k, 13, 13);
+	__raw_local_irq_ssm(ssm_mask);
 
         local_flush_tlb();