Enable a suitable ISA for the assembler around ll/sc so that code
builds even for processors that don't support the instructions.
Plus minor formatting fixes.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/kernel/semaphore.c b/arch/mips/kernel/semaphore.c
index 9c40fe5..dbb145e 100644
--- a/arch/mips/kernel/semaphore.c
+++ b/arch/mips/kernel/semaphore.c
@@ -42,24 +42,28 @@
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
-		"1:	ll	%0, %2					\n"
+		"	.set	mips2					\n"
+		"1:	ll	%0, %2		# __sem_update_count	\n"
 		"	sra	%1, %0, 31				\n"
 		"	not	%1					\n"
 		"	and	%1, %0, %1				\n"
-		"	add	%1, %1, %3				\n"
+		"	addu	%1, %1, %3				\n"
 		"	sc	%1, %2					\n"
 		"	beqzl	%1, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
 		: "r" (incr), "m" (sem->count));
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
-		"1:	ll	%0, %2					\n"
+		"	.set	mips2					\n"
+		"1:	ll	%0, %2		# __sem_update_count	\n"
 		"	sra	%1, %0, 31				\n"
 		"	not	%1					\n"
 		"	and	%1, %0, %1				\n"
-		"	add	%1, %1, %3				\n"
+		"	addu	%1, %1, %3				\n"
 		"	sc	%1, %2					\n"
 		"	beqz	%1, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
 		: "r" (incr), "m" (sem->count));
 	} else {
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index c0bd8d0..80ea3fb 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -62,20 +62,24 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%0, %1		# atomic_add		\n"
 		"	addu	%0, %2					\n"
 		"	sc	%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else if (cpu_has_llsc) {
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%0, %1		# atomic_add		\n"
 		"	addu	%0, %2					\n"
 		"	sc	%0, %1					\n"
 		"	beqz	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else {
@@ -100,20 +104,24 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%0, %1		# atomic_sub		\n"
 		"	subu	%0, %2					\n"
 		"	sc	%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else if (cpu_has_llsc) {
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%0, %1		# atomic_sub		\n"
 		"	subu	%0, %2					\n"
 		"	sc	%0, %1					\n"
 		"	beqz	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else {
@@ -136,12 +144,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%1, %2		# atomic_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -149,12 +159,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%1, %2		# atomic_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -179,12 +191,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%1, %2		# atomic_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -192,12 +206,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%1, %2		# atomic_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -229,6 +245,7 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
 		"	subu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -236,6 +253,7 @@
 		"	beqzl	%0, 1b					\n"
 		"	sync						\n"
 		"1:							\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -243,6 +261,7 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
 		"	subu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -250,6 +269,7 @@
 		"	beqz	%0, 1b					\n"
 		"	sync						\n"
 		"1:							\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -367,20 +387,24 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%0, %1		# atomic64_add		\n"
 		"	addu	%0, %2					\n"
 		"	scd	%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else if (cpu_has_llsc) {
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%0, %1		# atomic64_add		\n"
 		"	addu	%0, %2					\n"
 		"	scd	%0, %1					\n"
 		"	beqz	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else {
@@ -405,20 +429,24 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%0, %1		# atomic64_sub		\n"
 		"	subu	%0, %2					\n"
 		"	scd	%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else if (cpu_has_llsc) {
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%0, %1		# atomic64_sub		\n"
 		"	subu	%0, %2					\n"
 		"	scd	%0, %1					\n"
 		"	beqz	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
 	} else {
@@ -441,12 +469,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%1, %2		# atomic64_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -454,12 +484,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%1, %2		# atomic64_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -484,12 +516,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%1, %2		# atomic64_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -497,12 +531,14 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%1, %2		# atomic64_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -534,6 +570,7 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%1, %2		# atomic64_sub_if_positive\n"
 		"	dsubu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -541,6 +578,7 @@
 		"	beqzl	%0, 1b					\n"
 		"	sync						\n"
 		"1:							\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
@@ -548,6 +586,7 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%1, %2		# atomic64_sub_if_positive\n"
 		"	dsubu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -555,6 +594,7 @@
 		"	beqz	%0, 1b					\n"
 		"	sync						\n"
 		"1:							\n"
+		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
 		: "memory");
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index eb8d79d..1dc3587 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -18,14 +18,16 @@
 #if (_MIPS_SZLONG == 32)
 #define SZLONG_LOG 5
 #define SZLONG_MASK 31UL
-#define __LL	"ll	"
-#define __SC	"sc	"
+#define __LL		"ll	"
+#define __SC		"sc	"
+#define __SET_MIPS	".set	mips2	"
 #define cpu_to_lelongp(x) cpu_to_le32p((__u32 *) (x))
 #elif (_MIPS_SZLONG == 64)
 #define SZLONG_LOG 6
 #define SZLONG_MASK 63UL
-#define __LL	"lld	"
-#define __SC	"scd	"
+#define __LL		"lld	"
+#define __SC		"scd	"
+#define __SET_MIPS	".set	mips3	"
 #define cpu_to_lelongp(x) cpu_to_le64p((__u64 *) (x))
 #endif
 
@@ -72,18 +74,22 @@
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
+		"	" __SET_MIPS "					\n"
 		"1:	" __LL "%0, %1			# set_bit	\n"
 		"	or	%0, %2					\n"
-		"	"__SC	"%0, %1					\n"
+		"	" __SC	"%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
+		"	" __SET_MIPS "					\n"
 		"1:	" __LL "%0, %1			# set_bit	\n"
 		"	or	%0, %2					\n"
-		"	"__SC	"%0, %1					\n"
+		"	" __SC	"%0, %1					\n"
 		"	beqz	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
 	} else {
@@ -132,18 +138,22 @@
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
+		"	" __SET_MIPS "					\n"
 		"1:	" __LL "%0, %1			# clear_bit	\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (~(1UL << (nr & SZLONG_MASK))), "m" (*m));
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
+		"	" __SET_MIPS "					\n"
 		"1:	" __LL "%0, %1			# clear_bit	\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
 		"	beqz	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (~(1UL << (nr & SZLONG_MASK))), "m" (*m));
 	} else {
@@ -191,10 +201,12 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	" __SET_MIPS "				\n"
 		"1:	" __LL "%0, %1		# change_bit	\n"
 		"	xor	%0, %2				\n"
-		"	"__SC	"%0, %1				\n"
+		"	" __SC	"%0, %1				\n"
 		"	beqzl	%0, 1b				\n"
+		"	.set	mips0				\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
 	} else if (cpu_has_llsc) {
@@ -202,10 +214,12 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
+		"	" __SET_MIPS "				\n"
 		"1:	" __LL "%0, %1		# change_bit	\n"
 		"	xor	%0, %2				\n"
-		"	"__SC	"%0, %1				\n"
+		"	" __SC	"%0, %1				\n"
 		"	beqz	%0, 1b				\n"
+		"	.set	mips0				\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
 	} else {
@@ -253,14 +267,16 @@
 		unsigned long temp, res;
 
 		__asm__ __volatile__(
+		"	" __SET_MIPS "					\n"
 		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
-		"sync							\n"
+		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
 		: "memory");
@@ -271,16 +287,18 @@
 		unsigned long temp, res;
 
 		__asm__ __volatile__(
-		"	.set	noreorder	# test_and_set_bit	\n"
-		"1:	" __LL "%0, %1					\n"
+		"	.set	push					\n"
+		"	.set	noreorder				\n"
+		"	" __SET_MIPS "					\n"
+		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
 		"	beqz	%2, 1b					\n"
 		"	 and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
-		"sync							\n"
+		"	sync						\n"
 #endif
-		".set\treorder"
+		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
 		: "memory");
@@ -343,15 +361,17 @@
 		unsigned long temp, res;
 
 		__asm__ __volatile__(
+		"	" __SET_MIPS "					\n"
 		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
-			__SC 	"%2, %1					\n"
+		"	" __SC 	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
 		: "memory");
@@ -362,17 +382,19 @@
 		unsigned long temp, res;
 
 		__asm__ __volatile__(
-		"	.set	noreorder	# test_and_clear_bit	\n"
-		"1:	" __LL	"%0, %1					\n"
+		"	.set	push					\n"
+		"	.set	noreorder				\n"
+		"	" __SET_MIPS "					\n"
+		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
-			__SC 	"%2, %1					\n"
+		"	" __SC 	"%2, %1					\n"
 		"	beqz	%2, 1b					\n"
 		"	 and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
-		"	.set	reorder					\n"
+		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
 		: "memory");
@@ -435,14 +457,16 @@
 		unsigned long temp, res;
 
 		__asm__ __volatile__(
-		"1:	" __LL	" %0, %1	# test_and_change_bit	\n"
+		"	" __SET_MIPS "					\n"
+		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
 		"	xor	%2, %0, %3				\n"
-		"	"__SC	"%2, %1					\n"
+		"	" __SC	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
 		: "memory");
@@ -453,16 +477,18 @@
 		unsigned long temp, res;
 
 		__asm__ __volatile__(
-		"	.set	noreorder	# test_and_change_bit	\n"
-		"1:	" __LL	" %0, %1				\n"
+		"	.set	push					\n"
+		"	.set	noreorder				\n"
+		"	" __SET_MIPS "					\n"
+		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
 		"	xor	%2, %0, %3				\n"
-		"	"__SC	"\t%2, %1				\n"
+		"	" __SC	"\t%2, %1				\n"
 		"	beqz	%2, 1b					\n"
 		"	 and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
-		"	.set	reorder					\n"
+		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
 		: "memory");
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index cd3a6bc..ec29c93 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -176,6 +176,7 @@
 		unsigned long dummy;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%0, %3			# xchg_u32	\n"
 		"	move	%2, %z4					\n"
 		"	sc	%2, %1					\n"
@@ -184,6 +185,7 @@
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
 		: "memory");
@@ -191,6 +193,7 @@
 		unsigned long dummy;
 
 		__asm__ __volatile__(
+		"	.set	mips2					\n"
 		"1:	ll	%0, %3			# xchg_u32	\n"
 		"	move	%2, %z4					\n"
 		"	sc	%2, %1					\n"
@@ -198,6 +201,7 @@
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
 		: "memory");
@@ -222,6 +226,7 @@
 		unsigned long dummy;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%0, %3			# xchg_u64	\n"
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
@@ -230,6 +235,7 @@
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
 		: "memory");
@@ -237,6 +243,7 @@
 		unsigned long dummy;
 
 		__asm__ __volatile__(
+		"	.set	mips3					\n"
 		"1:	lld	%0, %3			# xchg_u64	\n"
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
@@ -244,6 +251,7 @@
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
 		: "memory");
@@ -291,7 +299,9 @@
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
+		"	.set	push					\n"
 		"	.set	noat					\n"
+		"	.set	mips2					\n"
 		"1:	ll	%0, %2			# __cmpxchg_u32	\n"
 		"	bne	%0, %z3, 2f				\n"
 		"	move	$1, %z4					\n"
@@ -302,13 +312,15 @@
 		"	sync						\n"
 #endif
 		"2:							\n"
-		"	.set	at					\n"
+		"	.set	pop					\n"
 		: "=&r" (retval), "=m" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
 		: "memory");
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
+		"	.set	push					\n"
 		"	.set	noat					\n"
+		"	.set	mips2					\n"
 		"1:	ll	%0, %2			# __cmpxchg_u32	\n"
 		"	bne	%0, %z3, 2f				\n"
 		"	move	$1, %z4					\n"
@@ -318,7 +330,7 @@
 		"	sync						\n"
 #endif
 		"2:							\n"
-		"	.set	at					\n"
+		"	.set	pop					\n"
 		: "=&r" (retval), "=m" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
 		: "memory");
@@ -343,7 +355,9 @@
 
 	if (cpu_has_llsc) {
 		__asm__ __volatile__(
+		"	.set	push					\n"
 		"	.set	noat					\n"
+		"	.set	mips3					\n"
 		"1:	lld	%0, %2			# __cmpxchg_u64	\n"
 		"	bne	%0, %z3, 2f				\n"
 		"	move	$1, %z4					\n"
@@ -354,13 +368,15 @@
 		"	sync						\n"
 #endif
 		"2:							\n"
-		"	.set	at					\n"
+		"	.set	pop					\n"
 		: "=&r" (retval), "=m" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
 		: "memory");
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
+		"	.set	push					\n"
 		"	.set	noat					\n"
+		"	.set	mips2					\n"
 		"1:	lld	%0, %2			# __cmpxchg_u64	\n"
 		"	bne	%0, %z3, 2f				\n"
 		"	move	$1, %z4					\n"
@@ -370,7 +386,7 @@
 		"	sync						\n"
 #endif
 		"2:							\n"
-		"	.set	at					\n"
+		"	.set	pop					\n"
 		: "=&r" (retval), "=m" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
 		: "memory");