[PATCH] powerpc: Consolidate asm compatibility macros

This patch consolidates macros used to generate assembly for
compatibility across different CPUs or configs.  A new header,
asm-powerpc/asm-compat.h contains the main compatibility macros.  It
uses some preprocessor magic to make the macros suitable both for use
in .S files, and in inline asm in .c files.  Headers (bitops.h,
uaccess.h, atomic.h, bug.h) which had their own such compatibility
macros are changed to use asm-compat.h.

ppc_asm.h is now for use in .S files *only*, and a #error enforces
that.  As such, we're a lot more careless about namespace pollution
here than in asm-compat.h.

While we're at it, this patch adds a call to the PPC405_ERR77 macro in
futex.h which should have had it already, but didn't.

Built and booted on pSeries, Maple and iSeries (ARCH=powerpc).  Built
for 32-bit powermac (ARCH=powerpc) and Walnut (ARCH=ppc).

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4d6001f..b780b42 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -41,20 +41,20 @@
 #ifndef CONFIG_SMP
 	LOADBASE(r3, last_task_used_math)
 	toreal(r3)
-	LDL	r4,OFF(last_task_used_math)(r3)
-	CMPI	0,r4,0
+	PPC_LL	r4,OFF(last_task_used_math)(r3)
+	PPC_LCMPI	0,r4,0
 	beq	1f
 	toreal(r4)
 	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
 	SAVE_32FPRS(0, r4)
 	mffs	fr0
 	stfd	fr0,THREAD_FPSCR(r4)
-	LDL	r5,PT_REGS(r4)
+	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
-	LDL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 	li	r10,MSR_FP|MSR_FE0|MSR_FE1
 	andc	r4,r4,r10		/* disable FP for previous task */
-	STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
 #endif /* CONFIG_SMP */
 	/* enable use of FP after return */
@@ -77,7 +77,7 @@
 #ifndef CONFIG_SMP
 	subi	r4,r5,THREAD
 	fromreal(r4)
-	STL	r4,OFF(last_task_used_math)(r3)
+	PPC_STL	r4,OFF(last_task_used_math)(r3)
 #endif /* CONFIG_SMP */
 	/* restore registers and return */
 	/* we haven't used ctr or xer or lr */
@@ -97,24 +97,24 @@
 	MTMSRD(r5)			/* enable use of fpu now */
 	SYNC_601
 	isync
-	CMPI	0,r3,0
+	PPC_LCMPI	0,r3,0
 	beqlr-				/* if no previous owner, done */
 	addi	r3,r3,THREAD	        /* want THREAD of task */
-	LDL	r5,PT_REGS(r3)
-	CMPI	0,r5,0
+	PPC_LL	r5,PT_REGS(r3)
+	PPC_LCMPI	0,r5,0
 	SAVE_32FPRS(0, r3)
 	mffs	fr0
 	stfd	fr0,THREAD_FPSCR(r3)
 	beq	1f
-	LDL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 	li	r3,MSR_FP|MSR_FE0|MSR_FE1
 	andc	r4,r4,r3		/* disable FP for previous task */
-	STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
 #ifndef CONFIG_SMP
 	li	r5,0
 	LOADBASE(r4,last_task_used_math)
-	STL	r5,OFF(last_task_used_math)(r4)
+	PPC_STL	r5,OFF(last_task_used_math)(r4)
 #endif /* CONFIG_SMP */
 	blr
 
diff --git a/arch/powerpc/platforms/iseries/misc.S b/arch/powerpc/platforms/iseries/misc.S
index 09f1452..dfe7aa1 100644
--- a/arch/powerpc/platforms/iseries/misc.S
+++ b/arch/powerpc/platforms/iseries/misc.S
@@ -15,6 +15,7 @@
 
 #include <asm/processor.h>
 #include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
 
 	.text
 
diff --git a/arch/powerpc/xmon/setjmp.S b/arch/powerpc/xmon/setjmp.S
index f8e40df..96a91f1 100644
--- a/arch/powerpc/xmon/setjmp.S
+++ b/arch/powerpc/xmon/setjmp.S
@@ -14,61 +14,61 @@
 
 _GLOBAL(xmon_setjmp)
 	mflr	r0
-	STL	r0,0(r3)
-	STL	r1,SZL(r3)
-	STL	r2,2*SZL(r3)
+	PPC_STL	r0,0(r3)
+	PPC_STL	r1,SZL(r3)
+	PPC_STL	r2,2*SZL(r3)
 	mfcr	r0
-	STL	r0,3*SZL(r3)
-	STL	r13,4*SZL(r3)
-	STL	r14,5*SZL(r3)
-	STL	r15,6*SZL(r3)
-	STL	r16,7*SZL(r3)
-	STL	r17,8*SZL(r3)
-	STL	r18,9*SZL(r3)
-	STL	r19,10*SZL(r3)
-	STL	r20,11*SZL(r3)
-	STL	r21,12*SZL(r3)
-	STL	r22,13*SZL(r3)
-	STL	r23,14*SZL(r3)
-	STL	r24,15*SZL(r3)
-	STL	r25,16*SZL(r3)
-	STL	r26,17*SZL(r3)
-	STL	r27,18*SZL(r3)
-	STL	r28,19*SZL(r3)
-	STL	r29,20*SZL(r3)
-	STL	r30,21*SZL(r3)
-	STL	r31,22*SZL(r3)
+	PPC_STL	r0,3*SZL(r3)
+	PPC_STL	r13,4*SZL(r3)
+	PPC_STL	r14,5*SZL(r3)
+	PPC_STL	r15,6*SZL(r3)
+	PPC_STL	r16,7*SZL(r3)
+	PPC_STL	r17,8*SZL(r3)
+	PPC_STL	r18,9*SZL(r3)
+	PPC_STL	r19,10*SZL(r3)
+	PPC_STL	r20,11*SZL(r3)
+	PPC_STL	r21,12*SZL(r3)
+	PPC_STL	r22,13*SZL(r3)
+	PPC_STL	r23,14*SZL(r3)
+	PPC_STL	r24,15*SZL(r3)
+	PPC_STL	r25,16*SZL(r3)
+	PPC_STL	r26,17*SZL(r3)
+	PPC_STL	r27,18*SZL(r3)
+	PPC_STL	r28,19*SZL(r3)
+	PPC_STL	r29,20*SZL(r3)
+	PPC_STL	r30,21*SZL(r3)
+	PPC_STL	r31,22*SZL(r3)
 	li	r3,0
 	blr
 
 _GLOBAL(xmon_longjmp)
-	CMPI	r4,0
+	PPC_LCMPI r4,0
 	bne	1f
 	li	r4,1
-1:	LDL	r13,4*SZL(r3)
-	LDL	r14,5*SZL(r3)
-	LDL	r15,6*SZL(r3)
-	LDL	r16,7*SZL(r3)
-	LDL	r17,8*SZL(r3)
-	LDL	r18,9*SZL(r3)
-	LDL	r19,10*SZL(r3)
-	LDL	r20,11*SZL(r3)
-	LDL	r21,12*SZL(r3)
-	LDL	r22,13*SZL(r3)
-	LDL	r23,14*SZL(r3)
-	LDL	r24,15*SZL(r3)
-	LDL	r25,16*SZL(r3)
-	LDL	r26,17*SZL(r3)
-	LDL	r27,18*SZL(r3)
-	LDL	r28,19*SZL(r3)
-	LDL	r29,20*SZL(r3)
-	LDL	r30,21*SZL(r3)
-	LDL	r31,22*SZL(r3)
-	LDL	r0,3*SZL(r3)
+1:	PPC_LL	r13,4*SZL(r3)
+	PPC_LL	r14,5*SZL(r3)
+	PPC_LL	r15,6*SZL(r3)
+	PPC_LL	r16,7*SZL(r3)
+	PPC_LL	r17,8*SZL(r3)
+	PPC_LL	r18,9*SZL(r3)
+	PPC_LL	r19,10*SZL(r3)
+	PPC_LL	r20,11*SZL(r3)
+	PPC_LL	r21,12*SZL(r3)
+	PPC_LL	r22,13*SZL(r3)
+	PPC_LL	r23,14*SZL(r3)
+	PPC_LL	r24,15*SZL(r3)
+	PPC_LL	r25,16*SZL(r3)
+	PPC_LL	r26,17*SZL(r3)
+	PPC_LL	r27,18*SZL(r3)
+	PPC_LL	r28,19*SZL(r3)
+	PPC_LL	r29,20*SZL(r3)
+	PPC_LL	r30,21*SZL(r3)
+	PPC_LL	r31,22*SZL(r3)
+	PPC_LL	r0,3*SZL(r3)
 	mtcrf	0x38,r0
-	LDL	r0,0(r3)
-	LDL	r1,SZL(r3)
-	LDL	r2,2*SZL(r3)
+	PPC_LL	r0,0(r3)
+	PPC_LL	r1,SZL(r3)
+	PPC_LL	r2,2*SZL(r3)
 	mtlr	r0
 	mr	r3,r4
 	blr
@@ -84,52 +84,52 @@
  * different ABIs, though).
  */
 _GLOBAL(xmon_save_regs)
-	STL	r0,0*SZL(r3)
-	STL	r2,2*SZL(r3)
-	STL	r3,3*SZL(r3)
-	STL	r4,4*SZL(r3)
-	STL	r5,5*SZL(r3)
-	STL	r6,6*SZL(r3)
-	STL	r7,7*SZL(r3)
-	STL	r8,8*SZL(r3)
-	STL	r9,9*SZL(r3)
-	STL	r10,10*SZL(r3)
-	STL	r11,11*SZL(r3)
-	STL	r12,12*SZL(r3)
-	STL	r13,13*SZL(r3)
-	STL	r14,14*SZL(r3)
-	STL	r15,15*SZL(r3)
-	STL	r16,16*SZL(r3)
-	STL	r17,17*SZL(r3)
-	STL	r18,18*SZL(r3)
-	STL	r19,19*SZL(r3)
-	STL	r20,20*SZL(r3)
-	STL	r21,21*SZL(r3)
-	STL	r22,22*SZL(r3)
-	STL	r23,23*SZL(r3)
-	STL	r24,24*SZL(r3)
-	STL	r25,25*SZL(r3)
-	STL	r26,26*SZL(r3)
-	STL	r27,27*SZL(r3)
-	STL	r28,28*SZL(r3)
-	STL	r29,29*SZL(r3)
-	STL	r30,30*SZL(r3)
-	STL	r31,31*SZL(r3)
+	PPC_STL	r0,0*SZL(r3)
+	PPC_STL	r2,2*SZL(r3)
+	PPC_STL	r3,3*SZL(r3)
+	PPC_STL	r4,4*SZL(r3)
+	PPC_STL	r5,5*SZL(r3)
+	PPC_STL	r6,6*SZL(r3)
+	PPC_STL	r7,7*SZL(r3)
+	PPC_STL	r8,8*SZL(r3)
+	PPC_STL	r9,9*SZL(r3)
+	PPC_STL	r10,10*SZL(r3)
+	PPC_STL	r11,11*SZL(r3)
+	PPC_STL	r12,12*SZL(r3)
+	PPC_STL	r13,13*SZL(r3)
+	PPC_STL	r14,14*SZL(r3)
+	PPC_STL	r15,15*SZL(r3)
+	PPC_STL	r16,16*SZL(r3)
+	PPC_STL	r17,17*SZL(r3)
+	PPC_STL	r18,18*SZL(r3)
+	PPC_STL	r19,19*SZL(r3)
+	PPC_STL	r20,20*SZL(r3)
+	PPC_STL	r21,21*SZL(r3)
+	PPC_STL	r22,22*SZL(r3)
+	PPC_STL	r23,23*SZL(r3)
+	PPC_STL	r24,24*SZL(r3)
+	PPC_STL	r25,25*SZL(r3)
+	PPC_STL	r26,26*SZL(r3)
+	PPC_STL	r27,27*SZL(r3)
+	PPC_STL	r28,28*SZL(r3)
+	PPC_STL	r29,29*SZL(r3)
+	PPC_STL	r30,30*SZL(r3)
+	PPC_STL	r31,31*SZL(r3)
 	/* go up one stack frame for SP */
-	LDL	r4,0(r1)
-	STL	r4,1*SZL(r3)
+	PPC_LL	r4,0(r1)
+	PPC_STL	r4,1*SZL(r3)
 	/* get caller's LR */
-	LDL	r0,LRSAVE(r4)
-	STL	r0,_NIP-STACK_FRAME_OVERHEAD(r3)
-	STL	r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+	PPC_LL	r0,LRSAVE(r4)
+	PPC_STL	r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_LINK-STACK_FRAME_OVERHEAD(r3)
 	mfmsr	r0
-	STL	r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_MSR-STACK_FRAME_OVERHEAD(r3)
 	mfctr	r0
-	STL	r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_CTR-STACK_FRAME_OVERHEAD(r3)
 	mfxer	r0
-	STL	r0,_XER-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_XER-STACK_FRAME_OVERHEAD(r3)
 	mfcr	r0
-	STL	r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_CCR-STACK_FRAME_OVERHEAD(r3)
 	li	r0,0
-	STL	r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
 	blr
diff --git a/arch/ppc/boot/openfirmware/Makefile b/arch/ppc/boot/openfirmware/Makefile
index 0341523..83a6433 100644
--- a/arch/ppc/boot/openfirmware/Makefile
+++ b/arch/ppc/boot/openfirmware/Makefile
@@ -80,8 +80,7 @@
 	$(call if_changed,mknote)
 
 
-$(obj)/coffcrt0.o: EXTRA_AFLAGS := -traditional -DXCOFF
-$(obj)/crt0.o:     EXTRA_AFLAGS := -traditional
+$(obj)/coffcrt0.o: EXTRA_AFLAGS := -DXCOFF
 targets += coffcrt0.o crt0.o
 $(obj)/coffcrt0.o $(obj)/crt0.o: $(common)/crt0.S FORCE
 	$(call if_changed_dep,as_o_S)
diff --git a/include/asm-powerpc/asm-compat.h b/include/asm-powerpc/asm-compat.h
new file mode 100644
index 0000000..8b133ef
--- /dev/null
+++ b/include/asm-powerpc/asm-compat.h
@@ -0,0 +1,55 @@
+#ifndef _ASM_POWERPC_ASM_COMPAT_H
+#define _ASM_POWERPC_ASM_COMPAT_H
+
+#include <linux/config.h>
+#include <asm/types.h>
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#  define ASM_CONST(x)		x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)	x##UL
+#  define ASM_CONST(x)		__ASM_CONST(x)
+#endif
+
+#ifdef __powerpc64__
+
+/* operations for longs and pointers */
+#define PPC_LL		stringify_in_c(ld)
+#define PPC_STL		stringify_in_c(std)
+#define PPC_LCMPI	stringify_in_c(cmpdi)
+#define PPC_LONG	stringify_in_c(.llong)
+#define PPC_TLNEI	stringify_in_c(tdnei)
+#define PPC_LLARX	stringify_in_c(ldarx)
+#define PPC_STLCX	stringify_in_c(stdcx.)
+#define PPC_CNTLZL	stringify_in_c(cntlzd)
+
+#else /* 32-bit */
+
+/* operations for longs and pointers */
+#define PPC_LL		stringify_in_c(lwz)
+#define PPC_STL		stringify_in_c(stw)
+#define PPC_LCMPI	stringify_in_c(cmpwi)
+#define PPC_LONG	stringify_in_c(.long)
+#define PPC_TLNEI	stringify_in_c(twnei)
+#define PPC_LLARX	stringify_in_c(lwarx)
+#define PPC_STLCX	stringify_in_c(stwcx.)
+#define PPC_CNTLZL	stringify_in_c(cntlzw)
+
+#endif
+
+#ifdef CONFIG_IBM405_ERR77
+/* Erratum #77 on the 405 means we need a sync or dcbt before every
+ * stwcx.  The old ATOMIC_SYNC_FIX covered some but not all of this.
+ */
+#define PPC405_ERR77(ra,rb)	stringify_in_c(dcbt	ra, rb;)
+#define	PPC405_ERR77_SYNC	stringify_in_c(sync;)
+#else
+#define PPC405_ERR77(ra,rb)
+#define PPC405_ERR77_SYNC
+#endif
+
+#endif /* _ASM_POWERPC_ASM_COMPAT_H */
diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h
index ed4b345..c5b12fd 100644
--- a/include/asm-powerpc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -9,21 +9,13 @@
 
 #ifdef __KERNEL__
 #include <asm/synch.h>
+#include <asm/asm-compat.h>
 
 #define ATOMIC_INIT(i)		{ (i) }
 
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v,i)		(((v)->counter) = (i))
 
-/* Erratum #77 on the 405 means we need a sync or dcbt before every stwcx.
- * The old ATOMIC_SYNC_FIX covered some but not all of this.
- */
-#ifdef CONFIG_IBM405_ERR77
-#define PPC405_ERR77(ra,rb)	"dcbt " #ra "," #rb ";"
-#else
-#define PPC405_ERR77(ra,rb)
-#endif
-
 static __inline__ void atomic_add(int a, atomic_t *v)
 {
 	int t;
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index dc25c53..5727229 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -40,6 +40,7 @@
 
 #include <linux/compiler.h>
 #include <asm/atomic.h>
+#include <asm/asm-compat.h>
 #include <asm/synch.h>
 
 /*
@@ -52,16 +53,6 @@
 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
 #define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
 
-#ifdef CONFIG_PPC64
-#define LARXL		"ldarx"
-#define STCXL		"stdcx."
-#define CNTLZL		"cntlzd"
-#else
-#define LARXL		"lwarx"
-#define STCXL		"stwcx."
-#define CNTLZL		"cntlzw"
-#endif
-
 static __inline__ void set_bit(int nr, volatile unsigned long *addr)
 {
 	unsigned long old;
@@ -69,10 +60,10 @@
 	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
 
 	__asm__ __volatile__(
-"1:"	LARXL "	%0,0,%3	# set_bit\n"
+"1:"	PPC_LLARX "%0,0,%3	# set_bit\n"
 	"or	%0,%0,%2\n"
 	PPC405_ERR77(0,%3)
-	STCXL "	%0,0,%3\n"
+	PPC_STLCX "%0,0,%3\n"
 	"bne-	1b"
 	: "=&r"(old), "=m"(*p)
 	: "r"(mask), "r"(p), "m"(*p)
@@ -86,10 +77,10 @@
 	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
 
 	__asm__ __volatile__(
-"1:"	LARXL "	%0,0,%3	# set_bit\n"
+"1:"	PPC_LLARX "%0,0,%3	# clear_bit\n"
 	"andc	%0,%0,%2\n"
 	PPC405_ERR77(0,%3)
-	STCXL "	%0,0,%3\n"
+	PPC_STLCX "%0,0,%3\n"
 	"bne-	1b"
 	: "=&r"(old), "=m"(*p)
 	: "r"(mask), "r"(p), "m"(*p)
@@ -103,10 +94,10 @@
 	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
 
 	__asm__ __volatile__(
-"1:"	LARXL "	%0,0,%3	# set_bit\n"
+"1:"	PPC_LLARX "%0,0,%3	# change_bit\n"
 	"xor	%0,%0,%2\n"
 	PPC405_ERR77(0,%3)
-	STCXL "	%0,0,%3\n"
+	PPC_STLCX "%0,0,%3\n"
 	"bne-	1b"
 	: "=&r"(old), "=m"(*p)
 	: "r"(mask), "r"(p), "m"(*p)
@@ -122,10 +113,10 @@
 
 	__asm__ __volatile__(
 	EIEIO_ON_SMP
-"1:"	LARXL "	%0,0,%3		# test_and_set_bit\n"
+"1:"	PPC_LLARX "%0,0,%3		# test_and_set_bit\n"
 	"or	%1,%0,%2 \n"
 	PPC405_ERR77(0,%3)
-	STCXL "	%1,0,%3 \n"
+	PPC_STLCX "%1,0,%3 \n"
 	"bne-	1b"
 	ISYNC_ON_SMP
 	: "=&r" (old), "=&r" (t)
@@ -144,10 +135,10 @@
 
 	__asm__ __volatile__(
 	EIEIO_ON_SMP
-"1:"	LARXL "	%0,0,%3		# test_and_clear_bit\n"
+"1:"	PPC_LLARX "%0,0,%3		# test_and_clear_bit\n"
 	"andc	%1,%0,%2 \n"
 	PPC405_ERR77(0,%3)
-	STCXL "	%1,0,%3 \n"
+	PPC_STLCX "%1,0,%3 \n"
 	"bne-	1b"
 	ISYNC_ON_SMP
 	: "=&r" (old), "=&r" (t)
@@ -166,10 +157,10 @@
 
 	__asm__ __volatile__(
 	EIEIO_ON_SMP
-"1:"	LARXL "	%0,0,%3		# test_and_change_bit\n"
+"1:"	PPC_LLARX "%0,0,%3		# test_and_change_bit\n"
 	"xor	%1,%0,%2 \n"
 	PPC405_ERR77(0,%3)
-	STCXL "	%1,0,%3 \n"
+	PPC_STLCX "%1,0,%3 \n"
 	"bne-	1b"
 	ISYNC_ON_SMP
 	: "=&r" (old), "=&r" (t)
@@ -184,9 +175,9 @@
         unsigned long old;
 
 	__asm__ __volatile__(
-"1:"	LARXL "	%0,0,%3         # set_bit\n"
+"1:"	PPC_LLARX "%0,0,%3         # set_bits\n"
 	"or	%0,%0,%2\n"
-	STCXL "	%0,0,%3\n"
+	PPC_STLCX "%0,0,%3\n"
 	"bne-	1b"
 	: "=&r" (old), "=m" (*addr)
 	: "r" (mask), "r" (addr), "m" (*addr)
@@ -268,7 +259,7 @@
 {
 	int lz;
 
-	asm (CNTLZL " %0,%1" : "=r" (lz) : "r" (x));
+	asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
 	return BITS_PER_LONG - 1 - lz;
 }
 
diff --git a/include/asm-powerpc/bug.h b/include/asm-powerpc/bug.h
index d625ee5..b001ecb 100644
--- a/include/asm-powerpc/bug.h
+++ b/include/asm-powerpc/bug.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_BUG_H
 #define _ASM_POWERPC_BUG_H
 
+#include <asm/asm-compat.h>
 /*
  * Define an illegal instr to trap on the bug.
  * We don't use 0 because that marks the end of a function
@@ -11,14 +12,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef __powerpc64__
-#define BUG_TABLE_ENTRY		".llong"
-#define BUG_TRAP_OP		"tdnei"
-#else 
-#define BUG_TABLE_ENTRY		".long"
-#define BUG_TRAP_OP		"twnei"
-#endif /* __powerpc64__ */
-
 struct bug_entry {
 	unsigned long	bug_addr;
 	long		line;
@@ -40,16 +33,16 @@
 	__asm__ __volatile__(						 \
 		"1:	twi 31,0,0\n"					 \
 		".section __bug_table,\"a\"\n"				 \
-		"\t"BUG_TABLE_ENTRY"	1b,%0,%1,%2\n"			 \
+		"\t"PPC_LONG"	1b,%0,%1,%2\n"			 \
 		".previous"						 \
 		: : "i" (__LINE__), "i" (__FILE__), "i" (__FUNCTION__)); \
 } while (0)
 
 #define BUG_ON(x) do {						\
 	__asm__ __volatile__(					\
-		"1:	"BUG_TRAP_OP"	%0,0\n"			\
+		"1:	"PPC_TLNEI"	%0,0\n"			\
 		".section __bug_table,\"a\"\n"			\
-		"\t"BUG_TABLE_ENTRY"	1b,%1,%2,%3\n"		\
+		"\t"PPC_LONG"	1b,%1,%2,%3\n"		\
 		".previous"					\
 		: : "r" ((long)(x)), "i" (__LINE__),		\
 		    "i" (__FILE__), "i" (__FUNCTION__));	\
@@ -57,9 +50,9 @@
 
 #define WARN_ON(x) do {						\
 	__asm__ __volatile__(					\
-		"1:	"BUG_TRAP_OP"	%0,0\n"			\
+		"1:	"PPC_TLNEI"	%0,0\n"			\
 		".section __bug_table,\"a\"\n"			\
-		"\t"BUG_TABLE_ENTRY"	1b,%1,%2,%3\n"		\
+		"\t"PPC_LONG"	1b,%1,%2,%3\n"		\
 		".previous"					\
 		: : "r" ((long)(x)),				\
 		    "i" (__LINE__ + BUG_WARNING_TRAP),		\
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 79a0556..f89fd88 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -2,7 +2,7 @@
 #define __ASM_POWERPC_CPUTABLE_H
 
 #include <linux/config.h>
-#include <asm/ppc_asm.h> /* for ASM_CONST */
+#include <asm/asm-compat.h>
 
 #define PPC_FEATURE_32			0x80000000
 #define PPC_FEATURE_64			0x40000000
diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h
index 37c94e5..f0319d5 100644
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -7,13 +7,14 @@
 #include <asm/errno.h>
 #include <asm/synch.h>
 #include <asm/uaccess.h>
-#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
   __asm__ __volatile ( \
 	SYNC_ON_SMP \
 "1:	lwarx	%0,0,%2\n" \
 	insn \
+	PPC405_ERR77(0, %2) \
 "2:	stwcx.	%1,0,%2\n" \
 	"bne-	1b\n" \
 	"li	%1,0\n" \
@@ -23,7 +24,7 @@
 	".previous\n" \
 	".section __ex_table,\"a\"\n" \
 	".align 3\n" \
-	DATAL " 1b,4b,2b,4b\n" \
+	PPC_LONG "1b,4b,2b,4b\n" \
 	".previous" \
 	: "=&r" (oldval), "=&r" (ret) \
 	: "b" (uaddr), "i" (-EFAULT), "1" (oparg) \
diff --git a/include/asm-powerpc/ppc_asm.h b/include/asm-powerpc/ppc_asm.h
index c534ca4..c27baa0 100644
--- a/include/asm-powerpc/ppc_asm.h
+++ b/include/asm-powerpc/ppc_asm.h
@@ -6,8 +6,13 @@
 
 #include <linux/stringify.h>
 #include <linux/config.h>
+#include <asm/asm-compat.h>
 
-#ifdef __ASSEMBLY__
+#ifndef __ASSEMBLY__
+#error __FILE__ should only be used in assembler files
+#else
+
+#define SZL			(BITS_PER_LONG/8)
 
 /*
  * Macros for storing registers into and loading registers from
@@ -184,12 +189,6 @@
 	oris    reg,reg,(label)@h;                      \
 	ori     reg,reg,(label)@l;
 
-/* operations for longs and pointers */
-#define LDL	ld
-#define STL	std
-#define CMPI	cmpdi
-#define SZL	8
-
 /* offsets for stack frame layout */
 #define LRSAVE	16
 
@@ -203,12 +202,6 @@
 
 #define OFF(name)	name@l
 
-/* operations for longs and pointers */
-#define LDL	lwz
-#define STL	stw
-#define CMPI	cmpwi
-#define SZL	4
-
 /* offsets for stack frame layout */
 #define LRSAVE	4
 
@@ -266,15 +259,6 @@
 #endif
 
 
-#ifdef CONFIG_IBM405_ERR77
-#define PPC405_ERR77(ra,rb)	dcbt	ra, rb;
-#define	PPC405_ERR77_SYNC	sync;
-#else
-#define PPC405_ERR77(ra,rb)
-#define PPC405_ERR77_SYNC
-#endif
-
-
 #ifdef CONFIG_IBM440EP_ERR42
 #define PPC440EP_ERR42 isync
 #else
@@ -502,17 +486,6 @@
 #define N_SLINE	68
 #define N_SO	100
 
-#define ASM_CONST(x) x
-#else
-  #define __ASM_CONST(x) x##UL
-  #define ASM_CONST(x) __ASM_CONST(x)
-
-#ifdef CONFIG_PPC64
-#define DATAL	".llong"
-#else
-#define DATAL	".long"
-#endif
-
 #endif /*  __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PPC_ASM_H */
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 3536a5c..f0cce5a 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -8,7 +8,6 @@
 #include <linux/kernel.h>
 
 #include <asm/hw_irq.h>
-#include <asm/ppc_asm.h>
 #include <asm/atomic.h>
 
 /*
diff --git a/include/asm-powerpc/uaccess.h b/include/asm-powerpc/uaccess.h
index 33af730..3872e92 100644
--- a/include/asm-powerpc/uaccess.h
+++ b/include/asm-powerpc/uaccess.h
@@ -120,14 +120,6 @@
 
 extern long __put_user_bad(void);
 
-#ifdef __powerpc64__
-#define __EX_TABLE_ALIGN	"3"
-#define __EX_TABLE_TYPE		"llong"
-#else
-#define __EX_TABLE_ALIGN	"2"
-#define __EX_TABLE_TYPE		"long"
-#endif
-
 /*
  * We don't tell gcc that we are accessing memory, but this is OK
  * because we do not write to any memory gcc knows about, so there
@@ -142,11 +134,12 @@
 		"	b 2b\n"					\
 		".previous\n"					\
 		".section __ex_table,\"a\"\n"			\
-		"	.align " __EX_TABLE_ALIGN "\n"		\
-		"	."__EX_TABLE_TYPE" 1b,3b\n"		\
+		"	.balign %5\n"				\
+			PPC_LONG "1b,3b\n"			\
 		".previous"					\
 		: "=r" (err)					\
-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err),\
+		  "i"(sizeof(unsigned long)))
 
 #ifdef __powerpc64__
 #define __put_user_asm2(x, ptr, retval)				\
@@ -162,12 +155,13 @@
 		"	b 3b\n"					\
 		".previous\n"					\
 		".section __ex_table,\"a\"\n"			\
-		"	.align " __EX_TABLE_ALIGN "\n"		\
-		"	." __EX_TABLE_TYPE " 1b,4b\n"		\
-		"	." __EX_TABLE_TYPE " 2b,4b\n"		\
+		"	.balign %5\n"				\
+			PPC_LONG "1b,4b\n"			\
+			PPC_LONG "2b,4b\n"			\
 		".previous"					\
 		: "=r" (err)					\
-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err),\
+		  "i"(sizeof(unsigned long)))
 #endif /* __powerpc64__ */
 
 #define __put_user_size(x, ptr, size, retval)			\
@@ -213,11 +207,12 @@
 		"	b 2b\n"				\
 		".previous\n"				\
 		".section __ex_table,\"a\"\n"		\
-		"	.align "__EX_TABLE_ALIGN "\n"	\
-		"	." __EX_TABLE_TYPE " 1b,3b\n"	\
+		"	.balign %5\n"			\
+			PPC_LONG "1b,3b\n"		\
 		".previous"				\
 		: "=r" (err), "=r" (x)			\
-		: "b" (addr), "i" (-EFAULT), "0" (err))
+		: "b" (addr), "i" (-EFAULT), "0" (err),	\
+		  "i"(sizeof(unsigned long)))
 
 #ifdef __powerpc64__
 #define __get_user_asm2(x, addr, err)			\
@@ -235,12 +230,13 @@
 		"	b 3b\n"				\
 		".previous\n"				\
 		".section __ex_table,\"a\"\n"		\
-		"	.align " __EX_TABLE_ALIGN "\n"	\
-		"	." __EX_TABLE_TYPE " 1b,4b\n"	\
-		"	." __EX_TABLE_TYPE " 2b,4b\n"	\
+		"	.balign %5\n"			\
+			PPC_LONG "1b,4b\n"		\
+			PPC_LONG "2b,4b\n"		\
 		".previous"				\
 		: "=r" (err), "=&r" (x)			\
-		: "b" (addr), "i" (-EFAULT), "0" (err))
+		: "b" (addr), "i" (-EFAULT), "0" (err),	\
+		  "i"(sizeof(unsigned long)))
 #endif /* __powerpc64__ */
 
 #define __get_user_size(x, ptr, size, retval)			\
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index 4c18a5c..d50997b 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -14,7 +14,7 @@
 #define _PPC64_MMU_H_
 
 #include <linux/config.h>
-#include <asm/ppc_asm.h> /* for ASM_CONST */
+#include <asm/asm-compat.h>
 #include <asm/page.h>
 
 /*
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index 82ce187..e32f118 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -11,7 +11,7 @@
  */
 
 #include <linux/config.h>
-#include <asm/ppc_asm.h> /* for ASM_CONST */
+#include <asm/asm-compat.h>
 
 /*
  * We support either 4k or 64k software page size. When using 64k pages