[PARISC] Ensure all ldcw uses are ldcw,co on pa2.0

ldcw,co should always be used on pa2.0, otherwise the strict cache
width alignment requirement is not relaxed.

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index d9e53cf..630730c 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -1638,7 +1638,7 @@
 	load32		PA(pa_dbit_lock),t0
 
 dbit_spin_20w:
-	ldcw            0(t0),t1
+	LDCW		0(t0),t1
 	cmpib,=         0,t1,dbit_spin_20w
 	nop
 
@@ -1674,7 +1674,7 @@
 	load32		PA(pa_dbit_lock),t0
 
 dbit_spin_11:
-	ldcw            0(t0),t1
+	LDCW		0(t0),t1
 	cmpib,=         0,t1,dbit_spin_11
 	nop
 
@@ -1714,7 +1714,7 @@
 	load32		PA(pa_dbit_lock),t0
 
 dbit_spin_20:
-	ldcw            0(t0),t1
+	LDCW		0(t0),t1
 	cmpib,=         0,t1,dbit_spin_20
 	nop
 
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 479d9a0..a028c99 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -541,7 +541,7 @@
 # endif
 /* ENABLE_LWS_DEBUG */
 
-	ldcw	0(%sr2,%r20), %r28			/* Try to acquire the lock */
+	LDCW	0(%sr2,%r20), %r28			/* Try to acquire the lock */
 	cmpb,<>,n	%r0, %r28, cas_action		/* Did we get it? */
 cas_wouldblock:
 	ldo	2(%r0), %r28				/* 2nd case */
diff --git a/include/asm-parisc/assembly.h b/include/asm-parisc/assembly.h
index 3ce3440..1a7bfe6 100644
--- a/include/asm-parisc/assembly.h
+++ b/include/asm-parisc/assembly.h
@@ -48,6 +48,7 @@
 #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
 
 #ifdef CONFIG_PA20
+#define LDCW		ldcw,co
 #define BL		b,l
 # ifdef CONFIG_64BIT
 #  define LEVEL		2.0w
@@ -55,6 +56,7 @@
 #  define LEVEL		2.0
 # endif
 #else
+#define LDCW		ldcw
 #define BL		bl
 #define LEVEL		1.1
 #endif
diff --git a/include/asm-parisc/system.h b/include/asm-parisc/system.h
index 8638761..5fe2d23 100644
--- a/include/asm-parisc/system.h
+++ b/include/asm-parisc/system.h
@@ -155,13 +155,14 @@
    type and dynamically select the 16-byte aligned int from the array
    for the semaphore.  */
 
-#define __PA_LDCW_ALIGNMENT 16
-#define __ldcw_align(a) ({ \
-  unsigned long __ret = (unsigned long) &(a)->lock[0];        		\
-  __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1); \
-  (volatile unsigned int *) __ret;                                      \
+#define __PA_LDCW_ALIGNMENT	16
+#define __ldcw_align(a) ({					\
+	unsigned long __ret = (unsigned long) &(a)->lock[0];	\
+	__ret = (__ret + __PA_LDCW_ALIGNMENT - 1)		\
+		& ~(__PA_LDCW_ALIGNMENT - 1);			\
+	(volatile unsigned int *) __ret;			\
 })
-#define LDCW	"ldcw"
+#define __LDCW	"ldcw"
 
 #else /*CONFIG_PA20*/
 /* From: "Jim Hull" <jim.hull of hp.com>
@@ -171,17 +172,18 @@
    they only require "natural" alignment (4-byte for ldcw, 8-byte for
    ldcd). */
 
-#define __PA_LDCW_ALIGNMENT 4
+#define __PA_LDCW_ALIGNMENT	4
 #define __ldcw_align(a) ((volatile unsigned int *)a)
-#define LDCW	"ldcw,co"
+#define __LDCW	"ldcw,co"
 
 #endif /*!CONFIG_PA20*/
 
 /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.  */
-#define __ldcw(a) ({ \
-	unsigned __ret; \
-	__asm__ __volatile__(LDCW " 0(%1),%0" : "=r" (__ret) : "r" (a)); \
-	__ret; \
+#define __ldcw(a) ({						\
+	unsigned __ret;						\
+	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
+		: "=r" (__ret) : "r" (a));			\
+	__ret;							\
 })
 
 #ifdef CONFIG_SMP