ARM: Add barriers to io{read,write}{8,16,32} accessors as well

The ioread/iowrite accessors also need barriers as they're used in
place of readl/writel et.al. in portable drivers.  Create __iormb()
and __iowmb() which are conditionally defined to be barriers dependent
on ARM_DMA_MEM_BUFFERABLE, and always use these macros in the accessors.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 3c91e7c..1261b1f9 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -193,23 +193,21 @@
 					cpu_to_le32(v),__mem_pci(c)))
 
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
-#define readb(c)		({ u8  __v = readb_relaxed(c); rmb(); __v; })
-#define readw(c)		({ u16 __v = readw_relaxed(c); rmb(); __v; })
-#define readl(c)		({ u32 __v = readl_relaxed(c); rmb(); __v; })
-
-#define writeb(v,c)		({ wmb(); writeb_relaxed(v,c); })
-#define writew(v,c)		({ wmb(); writew_relaxed(v,c); })
-#define writel(v,c)		({ wmb(); writel_relaxed(v,c); })
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
 #else
-#define readb(c)		readb_relaxed(c)
-#define readw(c)		readw_relaxed(c)
-#define readl(c)		readl_relaxed(c)
-
-#define writeb(v,c)		writeb_relaxed(v,c)
-#define writew(v,c)		writew_relaxed(v,c)
-#define writel(v,c)		writel_relaxed(v,c)
+#define __iormb()		do { } while (0)
+#define __iowmb()		do { } while (0)
 #endif
 
+#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+
+#define writeb(v,c)		({ __iowmb(); writeb_relaxed(v,c); })
+#define writew(v,c)		({ __iowmb(); writew_relaxed(v,c); })
+#define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
+
 #define readsb(p,d,l)		__raw_readsb(__mem_pci(p),d,l)
 #define readsw(p,d,l)		__raw_readsw(__mem_pci(p),d,l)
 #define readsl(p,d,l)		__raw_readsl(__mem_pci(p),d,l)
@@ -260,13 +258,13 @@
  * io{read,write}{8,16,32} macros
  */
 #ifndef ioread8
-#define ioread8(p)	({ unsigned int __v = __raw_readb(p); __v; })
-#define ioread16(p)	({ unsigned int __v = le16_to_cpu((__force __le16)__raw_readw(p)); __v; })
-#define ioread32(p)	({ unsigned int __v = le32_to_cpu((__force __le32)__raw_readl(p)); __v; })
+#define ioread8(p)	({ unsigned int __v = __raw_readb(p); __iormb(); __v; })
+#define ioread16(p)	({ unsigned int __v = le16_to_cpu((__force __le16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32(p)	({ unsigned int __v = le32_to_cpu((__force __le32)__raw_readl(p)); __iormb(); __v; })
 
-#define iowrite8(v,p)	__raw_writeb(v, p)
-#define iowrite16(v,p)	__raw_writew((__force __u16)cpu_to_le16(v), p)
-#define iowrite32(v,p)	__raw_writel((__force __u32)cpu_to_le32(v), p)
+#define iowrite8(v,p)	({ __iowmb(); (void)__raw_writeb(v, p); })
+#define iowrite16(v,p)	({ __iowmb(); (void)__raw_writew((__force __u16)cpu_to_le16(v), p); })
+#define iowrite32(v,p)	({ __iowmb(); (void)__raw_writel((__force __u32)cpu_to_le32(v), p); })
 
 #define ioread8_rep(p,d,c)	__raw_readsb(p,d,c)
 #define ioread16_rep(p,d,c)	__raw_readsw(p,d,c)