ARM: move heavy barrier support out of line
The existing memory barrier macro causes a significant amount of code
to be inserted inline at every call site. For example, in
gpio_set_irq_type(), we have this for mb():
c0344c08: f57ff04e dsb st
c0344c0c: e59f8190 ldr r8, [pc, #400] ; c0344da4 <gpio_set_irq_type+0x230>
c0344c10: e3590004 cmp r9, #4
c0344c14: e5983014 ldr r3, [r8, #20]
c0344c18: 0a000054 beq c0344d70 <gpio_set_irq_type+0x1fc>
c0344c1c: e3530000 cmp r3, #0
c0344c20: 0a000004 beq c0344c38 <gpio_set_irq_type+0xc4>
c0344c24: e50b2030 str r2, [fp, #-48] ; 0xffffffd0
c0344c28: e50bc034 str ip, [fp, #-52] ; 0xffffffcc
c0344c2c: e12fff33 blx r3
c0344c30: e51bc034 ldr ip, [fp, #-52] ; 0xffffffcc
c0344c34: e51b2030 ldr r2, [fp, #-48] ; 0xffffffd0
c0344c38: e5963004 ldr r3, [r6, #4]
Moving the outer_cache_sync() call out of line reduces the impact of
the barrier:
c0344968: f57ff04e dsb st
c034496c: e35a0004 cmp sl, #4
c0344970: e50b2030 str r2, [fp, #-48] ; 0xffffffd0
c0344974: 0a000044 beq c0344a8c <gpio_set_irq_type+0x1b8>
c0344978: ebf363dd bl c001d8f4 <arm_heavy_mb>
c034497c: e5953004 ldr r3, [r5, #4]
This should reduce the cache footprint of this code. Overall, this
results in a reduction of around 20K in the kernel size:
text data bss dec hex filename
10773970 667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old
10754219 667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new
Another advantage to this approach is that we can finally resolve the
issue of SoCs which have their own memory barrier requirements within
multiplatform kernels (such as OMAP.) Here, the bus interconnects
need additional handling to ensure that writes become visible in the
correct order (eg, between dma_map() operations, writes to DMA
coherent memory, and MMIO accesses.)
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 6c2327e..fea99b0 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -2,7 +2,6 @@
#define __ASM_BARRIER_H
#ifndef __ASSEMBLY__
-#include <asm/outercache.h>
#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
@@ -37,12 +36,19 @@
#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
#endif
+#ifdef CONFIG_ARM_HEAVY_MB
+extern void arm_heavy_mb(void);
+#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0)
+#else
+#define __arm_heavy_mb(x...) dsb(x)
+#endif
+
#ifdef CONFIG_ARCH_HAS_BARRIERS
#include <mach/barriers.h>
#elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
-#define mb() do { dsb(); outer_sync(); } while (0)
+#define mb() __arm_heavy_mb()
#define rmb() dsb()
-#define wmb() do { dsb(st); outer_sync(); } while (0)
+#define wmb() __arm_heavy_mb(st)
#define dma_rmb() dmb(osh)
#define dma_wmb() dmb(oshst)
#else
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 563b92f..c2bf24f 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -129,21 +129,4 @@
#endif
-#ifdef CONFIG_OUTER_CACHE_SYNC
-/**
- * outer_sync - perform a sync point for outer cache
- *
- * Ensure that all outer cache operations are complete and any store
- * buffers are drained.
- */
-static inline void outer_sync(void)
-{
- if (outer_cache.sync)
- outer_cache.sync();
-}
-#else
-static inline void outer_sync(void)
-{ }
-#endif
-
#endif /* __ASM_OUTERCACHE_H */
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 350f188..b96c8ed 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -39,6 +39,7 @@
#include <linux/export.h>
#include <asm/hardware/cache-l2x0.h>
+#include <asm/outercache.h>
#include <asm/exception.h>
#include <asm/mach/arch.h>
#include <asm/mach/irq.h>
diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c
index 04c9daf..7db5870 100644
--- a/arch/arm/mach-mmp/pm-pxa910.c
+++ b/arch/arm/mach-mmp/pm-pxa910.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/irq.h>
#include <asm/mach-types.h>
+#include <asm/outercache.h>
#include <mach/hardware.h>
#include <mach/cputype.h>
#include <mach/addr-map.h>
diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c
index d99d08e..83e94c9 100644
--- a/arch/arm/mach-prima2/pm.c
+++ b/arch/arm/mach-prima2/pm.c
@@ -16,6 +16,7 @@
#include <linux/of_platform.h>
#include <linux/io.h>
#include <linux/rtc/sirfsoc_rtciobrg.h>
+#include <asm/outercache.h>
#include <asm/suspend.h>
#include <asm/hardware/cache-l2x0.h>
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 7557bed..780bd13 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -8,6 +8,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
+#include <asm/outercache.h>
#include <asm/hardware/cache-l2x0.h>
#include "db8500-regs.h"
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7c6b976..df7537f 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -883,6 +883,7 @@
config OUTER_CACHE_SYNC
bool
+ select ARM_HEAVY_MB
help
The outer cache has a outer_cache_fns.sync function pointer
that can be used to drain the write buffer of the outer cache.
@@ -1031,6 +1032,9 @@
This option allows the use of custom mandatory barriers
included via the mach/barriers.h file.
+config ARM_HEAVY_MB
+ bool
+
config ARCH_SUPPORTS_BIG_ENDIAN
bool
help
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 34b66af..ce6c296 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -21,6 +21,17 @@
#include "mm.h"
+#ifdef CONFIG_ARM_HEAVY_MB
+void arm_heavy_mb(void)
+{
+#ifdef CONFIG_OUTER_CACHE_SYNC
+ if (outer_cache.sync)
+ outer_cache.sync();
+#endif
+}
+EXPORT_SYMBOL(arm_heavy_mb);
+#endif
+
#ifdef CONFIG_CPU_CACHE_VIPT
static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)