[ARM] add ARMv5TEJ aware cache flush method to compressed/head.S

The default ARMv4 method consisting of reading through some memory
area isn't compatible with the cache replacement policy of some
ARMv5TEJ compatible cache implementations.  It is also a bit wasteful
when a dedicated instruction can do the needed work optimally.

It is hard to tell if all ARMv5TEJ cores will support the used CP15
instruction, but at least all those implementations Linux currently
knows about (ARM926 and ARM1026) do support it.

Tested on an OMAP1610 H2 target.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Tested-by: George G. Davis <gdavis@mvista.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 5cac46a..2073bf0 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -641,7 +641,7 @@
 		.word	0x000f0000
 		b	__armv4_mmu_cache_on
 		b	__armv4_mmu_cache_off
-		b	__armv4_mmu_cache_flush
+		b	__armv5tej_mmu_cache_flush
 
 		.word	0x0007b000		@ ARMv6
 		.word	0x000ff000
@@ -821,6 +821,13 @@
 		mcr	p15, 0, r10, c7, c10, 4	@ drain WB
 		mov	pc, lr
 
+__armv5tej_mmu_cache_flush:
+1:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
+		bne	1b
+		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
+		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
+		mov	pc, lr
+
 __armv4_mmu_cache_flush:
 		mov	r2, #64*1024		@ default: 32K dcache size (*2)
 		mov	r11, #32		@ default: 32 byte line size