[ARM] 4582/2: Add support for the common VFP subarchitecture

This patch allows the VFP support code to run correctly on CPUs
compatible with the common VFP subarchitecture specification (Appendix
B in the ARM ARM v7-A and v7-R edition). It implements support for VFP
subarchitecture 2 while being backwards compatible with
subarchitecture 1.

On VFP subarchitecture 1, the arithmetic exceptions are asynchronous
(or imprecise as described in the old ARM ARM) unless the FPSCR.IXE
bit is 1. The exceptional instructions can be read from FPINST and
FPINST2 registers. With VFP subarchitecture 2, the arithmetic
exceptions can also be synchronous and marked by the FPEXC.DEX bit
(the FPEXC.EX bit is cleared). CPUs implementing the synchronous
arithmetic exceptions don't have the FPINST and FPINST2 registers and
accessing them would trigger and undefined exception.

Note that FPEXC.EX bit has an additional meaning on subarchitecture 1
- if it isn't set, there is no additional information in FPINST and
FPINST2 that needs to be saved at context switch or when lazy-loading
the VFP state of a different thread.

The patch also removes the clearing of the cumulative exception flags in
FPSCR when additional exceptions were raised. It is up to the user
application to clear these bits.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 0ac022f..53d9f8e 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -100,10 +100,10 @@
 	cmp	r4, #0
 	beq	no_old_VFP_process
 	VFPFMRX	r5, FPSCR		@ current status
-	VFPFMRX	r6, FPINST		@ FPINST (always there, rev0 onwards)
-	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to read?
-	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed - avoids reading
-					@ nonexistant reg on rev0
+	tst	r1, #FPEXC_EX		@ is there additional state to save?
+	VFPFMRX	r6, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
+	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed (and present)
 	VFPFSTMIA r4 			@ save the working registers
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
@@ -117,10 +117,10 @@
 	VFPFLDMIA r10	 		@ reload the working registers while
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
-	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to write?
-	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed - avoids writing
-					@ nonexistant reg on rev0
-	VFPFMXR	FPINST, r6
+	tst	r1, #FPEXC_EX		@ is there additional state to restore?
+	VFPFMXR	FPINST, r6, NE		@ restore FPINST (only if FPEXC.EX is set)
+	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed (and present)
 	VFPFMXR	FPSCR, r5		@ restore status
 
 check_for_exception:
@@ -136,10 +136,14 @@
 
 
 look_for_VFP_exceptions:
-	tst	r1, #FPEXC_EX
+	@ Check for synchronous or asynchronous exception
+	tst	r1, #FPEXC_EX | FPEXC_DEX
 	bne	process_exception
+	@ On some implementations of the VFP subarch 1, setting FPSCR.IXE
+	@ causes all the CDP instructions to be bounced synchronously without
+	@ setting the FPEXC.EX bit
 	VFPFMRX	r5, FPSCR
-	tst	r5, #FPSCR_IXE		@ IXE doesn't set FPEXC_EX !
+	tst	r5, #FPSCR_IXE
 	bne	process_exception
 
 	@ Fall into hand on to next handler - appropriate coproc instr
@@ -150,10 +154,6 @@
 
 process_exception:
 	DBGSTR	"bounce"
-	sub	r2, r2, #4
-	str	r2, [sp, #S_PC]		@ retry the instruction on exit from
-					@ the imprecise exception handling in
-					@ the support code
 	mov	r2, sp			@ nothing stacked - regdump is at TOS
 	mov	lr, r9			@ setup for a return to the user code.
 
@@ -161,7 +161,7 @@
 	@   r0 holds the trigger instruction
 	@   r1 holds the FPEXC value
 	@   r2 pointer to register dump
-	b	VFP9_bounce		@ we have handled this - the support
+	b	VFP_bounce		@ we have handled this - the support
 					@ code will raise an exception if
 					@ required. If not, the user code will
 					@ retry the faulted instruction
@@ -175,10 +175,10 @@
 	@ r1 - FPEXC
 	DBGSTR1	"save VFP state %p", r0
 	VFPFMRX	r2, FPSCR		@ current status
-	VFPFMRX	r3, FPINST		@ FPINST (always there, rev0 onwards)
-	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to read?
-	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed - avoids reading
-					@ nonexistant reg on rev0
+	tst	r1, #FPEXC_EX		@ is there additional state to save?
+	VFPFMRX	r3, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
+	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed (and present)
 	VFPFSTMIA r0 			@ save the working registers
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr