[ARM] 4583/1: ARMv7: Add VFPv3 support

This patch adds the support for VFPv3 (the kernel currently supports
VFPv2). The main difference is 32 double registers (compared to 16).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/include/asm-arm/fpstate.h b/include/asm-arm/fpstate.h
index f31cda5..392eb53 100644
--- a/include/asm-arm/fpstate.h
+++ b/include/asm-arm/fpstate.h
@@ -17,14 +17,18 @@
 /*
  * VFP storage area has:
  *  - FPEXC, FPSCR, FPINST and FPINST2.
- *  - 16 double precision data registers
- *  - an implementation-dependant word of state for FLDMX/FSTMX
+ *  - 16 or 32 double precision data registers
+ *  - an implementation-dependant word of state for FLDMX/FSTMX (pre-ARMv6)
  * 
  *  FPEXC will always be non-zero once the VFP has been used in this process.
  */
 
 struct vfp_hard_struct {
+#ifdef CONFIG_VFPv3
+	__u64 fpregs[32];
+#else
 	__u64 fpregs[16];
+#endif
 #if __LINUX_ARM_ARCH__ < 6
 	__u32 fpmx_state;
 #endif
@@ -35,6 +39,7 @@
 	 */
 	__u32 fpinst;
 	__u32 fpinst2;
+
 #ifdef CONFIG_SMP
 	__u32 cpu;
 #endif
diff --git a/include/asm-arm/vfp.h b/include/asm-arm/vfp.h
index 9d474d4..5f9a2cb 100644
--- a/include/asm-arm/vfp.h
+++ b/include/asm-arm/vfp.h
@@ -7,6 +7,8 @@
 
 #define FPSID			cr0
 #define FPSCR			cr1
+#define MVFR1			cr6
+#define MVFR0			cr7
 #define FPEXC			cr8
 #define FPINST			cr9
 #define FPINST2			cr10
@@ -70,6 +72,10 @@
 #define FPSCR_IXC		(1<<4)
 #define FPSCR_IDC		(1<<7)
 
+/* MVFR0 bits */
+#define MVFR0_A_SIMD_BIT	(0)
+#define MVFR0_A_SIMD_MASK	(0xf << MVFR0_A_SIMD_BIT)
+
 /* Bit patterns for decoding the packaged operation descriptors */
 #define VFPOPDESC_LENGTH_BIT	(9)
 #define VFPOPDESC_LENGTH_MASK	(0x07 << VFPOPDESC_LENGTH_BIT)
diff --git a/include/asm-arm/vfpmacros.h b/include/asm-arm/vfpmacros.h
index 27fe028..cccb389 100644
--- a/include/asm-arm/vfpmacros.h
+++ b/include/asm-arm/vfpmacros.h
@@ -15,19 +15,33 @@
 	.endm
 
 	@ read all the working registers back into the VFP
-	.macro	VFPFLDMIA, base
+	.macro	VFPFLDMIA, base, tmp
 #if __LINUX_ARM_ARCH__ < 6
 	LDC	p11, cr0, [\base],#33*4		    @ FLDMIAX \base!, {d0-d15}
 #else
 	LDC	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d0-d15}
 #endif
+#ifdef CONFIG_VFPv3
+	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
+	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field
+	cmp	\tmp, #2			    @ 32 x 64bit registers?
+	ldceql	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
+	addne	\base, \base, #32*4		    @ step over unused register space
+#endif
 	.endm
 
 	@ write all the working registers out of the VFP
-	.macro	VFPFSTMIA, base
+	.macro	VFPFSTMIA, base, tmp
 #if __LINUX_ARM_ARCH__ < 6
 	STC	p11, cr0, [\base],#33*4		    @ FSTMIAX \base!, {d0-d15}
 #else
 	STC	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d0-d15}
 #endif
+#ifdef CONFIG_VFPv3
+	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
+	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field
+	cmp	\tmp, #2			    @ 32 x 64bit registers?
+	stceql	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
+	addne	\base, \base, #32*4		    @ step over unused register space
+#endif
 	.endm