powerpc: Add VSX context save/restore, ptrace and signal support

This patch extends the floating point save and restore code to use the
VSX load/stores when VSX is available.  This will make FP context
save/restore marginally slower on FP only code, when VSX is available,
as it has to load/store 128bits rather than just 64bits.

Mixing FP, VMX and VSX code will get constant architected state.

The signals interface is extended to enable access to VSR 0-31
doubleword 1 after discussions with tool chain maintainers.  Backward
compatibility is maintained.

The ptrace interface is also extended to allow access to VSR 0-31 full
registers.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 582df704..d52ded36 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -53,6 +53,7 @@
 #ifndef CONFIG_SMP
 struct task_struct *last_task_used_math = NULL;
 struct task_struct *last_task_used_altivec = NULL;
+struct task_struct *last_task_used_vsx = NULL;
 struct task_struct *last_task_used_spe = NULL;
 #endif
 
@@ -106,11 +107,23 @@
 
 int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
 {
+#ifdef CONFIG_VSX
+	int i;
+	elf_fpreg_t *reg;
+#endif
+
 	if (!tsk->thread.regs)
 		return 0;
 	flush_fp_to_thread(current);
 
+#ifdef CONFIG_VSX
+	reg = (elf_fpreg_t *)fpregs;
+	for (i = 0; i < ELF_NFPREG - 1; i++, reg++)
+		*reg = tsk->thread.TS_FPR(i);
+	memcpy(reg, &tsk->thread.fpscr, sizeof(elf_fpreg_t));
+#else
 	memcpy(fpregs, &tsk->thread.TS_FPR(0), sizeof(*fpregs));
+#endif
 
 	return 1;
 }
@@ -149,7 +162,7 @@
 	}
 }
 
-int dump_task_altivec(struct task_struct *tsk, elf_vrregset_t *vrregs)
+int dump_task_altivec(struct task_struct *tsk, elf_vrreg_t *vrregs)
 {
 	/* ELF_NVRREG includes the VSCR and VRSAVE which we need to save
 	 * separately, see below */
@@ -179,6 +192,80 @@
 }
 #endif /* CONFIG_ALTIVEC */
 
+#ifdef CONFIG_VSX
+#if 0
+/* not currently used, but some crazy RAID module might want to later */
+void enable_kernel_vsx(void)
+{
+	WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
+		giveup_vsx(current);
+	else
+		giveup_vsx(NULL);	/* just enable vsx for kernel - force */
+#else
+	giveup_vsx(last_task_used_vsx);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_vsx);
+#endif
+
+void flush_vsx_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_VSX) {
+#ifdef CONFIG_SMP
+			BUG_ON(tsk != current);
+#endif
+			giveup_vsx(tsk);
+		}
+		preempt_enable();
+	}
+}
+
+/*
+ * This dumps the lower half 64bits of the first 32 VSX registers.
+ * This needs to be called with dump_task_fp and dump_task_altivec to
+ * get all the VSX state.
+ */
+int dump_task_vsx(struct task_struct *tsk, elf_vrreg_t *vrregs)
+{
+	elf_vrreg_t *reg;
+	double buf[32];
+	int i;
+
+	if (tsk == current)
+		flush_vsx_to_thread(tsk);
+
+	reg = (elf_vrreg_t *)vrregs;
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = current->thread.fpr[i][TS_VSRLOWOFFSET];
+	memcpy(reg, buf, sizeof(buf));
+
+	return 1;
+}
+#endif /* CONFIG_VSX */
+
+int dump_task_vector(struct task_struct *tsk, elf_vrregset_t *vrregs)
+{
+	int rc = 0;
+	elf_vrreg_t *regs = (elf_vrreg_t *)vrregs;
+#ifdef CONFIG_ALTIVEC
+	rc = dump_task_altivec(tsk, regs);
+	if (rc)
+		return rc;
+	regs += ELF_NVRREG;
+#endif
+
+#ifdef CONFIG_VSX
+	rc = dump_task_vsx(tsk, regs);
+#endif
+	return rc;
+}
+
 #ifdef CONFIG_SPE
 
 void enable_kernel_spe(void)
@@ -233,6 +320,10 @@
 	if (last_task_used_altivec == current)
 		last_task_used_altivec = NULL;
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+	if (last_task_used_vsx == current)
+		last_task_used_vsx = NULL;
+#endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 	if (last_task_used_spe == current)
 		last_task_used_spe = NULL;
@@ -297,6 +388,10 @@
 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
 		giveup_altivec(prev);
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
+		giveup_vsx(prev);
+#endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 	/*
 	 * If the previous thread used spe in the last quantum
@@ -317,6 +412,10 @@
 	if (new->thread.regs && last_task_used_altivec == new)
 		new->thread.regs->msr |= MSR_VEC;
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+	if (new->thread.regs && last_task_used_vsx == new)
+		new->thread.regs->msr |= MSR_VSX;
+#endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 	/* Avoid the trap.  On smp this this never happens since
 	 * we don't set last_task_used_spe
@@ -417,6 +516,8 @@
 	{MSR_EE,	"EE"},
 	{MSR_PR,	"PR"},
 	{MSR_FP,	"FP"},
+	{MSR_VEC,	"VEC"},
+	{MSR_VSX,	"VSX"},
 	{MSR_ME,	"ME"},
 	{MSR_IR,	"IR"},
 	{MSR_DR,	"DR"},
@@ -534,6 +635,7 @@
 {
 	flush_fp_to_thread(current);
 	flush_altivec_to_thread(current);
+	flush_vsx_to_thread(current);
 	flush_spe_to_thread(current);
 }
 
@@ -689,6 +791,9 @@
 #endif
 
 	discard_lazy_cpu_state();
+#ifdef CONFIG_VSX
+	current->thread.used_vsr = 0;
+#endif
 	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
 	current->thread.fpscr.val = 0;
 #ifdef CONFIG_ALTIVEC