sh: Move over to dynamically allocated FPU context.
This follows the x86 xstate changes and implements a task_xstate slab
cache that is dynamically sized to match one of hard FP/soft FP/FPU-less.
This also tidies up and consolidates some of the SH-2A/SH-4 FPU
fragmentation. Now fpu state restorers are commonly defined, with the
init_fpu()/fpu_init() mess reworked to follow the x86 convention.
The fpu_init() register initialization has been replaced by xstate setup
followed by writing out to hardware via the standard restore path.
As init_fpu() now performs a slab allocation a secondary lighterweight
restorer is also introduced for the context switch.
In the future the DSP state will be rolled in here, too.
More work remains for math emulation and the SH-5 FPU, which presently
uses its own special (UP-only) interfaces.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index fb6bbb9..06c4281 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -2,8 +2,8 @@
#define __ASM_SH_FPU_H
#ifndef __ASSEMBLY__
-#include <linux/preempt.h>
-#include <asm/ptrace.h>
+
+struct task_struct;
#ifdef CONFIG_SH_FPU
static inline void release_fpu(struct pt_regs *regs)
@@ -16,22 +16,23 @@
regs->sr &= ~SR_FD;
}
-struct task_struct;
-
extern void save_fpu(struct task_struct *__tsk);
-void fpu_state_restore(struct pt_regs *regs);
+extern void restore_fpu(struct task_struct *__tsk);
+extern void fpu_state_restore(struct pt_regs *regs);
+extern void __fpu_state_restore(void);
#else
-
-#define save_fpu(tsk) do { } while (0)
-#define release_fpu(regs) do { } while (0)
-#define grab_fpu(regs) do { } while (0)
-#define fpu_state_restore(regs) do { } while (0)
-
+#define save_fpu(tsk) do { } while (0)
+#define restore_fpu(tsk) do { } while (0)
+#define release_fpu(regs) do { } while (0)
+#define grab_fpu(regs) do { } while (0)
+#define fpu_state_restore(regs) do { } while (0)
+#define __fpu_state_restore(regs) do { } while (0)
#endif
struct user_regset;
extern int do_fpu_inst(unsigned short, struct pt_regs *);
+extern int init_fpu(struct task_struct *);
extern int fpregs_get(struct task_struct *target,
const struct user_regset *regset,
@@ -65,18 +66,6 @@
preempt_enable();
}
-static inline int init_fpu(struct task_struct *tsk)
-{
- if (tsk_used_math(tsk)) {
- if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current)
- unlazy_fpu(tsk, task_pt_regs(tsk));
- return 0;
- }
-
- set_stopped_child_used_math(tsk);
- return 0;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SH_FPU_H */
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 50b8c9c..a359898 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -90,11 +90,15 @@
unsigned long entry_pc;
};
-union sh_fpu_union {
- struct sh_fpu_hard_struct hard;
- struct sh_fpu_soft_struct soft;
+union thread_xstate {
+ struct sh_fpu_hard_struct hardfpu;
+ struct sh_fpu_soft_struct softfpu;
};
+extern unsigned int xstate_size;
+extern void free_thread_xstate(struct task_struct *);
+extern struct kmem_cache *task_xstate_cachep;
+
struct thread_struct {
/* Saved registers when thread is descheduled */
unsigned long sp;
@@ -103,13 +107,13 @@
/* Hardware debugging registers */
unsigned long ubc_pc;
- /* floating point info */
- union sh_fpu_union fpu;
-
#ifdef CONFIG_SH_DSP
/* Dsp status information */
struct sh_dsp_struct dsp_status;
#endif
+
+ /* Extended processor state */
+ union thread_xstate *xstate;
};
/* Count of active tasks with UBC settings */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 2c5b48e..55a36fe 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -97,6 +97,10 @@
extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
extern void free_thread_info(struct thread_info *ti);
+extern void arch_task_cache_init(void);
+#define arch_task_cache_init arch_task_cache_init
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+extern void init_thread_xstate(void);
#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index d97c803..0e48bc6 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -17,5 +17,7 @@
obj-$(CONFIG_SH_ADC) += adc.o
obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o
+obj-$(CONFIG_SH_FPU) += fpu.o
+obj-$(CONFIG_SH_FPU_EMU) += fpu.o
obj-y += irq/ init.o clock.o hwblk.o
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
new file mode 100644
index 0000000..c23e672
--- /dev/null
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -0,0 +1,82 @@
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/fpu.h>
+
+int init_fpu(struct task_struct *tsk)
+{
+ if (tsk_used_math(tsk)) {
+ if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current)
+ unlazy_fpu(tsk, task_pt_regs(tsk));
+ return 0;
+ }
+
+ /*
+ * Memory allocation at the first usage of the FPU and other state.
+ */
+ if (!tsk->thread.xstate) {
+ tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
+ GFP_KERNEL);
+ if (!tsk->thread.xstate)
+ return -ENOMEM;
+ }
+
+ if (boot_cpu_data.flags & CPU_HAS_FPU) {
+ struct sh_fpu_hard_struct *fp = &tsk->thread.xstate->hardfpu;
+ memset(fp, 0, xstate_size);
+ fp->fpscr = FPSCR_INIT;
+ } else {
+ struct sh_fpu_soft_struct *fp = &tsk->thread.xstate->softfpu;
+ memset(fp, 0, xstate_size);
+ fp->fpscr = FPSCR_INIT;
+ }
+
+ set_stopped_child_used_math(tsk);
+ return 0;
+}
+
+#ifdef CONFIG_SH_FPU
+void __fpu_state_restore(void)
+{
+ struct task_struct *tsk = current;
+
+ restore_fpu(tsk);
+
+ task_thread_info(tsk)->status |= TS_USEDFPU;
+ tsk->fpu_counter++;
+}
+
+void fpu_state_restore(struct pt_regs *regs)
+{
+ struct task_struct *tsk = current;
+
+ if (unlikely(!user_mode(regs))) {
+ printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
+ BUG();
+ return;
+ }
+
+ if (!tsk_used_math(tsk)) {
+ /*
+ * does a slab alloc which can sleep
+ */
+ if (init_fpu(tsk)) {
+ /*
+ * ran out of memory!
+ */
+ do_group_exit(SIGKILL);
+ return;
+ }
+ }
+
+ grab_fpu(regs);
+
+ __fpu_state_restore();
+}
+
+BUILD_TRAP_HANDLER(fpu_state_restore)
+{
+ TRAP_HANDLER_DECL;
+
+ fpu_state_restore(regs);
+}
+#endif /* CONFIG_SH_FPU */
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index 89b4b76..2e23422 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -28,18 +28,30 @@
#include <asm/ubc.h>
#endif
+#ifdef CONFIG_SH_FPU
+#define cpu_has_fpu 1
+#else
+#define cpu_has_fpu 0
+#endif
+
+#ifdef CONFIG_SH_DSP
+#define cpu_has_dsp 1
+#else
+#define cpu_has_dsp 0
+#endif
+
/*
* Generic wrapper for command line arguments to disable on-chip
* peripherals (nofpu, nodsp, and so forth).
*/
-#define onchip_setup(x) \
-static int x##_disabled __initdata = 0; \
- \
-static int __init x##_setup(char *opts) \
-{ \
- x##_disabled = 1; \
- return 1; \
-} \
+#define onchip_setup(x) \
+static int x##_disabled __initdata = !cpu_has_##x; \
+ \
+static int __init x##_setup(char *opts) \
+{ \
+ x##_disabled = 1; \
+ return 1; \
+} \
__setup("no" __stringify(x), x##_setup);
onchip_setup(fpu);
@@ -207,6 +219,18 @@
l2_cache_shape = -1; /* No S-cache */
}
+static void __init fpu_init(void)
+{
+ /* Disable the FPU */
+ if (fpu_disabled && (current_cpu_data.flags & CPU_HAS_FPU)) {
+ printk("FPU Disabled\n");
+ current_cpu_data.flags &= ~CPU_HAS_FPU;
+ }
+
+ disable_fpu();
+ clear_used_math();
+}
+
#ifdef CONFIG_SH_DSP
static void __init release_dsp(void)
{
@@ -244,9 +268,17 @@
if (sr & SR_DSP)
current_cpu_data.flags |= CPU_HAS_DSP;
+ /* Disable the DSP */
+ if (dsp_disabled && (current_cpu_data.flags & CPU_HAS_DSP)) {
+ printk("DSP Disabled\n");
+ current_cpu_data.flags &= ~CPU_HAS_DSP;
+ }
+
/* Now that we've determined the DSP status, clear the DSP bit. */
release_dsp();
}
+#else
+static inline void __init dsp_init(void) { }
#endif /* CONFIG_SH_DSP */
/**
@@ -302,18 +334,8 @@
detect_cache_shape();
}
- /* Disable the FPU */
- if (fpu_disabled) {
- printk("FPU Disabled\n");
- current_cpu_data.flags &= ~CPU_HAS_FPU;
- }
-
- /* FPU initialization */
- disable_fpu();
- if ((current_cpu_data.flags & CPU_HAS_FPU)) {
- current_thread_info()->status &= ~TS_USEDFPU;
- clear_used_math();
- }
+ fpu_init();
+ dsp_init();
/*
* Initialize the per-CPU ASID cache very early, since the
@@ -321,18 +343,12 @@
*/
current_cpu_data.asid_cache = NO_CONTEXT;
-#ifdef CONFIG_SH_DSP
- /* Probe for DSP */
- dsp_init();
-
- /* Disable the DSP */
- if (dsp_disabled) {
- printk("DSP Disabled\n");
- current_cpu_data.flags &= ~CPU_HAS_DSP;
- release_dsp();
- }
-#endif
-
speculative_execution_init();
expmask_init();
+
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (raw_smp_processor_id() == 0)
+ init_thread_xstate();
}
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index d395ce5..488d24e 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -26,8 +26,7 @@
/*
* Save FPU registers onto task structure.
*/
-void
-save_fpu(struct task_struct *tsk)
+void save_fpu(struct task_struct *tsk)
{
unsigned long dummy;
@@ -52,7 +51,7 @@
"fmov.s fr0, @-%0\n\t"
"lds %3, fpscr\n\t"
: "=r" (dummy)
- : "0" ((char *)(&tsk->thread.fpu.hard.status)),
+ : "0" ((char *)(&tsk->thread.xstate->hardfpu.status)),
"r" (FPSCR_RCHG),
"r" (FPSCR_INIT)
: "memory");
@@ -60,8 +59,7 @@
disable_fpu();
}
-static void
-restore_fpu(struct task_struct *tsk)
+void restore_fpu(struct task_struct *tsk)
{
unsigned long dummy;
@@ -85,45 +83,12 @@
"lds.l @%0+, fpscr\n\t"
"lds.l @%0+, fpul\n\t"
: "=r" (dummy)
- : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
+ : "0" (tsk->thread.xstate), "r" (FPSCR_RCHG)
: "memory");
disable_fpu();
}
/*
- * Load the FPU with signalling NANS. This bit pattern we're using
- * has the property that no matter wether considered as single or as
- * double precission represents signaling NANS.
- */
-
-static void
-fpu_init(void)
-{
- enable_fpu();
- asm volatile("lds %0, fpul\n\t"
- "fsts fpul, fr0\n\t"
- "fsts fpul, fr1\n\t"
- "fsts fpul, fr2\n\t"
- "fsts fpul, fr3\n\t"
- "fsts fpul, fr4\n\t"
- "fsts fpul, fr5\n\t"
- "fsts fpul, fr6\n\t"
- "fsts fpul, fr7\n\t"
- "fsts fpul, fr8\n\t"
- "fsts fpul, fr9\n\t"
- "fsts fpul, fr10\n\t"
- "fsts fpul, fr11\n\t"
- "fsts fpul, fr12\n\t"
- "fsts fpul, fr13\n\t"
- "fsts fpul, fr14\n\t"
- "fsts fpul, fr15\n\t"
- "lds %2, fpscr\n\t"
- : /* no output */
- : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
- disable_fpu();
-}
-
-/*
* Emulate arithmetic ops on denormalized number for some FPU insns.
*/
@@ -490,9 +455,9 @@
if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
struct task_struct *tsk = current;
- if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
+ if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_FPU_ERROR)) {
/* FPU error */
- denormal_to_double (&tsk->thread.fpu.hard,
+ denormal_to_double (&tsk->thread.xstate->hardfpu,
(finsn >> 8) & 0xf);
} else
return 0;
@@ -507,9 +472,9 @@
n = (finsn >> 8) & 0xf;
m = (finsn >> 4) & 0xf;
- hx = tsk->thread.fpu.hard.fp_regs[n];
- hy = tsk->thread.fpu.hard.fp_regs[m];
- fpscr = tsk->thread.fpu.hard.fpscr;
+ hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+ hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+ fpscr = tsk->thread.xstate->hardfpu.fpscr;
prec = fpscr & (1 << 19);
if ((fpscr & FPSCR_FPU_ERROR)
@@ -519,15 +484,15 @@
/* FPU error because of denormal */
llx = ((long long) hx << 32)
- | tsk->thread.fpu.hard.fp_regs[n+1];
+ | tsk->thread.xstate->hardfpu.fp_regs[n+1];
lly = ((long long) hy << 32)
- | tsk->thread.fpu.hard.fp_regs[m+1];
+ | tsk->thread.xstate->hardfpu.fp_regs[m+1];
if ((hx & 0x7fffffff) >= 0x00100000)
llx = denormal_muld(lly, llx);
else
llx = denormal_muld(llx, lly);
- tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
- tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+ tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff;
} else if ((fpscr & FPSCR_FPU_ERROR)
&& (!prec && ((hx & 0x7fffffff) < 0x00800000
|| (hy & 0x7fffffff) < 0x00800000))) {
@@ -536,7 +501,7 @@
hx = denormal_mulf(hy, hx);
else
hx = denormal_mulf(hx, hy);
- tsk->thread.fpu.hard.fp_regs[n] = hx;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
} else
return 0;
@@ -550,9 +515,9 @@
n = (finsn >> 8) & 0xf;
m = (finsn >> 4) & 0xf;
- hx = tsk->thread.fpu.hard.fp_regs[n];
- hy = tsk->thread.fpu.hard.fp_regs[m];
- fpscr = tsk->thread.fpu.hard.fpscr;
+ hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+ hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+ fpscr = tsk->thread.xstate->hardfpu.fpscr;
prec = fpscr & (1 << 19);
if ((fpscr & FPSCR_FPU_ERROR)
@@ -562,15 +527,15 @@
/* FPU error because of denormal */
llx = ((long long) hx << 32)
- | tsk->thread.fpu.hard.fp_regs[n+1];
+ | tsk->thread.xstate->hardfpu.fp_regs[n+1];
lly = ((long long) hy << 32)
- | tsk->thread.fpu.hard.fp_regs[m+1];
+ | tsk->thread.xstate->hardfpu.fp_regs[m+1];
if ((finsn & 0xf00f) == 0xf000)
llx = denormal_addd(llx, lly);
else
llx = denormal_addd(llx, lly ^ (1LL << 63));
- tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
- tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+ tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff;
} else if ((fpscr & FPSCR_FPU_ERROR)
&& (!prec && ((hx & 0x7fffffff) < 0x00800000
|| (hy & 0x7fffffff) < 0x00800000))) {
@@ -579,7 +544,7 @@
hx = denormal_addf(hx, hy);
else
hx = denormal_addf(hx, hy ^ 0x80000000);
- tsk->thread.fpu.hard.fp_regs[n] = hx;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
} else
return 0;
@@ -597,7 +562,7 @@
__unlazy_fpu(tsk, regs);
if (ieee_fpe_handler(regs)) {
- tsk->thread.fpu.hard.fpscr &=
+ tsk->thread.xstate->hardfpu.fpscr &=
~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
grab_fpu(regs);
restore_fpu(tsk);
@@ -607,33 +572,3 @@
force_sig(SIGFPE, tsk);
}
-
-void fpu_state_restore(struct pt_regs *regs)
-{
- struct task_struct *tsk = current;
-
- grab_fpu(regs);
- if (unlikely(!user_mode(regs))) {
- printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
- BUG();
- return;
- }
-
- if (likely(used_math())) {
- /* Using the FPU again. */
- restore_fpu(tsk);
- } else {
- /* First time FPU user. */
- fpu_init();
- set_used_math();
- }
- task_thread_info(tsk)->status |= TS_USEDFPU;
- tsk->fpu_counter++;
-}
-
-BUILD_TRAP_HANDLER(fpu_state_restore)
-{
- TRAP_HANDLER_DECL;
-
- fpu_state_restore(regs);
-}
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index e97857a..447482d 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -85,14 +85,14 @@
"fmov.s fr1, @-%0\n\t"
"fmov.s fr0, @-%0\n\t"
"lds %3, fpscr\n\t":"=r" (dummy)
- :"0"((char *)(&tsk->thread.fpu.hard.status)),
+ :"0"((char *)(&tsk->thread.xstate->hardfpu.status)),
"r"(FPSCR_RCHG), "r"(FPSCR_INIT)
:"memory");
disable_fpu();
}
-static void restore_fpu(struct task_struct *tsk)
+void restore_fpu(struct task_struct *tsk)
{
unsigned long dummy;
@@ -135,62 +135,11 @@
"lds.l @%0+, fpscr\n\t"
"lds.l @%0+, fpul\n\t"
:"=r" (dummy)
- :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
+ :"0" (tsk->thread.xstate), "r" (FPSCR_RCHG)
:"memory");
disable_fpu();
}
-/*
- * Load the FPU with signalling NANS. This bit pattern we're using
- * has the property that no matter wether considered as single or as
- * double precision represents signaling NANS.
- */
-
-static void fpu_init(void)
-{
- enable_fpu();
- asm volatile ( "lds %0, fpul\n\t"
- "lds %1, fpscr\n\t"
- "fsts fpul, fr0\n\t"
- "fsts fpul, fr1\n\t"
- "fsts fpul, fr2\n\t"
- "fsts fpul, fr3\n\t"
- "fsts fpul, fr4\n\t"
- "fsts fpul, fr5\n\t"
- "fsts fpul, fr6\n\t"
- "fsts fpul, fr7\n\t"
- "fsts fpul, fr8\n\t"
- "fsts fpul, fr9\n\t"
- "fsts fpul, fr10\n\t"
- "fsts fpul, fr11\n\t"
- "fsts fpul, fr12\n\t"
- "fsts fpul, fr13\n\t"
- "fsts fpul, fr14\n\t"
- "fsts fpul, fr15\n\t"
- "frchg\n\t"
- "fsts fpul, fr0\n\t"
- "fsts fpul, fr1\n\t"
- "fsts fpul, fr2\n\t"
- "fsts fpul, fr3\n\t"
- "fsts fpul, fr4\n\t"
- "fsts fpul, fr5\n\t"
- "fsts fpul, fr6\n\t"
- "fsts fpul, fr7\n\t"
- "fsts fpul, fr8\n\t"
- "fsts fpul, fr9\n\t"
- "fsts fpul, fr10\n\t"
- "fsts fpul, fr11\n\t"
- "fsts fpul, fr12\n\t"
- "fsts fpul, fr13\n\t"
- "fsts fpul, fr14\n\t"
- "fsts fpul, fr15\n\t"
- "frchg\n\t"
- "lds %2, fpscr\n\t"
- : /* no output */
- :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
- disable_fpu();
-}
-
/**
* denormal_to_double - Given denormalized float number,
* store double float
@@ -282,9 +231,9 @@
/* fcnvsd */
struct task_struct *tsk = current;
- if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
+ if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR))
/* FPU error */
- denormal_to_double(&tsk->thread.fpu.hard,
+ denormal_to_double(&tsk->thread.xstate->hardfpu,
(finsn >> 8) & 0xf);
else
return 0;
@@ -300,9 +249,9 @@
n = (finsn >> 8) & 0xf;
m = (finsn >> 4) & 0xf;
- hx = tsk->thread.fpu.hard.fp_regs[n];
- hy = tsk->thread.fpu.hard.fp_regs[m];
- fpscr = tsk->thread.fpu.hard.fpscr;
+ hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+ hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+ fpscr = tsk->thread.xstate->hardfpu.fpscr;
prec = fpscr & FPSCR_DBL_PRECISION;
if ((fpscr & FPSCR_CAUSE_ERROR)
@@ -312,18 +261,18 @@
/* FPU error because of denormal (doubles) */
llx = ((long long)hx << 32)
- | tsk->thread.fpu.hard.fp_regs[n + 1];
+ | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
lly = ((long long)hy << 32)
- | tsk->thread.fpu.hard.fp_regs[m + 1];
+ | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
llx = float64_mul(llx, lly);
- tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
- tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+ tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
} else if ((fpscr & FPSCR_CAUSE_ERROR)
&& (!prec && ((hx & 0x7fffffff) < 0x00800000
|| (hy & 0x7fffffff) < 0x00800000))) {
/* FPU error because of denormal (floats) */
hx = float32_mul(hx, hy);
- tsk->thread.fpu.hard.fp_regs[n] = hx;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
} else
return 0;
@@ -338,9 +287,9 @@
n = (finsn >> 8) & 0xf;
m = (finsn >> 4) & 0xf;
- hx = tsk->thread.fpu.hard.fp_regs[n];
- hy = tsk->thread.fpu.hard.fp_regs[m];
- fpscr = tsk->thread.fpu.hard.fpscr;
+ hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+ hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+ fpscr = tsk->thread.xstate->hardfpu.fpscr;
prec = fpscr & FPSCR_DBL_PRECISION;
if ((fpscr & FPSCR_CAUSE_ERROR)
@@ -350,15 +299,15 @@
/* FPU error because of denormal (doubles) */
llx = ((long long)hx << 32)
- | tsk->thread.fpu.hard.fp_regs[n + 1];
+ | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
lly = ((long long)hy << 32)
- | tsk->thread.fpu.hard.fp_regs[m + 1];
+ | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
if ((finsn & 0xf00f) == 0xf000)
llx = float64_add(llx, lly);
else
llx = float64_sub(llx, lly);
- tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
- tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+ tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
} else if ((fpscr & FPSCR_CAUSE_ERROR)
&& (!prec && ((hx & 0x7fffffff) < 0x00800000
|| (hy & 0x7fffffff) < 0x00800000))) {
@@ -367,7 +316,7 @@
hx = float32_add(hx, hy);
else
hx = float32_sub(hx, hy);
- tsk->thread.fpu.hard.fp_regs[n] = hx;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
} else
return 0;
@@ -382,9 +331,9 @@
n = (finsn >> 8) & 0xf;
m = (finsn >> 4) & 0xf;
- hx = tsk->thread.fpu.hard.fp_regs[n];
- hy = tsk->thread.fpu.hard.fp_regs[m];
- fpscr = tsk->thread.fpu.hard.fpscr;
+ hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+ hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+ fpscr = tsk->thread.xstate->hardfpu.fpscr;
prec = fpscr & FPSCR_DBL_PRECISION;
if ((fpscr & FPSCR_CAUSE_ERROR)
@@ -394,20 +343,20 @@
/* FPU error because of denormal (doubles) */
llx = ((long long)hx << 32)
- | tsk->thread.fpu.hard.fp_regs[n + 1];
+ | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
lly = ((long long)hy << 32)
- | tsk->thread.fpu.hard.fp_regs[m + 1];
+ | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
llx = float64_div(llx, lly);
- tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
- tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+ tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
} else if ((fpscr & FPSCR_CAUSE_ERROR)
&& (!prec && ((hx & 0x7fffffff) < 0x00800000
|| (hy & 0x7fffffff) < 0x00800000))) {
/* FPU error because of denormal (floats) */
hx = float32_div(hx, hy);
- tsk->thread.fpu.hard.fp_regs[n] = hx;
+ tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
} else
return 0;
@@ -420,17 +369,17 @@
unsigned int hx;
m = (finsn >> 8) & 0x7;
- hx = tsk->thread.fpu.hard.fp_regs[m];
+ hx = tsk->thread.xstate->hardfpu.fp_regs[m];
- if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
+ if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR)
&& ((hx & 0x7fffffff) < 0x00100000)) {
/* subnormal double to float conversion */
long long llx;
- llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
- | tsk->thread.fpu.hard.fp_regs[m + 1];
+ llx = ((long long)tsk->thread.xstate->hardfpu.fp_regs[m] << 32)
+ | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
- tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
+ tsk->thread.xstate->hardfpu.fpul = float64_to_float32(llx);
} else
return 0;
@@ -449,7 +398,7 @@
int float_rounding_mode(void)
{
struct task_struct *tsk = current;
- int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
+ int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.xstate->hardfpu.fpscr);
return roundingMode;
}
@@ -461,16 +410,16 @@
__unlazy_fpu(tsk, regs);
fpu_exception_flags = 0;
if (ieee_fpe_handler(regs)) {
- tsk->thread.fpu.hard.fpscr &=
+ tsk->thread.xstate->hardfpu.fpscr &=
~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
- tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
+ tsk->thread.xstate->hardfpu.fpscr |= fpu_exception_flags;
/* Set the FPSCR flag as well as cause bits - simply
* replicate the cause */
- tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
+ tsk->thread.xstate->hardfpu.fpscr |= (fpu_exception_flags >> 10);
grab_fpu(regs);
restore_fpu(tsk);
task_thread_info(tsk)->status |= TS_USEDFPU;
- if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
+ if ((((tsk->thread.xstate->hardfpu.fpscr & FPSCR_ENABLE_MASK) >> 7) &
(fpu_exception_flags >> 2)) == 0) {
return;
}
@@ -478,33 +427,3 @@
force_sig(SIGFPE, tsk);
}
-
-void fpu_state_restore(struct pt_regs *regs)
-{
- struct task_struct *tsk = current;
-
- grab_fpu(regs);
- if (unlikely(!user_mode(regs))) {
- printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
- BUG();
- return;
- }
-
- if (likely(used_math())) {
- /* Using the FPU again. */
- restore_fpu(tsk);
- } else {
- /* First time FPU user. */
- fpu_init();
- set_used_math();
- }
- task_thread_info(tsk)->status |= TS_USEDFPU;
- tsk->fpu_counter++;
-}
-
-BUILD_TRAP_HANDLER(fpu_state_restore)
-{
- TRAP_HANDLER_DECL;
-
- fpu_state_restore(regs);
-}
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 077e06e..81add9b 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -2,6 +2,32 @@
#include <linux/kernel.h>
#include <linux/sched.h>
+struct kmem_cache *task_xstate_cachep = NULL;
+unsigned int xstate_size;
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+ *dst = *src;
+
+ if (src->thread.xstate) {
+ dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
+ GFP_KERNEL);
+ if (!dst->thread.xstate)
+ return -ENOMEM;
+ memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
+ }
+
+ return 0;
+}
+
+void free_thread_xstate(struct task_struct *tsk)
+{
+ if (tsk->thread.xstate) {
+ kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
+ tsk->thread.xstate = NULL;
+ }
+}
+
#if THREAD_SHIFT < PAGE_SHIFT
static struct kmem_cache *thread_info_cache;
@@ -20,6 +46,7 @@
void free_thread_info(struct thread_info *ti)
{
+ free_thread_xstate(ti->task);
kmem_cache_free(thread_info_cache, ti);
}
@@ -41,6 +68,33 @@
void free_thread_info(struct thread_info *ti)
{
+ free_thread_xstate(ti->task);
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
#endif /* THREAD_SHIFT < PAGE_SHIFT */
+
+void arch_task_cache_init(void)
+{
+ if (!xstate_size)
+ return;
+
+ task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size,
+ __alignof__(union thread_xstate),
+ SLAB_PANIC | SLAB_NOTRACK, NULL);
+}
+
+#ifdef CONFIG_SH_FPU_EMU
+# define HAVE_SOFTFP 1
+#else
+# define HAVE_SOFTFP 0
+#endif
+
+void init_thread_xstate(void)
+{
+ if (boot_cpu_data.flags & CPU_HAS_FPU)
+ xstate_size = sizeof(struct sh_fpu_hard_struct);
+ else if (HAVE_SOFTFP)
+ xstate_size = sizeof(struct sh_fpu_soft_struct);
+ else
+ xstate_size = 0;
+}
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index c436140..03de657 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -156,6 +156,8 @@
regs->sr = SR_FD;
regs->pc = new_pc;
regs->regs[15] = new_sp;
+
+ free_thread_xstate(current);
}
EXPORT_SYMBOL(start_thread);
@@ -316,7 +318,7 @@
/* we're going to use this soon, after a few expensive things */
if (next->fpu_counter > 5)
- prefetch(&next_t->fpu.hard);
+ prefetch(next_t->xstate);
#ifdef CONFIG_MMU
/*
@@ -353,7 +355,7 @@
* chances of needing FPU soon are obviously high now
*/
if (next->fpu_counter > 5)
- fpu_state_restore(task_pt_regs(next));
+ __fpu_state_restore();
return prev;
}
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 9be35f3..be9b5dc 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -163,10 +163,10 @@
if ((boot_cpu_data.flags & CPU_HAS_FPU))
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.hard, 0, -1);
+ &target->thread.xstate->hardfpu, 0, -1);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.soft, 0, -1);
+ &target->thread.xstate->softfpu, 0, -1);
}
static int fpregs_set(struct task_struct *target,
@@ -184,10 +184,10 @@
if ((boot_cpu_data.flags & CPU_HAS_FPU))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.hard, 0, -1);
+ &target->thread.xstate->hardfpu, 0, -1);
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.soft, 0, -1);
+ &target->thread.xstate->softfpu, 0, -1);
}
static int fpregs_active(struct task_struct *target,
@@ -333,7 +333,7 @@
else
tmp = 0;
} else
- tmp = ((long *)&child->thread.fpu)
+ tmp = ((long *)child->thread.xstate)
[(addr - (long)&dummy->fpu) >> 2];
} else if (addr == (long) &dummy->u_fpvalid)
tmp = !!tsk_used_math(child);
@@ -362,7 +362,7 @@
else if (addr >= (long) &dummy->fpu &&
addr < (long) &dummy->u_fpvalid) {
set_stopped_child_used_math(child);
- ((long *)&child->thread.fpu)
+ ((long *)child->thread.xstate)
[(addr - (long)&dummy->fpu) >> 2] = data;
ret = 0;
} else if (addr == (long) &dummy->u_fpvalid) {
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 12815ce..6a7cce7 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -150,7 +150,7 @@
return 0;
set_used_math();
- return __copy_from_user(&tsk->thread.fpu.hard, &sc->sc_fpregs[0],
+ return __copy_from_user(&tsk->thread.xstate->hardfpu, &sc->sc_fpregs[0],
sizeof(long)*(16*2+2));
}
@@ -175,7 +175,7 @@
clear_used_math();
unlazy_fpu(tsk, regs);
- return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard,
+ return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.xstate->hardfpu,
sizeof(long)*(16*2+2));
}
#endif /* CONFIG_SH_FPU */
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index d6c15ca..1fcdb12 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -471,10 +471,10 @@
* denormal_to_double - Given denormalized float number,
* store double float
*
- * @fpu: Pointer to sh_fpu_hard structure
+ * @fpu: Pointer to sh_fpu_soft structure
* @n: Index to FP register
*/
-static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
+static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n)
{
unsigned long du, dl;
unsigned long x = fpu->fpul;
@@ -552,11 +552,11 @@
if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
struct task_struct *tsk = current;
- if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) {
+ if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) {
/* FPU error */
- denormal_to_double (&tsk->thread.fpu.hard,
+ denormal_to_double (&tsk->thread.xstate->softfpu,
(finsn >> 8) & 0xf);
- tsk->thread.fpu.hard.fpscr &=
+ tsk->thread.xstate->softfpu.fpscr &=
~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
task_thread_info(tsk)->status |= TS_USEDFPU;
} else {
@@ -617,7 +617,7 @@
int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct sh_fpu_soft_struct *fpu = &(tsk->thread.fpu.soft);
+ struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu);
if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
/* initialize once. */