Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu changes from Ingo Molnar:
"Various x86 FPU handling cleanups, refactorings and fixes (Borislav
Petkov, Oleg Nesterov, Rik van Riel)"
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
x86/fpu: Kill eager_fpu_init_bp()
x86/fpu: Don't allocate fpu->state for swapper/0
x86/fpu: Rename drop_init_fpu() to fpu_reset_state()
x86/fpu: Fold __drop_fpu() into its sole user
x86/fpu: Don't abuse drop_init_fpu() in flush_thread()
x86/fpu: Use restore_init_xstate() instead of math_state_restore() on kthread exec
x86/fpu: Introduce restore_init_xstate()
x86/fpu: Document user_fpu_begin()
x86/fpu: Factor out memset(xstate, 0) in fpu_finit() paths
x86/fpu: Change xstateregs_get()/set() to use ->xsave.i387 rather than ->fxsave
x86/fpu: Don't abuse FPU in kernel threads if use_eager_fpu()
x86/fpu: Always allow FPU in interrupt if use_eager_fpu()
x86/fpu: __kernel_fpu_begin() should clear fpu_owner_task even if use_eager_fpu()
x86/fpu: Also check fpu_lazy_restore() when use_eager_fpu()
x86/fpu: Use task_disable_lazy_fpu_restore() helper
x86/fpu: Use an explicit if/else in switch_fpu_prepare()
x86/fpu: Introduce task_disable_lazy_fpu_restore() helper
x86/fpu: Move lazy restore functions up a few lines
x86/fpu: Change math_error() to use unlazy_fpu(), kill (now) unused save_init_fpu()
x86/fpu: Don't do __thread_fpu_end() if use_eager_fpu()
...
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 72ba21a..da5e967 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -67,6 +67,34 @@
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+ per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+ tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+ return new == this_cpu_read_stable(fpu_owner_task) &&
+ cpu == new->thread.fpu.last_cpu;
+}
+
static inline int is_ia32_compat_frame(void)
{
return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@
static inline void fx_finit(struct i387_fxsave_struct *fx)
{
- memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
}
@@ -351,17 +378,6 @@
__thread_set_has_fpu(tsk);
}
-static inline void __drop_fpu(struct task_struct *tsk)
-{
- if (__thread_has_fpu(tsk)) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- __thread_fpu_end(tsk);
- }
-}
-
static inline void drop_fpu(struct task_struct *tsk)
{
/*
@@ -369,21 +385,37 @@
*/
preempt_disable();
tsk->thread.fpu_counter = 0;
- __drop_fpu(tsk);
+
+ if (__thread_has_fpu(tsk)) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ __thread_fpu_end(tsk);
+ }
+
clear_stopped_child_used_math(tsk);
preempt_enable();
}
-static inline void drop_init_fpu(struct task_struct *tsk)
+static inline void restore_init_xstate(void)
+{
+ if (use_xsave())
+ xrstor_state(init_xstate_buf, -1);
+ else
+ fxrstor_checking(&init_xstate_buf->i387);
+}
+
+/*
+ * Reset the FPU state in the eager case and drop it in the lazy case (later use
+ * will reinit it).
+ */
+static inline void fpu_reset_state(struct task_struct *tsk)
{
if (!use_eager_fpu())
drop_fpu(tsk);
- else {
- if (use_xsave())
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
- }
+ else
+ restore_init_xstate();
}
/*
@@ -400,24 +432,6 @@
*/
typedef struct { int preload; } fpu_switch_t;
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
- per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
- return new == this_cpu_read_stable(fpu_owner_task) &&
- cpu == new->thread.fpu.last_cpu;
-}
-
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
{
fpu_switch_t fpu;
@@ -426,13 +440,17 @@
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
- fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
- new->thread.fpu_counter > 5);
+ fpu.preload = tsk_used_math(new) &&
+ (use_eager_fpu() || new->thread.fpu_counter > 5);
+
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
- cpu = ~0;
- old->thread.fpu.last_cpu = cpu;
- old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
+ task_disable_lazy_fpu_restore(old);
+ else
+ old->thread.fpu.last_cpu = cpu;
+
+ /* But leave fpu_owner_task! */
+ old->thread.fpu.has_fpu = 0;
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
@@ -443,10 +461,10 @@
stts();
} else {
old->thread.fpu_counter = 0;
- old->thread.fpu.last_cpu = ~0;
+ task_disable_lazy_fpu_restore(old);
if (fpu.preload) {
new->thread.fpu_counter++;
- if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+ if (fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
@@ -466,7 +484,7 @@
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
- drop_init_fpu(new);
+ fpu_reset_state(new);
}
}
@@ -495,10 +513,12 @@
}
/*
- * Need to be preemption-safe.
+ * Needs to be preemption-safe.
*
* NOTE! user_fpu_begin() must be used only immediately before restoring
- * it. This function does not do any save/restore on their own.
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
*/
static inline void user_fpu_begin(void)
{
@@ -520,24 +540,6 @@
}
/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
- if (use_eager_fpu()) {
- __save_fpu(tsk);
- return;
- }
-
- preempt_disable();
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- preempt_enable();
-}
-
-/*
* i387 state interaction
*/
static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 29c740d..367f39d 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -42,8 +42,8 @@
* be set (so that the clts/stts pair does nothing that is
* visible in the interrupted kernel thread).
*
- * Except for the eagerfpu case when we return 1 unless we've already
- * been eager and saved the state in kernel_fpu_begin().
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
@@ -51,7 +51,7 @@
return false;
if (use_eager_fpu())
- return __thread_has_fpu(current);
+ return true;
return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
@@ -94,9 +94,10 @@
if (__thread_has_fpu(me)) {
__save_init_fpu(me);
- } else if (!use_eager_fpu()) {
+ } else {
this_cpu_write(fpu_owner_task, NULL);
- clts();
+ if (!use_eager_fpu())
+ clts();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -107,7 +108,7 @@
if (__thread_has_fpu(me)) {
if (WARN_ON(restore_fpu_checking(me)))
- drop_init_fpu(me);
+ fpu_reset_state(me);
} else if (!use_eager_fpu()) {
stts();
}
@@ -120,10 +121,13 @@
{
preempt_disable();
if (__thread_has_fpu(tsk)) {
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- } else
- tsk->thread.fpu_counter = 0;
+ if (use_eager_fpu()) {
+ __save_fpu(tsk);
+ } else {
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ }
+ }
preempt_enable();
}
EXPORT_SYMBOL(unlazy_fpu);
@@ -221,11 +225,12 @@
return;
}
+ memset(fpu->state, 0, xstate_size);
+
if (cpu_has_fxsr) {
fx_finit(&fpu->state->fxsave);
} else {
struct i387_fsave_struct *fp = &fpu->state->fsave;
- memset(fp, 0, xstate_size);
fp->cwd = 0xffff037fu;
fp->swd = 0xffff0000u;
fp->twd = 0xffffffffu;
@@ -247,7 +252,7 @@
if (tsk_used_math(tsk)) {
if (cpu_has_fpu && tsk == current)
unlazy_fpu(tsk);
- tsk->thread.fpu.last_cpu = ~0;
+ task_disable_lazy_fpu_restore(tsk);
return 0;
}
@@ -336,6 +341,7 @@
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
+ struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
if (!cpu_has_xsave)
@@ -350,14 +356,12 @@
* memory layout in the thread struct, so that we can copy the entire
* xstateregs to the user using one user_regset_copyout().
*/
- memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
- xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-
+ memcpy(&xsave->i387.sw_reserved,
+ xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
/*
* Copy the xstate memory layout.
*/
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->xsave, 0, -1);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
return ret;
}
@@ -365,8 +369,8 @@
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
+ struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
- struct xsave_hdr_struct *xsave_hdr;
if (!cpu_has_xsave)
return -ENODEV;
@@ -375,22 +379,16 @@
if (ret)
return ret;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->xsave, 0, -1);
-
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
- target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
- xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
-
- xsave_hdr->xstate_bv &= pcntxt_mask;
+ xsave->i387.mxcsr &= mxcsr_feature_mask;
+ xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
- memset(xsave_hdr->reserved, 0, 48);
-
+ memset(&xsave->xsave_hdr.reserved, 0, 48);
return ret;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0c8992d..8213da6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -89,8 +89,8 @@
dst->thread.fpu_counter = 0;
dst->thread.fpu.has_fpu = 0;
- dst->thread.fpu.last_cpu = ~0;
dst->thread.fpu.state = NULL;
+ task_disable_lazy_fpu_restore(dst);
if (tsk_used_math(src)) {
int err = fpu_alloc(&dst->thread.fpu);
if (err)
@@ -151,13 +151,18 @@
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- drop_init_fpu(tsk);
- /*
- * Free the FPU state for non xsave platforms. They get reallocated
- * lazily at the first use.
- */
- if (!use_eager_fpu())
+
+ if (!use_eager_fpu()) {
+ /* FPU state will be reallocated lazily at the first use. */
+ drop_fpu(tsk);
free_thread_xstate(tsk);
+ } else if (!used_math()) {
+ /* kthread execs. TODO: cleanup this horror. */
+ if (WARN_ON(init_fpu(tsk)))
+ force_sig(SIGKILL, tsk);
+ user_fpu_begin();
+ restore_init_xstate();
+ }
}
static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 53cc408..3e58186 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -680,7 +680,7 @@
* Ensure the signal handler starts with the new fpu state.
*/
if (used_math())
- drop_init_fpu(current);
+ fpu_reset_state(current);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6751c5c..f4fa991 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -731,7 +731,7 @@
/*
* Save the info for the exception handler and clear the error.
*/
- save_init_fpu(task);
+ unlazy_fpu(task);
task->thread.trap_nr = trapnr;
task->thread.error_code = error_code;
info.si_signo = SIGFPE;
@@ -860,7 +860,7 @@
kernel_fpu_disable();
__thread_fpu_begin(tsk);
if (unlikely(restore_fpu_checking(tsk))) {
- drop_init_fpu(tsk);
+ fpu_reset_state(tsk);
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
} else {
tsk->thread.fpu_counter++;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index cdc6cf9..87a815b 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -342,7 +342,7 @@
config_enabled(CONFIG_IA32_EMULATION));
if (!buf) {
- drop_init_fpu(tsk);
+ fpu_reset_state(tsk);
return 0;
}
@@ -416,7 +416,7 @@
*/
user_fpu_begin();
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
- drop_init_fpu(tsk);
+ fpu_reset_state(tsk);
return -1;
}
}
@@ -678,19 +678,13 @@
this_func();
}
-static inline void __init eager_fpu_init_bp(void)
+/*
+ * setup_init_fpu_buf() is __init and it is OK to call it here because
+ * init_xstate_buf will be unset only once during boot.
+ */
+void __init_refok eager_fpu_init(void)
{
- current->thread.fpu.state =
- alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
- if (!init_xstate_buf)
- setup_init_fpu_buf();
-}
-
-void eager_fpu_init(void)
-{
- static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
-
- clear_used_math();
+ WARN_ON(used_math());
current_thread_info()->status = 0;
if (eagerfpu == ENABLE)
@@ -701,21 +695,8 @@
return;
}
- if (boot_func) {
- boot_func();
- boot_func = NULL;
- }
-
- /*
- * This is same as math_state_restore(). But use_xsave() is
- * not yet patched to use math_state_restore().
- */
- init_fpu(current);
- __thread_fpu_begin(current);
- if (cpu_has_xsave)
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
+ if (!init_xstate_buf)
+ setup_init_fpu_buf();
}
/*