[PATCH] i386: Convert i386 PDA code to use %fs
Convert the PDA code to use %fs rather than %gs as the segment for
per-processor data. This is because some processors show a small but
measurable performance gain for reloading a NULL segment selector (as %fs
generally is in user-space) versus a non-NULL one (as %gs generally is).
On modern processors the difference is very small, perhaps undetectable.
Some old AMD "K6 3D+" processors are noticably slower when %fs is used
rather than %gs; I have no idea why this might be, but I think they're
sufficiently rare that it doesn't matter much.
This patch also fixes the math emulator, which had not been adjusted to
match the changed struct pt_regs.
[frederik.deweerdt@gmail.com: fixit with gdb]
[mingo@elte.hu: Fix KVM too]
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Ian Campbell <Ian.Campbell@XenSource.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Zachary Amsden <zach@vmware.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index c641056..23ae198 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -308,8 +308,8 @@
regs->eax,regs->ebx,regs->ecx,regs->edx);
printk("ESI: %08lx EDI: %08lx EBP: %08lx",
regs->esi, regs->edi, regs->ebp);
- printk(" DS: %04x ES: %04x GS: %04x\n",
- 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
+ printk(" DS: %04x ES: %04x FS: %04x\n",
+ 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
cr0 = read_cr0();
cr2 = read_cr2();
@@ -340,7 +340,7 @@
regs.xds = __USER_DS;
regs.xes = __USER_DS;
- regs.xgs = __KERNEL_PDA;
+ regs.xfs = __KERNEL_PDA;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -425,7 +425,7 @@
p->thread.eip = (unsigned long) ret_from_fork;
- savesegment(fs,p->thread.fs);
+ savesegment(gs,p->thread.gs);
tsk = current;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -501,8 +501,8 @@
dump->regs.eax = regs->eax;
dump->regs.ds = regs->xds;
dump->regs.es = regs->xes;
- savesegment(fs,dump->regs.fs);
- dump->regs.gs = regs->xgs;
+ dump->regs.fs = regs->xfs;
+ savesegment(gs,dump->regs.gs);
dump->regs.orig_eax = regs->orig_eax;
dump->regs.eip = regs->eip;
dump->regs.cs = regs->xcs;
@@ -653,7 +653,7 @@
load_esp0(tss, next);
/*
- * Save away %fs. No need to save %gs, as it was saved on the
+ * Save away %gs. No need to save %fs, as it was saved on the
* stack on entry. No need to save %es and %ds, as those are
* always kernel segments while inside the kernel. Doing this
* before setting the new TLS descriptors avoids the situation
@@ -662,7 +662,7 @@
* used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer.
*/
- savesegment(fs, prev->fs);
+ savesegment(gs, prev->gs);
/*
* Load the per-thread Thread-Local Storage descriptor.
@@ -670,12 +670,10 @@
load_TLS(next, cpu);
/*
- * Restore %fs if needed.
- *
- * Glibc normally makes %fs be zero.
+ * Restore %gs if needed (which is common)
*/
- if (unlikely(prev->fs | next->fs))
- loadsegment(fs, next->fs);
+ if (prev->gs | next->gs)
+ loadsegment(gs, next->gs);
write_pda(pcurrent, next_p);