x86: make percpu symbols zerobased on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ]
This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S. A new
PHDR is added as existing ones cannot contain sections near address
zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.
The following adjustments have been made to accomodate the address
change.
* code to locate percpu gdt_page in head_64.S is updated to add the
load address to the gdt_page offset.
* __per_cpu_load is used in places where access to the init data area
is necessary.
* pda->data_offset is initialized soon after C code is entered as zero
value doesn't work anymore.
This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d4..7ee0363 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -204,6 +204,23 @@
pushq $0
popfq
+#ifdef CONFIG_SMP
+ /*
+ * early_gdt_base should point to the gdt_page in static percpu init
+ * data area. Computing this requires two symbols - __per_cpu_load
+ * and per_cpu__gdt_page. As linker can't do no such relocation, do
+ * it by hand. As early_gdt_descr is manipulated by C code for
+ * secondary CPUs, this should be done only once for the boot CPU
+ * when early_gdt_descr_base contains zero.
+ */
+ movq early_gdt_descr_base(%rip), %rax
+ testq %rax, %rax
+ jnz 1f
+ movq $__per_cpu_load, %rax
+ addq $per_cpu__gdt_page, %rax
+ movq %rax, early_gdt_descr_base(%rip)
+1:
+#endif
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
@@ -401,7 +418,12 @@
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
- .quad per_cpu__gdt_page
+#ifdef CONFIG_SMP
+early_gdt_descr_base:
+ .quad 0x0000000000000000
+#else
+ .quad per_cpu__gdt_page
+#endif
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */