[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it.
Besides the security implications, this feature also helps debuggers, which
can COW a vma-backed VDSO just like a normal DSO and can thus do
single-stepping and other debugging features.
It's good for hypervisors (Xen, VMWare) too, which typically live in the same
high-mapped address space as the VDSO, hence whenever the VDSO is used, they
get lots of guest pagefaults and have to fix such guest accesses up - which
slows things down instead of speeding things up (the primary purpose of the
VDSO).
There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support
for older glibcs that still rely on a prelinked high-mapped VDSO. Newer
distributions (using glibc 2.3.3 or later) can turn this option off. Turning
it off is also recommended for security reasons: attackers cannot use the
predictable high-mapped VDSO page as syscall trampoline anymore.
There is a new vdso=[0|1] boot option as well, and a runtime
/proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned
on/off.
(This version of the VDSO-randomization patch also has working ELF
coredumping, the previous patch crashed in the coredumping code.)
This code is a combined work of the exec-shield VDSO randomization
code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell
started this patch and i completed it.
[akpm@osdl.org: cleanups]
[akpm@osdl.org: compile fix]
[akpm@osdl.org: compile fix 2]
[akpm@osdl.org: compile fix 3]
[akpm@osdl.org: revernt MAXMEM change]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Cc: Gerd Hoffmann <kraxel@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 4153d80..1eac92c 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -10,6 +10,7 @@
#include <asm/processor.h>
#include <asm/system.h> /* for savesegment */
#include <asm/auxvec.h>
+#include <asm/desc.h>
#include <linux/utsname.h>
@@ -129,15 +130,41 @@
#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
-#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL))
-#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE)
-#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
+#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
+#define VDSO_BASE ((unsigned long)current->mm->context.vdso)
+
+#ifdef CONFIG_COMPAT_VDSO
+# define VDSO_COMPAT_BASE VDSO_HIGH_BASE
+# define VDSO_PRELINK VDSO_HIGH_BASE
+#else
+# define VDSO_COMPAT_BASE VDSO_BASE
+# define VDSO_PRELINK 0
+#endif
+
+#define VDSO_COMPAT_SYM(x) \
+ (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)
+
+#define VDSO_SYM(x) \
+ (VDSO_BASE + (unsigned long)(x) - VDSO_PRELINK)
+
+#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE)
+#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE)
+
extern void __kernel_vsyscall;
+#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall)
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack);
+
+extern unsigned int vdso_enabled;
+
#define ARCH_DLINFO \
-do { \
- NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
- NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
+do if (vdso_enabled) { \
+ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \
} while (0)
/*
@@ -148,15 +175,15 @@
* Dumping its extra ELF program headers includes all the other information
* a debugger needs to easily find how the vsyscall DSO was being used.
*/
-#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum)
+#define ELF_CORE_EXTRA_PHDRS (VDSO_HIGH_EHDR->e_phnum)
#define ELF_CORE_WRITE_EXTRA_PHDRS \
do { \
const struct elf_phdr *const vsyscall_phdrs = \
- (const struct elf_phdr *) (VSYSCALL_BASE \
- + VSYSCALL_EHDR->e_phoff); \
+ (const struct elf_phdr *) (VDSO_HIGH_BASE \
+ + VDSO_HIGH_EHDR->e_phoff); \
int i; \
Elf32_Off ofs = 0; \
- for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
+ for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \
struct elf_phdr phdr = vsyscall_phdrs[i]; \
if (phdr.p_type == PT_LOAD) { \
BUG_ON(ofs != 0); \
@@ -174,10 +201,10 @@
#define ELF_CORE_WRITE_EXTRA_DATA \
do { \
const struct elf_phdr *const vsyscall_phdrs = \
- (const struct elf_phdr *) (VSYSCALL_BASE \
- + VSYSCALL_EHDR->e_phoff); \
+ (const struct elf_phdr *) (VDSO_HIGH_BASE \
+ + VDSO_HIGH_EHDR->e_phoff); \
int i; \
- for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
+ for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \
if (vsyscall_phdrs[i].p_type == PT_LOAD) \
DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index f7e068f..a48cc3f 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -51,7 +51,7 @@
*/
enum fixed_addresses {
FIX_HOLE,
- FIX_VSYSCALL,
+ FIX_VDSO,
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif
@@ -115,14 +115,6 @@
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL))
-#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
-
-
extern void __this_fixmap_does_not_exist(void);
/*
diff --git a/include/asm-i386/mmu.h b/include/asm-i386/mmu.h
index f431a0b..8358dd3 100644
--- a/include/asm-i386/mmu.h
+++ b/include/asm-i386/mmu.h
@@ -12,6 +12,7 @@
int size;
struct semaphore sem;
void *ldt;
+ void *vdso;
} mm_context_t;
#endif
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index e3a552f..f5bf544 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -96,6 +96,8 @@
#ifndef __ASSEMBLY__
+struct vm_area_struct;
+
/*
* This much address space is reserved for vmalloc() and iomap()
* as well as fixmap mappings.
@@ -139,6 +141,7 @@
#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
+#define __HAVE_ARCH_GATE_AREA 1
#endif /* __KERNEL__ */
#endif /* _I386_PAGE_H */
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index ff1e2b1..2833fa2 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -37,6 +37,7 @@
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
+ void *sysenter_return;
struct restart_block restart_block;
unsigned long previous_esp; /* ESP of the previous stack in case
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
index d480f2e..69f0f1d 100644
--- a/include/asm-i386/unwind.h
+++ b/include/asm-i386/unwind.h
@@ -78,8 +78,8 @@
return user_mode_vm(&info->regs);
#else
return info->regs.eip < PAGE_OFFSET
- || (info->regs.eip >= __fix_to_virt(FIX_VSYSCALL)
- && info->regs.eip < __fix_to_virt(FIX_VSYSCALL) + PAGE_SIZE)
+ || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
+ && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
|| info->regs.esp < PAGE_OFFSET;
#endif
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a929ea1..ff1fa87 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1065,5 +1065,7 @@
extern int randomize_va_space;
#endif
+const char *arch_vma_name(struct vm_area_struct *vma);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 349ef90..bee12a7 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -189,6 +189,7 @@
VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
+ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
};