Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 1 | /* |
Dave Jones | 835c34a | 2007-10-12 21:10:53 -0400 | [diff] [blame] | 2 | * handle transition of Linux booting another kernel |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 3 | * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> |
| 4 | * |
| 5 | * This source code is licensed under the GNU General Public License, |
| 6 | * Version 2. See the file COPYING for more details. |
| 7 | */ |
| 8 | |
| 9 | #include <linux/mm.h> |
| 10 | #include <linux/kexec.h> |
| 11 | #include <linux/delay.h> |
Rusty Russell | 1a3f239 | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 12 | #include <linux/init.h> |
Ken'ichi Ohmichi | fd59d23 | 2007-10-16 23:27:27 -0700 | [diff] [blame] | 13 | #include <linux/numa.h> |
Ingo Molnar | f43fdad | 2008-05-12 21:20:43 +0200 | [diff] [blame] | 14 | #include <linux/ftrace.h> |
Huang Ying | 3122c33 | 2008-08-15 00:40:26 -0700 | [diff] [blame] | 15 | #include <linux/suspend.h> |
Ingo Molnar | f43fdad | 2008-05-12 21:20:43 +0200 | [diff] [blame] | 16 | |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 17 | #include <asm/pgtable.h> |
| 18 | #include <asm/pgalloc.h> |
| 19 | #include <asm/tlbflush.h> |
| 20 | #include <asm/mmu_context.h> |
| 21 | #include <asm/io.h> |
| 22 | #include <asm/apic.h> |
| 23 | #include <asm/cpufeature.h> |
Eric W. Biederman | e7b47cc | 2005-07-29 13:01:18 -0600 | [diff] [blame] | 24 | #include <asm/desc.h> |
Zachary Amsden | 4bb0d3e | 2005-09-03 15:56:36 -0700 | [diff] [blame] | 25 | #include <asm/system.h> |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 26 | #include <asm/cacheflush.h> |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 27 | |
| 28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
Magnus Damm | 3566561 | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
| 30 | #ifdef CONFIG_X86_PAE |
| 31 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; |
| 32 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 33 | #endif |
Magnus Damm | 3566561 | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 34 | static u32 kexec_pte0[1024] PAGE_ALIGNED; |
| 35 | static u32 kexec_pte1[1024] PAGE_ALIGNED; |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 36 | |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 37 | static void set_idt(void *newidt, __u16 limit) |
| 38 | { |
Glauber de Oliveira Costa | 6b68f01 | 2008-01-30 13:31:12 +0100 | [diff] [blame] | 39 | struct desc_ptr curidt; |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 40 | |
| 41 | /* ia32 supports unaliged loads & stores */ |
Eric W. Biederman | e7b47cc | 2005-07-29 13:01:18 -0600 | [diff] [blame] | 42 | curidt.size = limit; |
| 43 | curidt.address = (unsigned long)newidt; |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 44 | |
Zachary Amsden | f2ab446 | 2005-09-03 15:56:42 -0700 | [diff] [blame] | 45 | load_idt(&curidt); |
WANG Cong | 378fc6e | 2008-06-24 16:21:18 +0100 | [diff] [blame] | 46 | } |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 47 | |
| 48 | |
| 49 | static void set_gdt(void *newgdt, __u16 limit) |
| 50 | { |
Glauber de Oliveira Costa | 6b68f01 | 2008-01-30 13:31:12 +0100 | [diff] [blame] | 51 | struct desc_ptr curgdt; |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 52 | |
| 53 | /* ia32 supports unaligned loads & stores */ |
Eric W. Biederman | e7b47cc | 2005-07-29 13:01:18 -0600 | [diff] [blame] | 54 | curgdt.size = limit; |
| 55 | curgdt.address = (unsigned long)newgdt; |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 56 | |
Zachary Amsden | f2ab446 | 2005-09-03 15:56:42 -0700 | [diff] [blame] | 57 | load_gdt(&curgdt); |
WANG Cong | 378fc6e | 2008-06-24 16:21:18 +0100 | [diff] [blame] | 58 | } |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 59 | |
| 60 | static void load_segments(void) |
| 61 | { |
| 62 | #define __STR(X) #X |
| 63 | #define STR(X) __STR(X) |
| 64 | |
| 65 | __asm__ __volatile__ ( |
| 66 | "\tljmp $"STR(__KERNEL_CS)",$1f\n" |
| 67 | "\t1:\n" |
Michael Matz | 2ec5e3a | 2006-03-07 21:55:48 -0800 | [diff] [blame] | 68 | "\tmovl $"STR(__KERNEL_DS)",%%eax\n" |
| 69 | "\tmovl %%eax,%%ds\n" |
| 70 | "\tmovl %%eax,%%es\n" |
| 71 | "\tmovl %%eax,%%fs\n" |
| 72 | "\tmovl %%eax,%%gs\n" |
| 73 | "\tmovl %%eax,%%ss\n" |
| 74 | ::: "eax", "memory"); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 75 | #undef STR |
| 76 | #undef __STR |
| 77 | } |
| 78 | |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 79 | /* |
| 80 | * A architecture hook called to validate the |
| 81 | * proposed image and prepare the control pages |
Huang Ying | 163f687 | 2008-08-15 00:40:22 -0700 | [diff] [blame] | 82 | * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 83 | * have been allocated, but the segments have yet |
| 84 | * been copied into the kernel. |
| 85 | * |
| 86 | * Do what every setup is needed on image and the |
| 87 | * reboot code buffer to allow us to avoid allocations |
| 88 | * later. |
| 89 | * |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 90 | * Make control page executable. |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 91 | */ |
| 92 | int machine_kexec_prepare(struct kimage *image) |
| 93 | { |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 94 | if (nx_enabled) |
| 95 | set_pages_x(image->control_code_page, 1); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 96 | return 0; |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | * Undo anything leftover by machine_kexec_prepare |
| 101 | * when an image is freed. |
| 102 | */ |
| 103 | void machine_kexec_cleanup(struct kimage *image) |
| 104 | { |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 105 | if (nx_enabled) |
| 106 | set_pages_nx(image->control_code_page, 1); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | /* |
| 110 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
| 111 | * We are past the point of no return, committed to rebooting now. |
| 112 | */ |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 113 | void machine_kexec(struct kimage *image) |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 114 | { |
Magnus Damm | 3566561 | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 115 | unsigned long page_list[PAGES_NR]; |
| 116 | void *control_page; |
Huang Ying | 3122c33 | 2008-08-15 00:40:26 -0700 | [diff] [blame] | 117 | int save_ftrace_enabled; |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 118 | asmlinkage unsigned long |
| 119 | (*relocate_kernel_ptr)(unsigned long indirection_page, |
| 120 | unsigned long control_page, |
| 121 | unsigned long start_address, |
| 122 | unsigned int has_pae, |
| 123 | unsigned int preserve_context); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 124 | |
Huang Ying | 3122c33 | 2008-08-15 00:40:26 -0700 | [diff] [blame] | 125 | #ifdef CONFIG_KEXEC_JUMP |
| 126 | if (kexec_image->preserve_context) |
| 127 | save_processor_state(); |
| 128 | #endif |
| 129 | |
| 130 | save_ftrace_enabled = __ftrace_enabled_save(); |
Ingo Molnar | f43fdad | 2008-05-12 21:20:43 +0200 | [diff] [blame] | 131 | |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 132 | /* Interrupts aren't acceptable while we reboot */ |
| 133 | local_irq_disable(); |
| 134 | |
Huang Ying | 89081d1 | 2008-07-25 19:45:10 -0700 | [diff] [blame] | 135 | if (image->preserve_context) { |
| 136 | #ifdef CONFIG_X86_IO_APIC |
| 137 | /* We need to put APICs in legacy mode so that we can |
| 138 | * get timer interrupts in second kernel. kexec/kdump |
| 139 | * paths already have calls to disable_IO_APIC() in |
| 140 | * one form or other. kexec jump path also need |
| 141 | * one. |
| 142 | */ |
| 143 | disable_IO_APIC(); |
| 144 | #endif |
| 145 | } |
| 146 | |
Magnus Damm | 3566561 | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 147 | control_page = page_address(image->control_code_page); |
Huang Ying | fb45daa | 2008-08-15 00:40:23 -0700 | [diff] [blame] | 148 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 149 | |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 150 | relocate_kernel_ptr = control_page; |
Magnus Damm | 3566561 | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
Magnus Damm | 3566561 | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 153 | page_list[PA_PGD] = __pa(kexec_pgd); |
| 154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
| 155 | #ifdef CONFIG_X86_PAE |
| 156 | page_list[PA_PMD_0] = __pa(kexec_pmd0); |
| 157 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
| 158 | page_list[PA_PMD_1] = __pa(kexec_pmd1); |
| 159 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; |
| 160 | #endif |
| 161 | page_list[PA_PTE_0] = __pa(kexec_pte0); |
| 162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
| 163 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
| 164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
Ken'ichi Ohmichi | e7706fc | 2008-10-20 13:51:52 +0900 | [diff] [blame] | 165 | |
| 166 | if (image->type == KEXEC_TYPE_DEFAULT) |
| 167 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) |
| 168 | << PAGE_SHIFT); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 169 | |
Eric W. Biederman | 2a8a3d5 | 2006-07-30 03:03:20 -0700 | [diff] [blame] | 170 | /* The segment registers are funny things, they have both a |
| 171 | * visible and an invisible part. Whenever the visible part is |
| 172 | * set to a specific selector, the invisible part is loaded |
| 173 | * with from a table in memory. At no other time is the |
| 174 | * descriptor table in memory accessed. |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 175 | * |
| 176 | * I take advantage of this here by force loading the |
| 177 | * segments, before I zap the gdt with an invalid value. |
| 178 | */ |
| 179 | load_segments(); |
| 180 | /* The gdt & idt are now invalid. |
| 181 | * If you want to load them you must set up your own idt & gdt. |
| 182 | */ |
| 183 | set_gdt(phys_to_virt(0),0); |
| 184 | set_idt(phys_to_virt(0),0); |
| 185 | |
| 186 | /* now call it */ |
Huang Ying | 3ab8352 | 2008-07-25 19:45:07 -0700 | [diff] [blame] | 187 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
| 188 | (unsigned long)page_list, |
| 189 | image->start, cpu_has_pae, |
| 190 | image->preserve_context); |
Huang Ying | 3122c33 | 2008-08-15 00:40:26 -0700 | [diff] [blame] | 191 | |
| 192 | #ifdef CONFIG_KEXEC_JUMP |
| 193 | if (kexec_image->preserve_context) |
| 194 | restore_processor_state(); |
| 195 | #endif |
| 196 | |
| 197 | __ftrace_enabled_restore(save_ftrace_enabled); |
Eric W. Biederman | 5033cba | 2005-06-25 14:57:56 -0700 | [diff] [blame] | 198 | } |
Rusty Russell | 1a3f239 | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 199 | |
Ken'ichi Ohmichi | fd59d23 | 2007-10-16 23:27:27 -0700 | [diff] [blame] | 200 | void arch_crash_save_vmcoreinfo(void) |
| 201 | { |
Ken'ichi Ohmichi | 92df5c3 | 2008-02-07 00:15:23 -0800 | [diff] [blame] | 202 | #ifdef CONFIG_NUMA |
Ken'ichi Ohmichi | bcbba6c | 2007-10-16 23:27:30 -0700 | [diff] [blame] | 203 | VMCOREINFO_SYMBOL(node_data); |
| 204 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); |
Ken'ichi Ohmichi | fd59d23 | 2007-10-16 23:27:27 -0700 | [diff] [blame] | 205 | #endif |
| 206 | #ifdef CONFIG_X86_PAE |
Ken'ichi Ohmichi | bcbba6c | 2007-10-16 23:27:30 -0700 | [diff] [blame] | 207 | VMCOREINFO_CONFIG(X86_PAE); |
Ken'ichi Ohmichi | fd59d23 | 2007-10-16 23:27:27 -0700 | [diff] [blame] | 208 | #endif |
| 209 | } |
| 210 | |