blob: d3943e5edc8f51e23e81dbefa00dd933c0531ebd [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
21
22/*
23 * This file handles the architecture-dependent parts of initialization
24 */
25
Alexey Dobriyan129f6942005-06-23 00:08:33 -070026#include <linux/config.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <linux/sched.h>
28#include <linux/mm.h>
Andy Whitcroft05b79bd2005-06-23 00:07:57 -070029#include <linux/mmzone.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/tty.h>
31#include <linux/ioport.h>
32#include <linux/acpi.h>
33#include <linux/apm_bios.h>
34#include <linux/initrd.h>
35#include <linux/bootmem.h>
36#include <linux/seq_file.h>
37#include <linux/console.h>
38#include <linux/mca.h>
39#include <linux/root_dev.h>
40#include <linux/highmem.h>
41#include <linux/module.h>
42#include <linux/efi.h>
43#include <linux/init.h>
44#include <linux/edd.h>
45#include <linux/nodemask.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070046#include <linux/kexec.h>
Vivek Goyal2030eae2005-06-25 14:58:20 -070047#include <linux/crash_dump.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070048
Linus Torvalds1da177e2005-04-16 15:20:36 -070049#include <video/edid.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070050
Eric W. Biederman9635b472005-06-25 14:57:41 -070051#include <asm/apic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <asm/e820.h>
53#include <asm/mpspec.h>
54#include <asm/setup.h>
55#include <asm/arch_hooks.h>
56#include <asm/sections.h>
57#include <asm/io_apic.h>
58#include <asm/ist.h>
59#include <asm/io.h>
60#include "setup_arch_pre.h"
61#include <bios_ebda.h>
62
Vivek Goyal92aa63a2005-06-25 14:58:18 -070063/* Forward Declaration. */
64void __init find_max_pfn(void);
65
Linus Torvalds1da177e2005-04-16 15:20:36 -070066/* This value is set up by the early boot code to point to the value
67 immediately after the boot time page tables. It contains a *physical*
68 address, and must not be in the .bss segment! */
69unsigned long init_pg_tables_end __initdata = ~0UL;
70
Li Shaohua0bb31842005-06-25 14:54:55 -070071int disable_pse __devinitdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
73/*
74 * Machine setup..
75 */
76
77#ifdef CONFIG_EFI
78int efi_enabled = 0;
79EXPORT_SYMBOL(efi_enabled);
80#endif
81
82/* cpu data as detected by the assembly code in head.S */
83struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
84/* common cpu data for all cpus */
85struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
Alexey Dobriyan129f6942005-06-23 00:08:33 -070086EXPORT_SYMBOL(boot_cpu_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
88unsigned long mmu_cr4_features;
89
90#ifdef CONFIG_ACPI_INTERPRETER
91 int acpi_disabled = 0;
92#else
93 int acpi_disabled = 1;
94#endif
95EXPORT_SYMBOL(acpi_disabled);
96
Len Brown888ba6c2005-08-24 12:07:20 -040097#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -070098int __initdata acpi_force = 0;
99extern acpi_interrupt_flags acpi_sci_flags;
100#endif
101
102/* for MCA, but anyone else can use it if they want */
103unsigned int machine_id;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700104#ifdef CONFIG_MCA
105EXPORT_SYMBOL(machine_id);
106#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107unsigned int machine_submodel_id;
108unsigned int BIOS_revision;
109unsigned int mca_pentium_flag;
110
111/* For PCI or other memory-mapped resources */
112unsigned long pci_mem_start = 0x10000000;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700113#ifdef CONFIG_PCI
114EXPORT_SYMBOL(pci_mem_start);
115#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
117/* Boot loader ID as an integer, for the benefit of proc_dointvec */
118int bootloader_type;
119
120/* user-defined highmem size */
121static unsigned int highmem_pages = -1;
122
123/*
124 * Setup options
125 */
126struct drive_info_struct { char dummy[32]; } drive_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700127#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
128 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
129EXPORT_SYMBOL(drive_info);
130#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131struct screen_info screen_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700132#ifdef CONFIG_VT
133EXPORT_SYMBOL(screen_info);
134#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135struct apm_info apm_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700136EXPORT_SYMBOL(apm_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137struct sys_desc_table_struct {
138 unsigned short length;
139 unsigned char table[0];
140};
141struct edid_info edid_info;
142struct ist_info ist_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700143#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
144 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
145EXPORT_SYMBOL(ist_info);
146#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147struct e820map e820;
148
149extern void early_cpu_init(void);
150extern void dmi_scan_machine(void);
151extern void generic_apic_probe(char *);
152extern int root_mountflags;
153
154unsigned long saved_videomode;
155
156#define RAMDISK_IMAGE_START_MASK 0x07FF
157#define RAMDISK_PROMPT_FLAG 0x8000
158#define RAMDISK_LOAD_FLAG 0x4000
159
160static char command_line[COMMAND_LINE_SIZE];
161
162unsigned char __initdata boot_params[PARAM_SIZE];
163
164static struct resource data_resource = {
165 .name = "Kernel data",
166 .start = 0,
167 .end = 0,
168 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
169};
170
171static struct resource code_resource = {
172 .name = "Kernel code",
173 .start = 0,
174 .end = 0,
175 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
176};
177
178static struct resource system_rom_resource = {
179 .name = "System ROM",
180 .start = 0xf0000,
181 .end = 0xfffff,
182 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183};
184
185static struct resource extension_rom_resource = {
186 .name = "Extension ROM",
187 .start = 0xe0000,
188 .end = 0xeffff,
189 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
190};
191
192static struct resource adapter_rom_resources[] = { {
193 .name = "Adapter ROM",
194 .start = 0xc8000,
195 .end = 0,
196 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197}, {
198 .name = "Adapter ROM",
199 .start = 0,
200 .end = 0,
201 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202}, {
203 .name = "Adapter ROM",
204 .start = 0,
205 .end = 0,
206 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207}, {
208 .name = "Adapter ROM",
209 .start = 0,
210 .end = 0,
211 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212}, {
213 .name = "Adapter ROM",
214 .start = 0,
215 .end = 0,
216 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217}, {
218 .name = "Adapter ROM",
219 .start = 0,
220 .end = 0,
221 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
222} };
223
224#define ADAPTER_ROM_RESOURCES \
225 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
226
227static struct resource video_rom_resource = {
228 .name = "Video ROM",
229 .start = 0xc0000,
230 .end = 0xc7fff,
231 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
232};
233
234static struct resource video_ram_resource = {
235 .name = "Video RAM area",
236 .start = 0xa0000,
237 .end = 0xbffff,
238 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
239};
240
241static struct resource standard_io_resources[] = { {
242 .name = "dma1",
243 .start = 0x0000,
244 .end = 0x001f,
245 .flags = IORESOURCE_BUSY | IORESOURCE_IO
246}, {
247 .name = "pic1",
248 .start = 0x0020,
249 .end = 0x0021,
250 .flags = IORESOURCE_BUSY | IORESOURCE_IO
251}, {
252 .name = "timer0",
253 .start = 0x0040,
254 .end = 0x0043,
255 .flags = IORESOURCE_BUSY | IORESOURCE_IO
256}, {
257 .name = "timer1",
258 .start = 0x0050,
259 .end = 0x0053,
260 .flags = IORESOURCE_BUSY | IORESOURCE_IO
261}, {
262 .name = "keyboard",
263 .start = 0x0060,
264 .end = 0x006f,
265 .flags = IORESOURCE_BUSY | IORESOURCE_IO
266}, {
267 .name = "dma page reg",
268 .start = 0x0080,
269 .end = 0x008f,
270 .flags = IORESOURCE_BUSY | IORESOURCE_IO
271}, {
272 .name = "pic2",
273 .start = 0x00a0,
274 .end = 0x00a1,
275 .flags = IORESOURCE_BUSY | IORESOURCE_IO
276}, {
277 .name = "dma2",
278 .start = 0x00c0,
279 .end = 0x00df,
280 .flags = IORESOURCE_BUSY | IORESOURCE_IO
281}, {
282 .name = "fpu",
283 .start = 0x00f0,
284 .end = 0x00ff,
285 .flags = IORESOURCE_BUSY | IORESOURCE_IO
286} };
287
288#define STANDARD_IO_RESOURCES \
289 (sizeof standard_io_resources / sizeof standard_io_resources[0])
290
291#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
292
293static int __init romchecksum(unsigned char *rom, unsigned long length)
294{
295 unsigned char *p, sum = 0;
296
297 for (p = rom; p < rom + length; p++)
298 sum += *p;
299 return sum == 0;
300}
301
302static void __init probe_roms(void)
303{
304 unsigned long start, length, upper;
305 unsigned char *rom;
306 int i;
307
308 /* video rom */
309 upper = adapter_rom_resources[0].start;
310 for (start = video_rom_resource.start; start < upper; start += 2048) {
311 rom = isa_bus_to_virt(start);
312 if (!romsignature(rom))
313 continue;
314
315 video_rom_resource.start = start;
316
317 /* 0 < length <= 0x7f * 512, historically */
318 length = rom[2] * 512;
319
320 /* if checksum okay, trust length byte */
321 if (length && romchecksum(rom, length))
322 video_rom_resource.end = start + length - 1;
323
324 request_resource(&iomem_resource, &video_rom_resource);
325 break;
326 }
327
328 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
329 if (start < upper)
330 start = upper;
331
332 /* system rom */
333 request_resource(&iomem_resource, &system_rom_resource);
334 upper = system_rom_resource.start;
335
336 /* check for extension rom (ignore length byte!) */
337 rom = isa_bus_to_virt(extension_rom_resource.start);
338 if (romsignature(rom)) {
339 length = extension_rom_resource.end - extension_rom_resource.start + 1;
340 if (romchecksum(rom, length)) {
341 request_resource(&iomem_resource, &extension_rom_resource);
342 upper = extension_rom_resource.start;
343 }
344 }
345
346 /* check for adapter roms on 2k boundaries */
347 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
348 rom = isa_bus_to_virt(start);
349 if (!romsignature(rom))
350 continue;
351
352 /* 0 < length <= 0x7f * 512, historically */
353 length = rom[2] * 512;
354
355 /* but accept any length that fits if checksum okay */
356 if (!length || start + length > upper || !romchecksum(rom, length))
357 continue;
358
359 adapter_rom_resources[i].start = start;
360 adapter_rom_resources[i].end = start + length - 1;
361 request_resource(&iomem_resource, &adapter_rom_resources[i]);
362
363 start = adapter_rom_resources[i++].end & ~2047UL;
364 }
365}
366
367static void __init limit_regions(unsigned long long size)
368{
369 unsigned long long current_addr = 0;
370 int i;
371
372 if (efi_enabled) {
373 for (i = 0; i < memmap.nr_map; i++) {
374 current_addr = memmap.map[i].phys_addr +
375 (memmap.map[i].num_pages << 12);
376 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
377 if (current_addr >= size) {
378 memmap.map[i].num_pages -=
379 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
380 memmap.nr_map = i + 1;
381 return;
382 }
383 }
384 }
385 }
386 for (i = 0; i < e820.nr_map; i++) {
387 if (e820.map[i].type == E820_RAM) {
388 current_addr = e820.map[i].addr + e820.map[i].size;
389 if (current_addr >= size) {
390 e820.map[i].size -= current_addr-size;
391 e820.nr_map = i + 1;
392 return;
393 }
394 }
395 }
396}
397
398static void __init add_memory_region(unsigned long long start,
399 unsigned long long size, int type)
400{
401 int x;
402
403 if (!efi_enabled) {
404 x = e820.nr_map;
405
406 if (x == E820MAX) {
407 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
408 return;
409 }
410
411 e820.map[x].addr = start;
412 e820.map[x].size = size;
413 e820.map[x].type = type;
414 e820.nr_map++;
415 }
416} /* add_memory_region */
417
418#define E820_DEBUG 1
419
420static void __init print_memory_map(char *who)
421{
422 int i;
423
424 for (i = 0; i < e820.nr_map; i++) {
425 printk(" %s: %016Lx - %016Lx ", who,
426 e820.map[i].addr,
427 e820.map[i].addr + e820.map[i].size);
428 switch (e820.map[i].type) {
429 case E820_RAM: printk("(usable)\n");
430 break;
431 case E820_RESERVED:
432 printk("(reserved)\n");
433 break;
434 case E820_ACPI:
435 printk("(ACPI data)\n");
436 break;
437 case E820_NVS:
438 printk("(ACPI NVS)\n");
439 break;
440 default: printk("type %lu\n", e820.map[i].type);
441 break;
442 }
443 }
444}
445
446/*
447 * Sanitize the BIOS e820 map.
448 *
449 * Some e820 responses include overlapping entries. The following
450 * replaces the original e820 map with a new one, removing overlaps.
451 *
452 */
453struct change_member {
454 struct e820entry *pbios; /* pointer to original bios entry */
455 unsigned long long addr; /* address for this change point */
456};
457static struct change_member change_point_list[2*E820MAX] __initdata;
458static struct change_member *change_point[2*E820MAX] __initdata;
459static struct e820entry *overlap_list[E820MAX] __initdata;
460static struct e820entry new_bios[E820MAX] __initdata;
461
462static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
463{
464 struct change_member *change_tmp;
465 unsigned long current_type, last_type;
466 unsigned long long last_addr;
467 int chgidx, still_changing;
468 int overlap_entries;
469 int new_bios_entry;
470 int old_nr, new_nr, chg_nr;
471 int i;
472
473 /*
474 Visually we're performing the following (1,2,3,4 = memory types)...
475
476 Sample memory map (w/overlaps):
477 ____22__________________
478 ______________________4_
479 ____1111________________
480 _44_____________________
481 11111111________________
482 ____________________33__
483 ___________44___________
484 __________33333_________
485 ______________22________
486 ___________________2222_
487 _________111111111______
488 _____________________11_
489 _________________4______
490
491 Sanitized equivalent (no overlap):
492 1_______________________
493 _44_____________________
494 ___1____________________
495 ____22__________________
496 ______11________________
497 _________1______________
498 __________3_____________
499 ___________44___________
500 _____________33_________
501 _______________2________
502 ________________1_______
503 _________________4______
504 ___________________2____
505 ____________________33__
506 ______________________4_
507 */
508
509 /* if there's only one memory region, don't bother */
510 if (*pnr_map < 2)
511 return -1;
512
513 old_nr = *pnr_map;
514
515 /* bail out if we find any unreasonable addresses in bios map */
516 for (i=0; i<old_nr; i++)
517 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
518 return -1;
519
520 /* create pointers for initial change-point information (for sorting) */
521 for (i=0; i < 2*old_nr; i++)
522 change_point[i] = &change_point_list[i];
523
524 /* record all known change-points (starting and ending addresses),
525 omitting those that are for empty memory regions */
526 chgidx = 0;
527 for (i=0; i < old_nr; i++) {
528 if (biosmap[i].size != 0) {
529 change_point[chgidx]->addr = biosmap[i].addr;
530 change_point[chgidx++]->pbios = &biosmap[i];
531 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
532 change_point[chgidx++]->pbios = &biosmap[i];
533 }
534 }
535 chg_nr = chgidx; /* true number of change-points */
536
537 /* sort change-point list by memory addresses (low -> high) */
538 still_changing = 1;
539 while (still_changing) {
540 still_changing = 0;
541 for (i=1; i < chg_nr; i++) {
542 /* if <current_addr> > <last_addr>, swap */
543 /* or, if current=<start_addr> & last=<end_addr>, swap */
544 if ((change_point[i]->addr < change_point[i-1]->addr) ||
545 ((change_point[i]->addr == change_point[i-1]->addr) &&
546 (change_point[i]->addr == change_point[i]->pbios->addr) &&
547 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
548 )
549 {
550 change_tmp = change_point[i];
551 change_point[i] = change_point[i-1];
552 change_point[i-1] = change_tmp;
553 still_changing=1;
554 }
555 }
556 }
557
558 /* create a new bios memory map, removing overlaps */
559 overlap_entries=0; /* number of entries in the overlap table */
560 new_bios_entry=0; /* index for creating new bios map entries */
561 last_type = 0; /* start with undefined memory type */
562 last_addr = 0; /* start with 0 as last starting address */
563 /* loop through change-points, determining affect on the new bios map */
564 for (chgidx=0; chgidx < chg_nr; chgidx++)
565 {
566 /* keep track of all overlapping bios entries */
567 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
568 {
569 /* add map entry to overlap list (> 1 entry implies an overlap) */
570 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
571 }
572 else
573 {
574 /* remove entry from list (order independent, so swap with last) */
575 for (i=0; i<overlap_entries; i++)
576 {
577 if (overlap_list[i] == change_point[chgidx]->pbios)
578 overlap_list[i] = overlap_list[overlap_entries-1];
579 }
580 overlap_entries--;
581 }
582 /* if there are overlapping entries, decide which "type" to use */
583 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
584 current_type = 0;
585 for (i=0; i<overlap_entries; i++)
586 if (overlap_list[i]->type > current_type)
587 current_type = overlap_list[i]->type;
588 /* continue building up new bios map based on this information */
589 if (current_type != last_type) {
590 if (last_type != 0) {
591 new_bios[new_bios_entry].size =
592 change_point[chgidx]->addr - last_addr;
593 /* move forward only if the new size was non-zero */
594 if (new_bios[new_bios_entry].size != 0)
595 if (++new_bios_entry >= E820MAX)
596 break; /* no more space left for new bios entries */
597 }
598 if (current_type != 0) {
599 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
600 new_bios[new_bios_entry].type = current_type;
601 last_addr=change_point[chgidx]->addr;
602 }
603 last_type = current_type;
604 }
605 }
606 new_nr = new_bios_entry; /* retain count for new bios entries */
607
608 /* copy new bios mapping into original location */
609 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
610 *pnr_map = new_nr;
611
612 return 0;
613}
614
615/*
616 * Copy the BIOS e820 map into a safe place.
617 *
618 * Sanity-check it while we're at it..
619 *
620 * If we're lucky and live on a modern system, the setup code
621 * will have given us a memory map that we can use to properly
622 * set up memory. If we aren't, we'll fake a memory map.
623 *
624 * We check to see that the memory map contains at least 2 elements
625 * before we'll use it, because the detection code in setup.S may
626 * not be perfect and most every PC known to man has two memory
627 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
628 * thinkpad 560x, for example, does not cooperate with the memory
629 * detection code.)
630 */
631static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
632{
633 /* Only one memory region (or negative)? Ignore it */
634 if (nr_map < 2)
635 return -1;
636
637 do {
638 unsigned long long start = biosmap->addr;
639 unsigned long long size = biosmap->size;
640 unsigned long long end = start + size;
641 unsigned long type = biosmap->type;
642
643 /* Overflow in 64 bits? Ignore the memory map. */
644 if (start > end)
645 return -1;
646
647 /*
648 * Some BIOSes claim RAM in the 640k - 1M region.
649 * Not right. Fix it up.
650 */
651 if (type == E820_RAM) {
652 if (start < 0x100000ULL && end > 0xA0000ULL) {
653 if (start < 0xA0000ULL)
654 add_memory_region(start, 0xA0000ULL-start, type);
655 if (end <= 0x100000ULL)
656 continue;
657 start = 0x100000ULL;
658 size = end - start;
659 }
660 }
661 add_memory_region(start, size, type);
662 } while (biosmap++,--nr_map);
663 return 0;
664}
665
666#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
667struct edd edd;
668#ifdef CONFIG_EDD_MODULE
669EXPORT_SYMBOL(edd);
670#endif
671/**
672 * copy_edd() - Copy the BIOS EDD information
673 * from boot_params into a safe place.
674 *
675 */
676static inline void copy_edd(void)
677{
678 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
679 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
680 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
681 edd.edd_info_nr = EDD_NR;
682}
683#else
684static inline void copy_edd(void)
685{
686}
687#endif
688
689/*
690 * Do NOT EVER look at the BIOS memory size location.
691 * It does not work on many machines.
692 */
693#define LOWMEMSIZE() (0x9f000)
694
695static void __init parse_cmdline_early (char ** cmdline_p)
696{
697 char c = ' ', *to = command_line, *from = saved_command_line;
698 int len = 0;
699 int userdef = 0;
700
701 /* Save unparsed command line copy for /proc/cmdline */
702 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
703
704 for (;;) {
705 if (c != ' ')
706 goto next_char;
707 /*
708 * "mem=nopentium" disables the 4MB page tables.
709 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
710 * to <mem>, overriding the bios size.
711 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
712 * <start> to <start>+<mem>, overriding the bios size.
713 *
714 * HPA tells me bootloaders need to parse mem=, so no new
715 * option should be mem= [also see Documentation/i386/boot.txt]
716 */
717 if (!memcmp(from, "mem=", 4)) {
718 if (to != command_line)
719 to--;
720 if (!memcmp(from+4, "nopentium", 9)) {
721 from += 9+4;
722 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
723 disable_pse = 1;
724 } else {
725 /* If the user specifies memory size, we
726 * limit the BIOS-provided memory map to
727 * that size. exactmap can be used to specify
728 * the exact map. mem=number can be used to
729 * trim the existing memory map.
730 */
731 unsigned long long mem_size;
732
733 mem_size = memparse(from+4, &from);
734 limit_regions(mem_size);
735 userdef=1;
736 }
737 }
738
739 else if (!memcmp(from, "memmap=", 7)) {
740 if (to != command_line)
741 to--;
742 if (!memcmp(from+7, "exactmap", 8)) {
Vivek Goyal92aa63a2005-06-25 14:58:18 -0700743#ifdef CONFIG_CRASH_DUMP
744 /* If we are doing a crash dump, we
745 * still need to know the real mem
746 * size before original memory map is
747 * reset.
748 */
749 find_max_pfn();
750 saved_max_pfn = max_pfn;
751#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 from += 8+7;
753 e820.nr_map = 0;
754 userdef = 1;
755 } else {
756 /* If the user specifies memory size, we
757 * limit the BIOS-provided memory map to
758 * that size. exactmap can be used to specify
759 * the exact map. mem=number can be used to
760 * trim the existing memory map.
761 */
762 unsigned long long start_at, mem_size;
763
764 mem_size = memparse(from+7, &from);
765 if (*from == '@') {
766 start_at = memparse(from+1, &from);
767 add_memory_region(start_at, mem_size, E820_RAM);
768 } else if (*from == '#') {
769 start_at = memparse(from+1, &from);
770 add_memory_region(start_at, mem_size, E820_ACPI);
771 } else if (*from == '$') {
772 start_at = memparse(from+1, &from);
773 add_memory_region(start_at, mem_size, E820_RESERVED);
774 } else {
775 limit_regions(mem_size);
776 userdef=1;
777 }
778 }
779 }
780
781 else if (!memcmp(from, "noexec=", 7))
782 noexec_setup(from + 7);
783
784
785#ifdef CONFIG_X86_SMP
786 /*
787 * If the BIOS enumerates physical processors before logical,
788 * maxcpus=N at enumeration-time can be used to disable HT.
789 */
790 else if (!memcmp(from, "maxcpus=", 8)) {
791 extern unsigned int maxcpus;
792
793 maxcpus = simple_strtoul(from + 8, NULL, 0);
794 }
795#endif
796
Len Brown888ba6c2005-08-24 12:07:20 -0400797#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 /* "acpi=off" disables both ACPI table parsing and interpreter */
799 else if (!memcmp(from, "acpi=off", 8)) {
800 disable_acpi();
801 }
802
803 /* acpi=force to over-ride black-list */
804 else if (!memcmp(from, "acpi=force", 10)) {
805 acpi_force = 1;
806 acpi_ht = 1;
807 acpi_disabled = 0;
808 }
809
810 /* acpi=strict disables out-of-spec workarounds */
811 else if (!memcmp(from, "acpi=strict", 11)) {
812 acpi_strict = 1;
813 }
814
815 /* Limit ACPI just to boot-time to enable HT */
816 else if (!memcmp(from, "acpi=ht", 7)) {
817 if (!acpi_force)
818 disable_acpi();
819 acpi_ht = 1;
820 }
821
822 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
823 else if (!memcmp(from, "pci=noacpi", 10)) {
824 acpi_disable_pci();
825 }
826 /* "acpi=noirq" disables ACPI interrupt routing */
827 else if (!memcmp(from, "acpi=noirq", 10)) {
828 acpi_noirq_set();
829 }
830
831 else if (!memcmp(from, "acpi_sci=edge", 13))
832 acpi_sci_flags.trigger = 1;
833
834 else if (!memcmp(from, "acpi_sci=level", 14))
835 acpi_sci_flags.trigger = 3;
836
837 else if (!memcmp(from, "acpi_sci=high", 13))
838 acpi_sci_flags.polarity = 1;
839
840 else if (!memcmp(from, "acpi_sci=low", 12))
841 acpi_sci_flags.polarity = 3;
842
843#ifdef CONFIG_X86_IO_APIC
844 else if (!memcmp(from, "acpi_skip_timer_override", 24))
845 acpi_skip_timer_override = 1;
846#endif
847
848#ifdef CONFIG_X86_LOCAL_APIC
849 /* disable IO-APIC */
850 else if (!memcmp(from, "noapic", 6))
851 disable_ioapic_setup();
852#endif /* CONFIG_X86_LOCAL_APIC */
Len Brown888ba6c2005-08-24 12:07:20 -0400853#endif /* CONFIG_ACPI */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854
Eric W. Biederman9635b472005-06-25 14:57:41 -0700855#ifdef CONFIG_X86_LOCAL_APIC
856 /* enable local APIC */
857 else if (!memcmp(from, "lapic", 5))
858 lapic_enable();
859
860 /* disable local APIC */
861 else if (!memcmp(from, "nolapic", 6))
862 lapic_disable();
863#endif /* CONFIG_X86_LOCAL_APIC */
864
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700865#ifdef CONFIG_KEXEC
866 /* crashkernel=size@addr specifies the location to reserve for
867 * a crash kernel. By reserving this memory we guarantee
868 * that linux never set's it up as a DMA target.
869 * Useful for holding code to do something appropriate
870 * after a kernel panic.
871 */
872 else if (!memcmp(from, "crashkernel=", 12)) {
873 unsigned long size, base;
874 size = memparse(from+12, &from);
875 if (*from == '@') {
876 base = memparse(from+1, &from);
877 /* FIXME: Do I want a sanity check
878 * to validate the memory range?
879 */
880 crashk_res.start = base;
881 crashk_res.end = base + size - 1;
882 }
883 }
884#endif
Vivek Goyal2030eae2005-06-25 14:58:20 -0700885#ifdef CONFIG_CRASH_DUMP
886 /* elfcorehdr= specifies the location of elf core header
887 * stored by the crashed kernel.
888 */
889 else if (!memcmp(from, "elfcorehdr=", 11))
890 elfcorehdr_addr = memparse(from+11, &from);
891#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 /*
894 * highmem=size forces highmem to be exactly 'size' bytes.
895 * This works even on boxes that have no highmem otherwise.
896 * This also works to reduce highmem size on bigger boxes.
897 */
898 else if (!memcmp(from, "highmem=", 8))
899 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
900
901 /*
902 * vmalloc=size forces the vmalloc area to be exactly 'size'
903 * bytes. This can be used to increase (or decrease) the
904 * vmalloc area - the default is 128m.
905 */
906 else if (!memcmp(from, "vmalloc=", 8))
907 __VMALLOC_RESERVE = memparse(from+8, &from);
908
909 next_char:
910 c = *(from++);
911 if (!c)
912 break;
913 if (COMMAND_LINE_SIZE <= ++len)
914 break;
915 *(to++) = c;
916 }
917 *to = '\0';
918 *cmdline_p = command_line;
919 if (userdef) {
920 printk(KERN_INFO "user-defined physical RAM map:\n");
921 print_memory_map("user");
922 }
923}
924
925/*
926 * Callback for efi_memory_walk.
927 */
928static int __init
929efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
930{
931 unsigned long *max_pfn = arg, pfn;
932
933 if (start < end) {
934 pfn = PFN_UP(end -1);
935 if (pfn > *max_pfn)
936 *max_pfn = pfn;
937 }
938 return 0;
939}
940
941
942/*
943 * Find the highest page frame number we have available
944 */
945void __init find_max_pfn(void)
946{
947 int i;
948
949 max_pfn = 0;
950 if (efi_enabled) {
951 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
952 return;
953 }
954
955 for (i = 0; i < e820.nr_map; i++) {
956 unsigned long start, end;
957 /* RAM? */
958 if (e820.map[i].type != E820_RAM)
959 continue;
960 start = PFN_UP(e820.map[i].addr);
961 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
962 if (start >= end)
963 continue;
964 if (end > max_pfn)
965 max_pfn = end;
966 }
967}
968
969/*
970 * Determine low and high memory ranges:
971 */
972unsigned long __init find_max_low_pfn(void)
973{
974 unsigned long max_low_pfn;
975
976 max_low_pfn = max_pfn;
977 if (max_low_pfn > MAXMEM_PFN) {
978 if (highmem_pages == -1)
979 highmem_pages = max_pfn - MAXMEM_PFN;
980 if (highmem_pages + MAXMEM_PFN < max_pfn)
981 max_pfn = MAXMEM_PFN + highmem_pages;
982 if (highmem_pages + MAXMEM_PFN > max_pfn) {
983 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
984 highmem_pages = 0;
985 }
986 max_low_pfn = MAXMEM_PFN;
987#ifndef CONFIG_HIGHMEM
988 /* Maximum memory usable is what is directly addressable */
989 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
990 MAXMEM>>20);
991 if (max_pfn > MAX_NONPAE_PFN)
992 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
993 else
994 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
995 max_pfn = MAXMEM_PFN;
996#else /* !CONFIG_HIGHMEM */
997#ifndef CONFIG_X86_PAE
998 if (max_pfn > MAX_NONPAE_PFN) {
999 max_pfn = MAX_NONPAE_PFN;
1000 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1001 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1002 }
1003#endif /* !CONFIG_X86_PAE */
1004#endif /* !CONFIG_HIGHMEM */
1005 } else {
1006 if (highmem_pages == -1)
1007 highmem_pages = 0;
1008#ifdef CONFIG_HIGHMEM
1009 if (highmem_pages >= max_pfn) {
1010 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1011 highmem_pages = 0;
1012 }
1013 if (highmem_pages) {
1014 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1015 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1016 highmem_pages = 0;
1017 }
1018 max_low_pfn -= highmem_pages;
1019 }
1020#else
1021 if (highmem_pages)
1022 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1023#endif
1024 }
1025 return max_low_pfn;
1026}
1027
1028/*
1029 * Free all available memory for boot time allocation. Used
1030 * as a callback function by efi_memory_walk()
1031 */
1032
1033static int __init
1034free_available_memory(unsigned long start, unsigned long end, void *arg)
1035{
1036 /* check max_low_pfn */
1037 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1038 return 0;
1039 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1040 end = (max_low_pfn + 1) << PAGE_SHIFT;
1041 if (start < end)
1042 free_bootmem(start, end - start);
1043
1044 return 0;
1045}
1046/*
1047 * Register fully available low RAM pages with the bootmem allocator.
1048 */
1049static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1050{
1051 int i;
1052
1053 if (efi_enabled) {
1054 efi_memmap_walk(free_available_memory, NULL);
1055 return;
1056 }
1057 for (i = 0; i < e820.nr_map; i++) {
1058 unsigned long curr_pfn, last_pfn, size;
1059 /*
1060 * Reserve usable low memory
1061 */
1062 if (e820.map[i].type != E820_RAM)
1063 continue;
1064 /*
1065 * We are rounding up the start address of usable memory:
1066 */
1067 curr_pfn = PFN_UP(e820.map[i].addr);
1068 if (curr_pfn >= max_low_pfn)
1069 continue;
1070 /*
1071 * ... and at the end of the usable range downwards:
1072 */
1073 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1074
1075 if (last_pfn > max_low_pfn)
1076 last_pfn = max_low_pfn;
1077
1078 /*
1079 * .. finally, did all the rounding and playing
1080 * around just make the area go away?
1081 */
1082 if (last_pfn <= curr_pfn)
1083 continue;
1084
1085 size = last_pfn - curr_pfn;
1086 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1087 }
1088}
1089
1090/*
1091 * workaround for Dell systems that neglect to reserve EBDA
1092 */
1093static void __init reserve_ebda_region(void)
1094{
1095 unsigned int addr;
1096 addr = get_bios_ebda();
1097 if (addr)
1098 reserve_bootmem(addr, PAGE_SIZE);
1099}
1100
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001101#ifndef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102void __init setup_bootmem_allocator(void);
1103static unsigned long __init setup_memory(void)
1104{
1105 /*
1106 * partially used pages are not usable - thus
1107 * we are rounding upwards:
1108 */
1109 min_low_pfn = PFN_UP(init_pg_tables_end);
1110
1111 find_max_pfn();
1112
1113 max_low_pfn = find_max_low_pfn();
1114
1115#ifdef CONFIG_HIGHMEM
1116 highstart_pfn = highend_pfn = max_pfn;
1117 if (max_pfn > max_low_pfn) {
1118 highstart_pfn = max_low_pfn;
1119 }
1120 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1121 pages_to_mb(highend_pfn - highstart_pfn));
1122#endif
1123 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1124 pages_to_mb(max_low_pfn));
1125
1126 setup_bootmem_allocator();
1127
1128 return max_low_pfn;
1129}
1130
1131void __init zone_sizes_init(void)
1132{
1133 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1134 unsigned int max_dma, low;
1135
1136 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1137 low = max_low_pfn;
1138
1139 if (low < max_dma)
1140 zones_size[ZONE_DMA] = low;
1141 else {
1142 zones_size[ZONE_DMA] = max_dma;
1143 zones_size[ZONE_NORMAL] = low - max_dma;
1144#ifdef CONFIG_HIGHMEM
1145 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1146#endif
1147 }
1148 free_area_init(zones_size);
1149}
1150#else
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001151extern unsigned long __init setup_memory(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152extern void zone_sizes_init(void);
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001153#endif /* !CONFIG_NEED_MULTIPLE_NODES */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
1155void __init setup_bootmem_allocator(void)
1156{
1157 unsigned long bootmap_size;
1158 /*
1159 * Initialize the boot-time allocator (with low memory only):
1160 */
1161 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1162
1163 register_bootmem_low_pages(max_low_pfn);
1164
1165 /*
1166 * Reserve the bootmem bitmap itself as well. We do this in two
1167 * steps (first step was init_bootmem()) because this catches
1168 * the (very unlikely) case of us accidentally initializing the
1169 * bootmem allocator with an invalid RAM area.
1170 */
Vivek Goyal8a919082005-06-25 14:57:51 -07001171 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1172 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173
1174 /*
1175 * reserve physical page 0 - it's a special BIOS page on many boxes,
1176 * enabling clean reboots, SMP operation, laptop functions.
1177 */
1178 reserve_bootmem(0, PAGE_SIZE);
1179
1180 /* reserve EBDA region, it's a 4K region */
1181 reserve_ebda_region();
1182
1183 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1184 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1185 unless you have no PS/2 mouse plugged in. */
1186 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1187 boot_cpu_data.x86 == 6)
1188 reserve_bootmem(0xa0000 - 4096, 4096);
1189
1190#ifdef CONFIG_SMP
1191 /*
1192 * But first pinch a few for the stack/trampoline stuff
1193 * FIXME: Don't need the extra page at 4K, but need to fix
1194 * trampoline before removing it. (see the GDT stuff)
1195 */
1196 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1197#endif
1198#ifdef CONFIG_ACPI_SLEEP
1199 /*
1200 * Reserve low memory region for sleep support.
1201 */
1202 acpi_reserve_bootmem();
1203#endif
1204#ifdef CONFIG_X86_FIND_SMP_CONFIG
1205 /*
1206 * Find and reserve possible boot-time SMP configuration:
1207 */
1208 find_smp_config();
1209#endif
1210
1211#ifdef CONFIG_BLK_DEV_INITRD
1212 if (LOADER_TYPE && INITRD_START) {
1213 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1214 reserve_bootmem(INITRD_START, INITRD_SIZE);
1215 initrd_start =
1216 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1217 initrd_end = initrd_start+INITRD_SIZE;
1218 }
1219 else {
1220 printk(KERN_ERR "initrd extends beyond end of memory "
1221 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1222 INITRD_START + INITRD_SIZE,
1223 max_low_pfn << PAGE_SHIFT);
1224 initrd_start = 0;
1225 }
1226 }
1227#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001228#ifdef CONFIG_KEXEC
1229 if (crashk_res.start != crashk_res.end)
1230 reserve_bootmem(crashk_res.start,
1231 crashk_res.end - crashk_res.start + 1);
1232#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233}
1234
1235/*
1236 * The node 0 pgdat is initialized before all of these because
1237 * it's needed for bootmem. node>0 pgdats have their virtual
1238 * space allocated before the pagetables are in place to access
1239 * them, so they can't be cleared then.
1240 *
1241 * This should all compile down to nothing when NUMA is off.
1242 */
1243void __init remapped_pgdat_init(void)
1244{
1245 int nid;
1246
1247 for_each_online_node(nid) {
1248 if (nid != 0)
1249 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1250 }
1251}
1252
1253/*
1254 * Request address space for all standard RAM and ROM resources
1255 * and also for regions reported as reserved by the e820.
1256 */
1257static void __init
1258legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1259{
1260 int i;
1261
1262 probe_roms();
1263 for (i = 0; i < e820.nr_map; i++) {
1264 struct resource *res;
1265 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1266 continue;
1267 res = alloc_bootmem_low(sizeof(struct resource));
1268 switch (e820.map[i].type) {
1269 case E820_RAM: res->name = "System RAM"; break;
1270 case E820_ACPI: res->name = "ACPI Tables"; break;
1271 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1272 default: res->name = "reserved";
1273 }
1274 res->start = e820.map[i].addr;
1275 res->end = res->start + e820.map[i].size - 1;
1276 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1277 request_resource(&iomem_resource, res);
1278 if (e820.map[i].type == E820_RAM) {
1279 /*
1280 * We don't know which RAM region contains kernel data,
1281 * so we try it repeatedly and let the resource manager
1282 * test it.
1283 */
1284 request_resource(res, code_resource);
1285 request_resource(res, data_resource);
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001286#ifdef CONFIG_KEXEC
1287 request_resource(res, &crashk_res);
1288#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 }
1290 }
1291}
1292
1293/*
1294 * Request address space for all standard resources
1295 */
1296static void __init register_memory(void)
1297{
1298 unsigned long gapstart, gapsize;
1299 unsigned long long last;
1300 int i;
1301
1302 if (efi_enabled)
1303 efi_initialize_iomem_resources(&code_resource, &data_resource);
1304 else
1305 legacy_init_iomem_resources(&code_resource, &data_resource);
1306
1307 /* EFI systems may still have VGA */
1308 request_resource(&iomem_resource, &video_ram_resource);
1309
1310 /* request I/O space for devices used on all i[345]86 PCs */
1311 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1312 request_resource(&ioport_resource, &standard_io_resources[i]);
1313
1314 /*
1315 * Search for the bigest gap in the low 32 bits of the e820
1316 * memory space.
1317 */
1318 last = 0x100000000ull;
1319 gapstart = 0x10000000;
1320 gapsize = 0x400000;
1321 i = e820.nr_map;
1322 while (--i >= 0) {
1323 unsigned long long start = e820.map[i].addr;
1324 unsigned long long end = start + e820.map[i].size;
1325
1326 /*
1327 * Since "last" is at most 4GB, we know we'll
1328 * fit in 32 bits if this condition is true
1329 */
1330 if (last > end) {
1331 unsigned long gap = last - end;
1332
1333 if (gap > gapsize) {
1334 gapsize = gap;
1335 gapstart = end;
1336 }
1337 }
1338 if (start < last)
1339 last = start;
1340 }
1341
1342 /*
1343 * Start allocating dynamic PCI memory a bit into the gap,
1344 * aligned up to the nearest megabyte.
1345 *
1346 * Question: should we try to pad it up a bit (do something
1347 * like " + (gapsize >> 3)" in there too?). We now have the
1348 * technology.
1349 */
1350 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1351
1352 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1353 pci_mem_start, gapstart, gapsize);
1354}
1355
1356/* Use inline assembly to define this because the nops are defined
1357 as inline assembly strings in the include files and we cannot
1358 get them easily into strings. */
1359asm("\t.data\nintelnops: "
1360 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1361 GENERIC_NOP7 GENERIC_NOP8);
1362asm("\t.data\nk8nops: "
1363 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1364 K8_NOP7 K8_NOP8);
1365asm("\t.data\nk7nops: "
1366 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1367 K7_NOP7 K7_NOP8);
1368
1369extern unsigned char intelnops[], k8nops[], k7nops[];
1370static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1371 NULL,
1372 intelnops,
1373 intelnops + 1,
1374 intelnops + 1 + 2,
1375 intelnops + 1 + 2 + 3,
1376 intelnops + 1 + 2 + 3 + 4,
1377 intelnops + 1 + 2 + 3 + 4 + 5,
1378 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1379 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1380};
1381static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1382 NULL,
1383 k8nops,
1384 k8nops + 1,
1385 k8nops + 1 + 2,
1386 k8nops + 1 + 2 + 3,
1387 k8nops + 1 + 2 + 3 + 4,
1388 k8nops + 1 + 2 + 3 + 4 + 5,
1389 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1390 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1391};
1392static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1393 NULL,
1394 k7nops,
1395 k7nops + 1,
1396 k7nops + 1 + 2,
1397 k7nops + 1 + 2 + 3,
1398 k7nops + 1 + 2 + 3 + 4,
1399 k7nops + 1 + 2 + 3 + 4 + 5,
1400 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1401 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1402};
1403static struct nop {
1404 int cpuid;
1405 unsigned char **noptable;
1406} noptypes[] = {
1407 { X86_FEATURE_K8, k8_nops },
1408 { X86_FEATURE_K7, k7_nops },
1409 { -1, NULL }
1410};
1411
1412/* Replace instructions with better alternatives for this CPU type.
1413
1414 This runs before SMP is initialized to avoid SMP problems with
1415 self modifying code. This implies that assymetric systems where
1416 APs have less capabilities than the boot processor are not handled.
Linus Torvalds72538d82005-07-22 18:29:40 -04001417 Tough. Make sure you disable such features by hand. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418void apply_alternatives(void *start, void *end)
1419{
1420 struct alt_instr *a;
1421 int diff, i, k;
1422 unsigned char **noptable = intel_nops;
1423 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1424 if (boot_cpu_has(noptypes[i].cpuid)) {
1425 noptable = noptypes[i].noptable;
1426 break;
1427 }
1428 }
1429 for (a = start; (void *)a < end; a++) {
1430 if (!boot_cpu_has(a->cpuid))
1431 continue;
1432 BUG_ON(a->replacementlen > a->instrlen);
1433 memcpy(a->instr, a->replacement, a->replacementlen);
1434 diff = a->instrlen - a->replacementlen;
1435 /* Pad the rest with nops */
1436 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1437 k = diff;
1438 if (k > ASM_NOP_MAX)
1439 k = ASM_NOP_MAX;
1440 memcpy(a->instr + i, noptable[k], k);
1441 }
1442 }
1443}
1444
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445void __init alternative_instructions(void)
1446{
1447 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 apply_alternatives(__alt_instructions, __alt_instructions_end);
1449}
1450
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451static char * __init machine_specific_memory_setup(void);
1452
1453#ifdef CONFIG_MCA
1454static void set_mca_bus(int x)
1455{
1456 MCA_bus = x;
1457}
1458#else
1459static void set_mca_bus(int x) { }
1460#endif
1461
1462/*
1463 * Determine if we were loaded by an EFI loader. If so, then we have also been
1464 * passed the efi memmap, systab, etc., so we should use these data structures
1465 * for initialization. Note, the efi init code path is determined by the
1466 * global efi_enabled. This allows the same kernel image to be used on existing
1467 * systems (with a traditional BIOS) as well as on EFI systems.
1468 */
1469void __init setup_arch(char **cmdline_p)
1470{
1471 unsigned long max_low_pfn;
1472
1473 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1474 pre_setup_arch_hook();
1475 early_cpu_init();
1476
1477 /*
1478 * FIXME: This isn't an official loader_type right
1479 * now but does currently work with elilo.
1480 * If we were configured as an EFI kernel, check to make
1481 * sure that we were loaded correctly from elilo and that
1482 * the system table is valid. If not, then initialize normally.
1483 */
1484#ifdef CONFIG_EFI
1485 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1486 efi_enabled = 1;
1487#endif
1488
1489 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1490 drive_info = DRIVE_INFO;
1491 screen_info = SCREEN_INFO;
1492 edid_info = EDID_INFO;
1493 apm_info.bios = APM_BIOS_INFO;
1494 ist_info = IST_INFO;
1495 saved_videomode = VIDEO_MODE;
1496 if( SYS_DESC_TABLE.length != 0 ) {
1497 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1498 machine_id = SYS_DESC_TABLE.table[0];
1499 machine_submodel_id = SYS_DESC_TABLE.table[1];
1500 BIOS_revision = SYS_DESC_TABLE.table[2];
1501 }
1502 bootloader_type = LOADER_TYPE;
1503
1504#ifdef CONFIG_BLK_DEV_RAM
1505 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1506 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1507 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1508#endif
1509 ARCH_SETUP
1510 if (efi_enabled)
1511 efi_init();
1512 else {
1513 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1514 print_memory_map(machine_specific_memory_setup());
1515 }
1516
1517 copy_edd();
1518
1519 if (!MOUNT_ROOT_RDONLY)
1520 root_mountflags &= ~MS_RDONLY;
1521 init_mm.start_code = (unsigned long) _text;
1522 init_mm.end_code = (unsigned long) _etext;
1523 init_mm.end_data = (unsigned long) _edata;
1524 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1525
1526 code_resource.start = virt_to_phys(_text);
1527 code_resource.end = virt_to_phys(_etext)-1;
1528 data_resource.start = virt_to_phys(_etext);
1529 data_resource.end = virt_to_phys(_edata)-1;
1530
1531 parse_cmdline_early(cmdline_p);
1532
1533 max_low_pfn = setup_memory();
1534
1535 /*
1536 * NOTE: before this point _nobody_ is allowed to allocate
1537 * any memory using the bootmem allocator. Although the
1538 * alloctor is now initialised only the first 8Mb of the kernel
1539 * virtual address space has been mapped. All allocations before
1540 * paging_init() has completed must use the alloc_bootmem_low_pages()
1541 * variant (which allocates DMA'able memory) and care must be taken
1542 * not to exceed the 8Mb limit.
1543 */
1544
1545#ifdef CONFIG_SMP
1546 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1547#endif
1548 paging_init();
1549 remapped_pgdat_init();
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001550 sparse_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 zone_sizes_init();
1552
1553 /*
1554 * NOTE: at this point the bootmem allocator is fully available.
1555 */
1556
1557#ifdef CONFIG_EARLY_PRINTK
1558 {
1559 char *s = strstr(*cmdline_p, "earlyprintk=");
1560 if (s) {
1561 extern void setup_early_printk(char *);
1562
1563 setup_early_printk(s);
1564 printk("early console enabled\n");
1565 }
1566 }
1567#endif
1568
1569
1570 dmi_scan_machine();
1571
1572#ifdef CONFIG_X86_GENERICARCH
1573 generic_apic_probe(*cmdline_p);
1574#endif
1575 if (efi_enabled)
1576 efi_map_memmap();
1577
Len Brown888ba6c2005-08-24 12:07:20 -04001578#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 /*
1580 * Parse the ACPI tables for possible boot-time SMP configuration.
1581 */
1582 acpi_boot_table_init();
1583 acpi_boot_init();
Alexander Nybergadaa7652005-05-31 14:39:27 -07001584#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585
1586#ifdef CONFIG_X86_LOCAL_APIC
1587 if (smp_found_config)
1588 get_smp_config();
1589#endif
1590
1591 register_memory();
1592
1593#ifdef CONFIG_VT
1594#if defined(CONFIG_VGA_CONSOLE)
1595 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1596 conswitchp = &vga_con;
1597#elif defined(CONFIG_DUMMY_CONSOLE)
1598 conswitchp = &dummy_con;
1599#endif
1600#endif
1601}
1602
1603#include "setup_arch_post.h"
1604/*
1605 * Local Variables:
1606 * mode:c
1607 * c-file-style:"k&r"
1608 * c-basic-offset:8
1609 * End:
1610 */