mm: allow reserve_bootmem() cross nodes
split reserve_bootmem_core() into two functions, one which checks
conflicts, and one which sets the bits.
and make reserve_bootmem to loop bdata_list to cross the nodes.
user could be crashkernel and ramdisk..., in case the range provided
by those externalities crosses the nodes.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 0f30bc8..b679164 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -111,44 +111,74 @@
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
-static int __init reserve_bootmem_core(bootmem_data_t *bdata,
+static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
unsigned long addr, unsigned long size, int flags)
{
unsigned long sidx, eidx;
unsigned long i;
- int ret;
+
+ BUG_ON(!size);
+
+ /* out of range, don't hold other */
+ if (addr + size < bdata->node_boot_start ||
+ PFN_DOWN(addr) > bdata->node_low_pfn)
+ return 0;
/*
- * round up, partially reserved pages are considered
- * fully reserved.
+ * Round up to index to the range.
*/
- BUG_ON(!size);
- BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
- BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
- BUG_ON(addr < bdata->node_boot_start);
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
- sidx = PFN_DOWN(addr - bdata->node_boot_start);
eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
- for (i = sidx; i < eidx; i++)
+ for (i = sidx; i < eidx; i++) {
+ if (test_bit(i, bdata->node_bootmem_map)) {
+ if (flags & BOOTMEM_EXCLUSIVE)
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+
+}
+
+static void __init reserve_bootmem_core(bootmem_data_t *bdata,
+ unsigned long addr, unsigned long size, int flags)
+{
+ unsigned long sidx, eidx;
+ unsigned long i;
+
+ BUG_ON(!size);
+
+ /* out of range */
+ if (addr + size < bdata->node_boot_start ||
+ PFN_DOWN(addr) > bdata->node_low_pfn)
+ return;
+
+ /*
+ * Round up to index to the range.
+ */
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
+ eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+ for (i = sidx; i < eidx; i++) {
if (test_and_set_bit(i, bdata->node_bootmem_map)) {
#ifdef CONFIG_DEBUG_BOOTMEM
printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
#endif
- if (flags & BOOTMEM_EXCLUSIVE) {
- ret = -EBUSY;
- goto err;
- }
}
-
- return 0;
-
-err:
- /* unreserve memory we accidentally reserved */
- for (i--; i >= sidx; i--)
- clear_bit(i, bdata->node_bootmem_map);
-
- return ret;
+ }
}
static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -415,6 +445,11 @@
void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
+ int ret;
+
+ ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+ if (ret < 0)
+ return;
reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
}
@@ -440,7 +475,18 @@
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
- return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags);
+ bootmem_data_t *bdata;
+ int ret;
+
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+ if (ret < 0)
+ return ret;
+ }
+ list_for_each_entry(bdata, &bdata_list, list)
+ reserve_bootmem_core(bdata, addr, size, flags);
+
+ return 0;
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */