mm: Fix boot crash in mm_alloc()
Thomas Gleixner reports that we now have a boot crash triggered by
CONFIG_CPUMASK_OFFSTACK=y:
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<c11ae035>] find_next_bit+0x55/0xb0
Call Trace:
[<c11addda>] cpumask_any_but+0x2a/0x70
[<c102396b>] flush_tlb_mm+0x2b/0x80
[<c1022705>] pud_populate+0x35/0x50
[<c10227ba>] pgd_alloc+0x9a/0xf0
[<c103a3fc>] mm_init+0xec/0x120
[<c103a7a3>] mm_alloc+0x53/0xd0
which was introduced by commit de03c72cfce5 ("mm: convert
mm->cpu_vm_cpumask into cpumask_var_t"), and is due to wrong ordering of
mm_init() vs mm_init_cpumask
Thomas wrote a patch to just fix the ordering of initialization, but I
hate the new double allocation in the fork path, so I ended up instead
doing some more radical surgery to clean it all up.
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Ingo Molnar <mingo@elte.hu>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/fork.c b/kernel/fork.c
index ca406d9..0276c30 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,20 +484,6 @@
#endif
}
-int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
- if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
- return -ENOMEM;
-
- if (oldmm)
- cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
- else
- memset(mm_cpumask(mm), 0, cpumask_size());
-#endif
- return 0;
-}
-
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
{
atomic_set(&mm->mm_users, 1);
@@ -538,17 +524,8 @@
return NULL;
memset(mm, 0, sizeof(*mm));
- mm = mm_init(mm, current);
- if (!mm)
- return NULL;
-
- if (mm_init_cpumask(mm, NULL)) {
- mm_free_pgd(mm);
- free_mm(mm);
- return NULL;
- }
-
- return mm;
+ mm_init_cpumask(mm);
+ return mm_init(mm, current);
}
/*
@@ -559,7 +536,6 @@
void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
- free_cpumask_var(mm->cpu_vm_mask_var);
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
@@ -753,6 +729,7 @@
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm));
+ mm_init_cpumask(mm);
/* Initializing for Swap token stuff */
mm->token_priority = 0;
@@ -765,9 +742,6 @@
if (!mm_init(mm, tsk))
goto fail_nomem;
- if (mm_init_cpumask(mm, oldmm))
- goto fail_nocpumask;
-
if (init_new_context(tsk, mm))
goto fail_nocontext;
@@ -794,9 +768,6 @@
return NULL;
fail_nocontext:
- free_cpumask_var(mm->cpu_vm_mask_var);
-
-fail_nocpumask:
/*
* If init_new_context() failed, we cannot use mmput() to free the mm
* because it calls destroy_context()
@@ -1591,6 +1562,13 @@
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ /*
+ * FIXME! The "sizeof(struct mm_struct)" currently includes the
+ * whole struct cpumask for the OFFSTACK case. We could change
+ * this to *only* allocate as much of it as required by the
+ * maximum number of CPU's we can ever have. The cpumask_allocation
+ * is at the end of the structure, exactly for that reason.
+ */
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);