BACKPORT: FROMGIT: hugetlb/userfaultfd: forbid huge pmd sharing when uffd enabled
Huge pmd sharing could bring problem to userfaultfd. The thing is that
userfaultfd is running its logic based on the special bits on page table
entries, however the huge pmd sharing could potentially share page table
entries for different address ranges. That could cause issues on either:
- When sharing huge pmd page tables for an uffd write protected range, the
newly mapped huge pmd range will also be write protected unexpectedly, or,
- When we try to write protect a range of huge pmd shared range, we'll first
do huge_pmd_unshare() in hugetlb_change_protection(), however that also
means the UFFDIO_WRITEPROTECT could be silently skipped for the shared
region, which could lead to data loss.
Since at it, a few other things are done altogether:
- Move want_pmd_share() from mm/hugetlb.c into linux/hugetlb.h, because
that's definitely something that arch code would like to use too
- ARM64 currently directly check against CONFIG_ARCH_WANT_HUGE_PMD_SHARE when
trying to share huge pmd. Switch to the want_pmd_share() helper.
Since at it, move vma_shareable() from huge_pmd_share() into want_pmd_share().
Link: https://lkml.kernel.org/r/20210218231202.15426-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Adam Ruprecht <ruprecht@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oliver Upton <oupton@google.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Shawn Anastasio <shawn@anastas.io>
Cc: Steven Price <steven.price@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
(cherry picked from commit ab6a0d00a63f92f1f0d220274fa989eb75c09f2b
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm)
Link: https://lore.kernel.org/patchwork/patch/1382207/
Conflicts:
include/linux/hugetlb.h
mm/hugetlb.c
(Manual rebase)
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
Bug: 160737021
Bug: 169683130
Change-Id: Ie2dff7ab31600cae78914e3278be61516844394e
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 4d690cc..54fae25 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -231,9 +231,8 @@
*/
pte = pte_alloc_map(mm, pmd, addr);
} else if (sz == PMD_SIZE) {
- if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
- pud_none(*pud))
- pte = huge_pmd_share(mm, vma, addr, pud);
+ if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pud)))
+ ptep = huge_pmd_share(mm, vma, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6dbd2e9..5d4c633 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -633,4 +633,6 @@
}
#endif
+bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
+
#endif /* _LINUX_HUGETLB_H */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f2f3b68..c92926c 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -43,6 +43,15 @@
return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
}
+/*
+ * Never enable huge pmd sharing on uffd-wp registered vmas, because uffd-wp
+ * protect information is per pgtable entry.
+ */
+static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_UFFD_WP;
+}
+
static inline bool userfaultfd_missing(struct vm_area_struct *vma)
{
return vma->vm_flags & VM_UFFD_MISSING;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 22b9716..ed6dac0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4688,6 +4688,18 @@
return false;
}
+bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
+{
+#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+ return false;
+#endif
+#ifdef CONFIG_USERFAULTFD
+ if (uffd_disable_huge_pmd_share(vma))
+ return false;
+#endif
+ return vma_shareable(vma, addr);
+}
+
/*
* Determine if start,end range within vma could be mapped by shared pmd.
* If yes, adjust start and end to cover range associated with possible
@@ -4736,9 +4748,6 @@
pte_t *pte;
spinlock_t *ptl;
- if (!vma_shareable(vma, addr))
- return (pte_t *)pmd_alloc(mm, pud, addr);
-
i_mmap_lock_write(mapping);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
@@ -4808,7 +4817,7 @@
*addr |= PUD_SIZE - PMD_SIZE;
return 1;
}
-#define want_pmd_share() (1)
+
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct vma,
unsigned long addr, pud_t *pud)
@@ -4825,7 +4834,6 @@
unsigned long *start, unsigned long *end)
{
}
-#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
@@ -4847,7 +4855,7 @@
pte = (pte_t *)pud;
} else {
BUG_ON(sz != PMD_SIZE);
- if (want_pmd_share() && pud_none(*pud))
+ if (want_pmd_share(vma, addr) && pud_none(*pud))
pte = huge_pmd_share(mm, vma, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);