thp: madvise(MADV_NOHUGEPAGE)
Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be
hugepage backed. Return -EINVAL if the vma is not of an anonymous type,
or the feature isn't built into the kernel. Never silently return
success.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9b48c24d..a8b7e42 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -52,10 +52,12 @@
#define HPAGE_PMD_SIZE HPAGE_SIZE
#define transparent_hugepage_enabled(__vma) \
- (transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
- (__vma)->vm_flags & VM_HUGEPAGE))
+ ((transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
+ ((__vma)->vm_flags & VM_HUGEPAGE))) && \
+ !((__vma)->vm_flags & VM_NOHUGEPAGE))
#define transparent_hugepage_defrag(__vma) \
((transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \
@@ -103,7 +105,7 @@
#if HPAGE_PMD_ORDER > MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
-extern int hugepage_madvise(unsigned long *vm_flags);
+extern int hugepage_madvise(unsigned long *vm_flags, int advice);
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@@ -141,7 +143,7 @@
do { } while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0)
-static inline int hugepage_madvise(unsigned long *vm_flags)
+static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
{
BUG();
return 0;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 552f318..6b394f0 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -38,9 +38,10 @@
static inline int khugepaged_enter(struct vm_area_struct *vma)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
- if (khugepaged_always() ||
- (khugepaged_req_madv() &&
- vma->vm_flags & VM_HUGEPAGE))
+ if ((khugepaged_always() ||
+ (khugepaged_req_madv() &&
+ vma->vm_flags & VM_HUGEPAGE)) &&
+ !(vma->vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM;
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce97a2b..956a355 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -83,6 +83,7 @@
#define VM_GROWSUP 0x00000200
#else
#define VM_GROWSUP 0x00000000
+#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
#endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f4f6041..fce667c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -16,6 +16,7 @@
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
+#include <linux/mman.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
@@ -1388,18 +1389,36 @@
return ret;
}
-int hugepage_madvise(unsigned long *vm_flags)
+int hugepage_madvise(unsigned long *vm_flags, int advice)
{
- /*
- * Be somewhat over-protective like KSM for now!
- */
- if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
- VM_MIXEDMAP | VM_SAO))
- return -EINVAL;
-
- *vm_flags |= VM_HUGEPAGE;
+ switch (advice) {
+ case MADV_HUGEPAGE:
+ /*
+ * Be somewhat over-protective like KSM for now!
+ */
+ if (*vm_flags & (VM_HUGEPAGE |
+ VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+ VM_MIXEDMAP | VM_SAO))
+ return -EINVAL;
+ *vm_flags &= ~VM_NOHUGEPAGE;
+ *vm_flags |= VM_HUGEPAGE;
+ break;
+ case MADV_NOHUGEPAGE:
+ /*
+ * Be somewhat over-protective like KSM for now!
+ */
+ if (*vm_flags & (VM_NOHUGEPAGE |
+ VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+ VM_MIXEDMAP | VM_SAO))
+ return -EINVAL;
+ *vm_flags &= ~VM_HUGEPAGE;
+ *vm_flags |= VM_NOHUGEPAGE;
+ break;
+ }
return 0;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index ecde40a..bbac126 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -72,7 +72,8 @@
goto out;
break;
case MADV_HUGEPAGE:
- error = hugepage_madvise(&new_flags);
+ case MADV_NOHUGEPAGE:
+ error = hugepage_madvise(&new_flags, behavior);
if (error)
goto out;
break;
@@ -290,6 +291,7 @@
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
case MADV_HUGEPAGE:
+ case MADV_NOHUGEPAGE:
#endif
return 1;