[PATCH] Add __GFP_THISNODE to avoid fallback to other nodes and ignore cpuset/memory policy restrictions
Add a new gfp flag __GFP_THISNODE to avoid fallback to other nodes. This
flag is essential if a kernel component requires memory to be located on a
certain node. It will be needed for alloc_pages_node() to force allocation
on the indicated node and for alloc_pages() to force allocation on the
current node.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 63ab88a..0eda5b6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -45,6 +45,7 @@
#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -53,7 +54,7 @@
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0d..7694036 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2316,7 +2316,7 @@
const struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
- if (in_interrupt())
+ if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
return 1;
node = z->zone_pgdat->node_id;
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c3429a7..8002e1f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1290,7 +1290,7 @@
if ((gfp & __GFP_WAIT) && !in_interrupt())
cpuset_update_task_memory_state();
- if (!pol || in_interrupt())
+ if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
return alloc_page_interleave(gfp, order, interleave_nodes(pol));
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 208a6b0..ea498788 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -893,6 +893,9 @@
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
do {
+ if (unlikely((gfp_mask & __GFP_THISNODE) &&
+ (*z)->zone_pgdat != zonelist->zones[0]->zone_pgdat))
+ break;
if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed(*z, gfp_mask))
continue;