mempolicy: add MPOL_F_RELATIVE_NODES flag
Adds another optional mode flag, MPOL_F_RELATIVE_NODES, that specifies
nodemasks passed via set_mempolicy() or mbind() should be considered relative
to the current task's mems_allowed.
When the mempolicy is created, the passed nodemask is folded and mapped onto
the current task's mems_allowed. For example, consider a task using
set_mempolicy() to pass MPOL_INTERLEAVE | MPOL_F_RELATIVE_NODES with a
nodemask of 1-3. If current's mems_allowed is 4-7, the effected nodemask is
5-7 (the second, third, and fourth node of mems_allowed).
If the same task is attached to a cpuset, the mempolicy nodemask is rebound
each time the mems are changed. Some possible rebinds and results are:
mems result
1-3 1-3
1-7 2-4
1,5-6 1,5-6
1,5-7 5-7
Likewise, the zonelist built for MPOL_BIND acts on the set of zones assigned
to the resultant nodemask from the relative remap.
In the MPOL_PREFERRED case, the preferred node is remapped from the currently
effected nodemask to the relative nodemask.
This mempolicy mode flag was conceived of by Paul Jackson <pj@sgi.com>.
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d59b1e7..ffd3be6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -136,7 +136,15 @@
static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
{
- return pol->flags & MPOL_F_STATIC_NODES;
+ return pol->flags & (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES);
+}
+
+static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
+ const nodemask_t *rel)
+{
+ nodemask_t tmp;
+ nodes_fold(tmp, *orig, nodes_weight(*rel));
+ nodes_onto(*ret, tmp, *rel);
}
/* Create a new policy */
@@ -157,7 +165,12 @@
return ERR_PTR(-ENOMEM);
atomic_set(&policy->refcnt, 1);
cpuset_update_task_memory_state();
- nodes_and(cpuset_context_nmask, *nodes, cpuset_current_mems_allowed);
+ if (flags & MPOL_F_RELATIVE_NODES)
+ mpol_relative_nodemask(&cpuset_context_nmask, nodes,
+ &cpuset_current_mems_allowed);
+ else
+ nodes_and(cpuset_context_nmask, *nodes,
+ cpuset_current_mems_allowed);
switch (mode) {
case MPOL_INTERLEAVE:
if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
@@ -873,6 +886,9 @@
mode &= ~MPOL_MODE_FLAGS;
if (mode >= MPOL_MAX)
return -EINVAL;
+ if ((mode_flags & MPOL_F_STATIC_NODES) &&
+ (mode_flags & MPOL_F_RELATIVE_NODES))
+ return -EINVAL;
err = get_nodes(&nodes, nmask, maxnode);
if (err)
return err;
@@ -891,6 +907,8 @@
mode &= ~MPOL_MODE_FLAGS;
if ((unsigned int)mode >= MPOL_MAX)
return -EINVAL;
+ if ((flags & MPOL_F_STATIC_NODES) && (flags & MPOL_F_RELATIVE_NODES))
+ return -EINVAL;
err = get_nodes(&nodes, nmask, maxnode);
if (err)
return err;
@@ -1745,10 +1763,12 @@
{
nodemask_t tmp;
int static_nodes;
+ int relative_nodes;
if (!pol)
return;
static_nodes = pol->flags & MPOL_F_STATIC_NODES;
+ relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES;
if (!mpol_store_user_nodemask(pol) &&
nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
return;
@@ -1761,6 +1781,9 @@
case MPOL_INTERLEAVE:
if (static_nodes)
nodes_and(tmp, pol->w.user_nodemask, *newmask);
+ else if (relative_nodes)
+ mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
+ newmask);
else {
nodes_remap(tmp, pol->v.nodes,
pol->w.cpuset_mems_allowed, *newmask);
@@ -1783,6 +1806,10 @@
pol->v.preferred_node = node;
else
pol->v.preferred_node = -1;
+ } else if (relative_nodes) {
+ mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
+ newmask);
+ pol->v.preferred_node = first_node(tmp);
} else {
pol->v.preferred_node = node_remap(pol->v.preferred_node,
pol->w.cpuset_mems_allowed, *newmask);
@@ -1878,6 +1905,8 @@
if (flags & MPOL_F_STATIC_NODES)
p += sprintf(p, "%sstatic", need_bar++ ? "|" : "");
+ if (flags & MPOL_F_RELATIVE_NODES)
+ p += sprintf(p, "%srelative", need_bar++ ? "|" : "");
}
if (!nodes_empty(nodes)) {