oom: add oom_kill_allocating_task sysctl
Adds a new sysctl, 'oom_kill_allocating_task', which will automatically kill
the OOM-triggering task instead of scanning through the tasklist to find a
memory-hogging target. This is helpful for systems with an insanely large
number of tasks where scanning the tasklist significantly degrades
performance.
Cc: Andrea Arcangeli <andrea@suse.de>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index a0ccc5b..17346da 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -31,6 +31,7 @@
- min_unmapped_ratio
- min_slab_ratio
- panic_on_oom
+- oom_kill_allocating_task
- mmap_min_address
- numa_zonelist_order
@@ -220,6 +221,27 @@
1 and 2 are for failover of clustering. Please select either
according to your policy of failover.
+=============================================================
+
+oom_kill_allocating_task
+
+This enables or disables killing the OOM-triggering task in
+out-of-memory situations.
+
+If this is set to zero, the OOM killer will scan through the entire
+tasklist and select a task based on heuristics to kill. This normally
+selects a rogue memory-hogging task that frees up a large amount of
+memory when killed.
+
+If this is set to non-zero, the OOM killer simply kills the task that
+triggered the out-of-memory condition. This avoids the expensive
+tasklist scan.
+
+If panic_on_oom is selected, it takes precedence over whatever value
+is used in oom_kill_allocating_task.
+
+The default value is 0.
+
==============================================================
mmap_min_addr
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c676b5e..5e63de0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -63,6 +63,7 @@
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern int sysctl_panic_on_oom;
+extern int sysctl_oom_kill_allocating_task;
extern int max_threads;
extern int core_uses_pid;
extern int suid_dumpable;
@@ -781,6 +782,14 @@
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "oom_kill_allocating_task",
+ .data = &sysctl_oom_kill_allocating_task,
+ .maxlen = sizeof(sysctl_oom_kill_allocating_task),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = VM_OVERCOMMIT_RATIO,
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6e999c8..00d0bd7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -27,6 +27,7 @@
#include <linux/notifier.h>
int sysctl_panic_on_oom;
+int sysctl_oom_kill_allocating_task;
static DEFINE_MUTEX(zone_scan_mutex);
/* #define DEBUG */
@@ -471,14 +472,16 @@
"No available memory (MPOL_BIND)");
break;
- case CONSTRAINT_CPUSET:
- oom_kill_process(current, points,
- "No available memory in cpuset");
- break;
-
case CONSTRAINT_NONE:
if (sysctl_panic_on_oom)
panic("out of memory. panic_on_oom is selected\n");
+ /* Fall-through */
+ case CONSTRAINT_CPUSET:
+ if (sysctl_oom_kill_allocating_task) {
+ oom_kill_process(current, points,
+ "Out of memory (oom_kill_allocating_task)");
+ break;
+ }
retry:
/*
* Rambo mode: Shoot down a process and hope it solves whatever