Merge branch 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
softlockup: fix invalid proc_handler for softlockup_panic
softlockup: fix watchdog task wakeup frequency
softlockup: fix watchdog task wakeup frequency
softlockup: show irqtrace
softlockup: print a module list on being stuck
softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression
softlockup: fix false positives on nohz if CPU is 100% idle for more than 60 seconds
softlockup: fix softlockup_thresh fix
softlockup: fix softlockup_thresh unaligned access and disable detection at runtime
softlockup: allow panic on lockup
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 30d44b7..47e7d87 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2034,6 +2034,9 @@
snd-ymfpci= [HW,ALSA]
+ softlockup_panic=
+ [KNL] Should the soft-lockup detector generate panics.
+
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/sonypi.txt
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1941d8b..af443a0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -295,10 +295,11 @@
extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
-extern unsigned long softlockup_thresh;
+extern unsigned int softlockup_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
+extern int softlockup_thresh;
#else
static inline void softlockup_tick(void)
{
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a272d78..7bd8d1a 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/lockdep.h>
#include <linux/notifier.h>
#include <linux/module.h>
@@ -25,7 +26,22 @@
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
static int __read_mostly did_panic;
-unsigned long __read_mostly softlockup_thresh = 60;
+int __read_mostly softlockup_thresh = 60;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+ softlockup_panic = simple_strtoul(str, NULL, 0);
+
+ return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
static int
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@
struct pt_regs *regs = get_irq_regs();
unsigned long now;
+ /* Is detection switched off? */
+ if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+ /* Be sure we don't false trigger if switched back on */
+ if (touch_timestamp)
+ per_cpu(touch_timestamp, this_cpu) = 0;
+ return;
+ }
+
if (touch_timestamp == 0) {
__touch_softlockup_watchdog();
return;
@@ -92,11 +116,8 @@
print_timestamp = per_cpu(print_timestamp, this_cpu);
/* report at most once a second */
- if ((print_timestamp >= touch_timestamp &&
- print_timestamp < (touch_timestamp + 1)) ||
- did_panic || !per_cpu(watchdog_task, this_cpu)) {
+ if (print_timestamp == touch_timestamp || did_panic)
return;
- }
/* do not print during early bootup: */
if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@
now = get_timestamp(this_cpu);
- /* Wake up the high-prio watchdog task every second: */
- if (now > (touch_timestamp + 1))
+ /*
+ * Wake up the high-prio watchdog task twice per
+ * threshold timespan.
+ */
+ if (now > touch_timestamp + softlockup_thresh/2)
wake_up_process(per_cpu(watchdog_task, this_cpu));
/* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@
this_cpu, now - touch_timestamp,
current->comm, task_pid_nr(current));
print_modules();
+ print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
spin_unlock(&print_lock);
+
+ if (softlockup_panic)
+ panic("softlockup: hung tasks");
}
/*
@@ -178,6 +206,9 @@
t->last_switch_timestamp = now;
touch_nmi_watchdog();
+
+ if (softlockup_panic)
+ panic("softlockup: blocked tasks");
}
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b859e6b..2a7b9d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -88,12 +88,13 @@
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
-#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
static int one = 1;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60;
+static int neg_one = -1;
#endif
#ifdef CONFIG_MMU
@@ -739,13 +740,24 @@
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_thresh",
.data = &softlockup_thresh,
- .maxlen = sizeof(unsigned long),
+ .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &neg_one,
.extra2 = &sixty,
},
{
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index beef7cc..942fc7c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -140,8 +140,6 @@
if (!ts->tick_stopped)
return;
- touch_softlockup_watchdog();
-
cpu_clear(cpu, nohz_cpu_mask);
now = ktime_get();
ts->idle_waketime = now;
@@ -149,6 +147,8 @@
local_irq_save(flags);
tick_do_update_jiffies64(now);
local_irq_restore(flags);
+
+ touch_softlockup_watchdog();
}
void tick_nohz_stop_idle(int cpu)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba106db..882c510 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -150,7 +150,7 @@
help
Say Y here to enable the kernel to detect "soft lockups",
which are bugs that cause the kernel to loop in kernel
- mode for more than 10 seconds, without giving other tasks a
+ mode for more than 60 seconds, without giving other tasks a
chance to run.
When a soft-lockup is detected, the kernel will print the
@@ -162,6 +162,30 @@
can be detected via the NMI-watchdog, on platforms that
support it.)
+config BOOTPARAM_SOFTLOCKUP_PANIC
+ bool "Panic (Reboot) On Soft Lockups"
+ depends on DETECT_SOFTLOCKUP
+ help
+ Say Y here to enable the kernel to panic on "soft lockups",
+ which are bugs that cause the kernel to loop in kernel
+ mode for more than 60 seconds, without giving other tasks a
+ chance to run.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ lockup has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a lockup must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+ int
+ depends on DETECT_SOFTLOCKUP
+ range 0 1
+ default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+ default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
config SCHED_DEBUG
bool "Collect scheduler debugging info"
depends on DEBUG_KERNEL && PROC_FS