[PATCH] Add debugging feature /proc/timer_stat

Add /proc/timer_stats support: debugging feature to profile timer expiration.
Both the starting site, process/PID and the expiration function is captured.
This allows the quick identification of timer event sources in a system.

Sample output:

# echo 1 > /proc/timer_stats
# cat /proc/timer_stats
Timer Stats Version: v0.1
Sample period: 4.010 s
  24,     0 swapper          hrtimer_stop_sched_tick (hrtimer_sched_tick)
  11,     0 swapper          sk_reset_timer (tcp_delack_timer)
   6,     0 swapper          hrtimer_stop_sched_tick (hrtimer_sched_tick)
   2,     1 swapper          queue_delayed_work_on (delayed_work_timer_fn)
  17,     0 swapper          hrtimer_restart_sched_tick (hrtimer_sched_tick)
   2,     1 swapper          queue_delayed_work_on (delayed_work_timer_fn)
   4,  2050 pcscd            do_nanosleep (hrtimer_wakeup)
   5,  4179 sshd             sk_reset_timer (tcp_write_timer)
   4,  2248 yum-updatesd     schedule_timeout (process_timeout)
  18,     0 swapper          hrtimer_restart_sched_tick (hrtimer_sched_tick)
   3,     0 swapper          sk_reset_timer (tcp_delack_timer)
   1,     1 swapper          neigh_table_init_no_netlink (neigh_periodic_timer)
   2,     1 swapper          e1000_up (e1000_watchdog)
   1,     1 init             schedule_timeout (process_timeout)
100 total events, 25.24 events/sec

[ cleanups and hrtimers support from Thomas Gleixner <tglx@linutronix.de> ]
[bunk@stusta.de: nr_entries can become static]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 62aad8e..476cb0c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -585,6 +585,18 @@
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
+#ifdef CONFIG_TIMER_STATS
+void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
+{
+	if (timer->start_site)
+		return;
+
+	timer->start_site = addr;
+	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+	timer->start_pid = current->pid;
+}
+#endif
+
 /*
  * Counterpart to lock_timer_base above:
  */
@@ -743,6 +755,7 @@
 		 * reprogramming happens in the interrupt handler. This is a
 		 * rare case and less expensive than a smp call.
 		 */
+		timer_stats_hrtimer_clear_start_info(timer);
 		reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 		__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
 				 reprogram);
@@ -791,6 +804,8 @@
 	}
 	timer->expires = tim;
 
+	timer_stats_hrtimer_set_start_info(timer);
+
 	enqueue_hrtimer(timer, new_base, base == new_base);
 
 	unlock_hrtimer_base(timer, &flags);
@@ -925,6 +940,12 @@
 
 	timer->base = &cpu_base->clock_base[clock_id];
 	hrtimer_init_timer_hres(timer);
+
+#ifdef CONFIG_TIMER_STATS
+	timer->start_site = NULL;
+	timer->start_pid = -1;
+	memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
 }
 EXPORT_SYMBOL_GPL(hrtimer_init);
 
@@ -1006,6 +1027,7 @@
 
 			__remove_hrtimer(timer, base,
 					 HRTIMER_STATE_CALLBACK, 0);
+			timer_stats_account_hrtimer(timer);
 
 			/*
 			 * Note: We clear the CALLBACK bit after
@@ -1050,6 +1072,8 @@
 		timer = list_entry(cpu_base->cb_pending.next,
 				   struct hrtimer, cb_entry);
 
+		timer_stats_account_hrtimer(timer);
+
 		fn = timer->function;
 		__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
 		spin_unlock_irq(&cpu_base->lock);
@@ -1106,6 +1130,8 @@
 		if (base->softirq_time.tv64 <= timer->expires.tv64)
 			break;
 
+		timer_stats_account_hrtimer(timer);
+
 		fn = timer->function;
 		__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 		spin_unlock_irq(&cpu_base->lock);