tracing: Show sample std dev in function profiling

When combined with function graph tracing the ftrace function profiler
also prints the average run time of functions. While this gives us some
good information, it doesn't tell us anything about the variance of the
run times of the function. This change prints out the s^2 sample
standard deviation alongside the average.

This change adds one entry to the profile record structure. This
increases the memory footprint of the function profiler by 1/3 on a
32-bit system, and by 1/5 on a 64-bit system when function graphing is
enabled, though the memory is only allocated when the profiler is turned
on. During the profiling, one extra line of code adds the squared
calltime to the new record entry, so this should not adversly affect
performance.

Note that the square of the sample standard deviation is printed because
there is no sqrt implementation for unsigned long long in the kernel.

Signed-off-by: Chase Douglas <chase.douglas@canonical.com>
LKML-Reference: <1272304925-2436-1-git-send-email-chase.douglas@canonical.com>

[ fixed comment about ns^2 -> us^2 conversion ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2404b59..3bcb340 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -264,6 +264,7 @@
 	unsigned long			counter;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	unsigned long long		time;
+	unsigned long long		time_squared;
 #endif
 };
 
@@ -366,9 +367,9 @@
 {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	seq_printf(m, "  Function                               "
-		   "Hit    Time            Avg\n"
+		   "Hit    Time            Avg             s^2\n"
 		      "  --------                               "
-		   "---    ----            ---\n");
+		   "---    ----            ---             ---\n");
 #else
 	seq_printf(m, "  Function                               Hit\n"
 		      "  --------                               ---\n");
@@ -384,6 +385,7 @@
 	static DEFINE_MUTEX(mutex);
 	static struct trace_seq s;
 	unsigned long long avg;
+	unsigned long long stddev;
 #endif
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -394,11 +396,25 @@
 	avg = rec->time;
 	do_div(avg, rec->counter);
 
+	/* Sample standard deviation (s^2) */
+	if (rec->counter <= 1)
+		stddev = 0;
+	else {
+		stddev = rec->time_squared - rec->counter * avg * avg;
+		/*
+		 * Divide only 1000 for ns^2 -> us^2 conversion.
+		 * trace_print_graph_duration will divide 1000 again.
+		 */
+		do_div(stddev, (rec->counter - 1) * 1000);
+	}
+
 	mutex_lock(&mutex);
 	trace_seq_init(&s);
 	trace_print_graph_duration(rec->time, &s);
 	trace_seq_puts(&s, "    ");
 	trace_print_graph_duration(avg, &s);
+	trace_seq_puts(&s, "    ");
+	trace_print_graph_duration(stddev, &s);
 	trace_print_seq(m, &s);
 	mutex_unlock(&mutex);
 #endif
@@ -668,8 +684,10 @@
 	}
 
 	rec = ftrace_find_profiled_func(stat, trace->func);
-	if (rec)
+	if (rec) {
 		rec->time += calltime;
+		rec->time_squared += calltime * calltime;
+	}
 
  out:
 	local_irq_restore(flags);