perf report: Add support for callchain graph output
Currently, the printing of callchains is done in a single
vertical level, this is the "flat" mode:
8.25% [k] copy_user_generic_string
4.19%
copy_user_generic_string
generic_file_aio_read
do_sync_read
vfs_read
sys_pread64
system_call_fastpath
pread64
This patch introduces a new "graph" mode which provides a
hierarchical output of factorized paths recursively sorted:
8.25% [k] copy_user_generic_string
|
|--4.31%-- generic_file_aio_read
| do_sync_read
| vfs_read
| |
| |--4.19%-- sys_pread64
| | system_call_fastpath
| | pread64
| |
| --0.12%-- sys_read
| system_call_fastpath
| __read
|
|--3.24%-- generic_file_buffered_write
| __generic_file_aio_write_nolock
| generic_file_aio_write
| do_sync_write
| reiserfs_file_write
| vfs_write
| |
| |--3.14%-- sys_pwrite64
| | system_call_fastpath
| | __pwrite64
| |
| --0.10%-- sys_write
[...]
The command line has then changed.
By providing the -c option, the callchain will output in the
flat mode by default.
But you can override it:
perf report -c graph
or
perf report -c flat
You can also pass the abreviated mode:
perf report -c g
or
perf report -c gra
will both make use of the graph mode.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b44476c..0ca4638 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -59,6 +59,7 @@
static int exclude_other = 1;
static int callchain;
+static enum chain_mode callchain_mode;
static u64 sample_type;
@@ -787,8 +788,103 @@
return cmp;
}
+static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
+{
+ int i;
+ size_t ret = 0;
+
+ ret += fprintf(fp, "%s", " ");
+
+ for (i = 0; i < depth; i++)
+ if (depth_mask & (1 << i))
+ ret += fprintf(fp, "| ");
+ else
+ ret += fprintf(fp, " ");
+
+ ret += fprintf(fp, "\n");
+
+ return ret;
+}
static size_t
-callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples)
+ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
+ int depth_mask, int count, u64 total_samples,
+ int hits)
+{
+ int i;
+ size_t ret = 0;
+
+ ret += fprintf(fp, "%s", " ");
+ for (i = 0; i < depth; i++) {
+ if (depth_mask & (1 << i))
+ ret += fprintf(fp, "|");
+ else
+ ret += fprintf(fp, " ");
+ if (!count && i == depth - 1) {
+ double percent;
+
+ percent = hits * 100.0 / total_samples;
+ ret += fprintf(fp, "--%2.2f%%-- ", percent);
+ } else
+ ret += fprintf(fp, "%s", " ");
+ }
+ if (chain->sym)
+ ret += fprintf(fp, "%s\n", chain->sym->name);
+ else
+ ret += fprintf(fp, "%p\n", (void *)(long)chain->ip);
+
+ return ret;
+}
+
+static size_t
+callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
+ u64 total_samples, int depth, int depth_mask)
+{
+ struct rb_node *node, *next;
+ struct callchain_node *child;
+ struct callchain_list *chain;
+ int new_depth_mask = depth_mask;
+ size_t ret = 0;
+ int i;
+
+ node = rb_first(&self->rb_root);
+ while (node) {
+ child = rb_entry(node, struct callchain_node, rb_node);
+
+ /*
+ * The depth mask manages the output of pipes that show
+ * the depth. We don't want to keep the pipes of the current
+ * level for the last child of this depth
+ */
+ next = rb_next(node);
+ if (!next)
+ new_depth_mask &= ~(1 << (depth - 1));
+
+ /*
+ * But we keep the older depth mask for the line seperator
+ * to keep the level link until we reach the last child
+ */
+ ret += ipchain__fprintf_graph_line(fp, depth, depth_mask);
+ i = 0;
+ list_for_each_entry(chain, &child->val, list) {
+ if (chain->ip >= PERF_CONTEXT_MAX)
+ continue;
+ ret += ipchain__fprintf_graph(fp, chain, depth,
+ new_depth_mask, i++,
+ total_samples,
+ child->cumul_hit);
+ }
+ ret += callchain__fprintf_graph(fp, child, total_samples,
+ depth + 1,
+ new_depth_mask | (1 << depth));
+ node = next;
+ }
+
+ return ret;
+}
+
+static size_t
+callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
+ u64 total_samples)
{
struct callchain_list *chain;
size_t ret = 0;
@@ -796,7 +892,7 @@
if (!self)
return 0;
- ret += callchain__fprintf(fp, self->parent, total_samples);
+ ret += callchain__fprintf_flat(fp, self->parent, total_samples);
list_for_each_entry(chain, &self->val, list) {
@@ -826,8 +922,13 @@
chain = rb_entry(rb_node, struct callchain_node, rb_node);
percent = chain->hit * 100.0 / total_samples;
- ret += fprintf(fp, " %6.2f%%\n", percent);
- ret += callchain__fprintf(fp, chain, total_samples);
+ if (callchain_mode == FLAT) {
+ ret += fprintf(fp, " %6.2f%%\n", percent);
+ ret += callchain__fprintf_flat(fp, chain, total_samples);
+ } else if (callchain_mode == GRAPH) {
+ ret += callchain__fprintf_graph(fp, chain,
+ total_samples, 1, 1);
+ }
ret += fprintf(fp, "\n");
rb_node = rb_next(rb_node);
}
@@ -1129,8 +1230,12 @@
struct rb_node *parent = NULL;
struct hist_entry *iter;
- if (callchain)
- sort_chain_to_rbtree(&he->sorted_chain, &he->callchain);
+ if (callchain) {
+ if (callchain_mode == FLAT)
+ sort_chain_flat(&he->sorted_chain, &he->callchain);
+ else if (callchain_mode == GRAPH)
+ sort_chain_graph(&he->sorted_chain, &he->callchain);
+ }
while (*p != NULL) {
parent = *p;
@@ -1702,6 +1807,26 @@
return rc;
}
+static int
+parse_callchain_opt(const struct option *opt __used, const char *arg,
+ int unset __used)
+{
+ callchain = 1;
+
+ if (!arg)
+ return 0;
+
+ if (!strncmp(arg, "graph", strlen(arg)))
+ callchain_mode = GRAPH;
+
+ else if (!strncmp(arg, "flat", strlen(arg)))
+ callchain_mode = FLAT;
+ else
+ return -1;
+
+ return 0;
+}
+
static const char * const report_usage[] = {
"perf report [<options>] <command>",
NULL
@@ -1725,7 +1850,9 @@
"regex filter to identify parent, see: '--sort parent'"),
OPT_BOOLEAN('x', "exclude-other", &exclude_other,
"Only display entries with parent-match"),
- OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
+ OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type",
+ "Display callchains with output_type: flat, graph. "
+ "Default to flat", &parse_callchain_opt, "flat"),
OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3c4a91f..a9873aa 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -19,9 +19,9 @@
#define chain_for_each_child(child, parent) \
list_for_each_entry(child, &parent->children, brothers)
-
static void
-rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+ enum chain_mode mode)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -31,10 +31,22 @@
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
- if (rnode->hit < chain->hit)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
+ switch (mode) {
+ case FLAT:
+ if (rnode->hit < chain->hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ case GRAPH:
+ if (rnode->cumul_hit < chain->cumul_hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ default:
+ break;
+ }
}
rb_link_node(&chain->rb_node, parent, p);
@@ -45,15 +57,36 @@
* Once we get every callchains from the stream, we can now
* sort them by hit
*/
-void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
+void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node)
{
struct callchain_node *child;
chain_for_each_child(child, node)
- sort_chain_to_rbtree(rb_root, child);
+ sort_chain_flat(rb_root, child);
if (node->hit)
- rb_insert_callchain(rb_root, node);
+ rb_insert_callchain(rb_root, node, FLAT);
+}
+
+static void __sort_chain_graph(struct callchain_node *node)
+{
+ struct callchain_node *child;
+
+ node->rb_root = RB_ROOT;
+ node->cumul_hit = node->hit;
+
+ chain_for_each_child(child, node) {
+ __sort_chain_graph(child);
+ rb_insert_callchain(&node->rb_root, child, GRAPH);
+ node->cumul_hit += child->cumul_hit;
+ }
+}
+
+void
+sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root)
+{
+ __sort_chain_graph(chain_root);
+ rb_root->rb_node = chain_root->rb_root.rb_node;
}
/*
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index e9bd5e8..dfa5600 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -6,15 +6,21 @@
#include <linux/rbtree.h>
#include "symbol.h"
+enum chain_mode {
+ FLAT,
+ GRAPH
+};
struct callchain_node {
struct callchain_node *parent;
struct list_head brothers;
struct list_head children;
struct list_head val;
- struct rb_node rb_node;
+ struct rb_node rb_node; /* to sort nodes in an rbtree */
+ struct rb_root rb_root; /* sorted tree of children */
unsigned int val_nr;
u64 hit;
+ u64 cumul_hit; /* hit + hits of children */
};
struct callchain_list {
@@ -32,5 +38,6 @@
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
struct symbol **syms);
-void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node);
+void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node);
+void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node);
#endif