perf tool: Add cgroup support

This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.

The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.

Example:

$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
 Performance counter stats for 'sleep 1':

      2,368,667,414  cycles                   test1
      2,369,661,459  cycles
      <not counted>  cycles                   test2

        1.001856890  seconds time elapsed

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 211063e..c974e08 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -85,6 +85,7 @@
 void perf_evsel__delete(struct perf_evsel *evsel)
 {
 	perf_evsel__exit(evsel);
+	close_cgroup(evsel->cgrp);
 	free(evsel);
 }
 
@@ -163,21 +164,32 @@
 			      struct thread_map *threads, bool group, bool inherit)
 {
 	int cpu, thread;
+	unsigned long flags = 0;
+	int pid = -1;
 
 	if (evsel->fd == NULL &&
 	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
 		return -1;
 
+	if (evsel->cgrp) {
+		flags = PERF_FLAG_PID_CGROUP;
+		pid = evsel->cgrp->fd;
+	}
+
 	for (cpu = 0; cpu < cpus->nr; cpu++) {
 		int group_fd = -1;
 
 		evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit;
 
 		for (thread = 0; thread < threads->nr; thread++) {
+
+			if (!evsel->cgrp)
+				pid = threads->map[thread];
+
 			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
-								     threads->map[thread],
+								     pid,
 								     cpus->map[cpu],
-								     group_fd, 0);
+								     group_fd, flags);
 			if (FD(evsel, cpu, thread) < 0)
 				goto out_close;