Merge branch 'perf/urgent' into perf/core
Merge reason: We want to queue up a dependent patch. Also update to
later -rc's.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 47aabee..a9100b2 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -24,6 +24,7 @@
-------------------------
p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ -:[GRP/]EVENT : Clear a probe
GRP : Group name. If omitted, use "kprobes" for it.
EVENT : Event name. If omitted, the event name is generated
@@ -37,15 +38,12 @@
@SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
- $argN : Fetch function argument. (N >= 0)(*)
- $retval : Fetch return value.(**)
- +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
- (*) aN may not correct on asmlinkaged functions and at the middle of
- function body.
- (**) only for return probe.
- (***) this is useful for fetching a field of data structures.
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
Per-Probe Event Filtering
@@ -82,13 +80,16 @@
To add a probe as a new event, write a new definition to kprobe_events
as below.
- echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+ echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event. As this example shows, users can
-choose more familiar names for each arguments.
+1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
+assigned to each function argument depends on arch-specific ABI. If you unsure
+the ABI, please try to use probe subcommand of perf-tools (you can find it
+under tools/perf/).
+As this example shows, users can choose more familiar names for each arguments.
- echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
+ echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events
This sets a kretprobe on the return point of do_sys_open() function with
recording return value as "myretprobe" event.
@@ -97,23 +98,24 @@
cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
name: myprobe
-ID: 75
+ID: 780
format:
- field:unsigned short common_type; offset:0; size:2;
- field:unsigned char common_flags; offset:2; size:1;
- field:unsigned char common_preempt_count; offset:3; size:1;
- field:int common_pid; offset:4; size:4;
- field:int common_tgid; offset:8; size:4;
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1;signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+ field:int common_lock_depth; offset:8; size:4; signed:1;
- field: unsigned long ip; offset:16;tsize:8;
- field: int nargs; offset:24;tsize:4;
- field: unsigned long dfd; offset:32;tsize:8;
- field: unsigned long filename; offset:40;tsize:8;
- field: unsigned long flags; offset:48;tsize:8;
- field: unsigned long mode; offset:56;tsize:8;
+ field:unsigned long __probe_ip; offset:12; size:4; signed:0;
+ field:int __probe_nargs; offset:16; size:4; signed:1;
+ field:unsigned long dfd; offset:20; size:4; signed:0;
+ field:unsigned long filename; offset:24; size:4; signed:0;
+ field:unsigned long flags; offset:28; size:4; signed:0;
+ field:unsigned long mode; offset:32; size:4; signed:0;
-print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip,
+REC->dfd, REC->filename, REC->flags, REC->mode
You can see that the event has 4 arguments as in the expressions you specified.
@@ -121,6 +123,12 @@
This clears all probe points.
+ Or,
+
+ echo -:myprobe >> kprobe_events
+
+ This clears probe points selectively.
+
Right after definition, each event is disabled by default. For tracing these
events, you need to enable it.
@@ -146,4 +154,3 @@
returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
returns from do_sys_open to sys_open+0x1b).
-
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index a3c11ca..95ad9da 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -495,9 +495,6 @@
entry->nr = 0;
- if (current->pid == 0) /* idle task? */
- return entry;
-
if (!user_mode(regs)) {
perf_callchain_kernel(regs, entry);
if (current->mm)
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 24ea837..a9dd3ab 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -68,9 +68,6 @@
is_user = user_mode(regs);
- if (!current || current->pid == 0)
- return;
-
if (is_user && current->state != TASK_RUNNING)
return;
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 8240f76..b81002f 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -14,6 +14,9 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
+/* Define reserved bits in DR6 which are always set to 1 */
+#define DR6_RESERVED (0xFFFF0FF0)
+
#define DR_TRAP0 (0x1) /* db0 */
#define DR_TRAP1 (0x2) /* db1 */
#define DR_TRAP2 (0x4) /* db2 */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1..93da9c3 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@
extern int check_nmi_watchdog(void);
extern int nmi_watchdog_enabled;
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
-extern int avail_to_resrv_perfctr_nmi(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1380367..befd172 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -27,7 +27,14 @@
/*
* Includes eventsel and unit mask as well:
*/
-#define ARCH_PERFMON_EVENT_MASK 0xffff
+
+
+#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
+#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
+#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
+#define INTEL_ARCH_INV_MASK 0x00800000ULL
+#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
+#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
/*
* filter mask to validate fixed counter events.
@@ -38,7 +45,12 @@
* The other filters are supported by fixed counters.
* The any-thread option is supported starting with v3.
*/
-#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000
+#define INTEL_ARCH_FIXED_MASK \
+ (INTEL_ARCH_CNT_MASK| \
+ INTEL_ARCH_INV_MASK| \
+ INTEL_ARCH_EDGE_MASK|\
+ INTEL_ARCH_UNIT_MASK|\
+ INTEL_ARCH_EVTSEL_MASK)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f6..2010280 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@
return 0;
}
-/* Get Nth argument at function call */
-extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
- unsigned int n);
-
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 35e8912..4dab78e 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,8 +3,6 @@
extern int kstack_depth_to_print;
-int x86_is_stack_id(int id, char *name);
-
struct thread_info;
struct stacktrace_ops;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8c1c070..1846ead 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
*
* For licencing details see kernel-base/COPYING
*/
@@ -22,6 +23,7 @@
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <linux/cpu.h>
+#include <linux/bitops.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -68,26 +70,47 @@
u64 pebs_event_reset[MAX_PEBS_EVENTS];
};
+struct event_constraint {
+ union {
+ unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ u64 idxmsk64[1];
+ };
+ int code;
+ int cmask;
+ int weight;
+};
+
struct cpu_hw_events {
- struct perf_event *events[X86_PMC_IDX_MAX];
- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
int enabled;
struct debug_store *ds;
+
+ int n_events;
+ int n_added;
+ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
};
-struct event_constraint {
- unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int code;
-};
+#define EVENT_CONSTRAINT(c, n, m) { \
+ { .idxmsk64[0] = (n) }, \
+ .code = (c), \
+ .cmask = (m), \
+ .weight = HWEIGHT64((u64)(n)), \
+}
-#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
-#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
+#define INTEL_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
-#define for_each_event_constraint(e, c) \
- for ((e) = (c); (e)->idxmsk[0]; (e)++)
+#define FIXED_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+#define EVENT_CONSTRAINT_END \
+ EVENT_CONSTRAINT(0, 0, 0)
+
+#define for_each_event_constraint(e, c) \
+ for ((e) = (c); (e)->cmask; (e)++)
/*
* struct x86_pmu - generic x86 pmu
@@ -114,8 +137,14 @@
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
- int (*get_event_idx)(struct cpu_hw_events *cpuc,
- struct hw_perf_event *hwc);
+
+ struct event_constraint *
+ (*get_event_constraints)(struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+
+ void (*put_event_constraints)(struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+ struct event_constraint *event_constraints;
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -124,7 +153,8 @@
.enabled = 1,
};
-static const struct event_constraint *event_constraints;
+static int x86_perf_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx);
/*
* Not sure about some of these
@@ -171,14 +201,14 @@
return hw_event & P6_EVNTSEL_MASK;
}
-static const struct event_constraint intel_p6_event_constraints[] =
+static struct event_constraint intel_p6_event_constraints[] =
{
- EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
- EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
- EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
+ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
EVENT_CONSTRAINT_END
};
@@ -196,32 +226,51 @@
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
-static const struct event_constraint intel_core_event_constraints[] =
+static struct event_constraint intel_core_event_constraints[] =
{
- EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
- EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
- EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
- EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
- EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
- EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+ INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
EVENT_CONSTRAINT_END
};
-static const struct event_constraint intel_nehalem_event_constraints[] =
+static struct event_constraint intel_nehalem_event_constraints[] =
{
- EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
- EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
- EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
- EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
- EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
- EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
- EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
- EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
- EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
- EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+ FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+ INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+ INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
EVENT_CONSTRAINT_END
};
@@ -245,6 +294,97 @@
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+static __initconst u64 westmere_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
+ [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
+ [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
+ [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static __initconst u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -527,11 +667,11 @@
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_MASK \
- (CORE_EVNTSEL_EVENT_MASK | \
- CORE_EVNTSEL_UNIT_MASK | \
- CORE_EVNTSEL_EDGE_MASK | \
- CORE_EVNTSEL_INV_MASK | \
- CORE_EVNTSEL_REG_MASK)
+ (INTEL_ARCH_EVTSEL_MASK | \
+ INTEL_ARCH_UNIT_MASK | \
+ INTEL_ARCH_EDGE_MASK | \
+ INTEL_ARCH_INV_MASK | \
+ INTEL_ARCH_CNT_MASK)
return hw_event & CORE_EVNTSEL_MASK;
}
@@ -1058,15 +1198,8 @@
static void p6_pmu_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 val;
- if (!cpuc->enabled)
- return;
-
- cpuc->enabled = 0;
- barrier();
-
/* p6 only has one enable register */
rdmsrl(MSR_P6_EVNTSEL0, val);
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1077,12 +1210,6 @@
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (!cpuc->enabled)
- return;
-
- cpuc->enabled = 0;
- barrier();
-
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
@@ -1094,17 +1221,6 @@
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
- if (!cpuc->enabled)
- return;
-
- cpuc->enabled = 0;
- /*
- * ensure we write the disable before we start disabling the
- * events proper, so that amd_pmu_enable_event() does the
- * right thing.
- */
- barrier();
-
for (idx = 0; idx < x86_pmu.num_events; idx++) {
u64 val;
@@ -1120,22 +1236,25 @@
void hw_perf_disable(void)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
if (!x86_pmu_initialized())
return;
- return x86_pmu.disable_all();
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->n_added = 0;
+ cpuc->enabled = 0;
+ barrier();
+
+ x86_pmu.disable_all();
}
static void p6_pmu_enable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
unsigned long val;
- if (cpuc->enabled)
- return;
-
- cpuc->enabled = 1;
- barrier();
-
/* p6 only has one enable register */
rdmsrl(MSR_P6_EVNTSEL0, val);
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1146,12 +1265,6 @@
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (cpuc->enabled)
- return;
-
- cpuc->enabled = 1;
- barrier();
-
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -1170,12 +1283,6 @@
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
- if (cpuc->enabled)
- return;
-
- cpuc->enabled = 1;
- barrier();
-
for (idx = 0; idx < x86_pmu.num_events; idx++) {
struct perf_event *event = cpuc->events[idx];
u64 val;
@@ -1189,10 +1296,239 @@
}
}
+static const struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
+{
+ return event->pmu == &pmu;
+}
+
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+ struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ int i, j, w, wmax, num = 0;
+ struct hw_perf_event *hwc;
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ for (i = 0; i < n; i++) {
+ constraints[i] =
+ x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+ }
+
+ /*
+ * fastpath, try to reuse previous register
+ */
+ for (i = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
+ c = constraints[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c->idxmsk))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+ set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+ if (i == n)
+ goto done;
+
+ /*
+ * begin slow path
+ */
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ /*
+ * weight = number of possible counters
+ *
+ * 1 = most constrained, only works on one counter
+ * wmax = least constrained, works on any counter
+ *
+ * assign events to counters starting with most
+ * constrained events.
+ */
+ wmax = x86_pmu.num_events;
+
+ /*
+ * when fixed event counters are present,
+ * wmax is incremented by 1 to account
+ * for one more choice
+ */
+ if (x86_pmu.num_events_fixed)
+ wmax++;
+
+ for (w = 1, num = n; num && w <= wmax; w++) {
+ /* for each event */
+ for (i = 0; num && i < n; i++) {
+ c = constraints[i];
+ hwc = &cpuc->event_list[i]->hw;
+
+ if (c->weight != w)
+ continue;
+
+ for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!test_bit(j, used_mask))
+ break;
+ }
+
+ if (j == X86_PMC_IDX_MAX)
+ break;
+
+ set_bit(j, used_mask);
+
+ if (assign)
+ assign[i] = j;
+ num--;
+ }
+ }
+done:
+ /*
+ * scheduling failed or is just a simulation,
+ * free resources if necessary
+ */
+ if (!assign || num) {
+ for (i = 0; i < n; i++) {
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ }
+ }
+ return num ? -ENOSPC : 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+
+ /* current number of events already accepted */
+ n = cpuc->n_events;
+
+ if (is_x86_event(leader)) {
+ if (n >= max_count)
+ return -ENOSPC;
+ cpuc->event_list[n] = leader;
+ n++;
+ }
+ if (!dogrp)
+ return n;
+
+ list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ if (!is_x86_event(event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -ENOSPC;
+
+ cpuc->event_list[n] = event;
+ n++;
+ }
+ return n;
+}
+
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ hwc->idx = idx;
+
+ if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+ hwc->config_base = 0;
+ hwc->event_base = 0;
+ } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+ hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ /*
+ * We set it so that event_base + idx in wrmsr/rdmsr maps to
+ * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+ */
+ hwc->event_base =
+ MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ } else {
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->event_base = x86_pmu.perfctr;
+ }
+}
+
+static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+
void hw_perf_enable(void)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ int i;
+
if (!x86_pmu_initialized())
return;
+
+ if (cpuc->enabled)
+ return;
+
+ if (cpuc->n_added) {
+ /*
+ * apply assignment obtained either from
+ * hw_perf_group_sched_in() or x86_pmu_enable()
+ *
+ * step1: save events moving to new counters
+ * step2: reprogram moved events into new counters
+ */
+ for (i = 0; i < cpuc->n_events; i++) {
+
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+ continue;
+
+ __x86_pmu_disable(event, cpuc);
+
+ hwc->idx = -1;
+ }
+
+ for (i = 0; i < cpuc->n_events; i++) {
+
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == -1) {
+ x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+ x86_perf_event_set_period(event, hwc, hwc->idx);
+ }
+ /*
+ * need to mark as active because x86_pmu_disable()
+ * clear active_mask and eventsp[] yet it preserves
+ * idx
+ */
+ set_bit(hwc->idx, cpuc->active_mask);
+ cpuc->events[hwc->idx] = event;
+
+ x86_pmu.enable(hwc, hwc->idx);
+ perf_event_update_userpage(event);
+ }
+ cpuc->n_added = 0;
+ perf_events_lapic_init();
+ }
+
+ cpuc->enabled = 1;
+ barrier();
+
x86_pmu.enable_all();
}
@@ -1398,148 +1734,40 @@
x86_pmu_enable_event(hwc, idx);
}
-static int fixed_mode_idx(struct hw_perf_event *hwc)
-{
- unsigned int hw_event;
-
- hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
- if (unlikely((hw_event ==
- x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
- (hwc->sample_period == 1)))
- return X86_PMC_IDX_FIXED_BTS;
-
- if (!x86_pmu.num_events_fixed)
- return -1;
-
- /*
- * fixed counters do not take all possible filters
- */
- if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
- return -1;
-
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
- return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
- return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
- return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
- return -1;
-}
-
/*
- * generic counter allocator: get next free counter
- */
-static int
-gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- int idx;
-
- idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
- return idx == x86_pmu.num_events ? -1 : idx;
-}
-
-/*
- * intel-specific counter allocator: check event constraints
- */
-static int
-intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- const struct event_constraint *event_constraint;
- int i, code;
-
- if (!event_constraints)
- goto skip;
-
- code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
-
- for_each_event_constraint(event_constraint, event_constraints) {
- if (code == event_constraint->code) {
- for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_and_set_bit(i, cpuc->used_mask))
- return i;
- }
- return -1;
- }
- }
-skip:
- return gen_get_event_idx(cpuc, hwc);
-}
-
-static int
-x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- int idx;
-
- idx = fixed_mode_idx(hwc);
- if (idx == X86_PMC_IDX_FIXED_BTS) {
- /* BTS is already occupied. */
- if (test_and_set_bit(idx, cpuc->used_mask))
- return -EAGAIN;
-
- hwc->config_base = 0;
- hwc->event_base = 0;
- hwc->idx = idx;
- } else if (idx >= 0) {
- /*
- * Try to get the fixed event, if that is already taken
- * then try to get a generic event:
- */
- if (test_and_set_bit(idx, cpuc->used_mask))
- goto try_generic;
-
- hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
- hwc->idx = idx;
- } else {
- idx = hwc->idx;
- /* Try to get the previous generic event again */
- if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
- idx = x86_pmu.get_event_idx(cpuc, hwc);
- if (idx == -1)
- return -EAGAIN;
-
- set_bit(idx, cpuc->used_mask);
- hwc->idx = idx;
- }
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
- }
-
- return idx;
-}
-
-/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * activate a single event
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ *
+ * Called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
*/
static int x86_pmu_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int idx;
+ struct hw_perf_event *hwc;
+ int assign[X86_PMC_IDX_MAX];
+ int n, n0, ret;
- idx = x86_schedule_event(cpuc, hwc);
- if (idx < 0)
- return idx;
+ hwc = &event->hw;
- perf_events_lapic_init();
+ n0 = cpuc->n_events;
+ n = collect_events(cpuc, event, false);
+ if (n < 0)
+ return n;
- x86_pmu.disable(hwc, idx);
+ ret = x86_schedule_events(cpuc, n, assign);
+ if (ret)
+ return ret;
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n*sizeof(int));
- cpuc->events[idx] = event;
- set_bit(idx, cpuc->active_mask);
-
- x86_perf_event_set_period(event, hwc, idx);
- x86_pmu.enable(hwc, idx);
-
- perf_event_update_userpage(event);
+ cpuc->n_events = n;
+ cpuc->n_added = n - n0;
return 0;
}
@@ -1583,7 +1811,7 @@
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
}
- pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+ pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < x86_pmu.num_events; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1667,9 +1895,8 @@
event->pending_kill = POLL_IN;
}
-static void x86_pmu_disable(struct perf_event *event)
+static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -1681,12 +1908,6 @@
x86_pmu.disable(hwc, idx);
/*
- * Make sure the cleared pointer becomes visible before we
- * (potentially) free the event:
- */
- barrier();
-
- /*
* Drain the remaining delta count out of a event
* that we are disabling:
*/
@@ -1697,8 +1918,28 @@
intel_pmu_drain_bts_buffer(cpuc);
cpuc->events[idx] = NULL;
- clear_bit(idx, cpuc->used_mask);
+}
+static void x86_pmu_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int i;
+
+ __x86_pmu_disable(event, cpuc);
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ if (event == cpuc->event_list[i]) {
+
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, event);
+
+ while (++i < cpuc->n_events)
+ cpuc->event_list[i-1] = cpuc->event_list[i];
+
+ --cpuc->n_events;
+ break;
+ }
+ }
perf_event_update_userpage(event);
}
@@ -1969,6 +2210,162 @@
return NOTIFY_STOP;
}
+static struct event_constraint unconstrained;
+
+static struct event_constraint bts_constraint =
+ EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+
+static struct event_constraint *
+intel_special_constraints(struct perf_event *event)
+{
+ unsigned int hw_event;
+
+ hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+
+ if (unlikely((hw_event ==
+ x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+ (event->hw.sample_period == 1))) {
+
+ return &bts_constraint;
+ }
+ return NULL;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_special_constraints(event);
+ if (c)
+ return c;
+
+ if (x86_pmu.event_constraints) {
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return c;
+ }
+ }
+
+ return &unconstrained;
+}
+
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ return &unconstrained;
+}
+
+static int x86_event_sched_in(struct perf_event *event,
+ struct perf_cpu_context *cpuctx, int cpu)
+{
+ int ret = 0;
+
+ event->state = PERF_EVENT_STATE_ACTIVE;
+ event->oncpu = cpu;
+ event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+
+ if (!is_x86_event(event))
+ ret = event->pmu->enable(event);
+
+ if (!ret && !is_software_event(event))
+ cpuctx->active_oncpu++;
+
+ if (!ret && event->attr.exclusive)
+ cpuctx->exclusive = 1;
+
+ return ret;
+}
+
+static void x86_event_sched_out(struct perf_event *event,
+ struct perf_cpu_context *cpuctx, int cpu)
+{
+ event->state = PERF_EVENT_STATE_INACTIVE;
+ event->oncpu = -1;
+
+ if (!is_x86_event(event))
+ event->pmu->disable(event);
+
+ event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
+
+ if (!is_software_event(event))
+ cpuctx->active_oncpu--;
+
+ if (event->attr.exclusive || !cpuctx->active_oncpu)
+ cpuctx->exclusive = 0;
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ *
+ * called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
+ */
+int hw_perf_group_sched_in(struct perf_event *leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx, int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct perf_event *sub;
+ int assign[X86_PMC_IDX_MAX];
+ int n0, n1, ret;
+
+ /* n0 = total number of events */
+ n0 = collect_events(cpuc, leader, true);
+ if (n0 < 0)
+ return n0;
+
+ ret = x86_schedule_events(cpuc, n0, assign);
+ if (ret)
+ return ret;
+
+ ret = x86_event_sched_in(leader, cpuctx, cpu);
+ if (ret)
+ return ret;
+
+ n1 = 1;
+ list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ if (sub->state > PERF_EVENT_STATE_OFF) {
+ ret = x86_event_sched_in(sub, cpuctx, cpu);
+ if (ret)
+ goto undo;
+ ++n1;
+ }
+ }
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n0*sizeof(int));
+
+ cpuc->n_events = n0;
+ cpuc->n_added = n1;
+ ctx->nr_active += n1;
+
+ /*
+ * 1 means successful and events are active
+ * This is not quite true because we defer
+ * actual activation until hw_perf_enable() but
+ * this way we* ensure caller won't try to enable
+ * individual events
+ */
+ return 1;
+undo:
+ x86_event_sched_out(leader, cpuctx, cpu);
+ n0 = 1;
+ list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+ x86_event_sched_out(sub, cpuctx, cpu);
+ if (++n0 == n1)
+ break;
+ }
+ }
+ return ret;
+}
+
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.notifier_call = perf_event_nmi_handler,
.next = NULL,
@@ -2000,7 +2397,8 @@
*/
.event_bits = 32,
.event_mask = (1ULL << 32) - 1,
- .get_event_idx = intel_get_event_idx,
+ .get_event_constraints = intel_get_event_constraints,
+ .event_constraints = intel_p6_event_constraints
};
static __initconst struct x86_pmu intel_pmu = {
@@ -2024,7 +2422,7 @@
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
- .get_event_idx = intel_get_event_idx,
+ .get_event_constraints = intel_get_event_constraints
};
static __initconst struct x86_pmu amd_pmu = {
@@ -2045,7 +2443,7 @@
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
- .get_event_idx = gen_get_event_idx,
+ .get_event_constraints = amd_get_event_constraints
};
static __init int p6_pmu_init(void)
@@ -2058,12 +2456,9 @@
case 7:
case 8:
case 11: /* Pentium III */
- event_constraints = intel_p6_event_constraints;
- break;
case 9:
case 13:
/* Pentium M */
- event_constraints = intel_p6_event_constraints;
break;
default:
pr_cont("unsupported p6 CPU model %d ",
@@ -2128,23 +2523,40 @@
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ x86_pmu.event_constraints = intel_core_event_constraints;
pr_cont("Core2 events, ");
- event_constraints = intel_core_event_constraints;
break;
- default:
- case 26:
+
+ case 26: /* 45 nm nehalem, "Bloomfield" */
+ case 30: /* 45 nm nehalem, "Lynnfield" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("Atom events, ");
break;
+
+ case 37: /* 32 nm nehalem, "Clarkdale" */
+ case 44: /* 32 nm nehalem, "Gulftown" */
+ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ x86_pmu.event_constraints = intel_westmere_event_constraints;
+ pr_cont("Westmere events, ");
+ break;
+ default:
+ /*
+ * default constraints for v2 and up
+ */
+ x86_pmu.event_constraints = intel_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
}
return 0;
}
@@ -2220,6 +2632,9 @@
perf_events_lapic_init();
register_die_notifier(&perf_event_nmi_notifier);
+ unconstrained = (struct event_constraint)
+ EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.event_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -2241,46 +2656,73 @@
.unthrottle = x86_pmu_unthrottle,
};
-static int
-validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
- struct hw_perf_event fake_event = event->hw;
-
- if (event->pmu && event->pmu != &pmu)
- return 0;
-
- return x86_schedule_event(cpuc, &fake_event) >= 0;
-}
-
+/*
+ * validate a single event group
+ *
+ * validation include:
+ * - check events are compatible which each other
+ * - events do not compete for the same counter
+ * - number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
static int validate_group(struct perf_event *event)
{
- struct perf_event *sibling, *leader = event->group_leader;
- struct cpu_hw_events fake_pmu;
+ struct perf_event *leader = event->group_leader;
+ struct cpu_hw_events *fake_cpuc;
+ int ret, n;
- memset(&fake_pmu, 0, sizeof(fake_pmu));
+ ret = -ENOMEM;
+ fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+ if (!fake_cpuc)
+ goto out;
- if (!validate_event(&fake_pmu, leader))
- return -ENOSPC;
+ /*
+ * the event is not yet connected with its
+ * siblings therefore we must first collect
+ * existing siblings, then add the new event
+ * before we can simulate the scheduling
+ */
+ ret = -ENOSPC;
+ n = collect_events(fake_cpuc, leader, true);
+ if (n < 0)
+ goto out_free;
- list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
- if (!validate_event(&fake_pmu, sibling))
- return -ENOSPC;
- }
+ fake_cpuc->n_events = n;
+ n = collect_events(fake_cpuc, event, false);
+ if (n < 0)
+ goto out_free;
- if (!validate_event(&fake_pmu, event))
- return -ENOSPC;
+ fake_cpuc->n_events = n;
- return 0;
+ ret = x86_schedule_events(fake_cpuc, n, NULL);
+
+out_free:
+ kfree(fake_cpuc);
+out:
+ return ret;
}
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
+ const struct pmu *tmp;
int err;
err = __hw_perf_event_init(event);
if (!err) {
+ /*
+ * we temporarily connect event to its pmu
+ * such that validate_group() can classify
+ * it as an x86 event using is_x86_event()
+ */
+ tmp = event->pmu;
+ event->pmu = &pmu;
+
if (event->group_leader != event)
err = validate_group(event);
+
+ event->pmu = tmp;
}
if (err) {
if (event->destroy)
@@ -2304,7 +2746,6 @@
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_ignored_frame);
static void
@@ -2320,10 +2761,6 @@
static int backtrace_stack(void *data, char *name)
{
- per_cpu(in_ignored_frame, smp_processor_id()) =
- x86_is_stack_id(NMI_STACK, name) ||
- x86_is_stack_id(DEBUG_STACK, name);
-
return 0;
}
@@ -2331,9 +2768,6 @@
{
struct perf_callchain_entry *entry = data;
- if (per_cpu(in_ignored_frame, smp_processor_id()))
- return;
-
if (reliable)
callchain_store(entry, addr);
}
@@ -2440,9 +2874,6 @@
is_user = user_mode(regs);
- if (!current || current->pid == 0)
- return;
-
if (is_user && current->state != TASK_RUNNING)
return;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df97..74f4e85 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@
return !test_bit(counter, perfctr_nmi_owner);
}
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
- unsigned int counter;
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- return !test_bit(counter, perfctr_nmi_owner);
-}
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
int reserve_perfctr_nmi(unsigned int msr)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index ae775ca..11540a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
#include "dumpstack.h"
-/* Just a stub for now */
-int x86_is_stack_id(int id, char *name)
-{
- return 0;
-}
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 0ad9597..676bc05 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@
#endif
};
-int x86_is_stack_id(int id, char *name)
-{
- return x86_stack_ids[id - 1] == name;
-}
-
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
{
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 05d5fec..ae90b47 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -502,8 +502,6 @@
rcu_read_lock();
bp = per_cpu(bp_per_reg[i], cpu);
- if (bp)
- rc = NOTIFY_DONE;
/*
* Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling
@@ -522,7 +520,13 @@
rcu_read_unlock();
}
- if (dr6 & (~DR_TRAP_BITS))
+ /*
+ * Further processing in do_debug() is needed for a) user-space
+ * breakpoints (to generate signals) and b) when the system has
+ * taken exception due to multiple causes
+ */
+ if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
+ (dr6 & (~DR_TRAP_BITS)))
rc = NOTIFY_DONE;
set_debugreg(dr7, 7);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 017d937..73554a3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -140,30 +140,6 @@
#endif
};
-/**
- * regs_get_argument_nth() - get Nth argument at function call
- * @regs: pt_regs which contains registers at function entry.
- * @n: argument number.
- *
- * regs_get_argument_nth() returns @n th argument of a function call.
- * Since usually the kernel stack will be changed right after function entry,
- * you must use this at function entry. If the @n th entry is NOT in the
- * kernel stack or pt_regs, this returns 0.
- */
-unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
-{
- if (n < ARRAY_SIZE(arg_offs_table))
- return *(unsigned long *)((char *)regs + arg_offs_table[n]);
- else {
- /*
- * The typical case: arg n is on the stack.
- * (Note: stack[0] = return address, so skip it)
- */
- n -= ARRAY_SIZE(arg_offs_table);
- return regs_get_kernel_stack_nth(regs, 1 + n);
- }
-}
-
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3339917..1168e44 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@
get_debugreg(dr6, 6);
+ /* Filter out all the reserved bits which are preset to 1 */
+ dr6 &= ~DR6_RESERVED;
+
/* Catch kmemcheck conditions first of all! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c05a29c..ba0fd1e 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -25,7 +25,7 @@
static __inline__ int get_bitmask_order(unsigned int count)
{
int order;
-
+
order = fls(count);
return order; /* We could be slightly more clever with -1 here... */
}
@@ -33,7 +33,7 @@
static __inline__ int get_count_order(unsigned int count)
{
int order;
-
+
order = fls(count) - 1;
if (count & (count - 1))
order++;
@@ -45,6 +45,20 @@
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}
+#define HWEIGHT8(w) \
+ ( (!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
+ (!!((w) & (1ULL << 7))) )
+
+#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+
/**
* rol32 - rotate a 32-bit value left
* @word: value to rotate
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2233c98..cd95919 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -5,6 +5,7 @@
#include <linux/trace_seq.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/perf_event.h>
struct trace_array;
struct tracer;
@@ -138,9 +139,6 @@
#define FTRACE_MAX_PROFILE_SIZE 2048
-extern char *perf_trace_buf;
-extern char *perf_trace_buf_nmi;
-
#define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
@@ -188,13 +186,27 @@
__trace_printk(ip, fmt, ##args); \
} while (0)
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
struct perf_event;
extern int ftrace_profile_enable(int event_id);
extern void ftrace_profile_disable(int event_id);
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
+extern void *
+ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
+ unsigned long *irq_flags);
+
+static inline void
+ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+ u64 count, unsigned long irq_flags)
+{
+ struct trace_entry *entry = raw_data;
+
+ perf_tp_event(entry->type, addr, count, raw_data, size);
+ perf_swevent_put_recursion_context(rctx);
+ local_irq_restore(irq_flags);
+}
#endif
#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/list.h b/include/linux/list.h
index 969f6e9..5d9c655 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -206,6 +206,20 @@
}
/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void list_rotate_left(struct list_head *head)
+{
+ struct list_head *first;
+
+ if (!list_empty(head)) {
+ first = head->next;
+ list_move_tail(first, head);
+ }
+}
+
+/**
* list_is_singular - tests whether a list has just one entry.
* @head: the list to test.
*/
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8fa7187..556b0f4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -290,7 +290,7 @@
};
#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0)
-#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_RECORD_MISC_KERNEL (1 << 0)
#define PERF_RECORD_MISC_USER (2 << 0)
#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
@@ -356,8 +356,8 @@
* u64 stream_id;
* };
*/
- PERF_RECORD_THROTTLE = 5,
- PERF_RECORD_UNTHROTTLE = 6,
+ PERF_RECORD_THROTTLE = 5,
+ PERF_RECORD_UNTHROTTLE = 6,
/*
* struct {
@@ -371,10 +371,10 @@
/*
* struct {
- * struct perf_event_header header;
- * u32 pid, tid;
+ * struct perf_event_header header;
+ * u32 pid, tid;
*
- * struct read_format values;
+ * struct read_format values;
* };
*/
PERF_RECORD_READ = 8,
@@ -412,7 +412,7 @@
* char data[size];}&& PERF_SAMPLE_RAW
* };
*/
- PERF_RECORD_SAMPLE = 9,
+ PERF_RECORD_SAMPLE = 9,
PERF_RECORD_MAX, /* non-ABI */
};
@@ -498,9 +498,8 @@
atomic64_t period_left;
u64 interrupts;
- u64 freq_count;
- u64 freq_interrupts;
- u64 freq_stamp;
+ u64 freq_time_stamp;
+ u64 freq_count_stamp;
#endif
};
@@ -565,6 +564,10 @@
struct perf_sample_data *,
struct pt_regs *regs);
+enum perf_group_flag {
+ PERF_GROUP_SOFTWARE = 0x1,
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -574,6 +577,7 @@
struct list_head event_entry;
struct list_head sibling_list;
int nr_siblings;
+ int group_flags;
struct perf_event *group_leader;
struct perf_event *output;
const struct pmu *pmu;
@@ -658,7 +662,7 @@
perf_overflow_handler_t overflow_handler;
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
struct event_filter *filter;
#endif
@@ -683,7 +687,8 @@
*/
struct mutex mutex;
- struct list_head group_list;
+ struct list_head pinned_groups;
+ struct list_head flexible_groups;
struct list_head event_list;
int nr_events;
int nr_active;
@@ -746,10 +751,9 @@
extern const struct pmu *hw_perf_event_init(struct perf_event *event);
-extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu);
-extern void perf_event_task_tick(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_in(struct task_struct *task);
+extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+extern void perf_event_task_tick(struct task_struct *task);
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
@@ -853,8 +857,7 @@
extern int sysctl_perf_event_sample_rate;
extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count,
- void *record, int entry_size);
+extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
@@ -875,12 +878,12 @@
extern void perf_event_disable(struct perf_event *event);
#else
static inline void
-perf_event_task_sched_in(struct task_struct *task, int cpu) { }
+perf_event_task_sched_in(struct task_struct *task) { }
static inline void
perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu) { }
+ struct task_struct *next) { }
static inline void
-perf_event_task_tick(struct task_struct *task, int cpu) { }
+perf_event_task_tick(struct task_struct *task) { }
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
@@ -895,13 +898,13 @@
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
static inline void
-perf_bp_event(struct perf_event *event, void *data) { }
+perf_bp_event(struct perf_event *event, void *data) { }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
-static inline int perf_swevent_get_recursion_context(void) { return -1; }
+static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 207466a..feee739 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,7 +99,7 @@
#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \
.profile_enable = prof_sysenter_enable, \
@@ -113,7 +113,7 @@
#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
#define TRACE_SYS_EXIT_PROFILE(sname)
#define TRACE_SYS_EXIT_PROFILE_INIT(sname)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
#ifdef CONFIG_FTRACE_SYSCALLS
#define __SC_STR_ADECL1(t, a) #a
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c6fe03e..f2c09e4 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -499,7 +499,7 @@
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
/*
* Generate the functions needed for tracepoint perf_event support.
@@ -542,7 +542,7 @@
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
/*
* Stage 4 of the trace events.
@@ -627,7 +627,7 @@
*
*/
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
#define _TRACE_PROFILE_INIT(call) \
.profile_enable = ftrace_profile_enable_##call, \
@@ -635,7 +635,7 @@
#else
#define _TRACE_PROFILE_INIT(call)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
#undef __entry
#define __entry entry
@@ -835,7 +835,7 @@
* }
*/
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
#undef __perf_addr
#define __perf_addr(a) __addr = (a)
@@ -850,22 +850,12 @@
proto) \
{ \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
- extern int perf_swevent_get_recursion_context(void); \
- extern void perf_swevent_put_recursion_context(int rctx); \
- extern void perf_tp_event(int, u64, u64, void *, int); \
struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \
- struct trace_entry *ent; \
int __entry_size; \
int __data_size; \
- char *trace_buf; \
- char *raw_data; \
- int __cpu; \
int rctx; \
- int pc; \
- \
- pc = preempt_count(); \
\
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
@@ -875,42 +865,16 @@
if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
"profile buffer not large enough")) \
return; \
- \
- local_irq_save(irq_flags); \
- \
- rctx = perf_swevent_get_recursion_context(); \
- if (rctx < 0) \
- goto end_recursion; \
- \
- __cpu = smp_processor_id(); \
- \
- if (in_nmi()) \
- trace_buf = rcu_dereference(perf_trace_buf_nmi); \
- else \
- trace_buf = rcu_dereference(perf_trace_buf); \
- \
- if (!trace_buf) \
- goto end; \
- \
- raw_data = per_cpu_ptr(trace_buf, __cpu); \
- \
- *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
- entry = (struct ftrace_raw_##call *)raw_data; \
- ent = &entry->ent; \
- tracing_generic_entry_update(ent, irq_flags, pc); \
- ent->type = event_call->id; \
- \
+ entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \
+ __entry_size, event_call->id, &rctx, &irq_flags); \
+ if (!entry) \
+ return; \
tstruct \
\
{ assign; } \
\
- perf_tp_event(event_call->id, __addr, __count, entry, \
- __entry_size); \
- \
-end: \
- perf_swevent_put_recursion_context(rctx); \
-end_recursion: \
- local_irq_restore(irq_flags); \
+ ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \
+ __count, irq_flags); \
}
#undef DEFINE_EVENT
@@ -927,7 +891,7 @@
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
#undef _TRACE_PROFILE_INIT
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 961fda3..3d463dc 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -49,12 +49,12 @@
enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
#endif
-#ifdef CONFIG_EVENT_PROFILE
+
+#ifdef CONFIG_PERF_EVENTS
int prof_sysenter_enable(struct ftrace_event_call *call);
void prof_sysenter_disable(struct ftrace_event_call *call);
int prof_sysexit_enable(struct ftrace_event_call *call);
void prof_sysexit_disable(struct ftrace_event_call *call);
-
#endif
#endif /* _TRACE_SYSCALL_H */
diff --git a/init/Kconfig b/init/Kconfig
index d95ca7c..ada4844 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -976,19 +976,6 @@
Say Y if unsure.
-config EVENT_PROFILE
- bool "Tracepoint profiling sources"
- depends on PERF_EVENTS && EVENT_TRACING
- default y
- help
- Allow the use of tracepoints as software performance events.
-
- When this is enabled, you can create perf events based on
- tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
- found in debugfs://tracing/events/*/*/id. (The -e/--events
- option to the perf tool can parse and interpret symbolic
- tracepoints, in the subsystem:tracepoint_name format.)
-
config PERF_COUNTERS
bool "Kernel performance counters (old config option)"
depends on HAVE_PERF_EVENTS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d27746b..40f8b07 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -289,6 +289,15 @@
event->total_time_running = run_end - event->tstamp_running;
}
+static struct list_head *
+ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
+{
+ if (event->attr.pinned)
+ return &ctx->pinned_groups;
+ else
+ return &ctx->flexible_groups;
+}
+
/*
* Add a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
@@ -303,9 +312,19 @@
* add it straight to the context's event list, or to the group
* leader's sibling list:
*/
- if (group_leader == event)
- list_add_tail(&event->group_entry, &ctx->group_list);
- else {
+ if (group_leader == event) {
+ struct list_head *list;
+
+ if (is_software_event(event))
+ event->group_flags |= PERF_GROUP_SOFTWARE;
+
+ list = ctx_group_list(event, ctx);
+ list_add_tail(&event->group_entry, list);
+ } else {
+ if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+ !is_software_event(event))
+ group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
}
@@ -355,9 +374,14 @@
* to the context list directly:
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+ struct list_head *list;
- list_move_tail(&sibling->group_entry, &ctx->group_list);
+ list = ctx_group_list(event, ctx);
+ list_move_tail(&sibling->group_entry, list);
sibling->group_leader = sibling;
+
+ /* Inherit group flags from the previous leader */
+ sibling->group_flags = event->group_flags;
}
}
@@ -686,24 +710,6 @@
}
/*
- * Return 1 for a group consisting entirely of software events,
- * 0 if the group contains any hardware events.
- */
-static int is_software_only_group(struct perf_event *leader)
-{
- struct perf_event *event;
-
- if (!is_software_event(leader))
- return 0;
-
- list_for_each_entry(event, &leader->sibling_list, group_entry)
- if (!is_software_event(event))
- return 0;
-
- return 1;
-}
-
-/*
* Work out whether we can put this event group on the CPU now.
*/
static int group_can_go_on(struct perf_event *event,
@@ -713,7 +719,7 @@
/*
* Groups consisting entirely of software events can always go on.
*/
- if (is_software_only_group(event))
+ if (event->group_flags & PERF_GROUP_SOFTWARE)
return 1;
/*
* If an exclusive group is already on, no other hardware
@@ -1043,8 +1049,15 @@
return 0;
}
-void __perf_event_sched_out(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx)
+enum event_type_t {
+ EVENT_FLEXIBLE = 0x1,
+ EVENT_PINNED = 0x2,
+ EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
+static void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
{
struct perf_event *event;
@@ -1055,10 +1068,18 @@
update_context_time(ctx);
perf_disable();
- if (ctx->nr_active) {
- list_for_each_entry(event, &ctx->group_list, group_entry)
+ if (!ctx->nr_active)
+ goto out_enable;
+
+ if (event_type & EVENT_PINNED)
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
- }
+
+ if (event_type & EVENT_FLEXIBLE)
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry)
+ group_sched_out(event, cpuctx, ctx);
+
+ out_enable:
perf_enable();
out:
raw_spin_unlock(&ctx->lock);
@@ -1170,9 +1191,9 @@
* not restart the event.
*/
void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu)
+ struct task_struct *next)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = task->perf_event_ctxp;
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
@@ -1220,15 +1241,13 @@
rcu_read_unlock();
if (do_switch) {
- __perf_event_sched_out(ctx, cpuctx);
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
cpuctx->task_ctx = NULL;
}
}
-/*
- * Called with IRQs disabled
- */
-static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_event_context *ctx,
+ enum event_type_t event_type)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
@@ -1238,41 +1257,36 @@
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
return;
- __perf_event_sched_out(ctx, cpuctx);
+ ctx_sched_out(ctx, cpuctx, event_type);
cpuctx->task_ctx = NULL;
}
/*
* Called with IRQs disabled
*/
-static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
{
- __perf_event_sched_out(&cpuctx->ctx, cpuctx);
+ task_ctx_sched_out(ctx, EVENT_ALL);
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
}
static void
-__perf_event_sched_in(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx, int cpu)
+ctx_pinned_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ int cpu)
{
struct perf_event *event;
- int can_add_hw = 1;
- raw_spin_lock(&ctx->lock);
- ctx->is_active = 1;
- if (likely(!ctx->nr_events))
- goto out;
-
- ctx->timestamp = perf_clock();
-
- perf_disable();
-
- /*
- * First go through the list and put on any pinned groups
- * in order to give them the best chance of going on.
- */
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (event->state <= PERF_EVENT_STATE_OFF ||
- !event->attr.pinned)
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ if (event->state <= PERF_EVENT_STATE_OFF)
continue;
if (event->cpu != -1 && event->cpu != cpu)
continue;
@@ -1289,16 +1303,20 @@
event->state = PERF_EVENT_STATE_ERROR;
}
}
+}
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- /*
- * Ignore events in OFF or ERROR state, and
- * ignore pinned events since we did them already.
- */
- if (event->state <= PERF_EVENT_STATE_OFF ||
- event->attr.pinned)
+static void
+ctx_flexible_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ int cpu)
+{
+ struct perf_event *event;
+ int can_add_hw = 1;
+
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ /* Ignore events in OFF or ERROR state */
+ if (event->state <= PERF_EVENT_STATE_OFF)
continue;
-
/*
* Listen to the 'cpu' scheduling filter constraint
* of events:
@@ -1310,11 +1328,61 @@
if (group_sched_in(event, cpuctx, ctx, cpu))
can_add_hw = 0;
}
+}
+
+static void
+ctx_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ int cpu = smp_processor_id();
+
+ raw_spin_lock(&ctx->lock);
+ ctx->is_active = 1;
+ if (likely(!ctx->nr_events))
+ goto out;
+
+ ctx->timestamp = perf_clock();
+
+ perf_disable();
+
+ /*
+ * First go through the list and put on any pinned groups
+ * in order to give them the best chance of going on.
+ */
+ if (event_type & EVENT_PINNED)
+ ctx_pinned_sched_in(ctx, cpuctx, cpu);
+
+ /* Then walk through the lower prio flexible groups */
+ if (event_type & EVENT_FLEXIBLE)
+ ctx_flexible_sched_in(ctx, cpuctx, cpu);
+
perf_enable();
out:
raw_spin_unlock(&ctx->lock);
}
+static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ struct perf_event_context *ctx = &cpuctx->ctx;
+
+ ctx_sched_in(ctx, cpuctx, event_type);
+}
+
+static void task_ctx_sched_in(struct task_struct *task,
+ enum event_type_t event_type)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_event_context *ctx = task->perf_event_ctxp;
+
+ if (likely(!ctx))
+ return;
+ if (cpuctx->task_ctx == ctx)
+ return;
+ ctx_sched_in(ctx, cpuctx, event_type);
+ cpuctx->task_ctx = ctx;
+}
/*
* Called from scheduler to add the events of the current task
* with interrupts disabled.
@@ -1326,38 +1394,112 @@
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void perf_event_task_sched_in(struct task_struct *task, int cpu)
+void perf_event_task_sched_in(struct task_struct *task)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = task->perf_event_ctxp;
if (likely(!ctx))
return;
+
if (cpuctx->task_ctx == ctx)
return;
- __perf_event_sched_in(ctx, cpuctx, cpu);
+
+ /*
+ * We want to keep the following priority order:
+ * cpu pinned (that don't need to move), task pinned,
+ * cpu flexible, task flexible.
+ */
+ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+
+ ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
+ cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
+ ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
+
cpuctx->task_ctx = ctx;
}
-static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
-{
- struct perf_event_context *ctx = &cpuctx->ctx;
-
- __perf_event_sched_in(ctx, cpuctx, cpu);
-}
-
#define MAX_INTERRUPTS (~0ULL)
static void perf_log_throttle(struct perf_event *event, int enable);
-static void perf_adjust_period(struct perf_event *event, u64 events)
+static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
+{
+ u64 frequency = event->attr.sample_freq;
+ u64 sec = NSEC_PER_SEC;
+ u64 divisor, dividend;
+
+ int count_fls, nsec_fls, frequency_fls, sec_fls;
+
+ count_fls = fls64(count);
+ nsec_fls = fls64(nsec);
+ frequency_fls = fls64(frequency);
+ sec_fls = 30;
+
+ /*
+ * We got @count in @nsec, with a target of sample_freq HZ
+ * the target period becomes:
+ *
+ * @count * 10^9
+ * period = -------------------
+ * @nsec * sample_freq
+ *
+ */
+
+ /*
+ * Reduce accuracy by one bit such that @a and @b converge
+ * to a similar magnitude.
+ */
+#define REDUCE_FLS(a, b) \
+do { \
+ if (a##_fls > b##_fls) { \
+ a >>= 1; \
+ a##_fls--; \
+ } else { \
+ b >>= 1; \
+ b##_fls--; \
+ } \
+} while (0)
+
+ /*
+ * Reduce accuracy until either term fits in a u64, then proceed with
+ * the other, so that finally we can do a u64/u64 division.
+ */
+ while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
+ REDUCE_FLS(nsec, frequency);
+ REDUCE_FLS(sec, count);
+ }
+
+ if (count_fls + sec_fls > 64) {
+ divisor = nsec * frequency;
+
+ while (count_fls + sec_fls > 64) {
+ REDUCE_FLS(count, sec);
+ divisor >>= 1;
+ }
+
+ dividend = count * sec;
+ } else {
+ dividend = count * sec;
+
+ while (nsec_fls + frequency_fls > 64) {
+ REDUCE_FLS(nsec, frequency);
+ dividend >>= 1;
+ }
+
+ divisor = nsec * frequency;
+ }
+
+ return div64_u64(dividend, divisor);
+}
+
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
u64 period, sample_period;
s64 delta;
- events *= hwc->sample_period;
- period = div64_u64(events, event->attr.sample_freq);
+ period = perf_calculate_period(event, nsec, count);
delta = (s64)(period - hwc->sample_period);
delta = (delta + 7) / 8; /* low pass filter */
@@ -1368,13 +1510,22 @@
sample_period = 1;
hwc->sample_period = sample_period;
+
+ if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+ perf_disable();
+ event->pmu->disable(event);
+ atomic64_set(&hwc->period_left, 0);
+ event->pmu->enable(event);
+ perf_enable();
+ }
}
static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
{
struct perf_event *event;
struct hw_perf_event *hwc;
- u64 interrupts, freq;
+ u64 interrupts, now;
+ s64 delta;
raw_spin_lock(&ctx->lock);
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@ -1395,44 +1546,18 @@
if (interrupts == MAX_INTERRUPTS) {
perf_log_throttle(event, 1);
event->pmu->unthrottle(event);
- interrupts = 2*sysctl_perf_event_sample_rate/HZ;
}
if (!event->attr.freq || !event->attr.sample_freq)
continue;
- /*
- * if the specified freq < HZ then we need to skip ticks
- */
- if (event->attr.sample_freq < HZ) {
- freq = event->attr.sample_freq;
+ event->pmu->read(event);
+ now = atomic64_read(&event->count);
+ delta = now - hwc->freq_count_stamp;
+ hwc->freq_count_stamp = now;
- hwc->freq_count += freq;
- hwc->freq_interrupts += interrupts;
-
- if (hwc->freq_count < HZ)
- continue;
-
- interrupts = hwc->freq_interrupts;
- hwc->freq_interrupts = 0;
- hwc->freq_count -= HZ;
- } else
- freq = HZ;
-
- perf_adjust_period(event, freq * interrupts);
-
- /*
- * In order to avoid being stalled by an (accidental) huge
- * sample period, force reset the sample period if we didn't
- * get any events in this freq period.
- */
- if (!interrupts) {
- perf_disable();
- event->pmu->disable(event);
- atomic64_set(&hwc->period_left, 0);
- event->pmu->enable(event);
- perf_enable();
- }
+ if (delta > 0)
+ perf_adjust_period(event, TICK_NSEC, delta);
}
raw_spin_unlock(&ctx->lock);
}
@@ -1442,26 +1567,22 @@
*/
static void rotate_ctx(struct perf_event_context *ctx)
{
- struct perf_event *event;
-
if (!ctx->nr_events)
return;
raw_spin_lock(&ctx->lock);
- /*
- * Rotate the first entry last (works just fine for group events too):
- */
+
+ /* Rotate the first entry last of non-pinned groups */
perf_disable();
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- list_move_tail(&event->group_entry, &ctx->group_list);
- break;
- }
+
+ list_rotate_left(&ctx->flexible_groups);
+
perf_enable();
raw_spin_unlock(&ctx->lock);
}
-void perf_event_task_tick(struct task_struct *curr, int cpu)
+void perf_event_task_tick(struct task_struct *curr)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
@@ -1469,24 +1590,39 @@
if (!atomic_read(&nr_events))
return;
- cpuctx = &per_cpu(perf_cpu_context, cpu);
+ cpuctx = &__get_cpu_var(perf_cpu_context);
ctx = curr->perf_event_ctxp;
perf_ctx_adjust_freq(&cpuctx->ctx);
if (ctx)
perf_ctx_adjust_freq(ctx);
- perf_event_cpu_sched_out(cpuctx);
+ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
- __perf_event_task_sched_out(ctx);
+ task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
rotate_ctx(&cpuctx->ctx);
if (ctx)
rotate_ctx(ctx);
- perf_event_cpu_sched_in(cpuctx, cpu);
+ cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
if (ctx)
- perf_event_task_sched_in(curr, cpu);
+ task_ctx_sched_in(curr, EVENT_FLEXIBLE);
+}
+
+static int event_enable_on_exec(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ if (!event->attr.enable_on_exec)
+ return 0;
+
+ event->attr.enable_on_exec = 0;
+ if (event->state >= PERF_EVENT_STATE_INACTIVE)
+ return 0;
+
+ __perf_event_mark_enabled(event, ctx);
+
+ return 1;
}
/*
@@ -1499,6 +1635,7 @@
struct perf_event *event;
unsigned long flags;
int enabled = 0;
+ int ret;
local_irq_save(flags);
ctx = task->perf_event_ctxp;
@@ -1509,14 +1646,16 @@
raw_spin_lock(&ctx->lock);
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (!event->attr.enable_on_exec)
- continue;
- event->attr.enable_on_exec = 0;
- if (event->state >= PERF_EVENT_STATE_INACTIVE)
- continue;
- __perf_event_mark_enabled(event, ctx);
- enabled = 1;
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
+ }
+
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
}
/*
@@ -1527,7 +1666,7 @@
raw_spin_unlock(&ctx->lock);
- perf_event_task_sched_in(task, smp_processor_id());
+ perf_event_task_sched_in(task);
out:
local_irq_restore(flags);
}
@@ -1590,7 +1729,8 @@
{
raw_spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
- INIT_LIST_HEAD(&ctx->group_list);
+ INIT_LIST_HEAD(&ctx->pinned_groups);
+ INIT_LIST_HEAD(&ctx->flexible_groups);
INIT_LIST_HEAD(&ctx->event_list);
atomic_set(&ctx->refcount, 1);
ctx->task = task;
@@ -3689,12 +3829,12 @@
if (event->attr.freq) {
u64 now = perf_clock();
- s64 delta = now - hwc->freq_stamp;
+ s64 delta = now - hwc->freq_time_stamp;
- hwc->freq_stamp = now;
+ hwc->freq_time_stamp = now;
- if (delta > 0 && delta < TICK_NSEC)
- perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+ if (delta > 0 && delta < 2*TICK_NSEC)
+ perf_adjust_period(event, delta, hwc->last_period);
}
/*
@@ -4185,7 +4325,7 @@
.read = task_clock_perf_event_read,
};
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
int entry_size)
@@ -4290,7 +4430,7 @@
{
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_EVENT_TRACING */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static void bp_perf_event_destroy(struct perf_event *event)
@@ -4871,8 +5011,15 @@
else
child_event->state = PERF_EVENT_STATE_OFF;
- if (parent_event->attr.freq)
- child_event->hw.sample_period = parent_event->hw.sample_period;
+ if (parent_event->attr.freq) {
+ u64 sample_period = parent_event->hw.sample_period;
+ struct hw_perf_event *hwc = &child_event->hw;
+
+ hwc->sample_period = sample_period;
+ hwc->last_period = sample_period;
+
+ atomic64_set(&hwc->period_left, sample_period);
+ }
child_event->overflow_handler = parent_event->overflow_handler;
@@ -5040,7 +5187,11 @@
mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
again:
- list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+ list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
+ group_entry)
+ __perf_event_exit_task(child_event, child_ctx, child);
+
+ list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
group_entry)
__perf_event_exit_task(child_event, child_ctx, child);
@@ -5049,7 +5200,8 @@
* its siblings to the list, but we obtained 'tmp' before that which
* will still point to the list head terminating the iteration.
*/
- if (!list_empty(&child_ctx->group_list))
+ if (!list_empty(&child_ctx->pinned_groups) ||
+ !list_empty(&child_ctx->flexible_groups))
goto again;
mutex_unlock(&child_ctx->mutex);
@@ -5057,6 +5209,24 @@
put_ctx(child_ctx);
}
+static void perf_free_event(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ struct perf_event *parent = event->parent;
+
+ if (WARN_ON_ONCE(!parent))
+ return;
+
+ mutex_lock(&parent->child_mutex);
+ list_del_init(&event->child_list);
+ mutex_unlock(&parent->child_mutex);
+
+ fput(parent->filp);
+
+ list_del_event(event, ctx);
+ free_event(event);
+}
+
/*
* free an unexposed, unused context as created by inheritance by
* init_task below, used by fork() in case of fail.
@@ -5071,23 +5241,15 @@
mutex_lock(&ctx->mutex);
again:
- list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
- struct perf_event *parent = event->parent;
+ list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ perf_free_event(event, ctx);
- if (WARN_ON_ONCE(!parent))
- continue;
+ list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+ group_entry)
+ perf_free_event(event, ctx);
- mutex_lock(&parent->child_mutex);
- list_del_init(&event->child_list);
- mutex_unlock(&parent->child_mutex);
-
- fput(parent->filp);
-
- list_del_event(event, ctx);
- free_event(event);
- }
-
- if (!list_empty(&ctx->group_list))
+ if (!list_empty(&ctx->pinned_groups) ||
+ !list_empty(&ctx->flexible_groups))
goto again;
mutex_unlock(&ctx->mutex);
@@ -5095,12 +5257,54 @@
put_ctx(ctx);
}
+static int
+inherit_task_group(struct perf_event *event, struct task_struct *parent,
+ struct perf_event_context *parent_ctx,
+ struct task_struct *child,
+ int *inherited_all)
+{
+ int ret;
+ struct perf_event_context *child_ctx = child->perf_event_ctxp;
+
+ if (!event->attr.inherit) {
+ *inherited_all = 0;
+ return 0;
+ }
+
+ if (!child_ctx) {
+ /*
+ * This is executed from the parent task context, so
+ * inherit events that have been marked for cloning.
+ * First allocate and initialize a context for the
+ * child.
+ */
+
+ child_ctx = kzalloc(sizeof(struct perf_event_context),
+ GFP_KERNEL);
+ if (!child_ctx)
+ return -ENOMEM;
+
+ __perf_event_init_context(child_ctx, child);
+ child->perf_event_ctxp = child_ctx;
+ get_task_struct(child);
+ }
+
+ ret = inherit_group(event, parent, parent_ctx,
+ child, child_ctx);
+
+ if (ret)
+ *inherited_all = 0;
+
+ return ret;
+}
+
+
/*
* Initialize the perf_event context in task_struct
*/
int perf_event_init_task(struct task_struct *child)
{
- struct perf_event_context *child_ctx = NULL, *parent_ctx;
+ struct perf_event_context *child_ctx, *parent_ctx;
struct perf_event_context *cloned_ctx;
struct perf_event *event;
struct task_struct *parent = current;
@@ -5138,41 +5342,22 @@
* We dont have to disable NMIs - we are only looking at
* the list, not manipulating it:
*/
- list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
-
- if (!event->attr.inherit) {
- inherited_all = 0;
- continue;
- }
-
- if (!child->perf_event_ctxp) {
- /*
- * This is executed from the parent task context, so
- * inherit events that have been marked for cloning.
- * First allocate and initialize a context for the
- * child.
- */
-
- child_ctx = kzalloc(sizeof(struct perf_event_context),
- GFP_KERNEL);
- if (!child_ctx) {
- ret = -ENOMEM;
- break;
- }
-
- __perf_event_init_context(child_ctx, child);
- child->perf_event_ctxp = child_ctx;
- get_task_struct(child);
- }
-
- ret = inherit_group(event, parent, parent_ctx,
- child, child_ctx);
- if (ret) {
- inherited_all = 0;
+ list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
+ ret = inherit_task_group(event, parent, parent_ctx, child,
+ &inherited_all);
+ if (ret)
break;
- }
}
+ list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
+ ret = inherit_task_group(event, parent, parent_ctx, child,
+ &inherited_all);
+ if (ret)
+ break;
+ }
+
+ child_ctx = child->perf_event_ctxp;
+
if (child_ctx && inherited_all) {
/*
* Mark the child context as a clone of the parent
@@ -5221,7 +5406,9 @@
struct perf_event_context *ctx = &cpuctx->ctx;
struct perf_event *event, *tmp;
- list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+ list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ __perf_event_remove_from_context(event);
+ list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
__perf_event_remove_from_context(event);
}
static void perf_event_exit_cpu(int cpu)
diff --git a/kernel/sched.c b/kernel/sched.c
index 4508fe7..7266b91 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2783,7 +2783,13 @@
*/
prev_state = prev->state;
finish_arch_switch(prev);
- perf_event_task_sched_in(current, cpu_of(rq));
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_disable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+ perf_event_task_sched_in(current);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_enable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
fire_sched_in_preempt_notifiers(current);
@@ -5298,7 +5304,7 @@
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
- perf_event_task_tick(curr, cpu);
+ perf_event_task_tick(curr);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
@@ -5512,7 +5518,7 @@
if (likely(prev != next)) {
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next, cpu);
+ perf_event_task_sched_out(prev, next);
rq->nr_switches++;
rq->curr = next;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd8..d00c6fe 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
-obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
+ifeq ($(CONFIG_PERF_EVENTS),y)
+obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 9e25573..f0d6930 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
*/
#include <linux/module.h>
+#include <linux/kprobes.h>
#include "trace.h"
-char *perf_trace_buf;
-EXPORT_SYMBOL_GPL(perf_trace_buf);
-
-char *perf_trace_buf_nmi;
-EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
+static char *perf_trace_buf;
+static char *perf_trace_buf_nmi;
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
@@ -120,3 +118,47 @@
}
mutex_unlock(&event_mutex);
}
+
+__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
+ int *rctxp, unsigned long *irq_flags)
+{
+ struct trace_entry *entry;
+ char *trace_buf, *raw_data;
+ int pc, cpu;
+
+ pc = preempt_count();
+
+ /* Protect the per cpu buffer, begin the rcu read side */
+ local_irq_save(*irq_flags);
+
+ *rctxp = perf_swevent_get_recursion_context();
+ if (*rctxp < 0)
+ goto err_recursion;
+
+ cpu = smp_processor_id();
+
+ if (in_nmi())
+ trace_buf = rcu_dereference(perf_trace_buf_nmi);
+ else
+ trace_buf = rcu_dereference(perf_trace_buf);
+
+ if (!trace_buf)
+ goto err;
+
+ raw_data = per_cpu_ptr(trace_buf, cpu);
+
+ /* zero the dead bytes from align to not leak stack to user */
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+ entry = (struct trace_entry *)raw_data;
+ tracing_generic_entry_update(entry, *irq_flags, pc);
+ entry->type = type;
+
+ return raw_data;
+err:
+ perf_swevent_put_recursion_context(*rctxp);
+err_recursion:
+ local_irq_restore(*irq_flags);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e42af9a..4615f62 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1371,7 +1371,7 @@
return err;
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
@@ -1439,5 +1439,5 @@
return err;
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0..6178abf 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@
return retval;
}
-static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
-{
- return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
-}
-
static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
void *dummy)
{
@@ -231,9 +226,7 @@
{
int ret = -EINVAL;
- if (ff->func == fetch_argument)
- ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
- else if (ff->func == fetch_register) {
+ if (ff->func == fetch_register) {
const char *name;
name = regs_query_register_name((unsigned int)((long)ff->data));
ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@
}
} else
ret = -EINVAL;
- } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
- ret = strict_strtoul(arg + 3, 10, ¶m);
- if (ret || param > PARAM_MAX_ARGS)
- ret = -EINVAL;
- else {
- ff->func = fetch_argument;
- ff->data = (void *)param;
- }
} else
ret = -EINVAL;
return ret;
@@ -611,7 +596,6 @@
* - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
- * $argN : fetch Nth of function argument. (N:0-)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
@@ -958,7 +942,7 @@
};
/* Kprobe handler */
-static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry *entry;
@@ -978,7 +962,7 @@
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
- return 0;
+ return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@@ -988,11 +972,10 @@
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
- return 0;
}
/* Kretprobe handler */
-static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1011,7 +994,7 @@
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
- return 0;
+ return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@@ -1022,8 +1005,6 @@
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
-
- return 0;
}
/* Event entry printers */
@@ -1250,137 +1231,67 @@
", REC->" FIELD_STRING_RETIP);
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static __kprobes int kprobe_profile_func(struct kprobe *kp,
+static __kprobes void kprobe_profile_func(struct kprobe *kp,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry;
- struct trace_entry *ent;
- int size, __size, i, pc, __cpu;
+ int size, __size, i;
unsigned long irq_flags;
- char *trace_buf;
- char *raw_data;
int rctx;
- pc = preempt_count();
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
- return 0;
+ return;
- /*
- * Protect the non nmi buffer
- * This also protects the rcu read side
- */
- local_irq_save(irq_flags);
+ entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+ if (!entry)
+ return;
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- __cpu = smp_processor_id();
-
- if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, __cpu);
-
- /* Zero dead bytes from alignment to avoid buffer leak to userspace */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
- entry = (struct kprobe_trace_entry *)raw_data;
- ent = &entry->ent;
-
- tracing_generic_entry_update(ent, irq_flags, pc);
- ent->type = call->id;
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- perf_tp_event(call->id, entry->ip, 1, entry, size);
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(irq_flags);
-
- return 0;
+ ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
}
/* Kretprobe profile handler */
-static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry;
- struct trace_entry *ent;
- int size, __size, i, pc, __cpu;
+ int size, __size, i;
unsigned long irq_flags;
- char *trace_buf;
- char *raw_data;
int rctx;
- pc = preempt_count();
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
- return 0;
+ return;
- /*
- * Protect the non nmi buffer
- * This also protects the rcu read side
- */
- local_irq_save(irq_flags);
+ entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+ if (!entry)
+ return;
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- __cpu = smp_processor_id();
-
- if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, __cpu);
-
- /* Zero dead bytes from alignment to avoid buffer leak to userspace */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
- entry = (struct kretprobe_trace_entry *)raw_data;
- ent = &entry->ent;
-
- tracing_generic_entry_update(ent, irq_flags, pc);
- ent->type = call->id;
entry->nargs = tp->nr_args;
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
- perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(irq_flags);
-
- return 0;
+ ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
}
static int probe_profile_enable(struct ftrace_event_call *call)
@@ -1408,7 +1319,7 @@
disable_kprobe(&tp->rp.kp);
}
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
static __kprobes
@@ -1418,10 +1329,10 @@
if (tp->flags & TP_FLAG_TRACE)
kprobe_trace_func(kp, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kprobe_profile_func(kp, regs);
-#endif /* CONFIG_EVENT_PROFILE */
+#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1432,10 +1343,10 @@
if (tp->flags & TP_FLAG_TRACE)
kretprobe_trace_func(ri, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kretprobe_profile_func(ri, regs);
-#endif /* CONFIG_EVENT_PROFILE */
+#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1464,7 +1375,7 @@
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
call->profile_enable = probe_profile_enable;
call->profile_disable = probe_profile_disable;
#endif
@@ -1523,28 +1434,67 @@
static __init int kprobe_trace_self_tests_init(void)
{
- int ret;
+ int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
+ struct trace_probe *tp;
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
- "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
- if (WARN_ON_ONCE(ret))
- pr_warning("error enabling function entry\n");
+ "$stack $stack0 +0($stack)");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on probing function entry.\n");
+ warn++;
+ } else {
+ /* Enable trace point */
+ tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting new probe.\n");
+ warn++;
+ } else
+ probe_event_enable(&tp->call);
+ }
ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
"$retval");
- if (WARN_ON_ONCE(ret))
- pr_warning("error enabling function return\n");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on probing function return.\n");
+ warn++;
+ } else {
+ /* Enable trace point */
+ tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting new probe.\n");
+ warn++;
+ } else
+ probe_event_enable(&tp->call);
+ }
+
+ if (warn)
+ goto end;
ret = target(1, 2, 3, 4, 5, 6);
- cleanup_all_probes();
+ ret = command_trace_probe("-:testprobe");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on deleting a probe.\n");
+ warn++;
+ }
- pr_cont("OK\n");
+ ret = command_trace_probe("-:testprobe2");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on deleting a probe.\n");
+ warn++;
+ }
+
+end:
+ cleanup_all_probes();
+ if (warn)
+ pr_cont("NG: Some tests are failed. Please check them.\n");
+ else
+ pr_cont("OK\n");
return 0;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f3..4e332b9 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -421,7 +421,7 @@
}
core_initcall(init_ftrace_syscalls);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -433,12 +433,9 @@
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
- char *trace_buf;
- char *raw_data;
int syscall_nr;
int rctx;
int size;
- int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -457,37 +454,15 @@
"profile buffer not large enough"))
return;
- /* Protect the per cpu buffer, begin the rcu read side */
- local_irq_save(flags);
+ rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+ sys_data->enter_event->id, &rctx, &flags);
+ if (!rec)
+ return;
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- cpu = smp_processor_id();
-
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, cpu);
-
- /* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
- rec = (struct syscall_trace_enter *) raw_data;
- tracing_generic_entry_update(&rec->ent, 0, 0);
- rec->ent.type = sys_data->enter_event->id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
-
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(flags);
+ ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -531,11 +506,8 @@
struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr;
- char *trace_buf;
- char *raw_data;
int rctx;
int size;
- int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -557,38 +529,15 @@
"exit event has grown above profile buffer size"))
return;
- /* Protect the per cpu buffer, begin the rcu read side */
- local_irq_save(flags);
+ rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+ sys_data->exit_event->id, &rctx, &flags);
+ if (!rec)
+ return;
- rctx = perf_swevent_get_recursion_context();
- if (rctx < 0)
- goto end_recursion;
-
- cpu = smp_processor_id();
-
- trace_buf = rcu_dereference(perf_trace_buf);
-
- if (!trace_buf)
- goto end;
-
- raw_data = per_cpu_ptr(trace_buf, cpu);
-
- /* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
- rec = (struct syscall_trace_exit *)raw_data;
-
- tracing_generic_entry_update(&rec->ent, 0, 0);
- rec->ent.type = sys_data->exit_event->id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
-
-end:
- perf_swevent_put_recursion_context(rctx);
-end_recursion:
- local_irq_restore(flags);
+ ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysexit_enable(struct ftrace_event_call *call)
@@ -626,6 +575,5 @@
mutex_unlock(&syscall_trace_lock);
}
-#endif
-
+#endif /* CONFIG_PERF_EVENTS */
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
new file mode 100644
index 0000000..fae174d
--- /dev/null
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -0,0 +1,22 @@
+perf-archive(1)
+===============
+
+NAME
+----
+perf-archive - Create archive with object files with build-ids found in perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf archive' [file]
+
+DESCRIPTION
+-----------
+This command runs runs perf-buildid-list --with-hits, and collects the files
+with the buildids found so that analisys of perf.data contents can be possible
+on another machine.
+
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
new file mode 100644
index 0000000..88bc3b5
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -0,0 +1,33 @@
+perf-buildid-cache(1)
+=====================
+
+NAME
+----
+perf-buildid-cache - Manage build-id cache.
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-list <options>'
+
+DESCRIPTION
+-----------
+This command manages the build-id cache. It can add and remove files to the
+cache. In the future it should as well purge older entries, set upper limits
+for the space used by the cache, etc.
+
+OPTIONS
+-------
+-a::
+--add=::
+ Add specified file to the cache.
+-r::
+--remove=::
+ Remove specified file to the cache.
+-v::
+--verbose::
+ Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 250e391..2de3407 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -15,6 +15,8 @@
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list
+or
+'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
DESCRIPTION
-----------
@@ -45,6 +47,11 @@
--list::
List up current probe events.
+-L::
+--line=::
+ Show source code lines which can be probed. This needs an argument
+ which specifies a range of the source code.
+
PROBE SYNTAX
------------
Probe points are defined by following syntax.
@@ -56,6 +63,19 @@
It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
+LINE SYNTAX
+-----------
+Line range is descripted by following syntax.
+
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 69c8325..0eeb247 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -12,7 +12,7 @@
DESCRIPTION
-----------
-Performance counters for Linux are are a new kernel-based subsystem
+Performance counters for Linux are a new kernel-based subsystem
that provide a framework for all things performance analysis. It
covers hardware level (CPU/PMU, Performance Monitoring Unit) features
and software features (software counters, tracepoints) as well.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 2e7fa3a..9b173e6 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -286,11 +286,7 @@
SCRIPT_SH =
TEST_PROGRAMS =
-#
-# No scripts right now:
-#
-
-# SCRIPT_SH += perf-am.sh
+SCRIPT_SH += perf-archive.sh
#
# No Perl scripts right now:
@@ -315,9 +311,6 @@
# List built-in command $C whose implementation cmd_$C() is not in
# builtin-$C.o but is linked in as part of some other command.
#
-# None right now:
-#
-# BUILT_INS += perf-init $X
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
@@ -369,6 +362,7 @@
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
+LIB_H += util/map.h
LIB_H += util/parse-options.h
LIB_H += util/parse-events.h
LIB_H += util/quote.h
@@ -435,8 +429,8 @@
LIB_OBJS += util/svghelper.o
LIB_OBJS += util/sort.o
LIB_OBJS += util/hist.o
-LIB_OBJS += util/data_map.o
LIB_OBJS += util/probe-event.o
+LIB_OBJS += util/util.o
BUILTIN_OBJS += builtin-annotate.o
@@ -451,6 +445,7 @@
BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
BUILTIN_OBJS += builtin-buildid-list.o
+BUILTIN_OBJS += builtin-buildid-cache.o
BUILTIN_OBJS += builtin-list.o
BUILTIN_OBJS += builtin-record.o
BUILTIN_OBJS += builtin-report.o
@@ -1009,6 +1004,7 @@
$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+ $(INSTALL) perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 593ff25..73c202e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -132,8 +132,8 @@
{
struct addr_location al;
- dump_printf("(IP, %d): %d: %p\n", event->header.misc,
- event->ip.pid, (void *)(long)event->ip.ip);
+ dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
+ event->ip.pid, event->ip.ip);
if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
@@ -451,10 +451,10 @@
}
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_fork_event = event__process_task,
+ .sample = process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .fork = event__process_task,
};
static int __cmd_annotate(void)
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
new file mode 100644
index 0000000..30a05f5
--- /dev/null
+++ b/tools/perf/builtin-buildid-cache.c
@@ -0,0 +1,133 @@
+/*
+ * builtin-buildid-cache.c
+ *
+ * Builtin buildid-cache command: Manages build-id cache
+ *
+ * Copyright (C) 2010, Red Hat Inc.
+ * Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+#include "perf.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include "util/header.h"
+#include "util/parse-options.h"
+#include "util/strlist.h"
+#include "util/symbol.h"
+
+static char const *add_name_list_str, *remove_name_list_str;
+
+static const char * const buildid_cache_usage[] = {
+ "perf buildid-cache [<options>]",
+ NULL
+};
+
+static const struct option buildid_cache_options[] = {
+ OPT_STRING('a', "add", &add_name_list_str,
+ "file list", "file(s) to add"),
+ OPT_STRING('r', "remove", &remove_name_list_str, "file list",
+ "file(s) to remove"),
+ OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose"),
+ OPT_END()
+};
+
+static int build_id_cache__add_file(const char *filename, const char *debugdir)
+{
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ u8 build_id[BUILD_ID_SIZE];
+ int err;
+
+ if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__add_s(sbuild_id, debugdir, filename, false);
+ if (verbose)
+ pr_info("Adding %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+ return err;
+}
+
+static int build_id_cache__remove_file(const char *filename __used,
+ const char *debugdir __used)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ int err;
+
+ if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__remove_s(sbuild_id, debugdir);
+ if (verbose)
+ pr_info("Removing %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+
+ return err;
+}
+
+static int __cmd_buildid_cache(void)
+{
+ struct strlist *list;
+ struct str_node *pos;
+ char debugdir[PATH_MAX];
+
+ snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
+ DEBUG_CACHE_DIR);
+
+ if (add_name_list_str) {
+ list = strlist__new(true, add_name_list_str);
+ if (list) {
+ strlist__for_each(pos, list)
+ if (build_id_cache__add_file(pos->s, debugdir)) {
+ if (errno == EEXIST) {
+ pr_debug("%s already in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't add %s: %s\n",
+ pos->s, strerror(errno));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ if (remove_name_list_str) {
+ list = strlist__new(true, remove_name_list_str);
+ if (list) {
+ strlist__for_each(pos, list)
+ if (build_id_cache__remove_file(pos->s, debugdir)) {
+ if (errno == ENOENT) {
+ pr_debug("%s wasn't in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't remove %s: %s\n",
+ pos->s, strerror(errno));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ return 0;
+}
+
+int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used)
+{
+ argc = parse_options(argc, argv, buildid_cache_options,
+ buildid_cache_usage, 0);
+
+ if (symbol__init() < 0)
+ return -1;
+
+ setup_pager();
+ return __cmd_buildid_cache();
+}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 1e99ac8..431f204 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -16,6 +16,7 @@
static char const *input_name = "perf.data";
static int force;
+static bool with_hits;
static const char * const buildid_list_usage[] = {
"perf buildid-list [<options>]",
@@ -23,6 +24,7 @@
};
static const struct option options[] = {
+ OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -31,26 +33,34 @@
OPT_END()
};
-static int perf_file_section__process_buildids(struct perf_file_section *self,
- int feat, int fd)
+static int build_id_list__process_event(event_t *event,
+ struct perf_session *session)
{
- if (feat != HEADER_BUILD_ID)
- return 0;
+ struct addr_location al;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread = perf_session__findnew(session, event->ip.pid);
- if (lseek(fd, self->offset, SEEK_SET) < 0) {
- pr_warning("Failed to lseek to %Ld offset for buildids!\n",
- self->offset);
+ if (thread == NULL) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
return -1;
}
- if (perf_header__read_build_ids(fd, self->offset, self->size)) {
- pr_warning("Failed to read buildids!\n");
- return -1;
- }
+ thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
+ event->ip.ip, &al);
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
return 0;
}
+static struct perf_event_ops build_id_list__event_ops = {
+ .sample = build_id_list__process_event,
+ .mmap = event__process_mmap,
+ .fork = event__process_task,
+};
+
static int __cmd_buildid_list(void)
{
int err = -1;
@@ -60,10 +70,10 @@
if (session == NULL)
return -1;
- err = perf_header__process_sections(&session->header, session->fd,
- perf_file_section__process_buildids);
- if (err >= 0)
- dsos__fprintf_buildid(stdout);
+ if (with_hits)
+ perf_session__process_events(session, &build_id_list__event_ops);
+
+ dsos__fprintf_buildid(stdout, with_hits);
perf_session__delete(session);
return err;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index bd71b8c..18b3f50 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -42,8 +42,8 @@
struct addr_location al;
struct sample_data data = { .period = 1, };
- dump_printf("(IP, %d): %d: %p\n", event->header.misc,
- event->ip.pid, (void *)(long)event->ip.ip);
+ dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
+ event->ip.pid, event->ip.ip);
if (event__preprocess_sample(event, session, &al, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
@@ -51,12 +51,12 @@
return -1;
}
- if (al.filtered)
+ if (al.filtered || al.sym == NULL)
return 0;
event__parse_sample(event, session->sample_type, &data);
- if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
+ if (perf_session__add_hist_entry(session, &al, data.period)) {
pr_warning("problem incrementing symbol count, skipping event\n");
return -1;
}
@@ -66,12 +66,12 @@
}
static struct perf_event_ops event_ops = {
- .process_sample_event = diff__process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_exit_event = event__process_task,
- .process_fork_event = event__process_task,
- .process_lost_event = event__process_lost,
+ .sample = diff__process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .exit = event__process_task,
+ .fork = event__process_task,
+ .lost = event__process_lost,
};
static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -82,29 +82,19 @@
struct hist_entry *iter;
while (*p != NULL) {
- int cmp;
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node);
-
- cmp = strcmp(he->map->dso->name, iter->map->dso->name);
- if (cmp > 0)
+ if (hist_entry__cmp(he, iter) < 0)
p = &(*p)->rb_left;
- else if (cmp < 0)
+ else
p = &(*p)->rb_right;
- else {
- cmp = strcmp(he->sym->name, iter->sym->name);
- if (cmp > 0)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
}
rb_link_node(&he->rb_node, parent, p);
rb_insert_color(&he->rb_node, root);
}
-static void perf_session__resort_by_name(struct perf_session *self)
+static void perf_session__resort_hist_entries(struct perf_session *self)
{
unsigned long position = 1;
struct rb_root tmp = RB_ROOT;
@@ -122,29 +112,28 @@
self->hists = tmp;
}
+static void perf_session__set_hist_entries_positions(struct perf_session *self)
+{
+ perf_session__output_resort(self, self->events_stats.total);
+ perf_session__resort_hist_entries(self);
+}
+
static struct hist_entry *
-perf_session__find_hist_entry_by_name(struct perf_session *self,
- struct hist_entry *he)
+perf_session__find_hist_entry(struct perf_session *self,
+ struct hist_entry *he)
{
struct rb_node *n = self->hists.rb_node;
while (n) {
struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
- int cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+ int64_t cmp = hist_entry__cmp(he, iter);
- if (cmp > 0)
+ if (cmp < 0)
n = n->rb_left;
- else if (cmp < 0)
+ else if (cmp > 0)
n = n->rb_right;
- else {
- cmp = strcmp(he->sym->name, iter->sym->name);
- if (cmp > 0)
- n = n->rb_left;
- else if (cmp < 0)
- n = n->rb_right;
- else
- return iter;
- }
+ else
+ return iter;
}
return NULL;
@@ -155,11 +144,9 @@
{
struct rb_node *nd;
- perf_session__resort_by_name(old_session);
-
for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
- pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
+ pos->pair = perf_session__find_hist_entry(old_session, pos);
}
}
@@ -177,9 +164,12 @@
ret = perf_session__process_events(session[i], &event_ops);
if (ret)
goto out_delete;
- perf_session__output_resort(session[i], session[i]->events_stats.total);
}
+ perf_session__output_resort(session[1], session[1]->events_stats.total);
+ if (show_displacement)
+ perf_session__set_hist_entries_positions(session[0]);
+
perf_session__match_hists(session[0], session[1]);
perf_session__fprintf_hists(session[1], session[0],
show_displacement, stdout);
@@ -204,7 +194,7 @@
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
- OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+ OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
"Don't shorten the pathnames taking into account the cwd"),
OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 9f810b1..215b584 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -286,8 +286,7 @@
puts(" The most commonly used perf commands are:");
for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
- printf(" %s ", common_cmds[i].name);
- mput_char(' ', longest - strlen(common_cmds[i].name));
+ printf(" %-*s ", longest, common_cmds[i].name);
puts(common_cmds[i].help);
}
}
@@ -314,8 +313,6 @@
return "perf";
else if (!prefixcmp(perf_cmd, "perf"))
return perf_cmd;
- else if (is_perf_command(perf_cmd))
- return prepend("perf-", perf_cmd);
else
return prepend("perf-", perf_cmd);
}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 93c67bf..5d5dc6b 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -92,23 +92,18 @@
if (!dir1)
return;
- while (true) {
- dent1 = readdir(dir1);
- if (!dent1)
- break;
-
- if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ while ((dent1 = readdir(dir1)) != NULL) {
+ if (dent1->d_type != DT_DIR ||
+ sscanf(dent1->d_name, "node%u", &mem) < 1)
continue;
snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
dir2 = opendir(buf);
if (!dir2)
continue;
- while (true) {
- dent2 = readdir(dir2);
- if (!dent2)
- break;
- if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ while ((dent2 = readdir(dir2)) != NULL) {
+ if (dent2->d_type != DT_LNK ||
+ sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
continue;
cpunode_map[cpu] = mem;
}
@@ -321,11 +316,8 @@
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
@@ -342,22 +334,9 @@
return 0;
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
};
static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -504,11 +483,14 @@
static int __cmd_kmem(void)
{
- int err;
+ int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "kmem record"))
+ goto out_delete;
+
setup_pager();
err = perf_session__process_events(session, &event_ops);
if (err != 0)
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c1e6774..34f2acb 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,11 +55,13 @@
bool need_dwarf;
bool list_events;
bool force_add;
+ bool show_lines;
int nr_probe;
struct probe_point probes[MAX_PROBES];
struct strlist *dellist;
struct perf_session *psession;
struct map *kmap;
+ struct line_range line_range;
} session;
@@ -137,6 +139,16 @@
pr_debug("Try to open %s\n", session.kmap->dso->long_name);
return open(session.kmap->dso->long_name, O_RDONLY);
}
+
+static int opt_show_lines(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ if (str)
+ parse_line_range_desc(str, &session.line_range);
+ INIT_LIST_HEAD(&session.line_range.line_list);
+ session.show_lines = true;
+ return 0;
+}
#endif
static const char * const probe_usage[] = {
@@ -144,6 +156,7 @@
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
+ "perf probe --line 'LINEDESC'",
NULL
};
@@ -182,9 +195,32 @@
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
" with existing name"),
+#ifndef NO_LIBDWARF
+ OPT_CALLBACK('L', "line", NULL,
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
+ "Show source code lines.", opt_show_lines),
+#endif
OPT_END()
};
+/* Initialize symbol maps for vmlinux */
+static void init_vmlinux(void)
+{
+ symbol_conf.sort_by_name = true;
+ if (symbol_conf.vmlinux_name == NULL)
+ symbol_conf.try_vmlinux_path = true;
+ else
+ pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+ if (symbol__init() < 0)
+ die("Failed to init symbol map.");
+ session.psession = perf_session__new(NULL, O_WRONLY, false);
+ if (session.psession == NULL)
+ die("Failed to init perf_session.");
+ session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
+ if (!session.kmap)
+ die("Could not find kernel map.\n");
+}
+
int cmd_probe(int argc, const char **argv, const char *prefix __used)
{
int i, ret;
@@ -203,7 +239,8 @@
parse_probe_event_argv(argc, argv);
}
- if ((!session.nr_probe && !session.dellist && !session.list_events))
+ if ((!session.nr_probe && !session.dellist && !session.list_events &&
+ !session.show_lines))
usage_with_options(probe_usage, options);
if (debugfs_valid_mountpoint(debugfs_path) < 0)
@@ -215,10 +252,34 @@
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
+ if (session.show_lines) {
+ pr_warning(" Error: Don't use --list with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
show_perf_probe_events();
return 0;
}
+#ifndef NO_LIBDWARF
+ if (session.show_lines) {
+ if (session.nr_probe != 0 || session.dellist) {
+ pr_warning(" Error: Don't use --line with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ init_vmlinux();
+ fd = open_vmlinux();
+ if (fd < 0)
+ die("Could not open debuginfo file.");
+ ret = find_line_range(fd, &session.line_range);
+ if (ret <= 0)
+ die("Source line is not found.\n");
+ close(fd);
+ show_line_range(&session.line_range);
+ return 0;
+ }
+#endif
+
if (session.dellist) {
del_trace_kprobe_events(session.dellist);
strlist__delete(session.dellist);
@@ -226,20 +287,8 @@
return 0;
}
- /* Initialize symbol maps for vmlinux */
- symbol_conf.sort_by_name = true;
- if (symbol_conf.vmlinux_name == NULL)
- symbol_conf.try_vmlinux_path = true;
- if (symbol__init() < 0)
- die("Failed to init symbol map.");
- session.psession = perf_session__new(NULL, O_WRONLY, false);
- if (session.psession == NULL)
- die("Failed to init perf_session.");
- session.kmap = map_groups__find_by_name(&session.psession->kmaps,
- MAP__FUNCTION,
- "[kernel.kallsyms]");
- if (!session.kmap)
- die("Could not find kernel map.\n");
+ /* Add probes */
+ init_vmlinux();
if (session.need_dwarf)
#ifdef NO_LIBDWARF
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2654253..90345223 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -113,12 +113,24 @@
static void write_event(event_t *buf, size_t size)
{
- /*
- * Add it to the list of DSOs, so that when we finish this
- * record session we can pick the available build-ids.
- */
- if (buf->header.type == PERF_RECORD_MMAP)
- dsos__findnew(buf->mmap.filename);
+ size_t processed_size = buf->header.size;
+ event_t *ev = buf;
+
+ do {
+ /*
+ * Add it to the list of DSOs, so that when we finish this
+ * record session we can pick the available build-ids.
+ */
+ if (ev->header.type == PERF_RECORD_MMAP) {
+ struct list_head *head = &dsos__user;
+ if (ev->header.misc == 1)
+ head = &dsos__kernel;
+ __dsos__findnew(head, ev->mmap.filename);
+ }
+
+ ev = ((void *)ev) + ev->header.size;
+ processed_size += ev->header.size;
+ } while (processed_size < size);
write_output(buf, size);
}
@@ -551,6 +563,19 @@
return err;
}
+ err = event__synthesize_kernel_mmap(process_synthesized_event,
+ session, "_text");
+ if (err < 0) {
+ pr_err("Couldn't record kernel reference relocation symbol.\n");
+ return err;
+ }
+
+ err = event__synthesize_modules(process_synthesized_event, session);
+ if (err < 0) {
+ pr_err("Couldn't record kernel reference relocation symbol.\n");
+ return err;
+ }
+
if (!system_wide && profile_cpu == -1)
event__synthesize_thread(pid, process_synthesized_event,
session);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 860f1ee..cfc655d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -34,6 +34,8 @@
static char const *input_name = "perf.data";
static int force;
+static bool hide_unresolved;
+static bool dont_use_callchains;
static int show_threads;
static struct perf_read_values show_threads_values;
@@ -91,11 +93,8 @@
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
unsigned int i;
@@ -121,7 +120,7 @@
return -1;
}
- if (al.filtered)
+ if (al.filtered || (hide_unresolved && al.sym == NULL))
return 0;
if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
@@ -156,14 +155,14 @@
return 0;
}
-static int sample_type_check(struct perf_session *session)
+static int perf_session__setup_sample_type(struct perf_session *self)
{
- if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
fprintf(stderr, "selected --sort parent, but no"
" callchain data. Did you call"
" perf record without -g?\n");
- return -1;
+ return -EINVAL;
}
if (symbol_conf.use_callchain) {
fprintf(stderr, "selected -g but no callchain data."
@@ -171,12 +170,13 @@
" -g?\n");
return -1;
}
- } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) {
+ } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
+ !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (register_callchain_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
- return -1;
+ return -EINVAL;
}
}
@@ -184,20 +184,18 @@
}
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_exit_event = event__process_task,
- .process_fork_event = event__process_task,
- .process_lost_event = event__process_lost,
- .process_read_event = process_read_event,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .exit = event__process_task,
+ .fork = event__process_task,
+ .lost = event__process_lost,
+ .read = process_read_event,
};
-
static int __cmd_report(void)
{
- int ret;
+ int ret = -EINVAL;
struct perf_session *session;
session = perf_session__new(input_name, O_RDONLY, force);
@@ -207,6 +205,10 @@
if (show_threads)
perf_read_values_init(&show_threads_values);
+ ret = perf_session__setup_sample_type(session);
+ if (ret)
+ goto out_delete;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
@@ -243,11 +245,19 @@
static int
parse_callchain_opt(const struct option *opt __used, const char *arg,
- int unset __used)
+ int unset)
{
char *tok;
char *endptr;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ dont_use_callchains = true;
+ return 0;
+ }
+
symbol_conf.use_callchain = true;
if (!arg)
@@ -319,7 +329,7 @@
"pretty printing style key: normal raw"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
- OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+ OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
"Don't shorten the pathnames taking into account the cwd"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
"regex filter to identify parent, see: '--sort parent'"),
@@ -340,6 +350,8 @@
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
+ OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
+ "Only display entries resolved to a symbol"),
OPT_END()
};
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 80209df..4f5a03e 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1621,11 +1621,8 @@
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, data.pid);
if (thread == NULL) {
@@ -1653,33 +1650,22 @@
return 0;
}
-static int sample_type_check(struct perf_session *session __used)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .process_lost_event = process_lost_event,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
+ .lost = process_lost_event,
};
static int read_events(void)
{
- int err;
+ int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
if (session == NULL)
return -ENOMEM;
- err = perf_session__process_events(session, &event_ops);
+ if (perf_session__has_traces(session, "record -R"))
+ err = perf_session__process_events(session, &event_ops);
+
perf_session__delete(session);
return err;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c70d720..e8c85d5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,6 +44,7 @@
#include "util/parse-events.h"
#include "util/event.h"
#include "util/debug.h"
+#include "util/header.h"
#include <sys/prctl.h>
#include <math.h>
@@ -79,6 +80,8 @@
static int event_scaled[MAX_COUNTERS];
+static volatile int done = 0;
+
struct stats
{
double n, mean, M2;
@@ -247,61 +250,64 @@
unsigned long long t0, t1;
int status = 0;
int counter;
- int pid;
+ int pid = target_pid;
int child_ready_pipe[2], go_pipe[2];
+ const bool forks = (target_pid == -1 && argc > 0);
char buf;
if (!system_wide)
nr_cpus = 1;
- if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
+ if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
exit(1);
}
- if ((pid = fork()) < 0)
- perror("failed to fork");
+ if (forks) {
+ if ((pid = fork()) < 0)
+ perror("failed to fork");
- if (!pid) {
- close(child_ready_pipe[0]);
- close(go_pipe[1]);
- fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+ if (!pid) {
+ close(child_ready_pipe[0]);
+ close(go_pipe[1]);
+ fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+ /*
+ * Do a dummy execvp to get the PLT entry resolved,
+ * so we avoid the resolver overhead on the real
+ * execvp call.
+ */
+ execvp("", (char **)argv);
+
+ /*
+ * Tell the parent we're ready to go
+ */
+ close(child_ready_pipe[1]);
+
+ /*
+ * Wait until the parent tells us to go.
+ */
+ if (read(go_pipe[0], &buf, 1) == -1)
+ perror("unable to read pipe");
+
+ execvp(argv[0], (char **)argv);
+
+ perror(argv[0]);
+ exit(-1);
+ }
+
+ child_pid = pid;
/*
- * Do a dummy execvp to get the PLT entry resolved,
- * so we avoid the resolver overhead on the real
- * execvp call.
- */
- execvp("", (char **)argv);
-
- /*
- * Tell the parent we're ready to go
+ * Wait for the child to be ready to exec.
*/
close(child_ready_pipe[1]);
-
- /*
- * Wait until the parent tells us to go.
- */
- if (read(go_pipe[0], &buf, 1) == -1)
+ close(go_pipe[0]);
+ if (read(child_ready_pipe[0], &buf, 1) == -1)
perror("unable to read pipe");
-
- execvp(argv[0], (char **)argv);
-
- perror(argv[0]);
- exit(-1);
+ close(child_ready_pipe[0]);
}
- child_pid = pid;
-
- /*
- * Wait for the child to be ready to exec.
- */
- close(child_ready_pipe[1]);
- close(go_pipe[0]);
- if (read(child_ready_pipe[0], &buf, 1) == -1)
- perror("unable to read pipe");
- close(child_ready_pipe[0]);
-
for (counter = 0; counter < nr_counters; counter++)
create_perf_stat_counter(counter, pid);
@@ -310,8 +316,12 @@
*/
t0 = rdclock();
- close(go_pipe[1]);
- wait(&status);
+ if (forks) {
+ close(go_pipe[1]);
+ wait(&status);
+ } else {
+ while(!done);
+ }
t1 = rdclock();
@@ -417,10 +427,13 @@
fflush(stdout);
fprintf(stderr, "\n");
- fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
-
- for (i = 1; i < argc; i++)
- fprintf(stderr, " %s", argv[i]);
+ fprintf(stderr, " Performance counter stats for ");
+ if(target_pid == -1) {
+ fprintf(stderr, "\'%s", argv[0]);
+ for (i = 1; i < argc; i++)
+ fprintf(stderr, " %s", argv[i]);
+ }else
+ fprintf(stderr, "task pid \'%d", target_pid);
fprintf(stderr, "\'");
if (run_count > 1)
@@ -445,6 +458,9 @@
static void skip_signal(int signo)
{
+ if(target_pid != -1)
+ done = 1;
+
signr = signo;
}
@@ -461,7 +477,7 @@
}
static const char * const stat_usage[] = {
- "perf stat [<options>] <command>",
+ "perf stat [<options>] [<command>]",
NULL
};
@@ -492,7 +508,7 @@
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc)
+ if (!argc && target_pid == -1)
usage_with_options(stat_usage, options);
if (run_count <= 0)
usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 3f8bbcf..0d4d8ff 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1029,33 +1029,24 @@
}
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr, "No trace samples found in the file.\n"
- "Have you used 'perf timechart record' to record it?\n");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_comm_event = process_comm_event,
- .process_fork_event = process_fork_event,
- .process_exit_event = process_exit_event,
- .process_sample_event = queue_sample_event,
- .sample_type_check = sample_type_check,
+ .comm = process_comm_event,
+ .fork = process_fork_event,
+ .exit = process_exit_event,
+ .sample = queue_sample_event,
};
static int __cmd_timechart(void)
{
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
- int ret;
+ int ret = -EINVAL;
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "timechart record"))
+ goto out_delete;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ddc584b..1fc018e 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -667,7 +667,7 @@
}
if (!found) {
- fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+ fprintf(stderr, "Sorry, %s is not active.\n", buf);
sleep(1);
return;
} else
@@ -934,8 +934,11 @@
struct addr_location al;
u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ ++samples;
+
switch (origin) {
case PERF_RECORD_MISC_USER:
+ ++userspace_samples;
if (hide_user_symbols)
return;
break;
@@ -948,9 +951,31 @@
}
if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
- al.sym == NULL || al.filtered)
+ al.filtered)
return;
+ if (al.sym == NULL) {
+ /*
+ * As we do lazy loading of symtabs we only will know if the
+ * specified vmlinux file is invalid when we actually have a
+ * hit in kernel space and then try to load it. So if we get
+ * here and there are _no_ symbols in the DSO backing the
+ * kernel map, bail out.
+ *
+ * We may never get here, for instance, if we use -K/
+ * --hide-kernel-symbols, even if the user specifies an
+ * invalid --vmlinux ;-)
+ */
+ if (al.map == session->vmlinux_maps[MAP__FUNCTION] &&
+ RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
+ pr_err("The %s file can't be used\n",
+ symbol_conf.vmlinux_name);
+ exit(1);
+ }
+
+ return;
+ }
+
syme = symbol__priv(al.sym);
if (!syme->skip) {
syme->count[counter]++;
@@ -960,9 +985,6 @@
if (list_empty(&syme->node) || !syme->node.next)
__list_insert_active_sym(syme);
pthread_mutex_unlock(&active_symbols_lock);
- if (origin == PERF_RECORD_MISC_USER)
- ++userspace_samples;
- ++samples;
}
}
@@ -975,6 +997,10 @@
case PERF_RECORD_MMAP:
event__process_mmap(event, session);
break;
+ case PERF_RECORD_FORK:
+ case PERF_RECORD_EXIT:
+ event__process_task(event, session);
+ break;
default:
break;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 574a215..0b65779 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -75,11 +75,8 @@
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
@@ -103,22 +100,9 @@
return 0;
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
};
static int __cmd_trace(struct perf_session *session)
@@ -531,6 +515,8 @@
parse_scriptname),
OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
"generate perf-trace.xx script in specified language"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
OPT_END()
};
@@ -592,6 +578,9 @@
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "record -R"))
+ return -EINVAL;
+
if (generate_script_lang) {
struct stat perf_stat;
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 18035b1..dee97cf 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -16,6 +16,7 @@
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_bench(int argc, const char **argv, const char *prefix);
+extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
extern int cmd_diff(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 71dc7c3..9afcff2 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,7 +3,9 @@
# command name category [deprecated] [common]
#
perf-annotate mainporcelain common
+perf-archive mainporcelain common
perf-bench mainporcelain common
+perf-buildid-cache mainporcelain common
perf-buildid-list mainporcelain common
perf-diff mainporcelain common
perf-list mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 8d0de51..bd0bb1b 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -101,10 +101,10 @@
*/
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
- PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
- PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
};
@@ -131,8 +131,8 @@
*/
enum sw_event_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
- PERF_COUNT_SW_TASK_CLOCK = 1,
- PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
new file mode 100644
index 0000000..45fbe2f
--- /dev/null
+++ b/tools/perf/perf-archive.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# perf archive
+# Arnaldo Carvalho de Melo <acme@redhat.com>
+
+PERF_DATA=perf.data
+if [ $# -ne 0 ] ; then
+ PERF_DATA=$1
+fi
+
+DEBUGDIR=~/.debug/
+BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
+
+perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS
+if [ ! -s $BUILDIDS ] ; then
+ echo "perf archive: no build-ids found"
+ rm -f $BUILDIDS
+ exit 1
+fi
+
+MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
+
+cut -d ' ' -f 1 $BUILDIDS | \
+while read build_id ; do
+ linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2}
+ filename=$(readlink -f $linkname)
+ echo ${linkname#$DEBUGDIR} >> $MANIFEST
+ echo ${filename#$DEBUGDIR} >> $MANIFEST
+done
+
+tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST
+rm -f $MANIFEST $BUILDIDS
+exit 0
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 873e55f..05c861c 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -285,6 +285,7 @@
{
const char *cmd = argv[0];
static struct cmd_struct commands[] = {
+ { "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "diff", cmd_diff, 0 },
{ "help", cmd_help, 0 },
@@ -388,7 +389,7 @@
/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
static void get_debugfs_mntpt(void)
{
- const char *path = debugfs_find_mountpoint();
+ const char *path = debugfs_mount(NULL);
if (path)
strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
deleted file mode 100644
index b557b83..0000000
--- a/tools/perf/util/data_map.c
+++ /dev/null
@@ -1,252 +0,0 @@
-#include "symbol.h"
-#include "util.h"
-#include "debug.h"
-#include "thread.h"
-#include "session.h"
-
-static int process_event_stub(event_t *event __used,
- struct perf_session *session __used)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
-{
- if (!handler->process_sample_event)
- handler->process_sample_event = process_event_stub;
- if (!handler->process_mmap_event)
- handler->process_mmap_event = process_event_stub;
- if (!handler->process_comm_event)
- handler->process_comm_event = process_event_stub;
- if (!handler->process_fork_event)
- handler->process_fork_event = process_event_stub;
- if (!handler->process_exit_event)
- handler->process_exit_event = process_event_stub;
- if (!handler->process_lost_event)
- handler->process_lost_event = process_event_stub;
- if (!handler->process_read_event)
- handler->process_read_event = process_event_stub;
- if (!handler->process_throttle_event)
- handler->process_throttle_event = process_event_stub;
- if (!handler->process_unthrottle_event)
- handler->process_unthrottle_event = process_event_stub;
-}
-
-static const char *event__name[] = {
- [0] = "TOTAL",
- [PERF_RECORD_MMAP] = "MMAP",
- [PERF_RECORD_LOST] = "LOST",
- [PERF_RECORD_COMM] = "COMM",
- [PERF_RECORD_EXIT] = "EXIT",
- [PERF_RECORD_THROTTLE] = "THROTTLE",
- [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
- [PERF_RECORD_FORK] = "FORK",
- [PERF_RECORD_READ] = "READ",
- [PERF_RECORD_SAMPLE] = "SAMPLE",
-};
-
-unsigned long event__total[PERF_RECORD_MAX];
-
-void event__print_totals(void)
-{
- int i;
- for (i = 0; i < PERF_RECORD_MAX; ++i)
- pr_info("%10s events: %10ld\n",
- event__name[i], event__total[i]);
-}
-
-static int process_event(event_t *event, struct perf_session *session,
- struct perf_event_ops *ops,
- unsigned long offset, unsigned long head)
-{
- trace_event(event);
-
- if (event->header.type < PERF_RECORD_MAX) {
- dump_printf("%p [%p]: PERF_RECORD_%s",
- (void *)(offset + head),
- (void *)(long)(event->header.size),
- event__name[event->header.type]);
- ++event__total[0];
- ++event__total[event->header.type];
- }
-
- switch (event->header.type) {
- case PERF_RECORD_SAMPLE:
- return ops->process_sample_event(event, session);
- case PERF_RECORD_MMAP:
- return ops->process_mmap_event(event, session);
- case PERF_RECORD_COMM:
- return ops->process_comm_event(event, session);
- case PERF_RECORD_FORK:
- return ops->process_fork_event(event, session);
- case PERF_RECORD_EXIT:
- return ops->process_exit_event(event, session);
- case PERF_RECORD_LOST:
- return ops->process_lost_event(event, session);
- case PERF_RECORD_READ:
- return ops->process_read_event(event, session);
- case PERF_RECORD_THROTTLE:
- return ops->process_throttle_event(event, session);
- case PERF_RECORD_UNTHROTTLE:
- return ops->process_unthrottle_event(event, session);
- default:
- ops->total_unknown++;
- return -1;
- }
-}
-
-int perf_header__read_build_ids(int input, u64 offset, u64 size)
-{
- struct build_id_event bev;
- char filename[PATH_MAX];
- u64 limit = offset + size;
- int err = -1;
-
- while (offset < limit) {
- struct dso *dso;
- ssize_t len;
-
- if (read(input, &bev, sizeof(bev)) != sizeof(bev))
- goto out;
-
- len = bev.header.size - sizeof(bev);
- if (read(input, filename, len) != len)
- goto out;
-
- dso = dsos__findnew(filename);
- if (dso != NULL)
- dso__set_build_id(dso, &bev.build_id);
-
- offset += bev.header.size;
- }
- err = 0;
-out:
- return err;
-}
-
-static struct thread *perf_session__register_idle_thread(struct perf_session *self)
-{
- struct thread *thread = perf_session__findnew(self, 0);
-
- if (!thread || thread__set_comm(thread, "swapper")) {
- pr_err("problem inserting idle task.\n");
- thread = NULL;
- }
-
- return thread;
-}
-
-int perf_session__process_events(struct perf_session *self,
- struct perf_event_ops *ops)
-{
- int err;
- unsigned long head, shift;
- unsigned long offset = 0;
- size_t page_size;
- event_t *event;
- uint32_t size;
- char *buf;
-
- if (perf_session__register_idle_thread(self) == NULL)
- return -ENOMEM;
-
- perf_event_ops__fill_defaults(ops);
-
- page_size = getpagesize();
-
- head = self->header.data_offset;
- self->sample_type = perf_header__sample_type(&self->header);
-
- err = -EINVAL;
- if (ops->sample_type_check && ops->sample_type_check(self) < 0)
- goto out_err;
-
- if (!ops->full_paths) {
- char bf[PATH_MAX];
-
- if (getcwd(bf, sizeof(bf)) == NULL) {
- err = -errno;
-out_getcwd_err:
- pr_err("failed to get the current directory\n");
- goto out_err;
- }
- self->cwd = strdup(bf);
- if (self->cwd == NULL) {
- err = -ENOMEM;
- goto out_getcwd_err;
- }
- self->cwdlen = strlen(self->cwd);
- }
-
- shift = page_size * (head / page_size);
- offset += shift;
- head -= shift;
-
-remap:
- buf = mmap(NULL, page_size * self->mmap_window, PROT_READ,
- MAP_SHARED, self->fd, offset);
- if (buf == MAP_FAILED) {
- pr_err("failed to mmap file\n");
- err = -errno;
- goto out_err;
- }
-
-more:
- event = (event_t *)(buf + head);
-
- size = event->header.size;
- if (!size)
- size = 8;
-
- if (head + event->header.size >= page_size * self->mmap_window) {
- int munmap_ret;
-
- shift = page_size * (head / page_size);
-
- munmap_ret = munmap(buf, page_size * self->mmap_window);
- assert(munmap_ret == 0);
-
- offset += shift;
- head -= shift;
- goto remap;
- }
-
- size = event->header.size;
-
- dump_printf("\n%p [%p]: event: %d\n",
- (void *)(offset + head),
- (void *)(long)event->header.size,
- event->header.type);
-
- if (!size || process_event(event, self, ops, offset, head) < 0) {
-
- dump_printf("%p [%p]: skipping unknown header type: %d\n",
- (void *)(offset + head),
- (void *)(long)(event->header.size),
- event->header.type);
-
- /*
- * assume we lost track of the stream, check alignment, and
- * increment a single u64 in the hope to catch on again 'soon'.
- */
-
- if (unlikely(head & 7))
- head &= ~7ULL;
-
- size = 8;
- }
-
- head += size;
-
- if (offset + head >= self->header.data_offset + self->header.data_size)
- goto done;
-
- if (offset + head < self->size)
- goto more;
-
-done:
- err = 0;
-out_err:
- return err;
-}
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 28d520d..0905600 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -9,6 +9,7 @@
#include "color.h"
#include "event.h"
#include "debug.h"
+#include "util.h"
int verbose = 0;
int dump_trace = 0;
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index 06b73ee..a88fefc 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -106,16 +106,14 @@
return 0;
}
-/* mount the debugfs somewhere */
+/* mount the debugfs somewhere if it's not mounted */
-int debugfs_mount(const char *mountpoint)
+char *debugfs_mount(const char *mountpoint)
{
- char mountcmd[128];
-
/* see if it's already mounted */
if (debugfs_find_mountpoint()) {
debugfs_premounted = 1;
- return 0;
+ return debugfs_mountpoint;
}
/* if not mounted and no argument */
@@ -127,13 +125,14 @@
mountpoint = "/sys/kernel/debug";
}
+ if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0)
+ return NULL;
+
/* save the mountpoint */
strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
+ debugfs_found = 1;
- /* mount it */
- snprintf(mountcmd, sizeof(mountcmd),
- "/bin/mount -t debugfs debugfs %s", mountpoint);
- return system(mountcmd);
+ return debugfs_mountpoint;
}
/* umount the debugfs */
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 3cd14f9..83a0287 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -15,7 +15,7 @@
extern const char *debugfs_find_mountpoint(void);
extern int debugfs_valid_mountpoint(const char *debugfs);
extern int debugfs_valid_entry(const char *path);
-extern int debugfs_mount(const char *mountpoint);
+extern char *debugfs_mount(const char *mountpoint);
extern int debugfs_umount(void);
extern int debugfs_write(const char *entry, const char *value);
extern int debugfs_read(const char *entry, char *buffer, size_t size);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bb0fd6d..bbaee61 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -8,8 +8,7 @@
#include "thread.h"
static pid_t event__synthesize_comm(pid_t pid, int full,
- int (*process)(event_t *event,
- struct perf_session *session),
+ event__handler_t process,
struct perf_session *session)
{
event_t ev;
@@ -91,8 +90,7 @@
}
static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
- int (*process)(event_t *event,
- struct perf_session *session),
+ event__handler_t process,
struct perf_session *session)
{
char filename[PATH_MAX];
@@ -112,7 +110,10 @@
while (1) {
char bf[BUFSIZ], *pbf = bf;
event_t ev = {
- .header = { .type = PERF_RECORD_MMAP },
+ .header = {
+ .type = PERF_RECORD_MMAP,
+ .misc = 0, /* Just like the kernel, see kernel/perf_event.c __perf_event_mmap */
+ },
};
int n;
size_t size;
@@ -156,9 +157,38 @@
return 0;
}
-int event__synthesize_thread(pid_t pid,
- int (*process)(event_t *event,
- struct perf_session *session),
+int event__synthesize_modules(event__handler_t process,
+ struct perf_session *session)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(&session->kmaps.maps[MAP__FUNCTION]);
+ nd; nd = rb_next(nd)) {
+ event_t ev;
+ size_t size;
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+
+ if (pos->dso->kernel)
+ continue;
+
+ size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+ memset(&ev, 0, sizeof(ev));
+ ev.mmap.header.misc = 1; /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
+ ev.mmap.header.type = PERF_RECORD_MMAP;
+ ev.mmap.header.size = (sizeof(ev.mmap) -
+ (sizeof(ev.mmap.filename) - size));
+ ev.mmap.start = pos->start;
+ ev.mmap.len = pos->end - pos->start;
+
+ memcpy(ev.mmap.filename, pos->dso->long_name,
+ pos->dso->long_name_len + 1);
+ process(&ev, session);
+ }
+
+ return 0;
+}
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
struct perf_session *session)
{
pid_t tgid = event__synthesize_comm(pid, 1, process, session);
@@ -167,8 +197,7 @@
return event__synthesize_mmap_events(pid, tgid, process, session);
}
-void event__synthesize_threads(int (*process)(event_t *event,
- struct perf_session *session),
+void event__synthesize_threads(event__handler_t process,
struct perf_session *session)
{
DIR *proc;
@@ -189,6 +218,59 @@
closedir(proc);
}
+struct process_symbol_args {
+ const char *name;
+ u64 start;
+};
+
+static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct process_symbol_args *args = arg;
+
+ /*
+ * Must be a function or at least an alias, as in PARISC64, where "_text" is
+ * an 'A' to the same address as "_stext".
+ */
+ if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+ type == 'A') || strcmp(name, args->name))
+ return 0;
+
+ args->start = start;
+ return 1;
+}
+
+int event__synthesize_kernel_mmap(event__handler_t process,
+ struct perf_session *session,
+ const char *symbol_name)
+{
+ size_t size;
+ event_t ev = {
+ .header = {
+ .type = PERF_RECORD_MMAP,
+ .misc = 1, /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
+ },
+ };
+ /*
+ * We should get this from /sys/kernel/sections/.text, but till that is
+ * available use this, and after it is use this as a fallback for older
+ * kernels.
+ */
+ struct process_symbol_args args = { .name = symbol_name, };
+
+ if (kallsyms__parse("/proc/kallsyms", &args, find_symbol_cb) <= 0)
+ return -ENOENT;
+
+ size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+ "[kernel.kallsyms.%s]", symbol_name) + 1;
+ size = ALIGN(size, sizeof(u64));
+ ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size));
+ ev.mmap.pgoff = args.start;
+ ev.mmap.start = session->vmlinux_maps[MAP__FUNCTION]->start;
+ ev.mmap.len = session->vmlinux_maps[MAP__FUNCTION]->end - ev.mmap.start ;
+
+ return process(&ev, session);
+}
+
static void thread__comm_adjust(struct thread *self)
{
char *comm = self->comm;
@@ -240,22 +322,84 @@
int event__process_mmap(event_t *self, struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->mmap.pid);
- struct map *map = map__new(&self->mmap, MAP__FUNCTION,
- session->cwd, session->cwdlen);
+ struct thread *thread;
+ struct map *map;
- dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
- self->mmap.pid, self->mmap.tid,
- (void *)(long)self->mmap.start,
- (void *)(long)self->mmap.len,
- (void *)(long)self->mmap.pgoff,
- self->mmap.filename);
+ dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n",
+ self->mmap.pid, self->mmap.tid, self->mmap.start,
+ self->mmap.len, self->mmap.pgoff, self->mmap.filename);
+
+ if (self->mmap.pid == 0) {
+ static const char kmmap_prefix[] = "[kernel.kallsyms.";
+
+ if (self->mmap.filename[0] == '/') {
+ char short_module_name[1024];
+ char *name = strrchr(self->mmap.filename, '/'), *dot;
+
+ if (name == NULL)
+ goto out_problem;
+
+ ++name; /* skip / */
+ dot = strrchr(name, '.');
+ if (dot == NULL)
+ goto out_problem;
+
+ snprintf(short_module_name, sizeof(short_module_name),
+ "[%.*s]", (int)(dot - name), name);
+ strxfrchar(short_module_name, '-', '_');
+
+ map = perf_session__new_module_map(session,
+ self->mmap.start,
+ self->mmap.filename);
+ if (map == NULL)
+ goto out_problem;
+
+ name = strdup(short_module_name);
+ if (name == NULL)
+ goto out_problem;
+
+ map->dso->short_name = name;
+ map->end = map->start + self->mmap.len;
+ } else if (memcmp(self->mmap.filename, kmmap_prefix,
+ sizeof(kmmap_prefix) - 1) == 0) {
+ const char *symbol_name = (self->mmap.filename +
+ sizeof(kmmap_prefix) - 1);
+ /*
+ * Should be there already, from the build-id table in
+ * the header.
+ */
+ struct dso *kernel = __dsos__findnew(&dsos__kernel,
+ "[kernel.kallsyms]");
+ if (kernel == NULL)
+ goto out_problem;
+
+ kernel->kernel = 1;
+ if (__map_groups__create_kernel_maps(&session->kmaps,
+ session->vmlinux_maps,
+ kernel) < 0)
+ goto out_problem;
+
+ session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start;
+ session->vmlinux_maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len;
+
+ perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
+ self->mmap.pgoff);
+ }
+ return 0;
+ }
+
+ thread = perf_session__findnew(session, self->mmap.pid);
+ map = map__new(&self->mmap, MAP__FUNCTION,
+ session->cwd, session->cwdlen);
if (thread == NULL || map == NULL)
- dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
- else
- thread__insert_map(thread, map);
+ goto out_problem;
+ thread__insert_map(thread, map);
+ return 0;
+
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
return 0;
}
@@ -284,11 +428,10 @@
return 0;
}
-void thread__find_addr_location(struct thread *self,
- struct perf_session *session, u8 cpumode,
- enum map_type type, u64 addr,
- struct addr_location *al,
- symbol_filter_t filter)
+void thread__find_addr_map(struct thread *self,
+ struct perf_session *session, u8 cpumode,
+ enum map_type type, u64 addr,
+ struct addr_location *al)
{
struct map_groups *mg = &self->mg;
@@ -303,7 +446,6 @@
else {
al->level = 'H';
al->map = NULL;
- al->sym = NULL;
return;
}
try_again:
@@ -322,11 +464,21 @@
mg = &session->kmaps;
goto try_again;
}
- al->sym = NULL;
- } else {
+ } else
al->addr = al->map->map_ip(al->map, al->addr);
+}
+
+void thread__find_addr_location(struct thread *self,
+ struct perf_session *session, u8 cpumode,
+ enum map_type type, u64 addr,
+ struct addr_location *al,
+ symbol_filter_t filter)
+{
+ thread__find_addr_map(self, session, cpumode, type, addr, al);
+ if (al->map != NULL)
al->sym = map__find_symbol(al->map, session, al->addr, filter);
- }
+ else
+ al->sym = NULL;
}
static void dso__calc_col_width(struct dso *self)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 690a96d..50a7132 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,10 +1,10 @@
#ifndef __PERF_RECORD_H
#define __PERF_RECORD_H
+#include <limits.h>
+
#include "../perf.h"
-#include "util.h"
-#include <linux/list.h>
-#include <linux/rbtree.h>
+#include "map.h"
/*
* PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -101,74 +101,19 @@
void event__print_totals(void);
-enum map_type {
- MAP__FUNCTION = 0,
- MAP__VARIABLE,
-};
-
-#define MAP__NR_TYPES (MAP__VARIABLE + 1)
-
-struct map {
- union {
- struct rb_node rb_node;
- struct list_head node;
- };
- u64 start;
- u64 end;
- enum map_type type;
- u64 pgoff;
- u64 (*map_ip)(struct map *, u64);
- u64 (*unmap_ip)(struct map *, u64);
- struct dso *dso;
-};
-
-static inline u64 map__map_ip(struct map *map, u64 ip)
-{
- return ip - map->start + map->pgoff;
-}
-
-static inline u64 map__unmap_ip(struct map *map, u64 ip)
-{
- return ip + map->start - map->pgoff;
-}
-
-static inline u64 identity__map_ip(struct map *map __used, u64 ip)
-{
- return ip;
-}
-
-struct symbol;
-
-typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
-
-void map__init(struct map *self, enum map_type type,
- u64 start, u64 end, u64 pgoff, struct dso *dso);
-struct map *map__new(struct mmap_event *event, enum map_type,
- char *cwd, int cwdlen);
-void map__delete(struct map *self);
-struct map *map__clone(struct map *self);
-int map__overlap(struct map *l, struct map *r);
-size_t map__fprintf(struct map *self, FILE *fp);
-
struct perf_session;
-int map__load(struct map *self, struct perf_session *session,
- symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
- u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
- struct perf_session *session,
- symbol_filter_t filter);
-void map__fixup_start(struct map *self);
-void map__fixup_end(struct map *self);
+typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
-int event__synthesize_thread(pid_t pid,
- int (*process)(event_t *event,
- struct perf_session *session),
+int event__synthesize_thread(pid_t pid, event__handler_t process,
struct perf_session *session);
-void event__synthesize_threads(int (*process)(event_t *event,
- struct perf_session *session),
+void event__synthesize_threads(event__handler_t process,
struct perf_session *session);
+int event__synthesize_kernel_mmap(event__handler_t process,
+ struct perf_session *session,
+ const char *symbol_name);
+int event__synthesize_modules(event__handler_t process,
+ struct perf_session *session);
int event__process_comm(event_t *self, struct perf_session *session);
int event__process_lost(event_t *self, struct perf_session *session);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8a0bca5..2bb2bdb 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,8 +1,10 @@
#include <sys/types.h>
+#include <byteswap.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <linux/list.h>
+#include <linux/kernel.h>
#include "util.h"
#include "header.h"
@@ -105,24 +107,28 @@
static int event_count;
static struct perf_trace_event_type *events;
-void perf_header__push_event(u64 id, const char *name)
+int perf_header__push_event(u64 id, const char *name)
{
if (strlen(name) > MAX_EVENT_NAME)
pr_warning("Event %s will be truncated\n", name);
if (!events) {
events = malloc(sizeof(struct perf_trace_event_type));
- if (!events)
- die("nomem");
+ if (events == NULL)
+ return -ENOMEM;
} else {
- events = realloc(events, (event_count + 1) * sizeof(struct perf_trace_event_type));
- if (!events)
- die("nomem");
+ struct perf_trace_event_type *nevents;
+
+ nevents = realloc(events, (event_count + 1) * sizeof(*events));
+ if (nevents == NULL)
+ return -ENOMEM;
+ events = nevents;
}
memset(&events[event_count], 0, sizeof(struct perf_trace_event_type));
events[event_count].event_id = id;
strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1);
event_count++;
+ return 0;
}
char *perf_header__find_event(u64 id)
@@ -169,31 +175,45 @@
return 0;
}
-static int __dsos__write_buildid_table(struct list_head *head, int fd)
-{
-#define NAME_ALIGN 64
- struct dso *pos;
- static const char zero_buf[NAME_ALIGN];
+#define NAME_ALIGN 64
- list_for_each_entry(pos, head, node) {
+static int write_padded(int fd, const void *bf, size_t count,
+ size_t count_aligned)
+{
+ static const char zero_buf[NAME_ALIGN];
+ int err = do_write(fd, bf, count);
+
+ if (!err)
+ err = do_write(fd, zero_buf, count_aligned - count);
+
+ return err;
+}
+
+#define dsos__for_each_with_build_id(pos, head) \
+ list_for_each_entry(pos, head, node) \
+ if (!pos->has_build_id) \
+ continue; \
+ else
+
+static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
+{
+ struct dso *pos;
+
+ dsos__for_each_with_build_id(pos, head) {
int err;
struct build_id_event b;
- size_t len;
+ size_t len = pos->long_name_len + 1;
- if (!pos->has_build_id)
- continue;
- len = pos->long_name_len + 1;
len = ALIGN(len, NAME_ALIGN);
memset(&b, 0, sizeof(b));
memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+ b.header.misc = misc;
b.header.size = sizeof(b) + len;
err = do_write(fd, &b, sizeof(b));
if (err < 0)
return err;
- err = do_write(fd, pos->long_name, pos->long_name_len + 1);
- if (err < 0)
- return err;
- err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
+ err = write_padded(fd, pos->long_name,
+ pos->long_name_len + 1, len);
if (err < 0)
return err;
}
@@ -203,12 +223,143 @@
static int dsos__write_buildid_table(int fd)
{
- int err = __dsos__write_buildid_table(&dsos__kernel, fd);
+ int err = __dsos__write_buildid_table(&dsos__kernel,
+ PERF_RECORD_MISC_KERNEL, fd);
if (err == 0)
- err = __dsos__write_buildid_table(&dsos__user, fd);
+ err = __dsos__write_buildid_table(&dsos__user,
+ PERF_RECORD_MISC_USER, fd);
return err;
}
+int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
+ const char *name, bool is_kallsyms)
+{
+ const size_t size = PATH_MAX;
+ char *filename = malloc(size),
+ *linkname = malloc(size), *targetname;
+ int len, err = -1;
+
+ if (filename == NULL || linkname == NULL)
+ goto out_free;
+
+ len = snprintf(filename, size, "%s%s%s",
+ debugdir, is_kallsyms ? "/" : "", name);
+ if (mkdir_p(filename, 0755))
+ goto out_free;
+
+ snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
+
+ if (access(filename, F_OK)) {
+ if (is_kallsyms) {
+ if (copyfile("/proc/kallsyms", filename))
+ goto out_free;
+ } else if (link(name, filename) && copyfile(name, filename))
+ goto out_free;
+ }
+
+ len = snprintf(linkname, size, "%s/.build-id/%.2s",
+ debugdir, sbuild_id);
+
+ if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+ goto out_free;
+
+ snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
+ targetname = filename + strlen(debugdir) - 5;
+ memcpy(targetname, "../..", 5);
+
+ if (symlink(targetname, linkname) == 0)
+ err = 0;
+out_free:
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+ const char *name, const char *debugdir,
+ bool is_kallsyms)
+{
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ build_id__sprintf(build_id, build_id_size, sbuild_id);
+
+ return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
+}
+
+int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
+{
+ const size_t size = PATH_MAX;
+ char *filename = malloc(size),
+ *linkname = malloc(size);
+ int err = -1;
+
+ if (filename == NULL || linkname == NULL)
+ goto out_free;
+
+ snprintf(linkname, size, "%s/.build-id/%.2s/%s",
+ debugdir, sbuild_id, sbuild_id + 2);
+
+ if (access(linkname, F_OK))
+ goto out_free;
+
+ if (readlink(linkname, filename, size) < 0)
+ goto out_free;
+
+ if (unlink(linkname))
+ goto out_free;
+
+ /*
+ * Since the link is relative, we must make it absolute:
+ */
+ snprintf(linkname, size, "%s/.build-id/%.2s/%s",
+ debugdir, sbuild_id, filename);
+
+ if (unlink(linkname))
+ goto out_free;
+
+ err = 0;
+out_free:
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int dso__cache_build_id(struct dso *self, const char *debugdir)
+{
+ bool is_kallsyms = self->kernel && self->long_name[0] != '/';
+
+ return build_id_cache__add_b(self->build_id, sizeof(self->build_id),
+ self->long_name, debugdir, is_kallsyms);
+}
+
+static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
+{
+ struct dso *pos;
+ int err = 0;
+
+ dsos__for_each_with_build_id(pos, head)
+ if (dso__cache_build_id(pos, debugdir))
+ err = -1;
+
+ return err;
+}
+
+static int dsos__cache_build_ids(void)
+{
+ int err_kernel, err_user;
+ char debugdir[PATH_MAX];
+
+ snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
+ DEBUG_CACHE_DIR);
+
+ if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
+ return -1;
+
+ err_kernel = __dsos__cache_build_ids(&dsos__kernel, debugdir);
+ err_user = __dsos__cache_build_ids(&dsos__user, debugdir);
+ return err_kernel || err_user ? -1 : 0;
+}
+
static int perf_header__adds_write(struct perf_header *self, int fd)
{
int nr_sections;
@@ -258,6 +409,7 @@
goto out_free;
}
buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
+ dsos__cache_build_ids();
}
lseek(fd, sec_start, SEEK_SET);
@@ -360,30 +512,43 @@
return 0;
}
-static void do_read(int fd, void *buf, size_t size)
+static int do_read(int fd, void *buf, size_t size)
{
while (size) {
int ret = read(fd, buf, size);
- if (ret < 0)
- die("failed to read");
- if (ret == 0)
- die("failed to read: missing data");
+ if (ret <= 0)
+ return -1;
size -= ret;
buf += ret;
}
+
+ return 0;
+}
+
+static int perf_header__getbuffer64(struct perf_header *self,
+ int fd, void *buf, size_t size)
+{
+ if (do_read(fd, buf, size))
+ return -1;
+
+ if (self->needs_swap)
+ mem_bswap_64(buf, size);
+
+ return 0;
}
int perf_header__process_sections(struct perf_header *self, int fd,
int (*process)(struct perf_file_section *self,
+ struct perf_header *ph,
int feat, int fd))
{
struct perf_file_section *feat_sec;
int nr_sections;
int sec_size;
int idx = 0;
- int err = 0, feat = 1;
+ int err = -1, feat = 1;
nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
@@ -397,33 +562,45 @@
lseek(fd, self->data_offset + self->data_size, SEEK_SET);
- do_read(fd, feat_sec, sec_size);
+ if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
+ goto out_free;
+ err = 0;
while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
if (perf_header__has_feat(self, feat)) {
struct perf_file_section *sec = &feat_sec[idx++];
- err = process(sec, feat, fd);
+ err = process(sec, self, feat, fd);
if (err < 0)
break;
}
++feat;
}
-
+out_free:
free(feat_sec);
return err;
-};
+}
int perf_file_header__read(struct perf_file_header *self,
struct perf_header *ph, int fd)
{
lseek(fd, 0, SEEK_SET);
- do_read(fd, self, sizeof(*self));
- if (self->magic != PERF_MAGIC ||
- self->attr_size != sizeof(struct perf_file_attr))
+ if (do_read(fd, self, sizeof(*self)) ||
+ memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
return -1;
+ if (self->attr_size != sizeof(struct perf_file_attr)) {
+ u64 attr_size = bswap_64(self->attr_size);
+
+ if (attr_size != sizeof(struct perf_file_attr))
+ return -1;
+
+ mem_bswap_64(self, offsetof(struct perf_file_header,
+ adds_features));
+ ph->needs_swap = true;
+ }
+
if (self->size != sizeof(*self)) {
/* Support the previous format */
if (self->size == offsetof(typeof(*self), adds_features))
@@ -433,16 +610,28 @@
}
memcpy(&ph->adds_features, &self->adds_features,
- sizeof(self->adds_features));
+ sizeof(ph->adds_features));
+ /*
+ * FIXME: hack that assumes that if we need swap the perf.data file
+ * may be coming from an arch with a different word-size, ergo different
+ * DEFINE_BITMAP format, investigate more later, but for now its mostly
+ * safe to assume that we have a build-id section. Trace files probably
+ * have several other issues in this realm anyway...
+ */
+ if (ph->needs_swap) {
+ memset(&ph->adds_features, 0, sizeof(ph->adds_features));
+ perf_header__set_feat(ph, HEADER_BUILD_ID);
+ }
ph->event_offset = self->event_types.offset;
- ph->event_size = self->event_types.size;
- ph->data_offset = self->data.offset;
+ ph->event_size = self->event_types.size;
+ ph->data_offset = self->data.offset;
ph->data_size = self->data.size;
return 0;
}
static int perf_file_section__process(struct perf_file_section *self,
+ struct perf_header *ph,
int feat, int fd)
{
if (lseek(fd, self->offset, SEEK_SET) < 0) {
@@ -457,7 +646,7 @@
break;
case HEADER_BUILD_ID:
- if (perf_header__read_build_ids(fd, self->offset, self->size))
+ if (perf_header__read_build_ids(ph, fd, self->offset, self->size))
pr_debug("Failed to read buildids, continuing...\n");
break;
default:
@@ -469,7 +658,7 @@
int perf_header__read(struct perf_header *self, int fd)
{
- struct perf_file_header f_header;
+ struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
int nr_attrs, nr_ids, i, j;
@@ -486,7 +675,9 @@
struct perf_header_attr *attr;
off_t tmp;
- do_read(fd, &f_attr, sizeof(f_attr));
+ if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr)))
+ goto out_errno;
+
tmp = lseek(fd, 0, SEEK_CUR);
attr = perf_header_attr__new(&f_attr.attr);
@@ -497,7 +688,8 @@
lseek(fd, f_attr.ids.offset, SEEK_SET);
for (j = 0; j < nr_ids; j++) {
- do_read(fd, &f_id, sizeof(f_id));
+ if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id)))
+ goto out_errno;
if (perf_header_attr__add_id(attr, f_id) < 0) {
perf_header_attr__delete(attr);
@@ -517,7 +709,9 @@
events = malloc(f_header.event_types.size);
if (events == NULL)
return -ENOMEM;
- do_read(fd, events, f_header.event_types.size);
+ if (perf_header__getbuffer64(self, fd, events,
+ f_header.event_types.size))
+ goto out_errno;
event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
}
@@ -527,6 +721,8 @@
self->frozen = 1;
return 0;
+out_errno:
+ return -errno;
}
u64 perf_header__sample_type(struct perf_header *header)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d118d05..82a6af7 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -5,6 +5,7 @@
#include <sys/types.h>
#include <stdbool.h>
#include "types.h"
+#include "event.h"
#include <linux/bitmap.h>
@@ -52,6 +53,7 @@
u64 data_size;
u64 event_offset;
u64 event_size;
+ bool needs_swap;
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
@@ -64,7 +66,7 @@
int perf_header__add_attr(struct perf_header *self,
struct perf_header_attr *attr);
-void perf_header__push_event(u64 id, const char *name);
+int perf_header__push_event(u64 id, const char *name);
char *perf_header__find_event(u64 id);
struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
@@ -80,6 +82,11 @@
int perf_header__process_sections(struct perf_header *self, int fd,
int (*process)(struct perf_file_section *self,
+ struct perf_header *ph,
int feat, int fd));
+int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
+ const char *name, bool is_kallsyms);
+int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
+
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 0000000..72f0b6a
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,73 @@
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+enum map_type {
+ MAP__FUNCTION = 0,
+ MAP__VARIABLE,
+};
+
+#define MAP__NR_TYPES (MAP__VARIABLE + 1)
+
+struct dso;
+
+struct map {
+ union {
+ struct rb_node rb_node;
+ struct list_head node;
+ };
+ u64 start;
+ u64 end;
+ enum map_type type;
+ u64 pgoff;
+ u64 (*map_ip)(struct map *, u64);
+ u64 (*unmap_ip)(struct map *, u64);
+ struct dso *dso;
+};
+
+static inline u64 map__map_ip(struct map *map, u64 ip)
+{
+ return ip - map->start + map->pgoff;
+}
+
+static inline u64 map__unmap_ip(struct map *map, u64 ip)
+{
+ return ip + map->start - map->pgoff;
+}
+
+static inline u64 identity__map_ip(struct map *map __used, u64 ip)
+{
+ return ip;
+}
+
+struct symbol;
+struct mmap_event;
+
+typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+
+void map__init(struct map *self, enum map_type type,
+ u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct mmap_event *event, enum map_type,
+ char *cwd, int cwdlen);
+void map__delete(struct map *self);
+struct map *map__clone(struct map *self);
+int map__overlap(struct map *l, struct map *r);
+size_t map__fprintf(struct map *self, FILE *fp);
+
+struct perf_session;
+
+int map__load(struct map *self, struct perf_session *session,
+ symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
+ u64 addr, symbol_filter_t filter);
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+ struct perf_session *session,
+ symbol_filter_t filter);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
+
+#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e5bc0fb..05d0c5c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -450,7 +450,8 @@
/* sys + ':' + event + ':' + flags*/
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
static enum event_result
-parse_subsystem_tracepoint_event(char *sys_name, char *flags)
+parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
+ char *flags)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
@@ -474,6 +475,9 @@
|| !strcmp(evt_ent->d_name, "filter"))
continue;
+ if (!strglobmatch(evt_ent->d_name, evt_exp))
+ continue;
+
len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
evt_ent->d_name, flags ? ":" : "",
flags ?: "");
@@ -522,9 +526,10 @@
if (evt_length >= MAX_EVENT_LENGTH)
return EVT_FAILED;
- if (!strcmp(evt_name, "*")) {
+ if (strpbrk(evt_name, "*?")) {
*strp = evt_name + evt_length;
- return parse_subsystem_tracepoint_event(sys_name, flags);
+ return parse_multiple_tracepoint_event(sys_name, evt_name,
+ flags);
} else
return parse_single_tracepoint_event(sys_name, evt_name,
evt_length, flags,
@@ -753,11 +758,11 @@
return ret;
}
-static void store_event_type(const char *orgname)
+static int store_event_type(const char *orgname)
{
char filename[PATH_MAX], *c;
FILE *file;
- int id;
+ int id, n;
sprintf(filename, "%s/", debugfs_path);
strncat(filename, orgname, strlen(orgname));
@@ -769,11 +774,14 @@
file = fopen(filename, "r");
if (!file)
- return;
- if (fscanf(file, "%i", &id) < 1)
- die("cannot store event ID");
+ return 0;
+ n = fscanf(file, "%i", &id);
fclose(file);
- perf_header__push_event(id, orgname);
+ if (n < 1) {
+ pr_err("cannot store event ID\n");
+ return -EINVAL;
+ }
+ return perf_header__push_event(id, orgname);
}
int parse_events(const struct option *opt __used, const char *str, int unset __used)
@@ -782,7 +790,8 @@
enum event_result ret;
if (strchr(str, ':'))
- store_event_type(str);
+ if (store_event_type(str) < 0)
+ return -1;
for (;;) {
if (nr_counters == MAX_COUNTERS)
@@ -835,11 +844,12 @@
}
static const char * const event_type_descriptors[] = {
- "",
"Hardware event",
"Software event",
"Tracepoint event",
"Hardware cache event",
+ "Raw hardware event descriptor",
+ "Hardware breakpoint",
};
/*
@@ -872,7 +882,7 @@
snprintf(evt_path, MAXPATHLEN, "%s:%s",
sys_dirent.d_name, evt_dirent.d_name);
printf(" %-42s [%s]\n", evt_path,
- event_type_descriptors[PERF_TYPE_TRACEPOINT+1]);
+ event_type_descriptors[PERF_TYPE_TRACEPOINT]);
}
closedir(evt_dir);
}
@@ -892,9 +902,7 @@
printf("List of pre-defined events (to be used in -e):\n");
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
- type = syms->type + 1;
- if (type >= ARRAY_SIZE(event_type_descriptors))
- type = 0;
+ type = syms->type;
if (type != prev_type)
printf("\n");
@@ -919,17 +927,19 @@
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
printf(" %-42s [%s]\n",
event_cache_name(type, op, i),
- event_type_descriptors[4]);
+ event_type_descriptors[PERF_TYPE_HW_CACHE]);
}
}
}
printf("\n");
- printf(" %-42s [raw hardware event descriptor]\n",
- "rNNN");
+ printf(" %-42s [%s]\n",
+ "rNNN", event_type_descriptors[PERF_TYPE_RAW]);
printf("\n");
- printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
+ printf(" %-42s [%s]\n",
+ "mem:<addr>[:access]",
+ event_type_descriptors[PERF_TYPE_BREAKPOINT]);
printf("\n");
print_tracepoint_events();
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 29465d4..71b0dd5 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -37,6 +37,8 @@
#include "string.h"
#include "strlist.h"
#include "debug.h"
+#include "cache.h"
+#include "color.h"
#include "parse-events.h" /* For debugfs_path */
#include "probe-event.h"
@@ -62,6 +64,42 @@
return ret;
}
+void parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+ const char *ptr;
+ char *tmp;
+ /*
+ * <Syntax>
+ * SRC:SLN[+NUM|-ELN]
+ * FUNC[:SLN[+NUM|-ELN]]
+ */
+ ptr = strchr(arg, ':');
+ if (ptr) {
+ lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0);
+ if (*tmp == '+')
+ lr->end = lr->start + (unsigned int)strtoul(tmp + 1,
+ &tmp, 0);
+ else if (*tmp == '-')
+ lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0);
+ else
+ lr->end = 0;
+ pr_debug("Line range is %u to %u\n", lr->start, lr->end);
+ if (lr->end && lr->start > lr->end)
+ semantic_error("Start line must be smaller"
+ " than end line.");
+ if (*tmp != '\0')
+ semantic_error("Tailing with invalid character '%d'.",
+ *tmp);
+ tmp = strndup(arg, (ptr - arg));
+ } else
+ tmp = strdup(arg);
+
+ if (strchr(tmp, '.'))
+ lr->file = tmp;
+ else
+ lr->function = tmp;
+}
+
/* Check the name is good for event/group */
static bool check_event_name(const char *name)
{
@@ -368,7 +406,7 @@
if (ret < 0) {
if (errno == ENOENT)
die("kprobe_events file does not exist -"
- " please rebuild with CONFIG_KPROBE_TRACER.");
+ " please rebuild with CONFIG_KPROBE_EVENT.");
else
die("Could not open kprobe_events file: %s",
strerror(errno));
@@ -455,6 +493,8 @@
struct strlist *rawlist;
struct str_node *ent;
+ setup_pager();
+
fd = open_kprobe_events(O_RDONLY, 0);
rawlist = get_trace_kprobe_event_rawlist(fd);
close(fd);
@@ -675,3 +715,66 @@
close(fd);
}
+#define LINEBUF_SIZE 256
+
+static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
+{
+ char buf[LINEBUF_SIZE];
+ const char *color = PERF_COLOR_BLUE;
+
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%7u %s", l, buf);
+ else
+ color_fprintf(stdout, color, " %s", buf);
+ }
+
+ while (strlen(buf) == LINEBUF_SIZE - 1 &&
+ buf[LINEBUF_SIZE - 2] != '\n') {
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%s", buf);
+ else
+ color_fprintf(stdout, color, "%s", buf);
+ }
+ }
+ return;
+error:
+ if (feof(fp))
+ die("Source file is shorter than expected.");
+ else
+ die("File read error: %s", strerror(errno));
+}
+
+void show_line_range(struct line_range *lr)
+{
+ unsigned int l = 1;
+ struct line_node *ln;
+ FILE *fp;
+
+ setup_pager();
+
+ if (lr->function)
+ fprintf(stdout, "<%s:%d>\n", lr->function,
+ lr->start - lr->offset);
+ else
+ fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+
+ fp = fopen(lr->path, "r");
+ if (fp == NULL)
+ die("Failed to open %s: %s", lr->path, strerror(errno));
+ /* Skip to starting line number */
+ while (l < lr->start)
+ show_one_line(fp, l++, true, false);
+
+ list_for_each_entry(ln, &lr->line_list, list) {
+ while (ln->line > l)
+ show_one_line(fp, (l++) - lr->offset, false, false);
+ show_one_line(fp, (l++) - lr->offset, false, true);
+ }
+ fclose(fp);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7f1d499..711287d 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -5,6 +5,7 @@
#include "probe-finder.h"
#include "strlist.h"
+extern void parse_line_range_desc(const char *arg, struct line_range *lr);
extern void parse_perf_probe_event(const char *str, struct probe_point *pp,
bool *need_dwarf);
extern int synthesize_perf_probe_point(struct probe_point *pp);
@@ -15,6 +16,7 @@
bool force_add);
extern void del_trace_kprobe_events(struct strlist *dellist);
extern void show_perf_probe_events(void);
+extern void show_line_range(struct line_range *lr);
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4b852c0..1b2124d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -140,6 +140,31 @@
return found;
}
+static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
+{
+ Dwarf_Signed cnt, i;
+ char **srcs;
+ int ret = 0;
+
+ if (!buf || !fno)
+ return -EINVAL;
+
+ ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
+ if (ret == DW_DLV_OK) {
+ if ((Dwarf_Unsigned)cnt > fno - 1) {
+ *buf = strdup(srcs[fno - 1]);
+ ret = 0;
+ pr_debug("found filename: %s\n", *buf);
+ } else
+ ret = -ENOENT;
+ for (i = 0; i < cnt; i++)
+ dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+ dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+ } else
+ ret = -EINVAL;
+ return ret;
+}
+
/* Compare diename and tname */
static int die_compare_name(Dwarf_Die dw_die, const char *tname)
{
@@ -402,11 +427,11 @@
} else if (op == DW_OP_regx) {
regn = loc->lr_number;
} else
- die("Dwarf_OP %d is not supported.\n", op);
+ die("Dwarf_OP %d is not supported.", op);
regs = get_arch_regstr(regn);
if (!regs)
- die("%lld exceeds max register number.\n", regn);
+ die("%lld exceeds max register number.", regn);
if (deref)
ret = snprintf(pf->buf, pf->len,
@@ -438,7 +463,7 @@
return ;
error:
die("Failed to find the location of %s at this address.\n"
- " Perhaps, it has been optimized out.\n", pf->var);
+ " Perhaps, it has been optimized out.", pf->var);
}
static int variable_callback(struct die_link *dlink, void *data)
@@ -476,7 +501,7 @@
/* Search child die for local variables and parameters. */
ret = search_die_from_children(sp_die, variable_callback, pf);
if (!ret)
- die("Failed to find '%s' in this function.\n", pf->var);
+ die("Failed to find '%s' in this function.", pf->var);
}
/* Get a frame base on the address */
@@ -567,7 +592,7 @@
}
/* Find probe point from its line number */
-static void find_by_line(struct probe_finder *pf)
+static void find_probe_point_by_line(struct probe_finder *pf)
{
Dwarf_Signed cnt, i, clm;
Dwarf_Line *lines;
@@ -602,7 +627,7 @@
ret = search_die_from_children(pf->cu_die,
probeaddr_callback, pf);
if (ret == 0)
- die("Probe point is not found in subprograms.\n");
+ die("Probe point is not found in subprograms.");
/* Continuing, because target line might be inlined. */
}
dwarf_srclines_dealloc(__dw_debug, lines, cnt);
@@ -626,7 +651,7 @@
pf->fno = die_get_decl_file(dlink->die);
pf->lno = die_get_decl_line(dlink->die)
+ pp->line;
- find_by_line(pf);
+ find_probe_point_by_line(pf);
return 1;
}
if (die_inlined_subprogram(dlink->die)) {
@@ -661,7 +686,7 @@
!die_inlined_subprogram(lk->die))
goto found;
}
- die("Failed to find real subprogram.\n");
+ die("Failed to find real subprogram.");
found:
/* Get offset from subprogram */
ret = die_within_subprogram(lk->die, pf->addr, &offs);
@@ -673,7 +698,7 @@
return 0;
}
-static void find_by_func(struct probe_finder *pf)
+static void find_probe_point_by_func(struct probe_finder *pf)
{
search_die_from_children(pf->cu_die, probefunc_callback, pf);
}
@@ -714,10 +739,10 @@
if (ret == DW_DLV_NO_ENTRY)
pf.cu_base = 0;
if (pp->function)
- find_by_func(&pf);
+ find_probe_point_by_func(&pf);
else {
pf.lno = pp->line;
- find_by_line(&pf);
+ find_probe_point_by_line(&pf);
}
}
dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
@@ -728,3 +753,159 @@
return pp->found;
}
+
+static void line_range_add_line(struct line_range *lr, unsigned int line)
+{
+ struct line_node *ln;
+ struct list_head *p;
+
+ /* Reverse search, because new line will be the last one */
+ list_for_each_entry_reverse(ln, &lr->line_list, list) {
+ if (ln->line < line) {
+ p = &ln->list;
+ goto found;
+ } else if (ln->line == line) /* Already exist */
+ return ;
+ }
+ /* List is empty, or the smallest entry */
+ p = &lr->line_list;
+found:
+ pr_debug("Debug: add a line %u\n", line);
+ ln = zalloc(sizeof(struct line_node));
+ DIE_IF(ln == NULL);
+ ln->line = line;
+ INIT_LIST_HEAD(&ln->list);
+ list_add(&ln->list, p);
+}
+
+/* Find line range from its line number */
+static void find_line_range_by_line(struct line_finder *lf)
+{
+ Dwarf_Signed cnt, i;
+ Dwarf_Line *lines;
+ Dwarf_Unsigned lineno = 0;
+ Dwarf_Unsigned fno;
+ Dwarf_Addr addr;
+ int ret;
+
+ ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ for (i = 0; i < cnt; i++) {
+ ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (fno != lf->fno)
+ continue;
+
+ ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->lno_s > lineno || lf->lno_e < lineno)
+ continue;
+
+ /* Filter line in the function address range */
+ if (lf->addr_s && lf->addr_e) {
+ ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->addr_s > addr || lf->addr_e <= addr)
+ continue;
+ }
+ line_range_add_line(lf->lr, (unsigned int)lineno);
+ }
+ dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+ if (!list_empty(&lf->lr->line_list))
+ lf->found = 1;
+}
+
+/* Search function from function name */
+static int linefunc_callback(struct die_link *dlink, void *data)
+{
+ struct line_finder *lf = (struct line_finder *)data;
+ struct line_range *lr = lf->lr;
+ Dwarf_Half tag;
+ int ret;
+
+ ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (tag == DW_TAG_subprogram &&
+ die_compare_name(dlink->die, lr->function) == 0) {
+ /* Get the address range of this function */
+ ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
+ if (ret == DW_DLV_OK)
+ ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY) {
+ lf->addr_s = 0;
+ lf->addr_e = 0;
+ }
+
+ lf->fno = die_get_decl_file(dlink->die);
+ lr->offset = die_get_decl_line(dlink->die);;
+ lf->lno_s = lr->offset + lr->start;
+ if (!lr->end)
+ lf->lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf->lno_e = lr->offset + lr->end;
+ lr->start = lf->lno_s;
+ lr->end = lf->lno_e;
+ find_line_range_by_line(lf);
+ /* If we find a target function, this should be end. */
+ lf->found = 1;
+ return 1;
+ }
+ return 0;
+}
+
+static void find_line_range_by_func(struct line_finder *lf)
+{
+ search_die_from_children(lf->cu_die, linefunc_callback, lf);
+}
+
+int find_line_range(int fd, struct line_range *lr)
+{
+ Dwarf_Half addr_size = 0;
+ Dwarf_Unsigned next_cuh = 0;
+ int ret;
+ struct line_finder lf = {.lr = lr};
+
+ ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
+ if (ret != DW_DLV_OK)
+ return -ENOENT;
+
+ while (!lf.found) {
+ /* Search CU (Compilation Unit) */
+ ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
+ &addr_size, &next_cuh, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY)
+ break;
+
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ /* Check if target file is included. */
+ if (lr->file)
+ lf.fno = cu_find_fileno(lf.cu_die, lr->file);
+
+ if (!lr->file || lf.fno) {
+ if (lr->function)
+ find_line_range_by_func(&lf);
+ else {
+ lf.lno_s = lr->start;
+ if (!lr->end)
+ lf.lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf.lno_e = lr->end;
+ find_line_range_by_line(&lf);
+ }
+ /* Get the real file path */
+ if (lf.found)
+ cu_get_filename(lf.cu_die, lf.fno, &lr->path);
+ }
+ dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
+ }
+ ret = dwarf_finish(__dw_debug, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ return lf.found;
+}
+
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index a4086aad..972b386 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -1,6 +1,8 @@
#ifndef _PROBE_FINDER_H
#define _PROBE_FINDER_H
+#include "util.h"
+
#define MAX_PATH_LEN 256
#define MAX_PROBE_BUFFER 1024
#define MAX_PROBES 128
@@ -32,8 +34,26 @@
char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
};
+/* Line number container */
+struct line_node {
+ struct list_head list;
+ unsigned int line;
+};
+
+/* Line range */
+struct line_range {
+ char *file; /* File name */
+ char *function; /* Function name */
+ unsigned int start; /* Start line number */
+ unsigned int end; /* End line number */
+ unsigned int offset; /* Start line offset */
+ char *path; /* Real path name */
+ struct list_head line_list; /* Visible lines */
+};
+
#ifndef NO_LIBDWARF
extern int find_probepoint(int fd, struct probe_point *pp);
+extern int find_line_range(int fd, struct line_range *lr);
/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
#ifndef _MIPS_SZLONG
@@ -60,6 +80,19 @@
char *buf; /* Current output buffer */
int len; /* Length of output buffer */
};
+
+struct line_finder {
+ struct line_range *lr; /* Target line range */
+
+ Dwarf_Unsigned fno; /* File number */
+ Dwarf_Unsigned lno_s; /* Start line number */
+ Dwarf_Unsigned lno_e; /* End line number */
+ Dwarf_Addr addr_s; /* Start address */
+ Dwarf_Addr addr_e; /* End address */
+ Dwarf_Die cu_die; /* Current CU */
+ int found;
+};
+
#endif /* NO_LIBDWARF */
#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ce3a6c8..8e7c189 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
+#include <byteswap.h>
#include <unistd.h>
#include <sys/types.h>
@@ -66,13 +67,22 @@
self->mmap_window = 32;
self->cwd = NULL;
self->cwdlen = 0;
+ self->unknown_events = 0;
map_groups__init(&self->kmaps);
- if (perf_session__create_kernel_maps(self) < 0)
- goto out_delete;
+ if (mode == O_RDONLY) {
+ if (perf_session__open(self, force) < 0)
+ goto out_delete;
+ } else if (mode == O_WRONLY) {
+ /*
+ * In O_RDONLY mode this will be performed when reading the
+ * kernel MMAP event, in event__process_mmap().
+ */
+ if (perf_session__create_kernel_maps(self) < 0)
+ goto out_delete;
+ }
- if (mode == O_RDONLY && perf_session__open(self, force) < 0)
- goto out_delete;
+ self->sample_type = perf_header__sample_type(&self->header);
out:
return self;
out_free:
@@ -148,3 +158,389 @@
return syms;
}
+
+static int process_event_stub(event_t *event __used,
+ struct perf_session *session __used)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
+{
+ if (handler->sample == NULL)
+ handler->sample = process_event_stub;
+ if (handler->mmap == NULL)
+ handler->mmap = process_event_stub;
+ if (handler->comm == NULL)
+ handler->comm = process_event_stub;
+ if (handler->fork == NULL)
+ handler->fork = process_event_stub;
+ if (handler->exit == NULL)
+ handler->exit = process_event_stub;
+ if (handler->lost == NULL)
+ handler->lost = process_event_stub;
+ if (handler->read == NULL)
+ handler->read = process_event_stub;
+ if (handler->throttle == NULL)
+ handler->throttle = process_event_stub;
+ if (handler->unthrottle == NULL)
+ handler->unthrottle = process_event_stub;
+}
+
+static const char *event__name[] = {
+ [0] = "TOTAL",
+ [PERF_RECORD_MMAP] = "MMAP",
+ [PERF_RECORD_LOST] = "LOST",
+ [PERF_RECORD_COMM] = "COMM",
+ [PERF_RECORD_EXIT] = "EXIT",
+ [PERF_RECORD_THROTTLE] = "THROTTLE",
+ [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+ [PERF_RECORD_FORK] = "FORK",
+ [PERF_RECORD_READ] = "READ",
+ [PERF_RECORD_SAMPLE] = "SAMPLE",
+};
+
+unsigned long event__total[PERF_RECORD_MAX];
+
+void event__print_totals(void)
+{
+ int i;
+ for (i = 0; i < PERF_RECORD_MAX; ++i)
+ pr_info("%10s events: %10ld\n",
+ event__name[i], event__total[i]);
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+ u64 *m = src;
+
+ while (byte_size > 0) {
+ *m = bswap_64(*m);
+ byte_size -= sizeof(u64);
+ ++m;
+ }
+}
+
+static void event__all64_swap(event_t *self)
+{
+ struct perf_event_header *hdr = &self->header;
+ mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
+}
+
+static void event__comm_swap(event_t *self)
+{
+ self->comm.pid = bswap_32(self->comm.pid);
+ self->comm.tid = bswap_32(self->comm.tid);
+}
+
+static void event__mmap_swap(event_t *self)
+{
+ self->mmap.pid = bswap_32(self->mmap.pid);
+ self->mmap.tid = bswap_32(self->mmap.tid);
+ self->mmap.start = bswap_64(self->mmap.start);
+ self->mmap.len = bswap_64(self->mmap.len);
+ self->mmap.pgoff = bswap_64(self->mmap.pgoff);
+}
+
+static void event__task_swap(event_t *self)
+{
+ self->fork.pid = bswap_32(self->fork.pid);
+ self->fork.tid = bswap_32(self->fork.tid);
+ self->fork.ppid = bswap_32(self->fork.ppid);
+ self->fork.ptid = bswap_32(self->fork.ptid);
+ self->fork.time = bswap_64(self->fork.time);
+}
+
+static void event__read_swap(event_t *self)
+{
+ self->read.pid = bswap_32(self->read.pid);
+ self->read.tid = bswap_32(self->read.tid);
+ self->read.value = bswap_64(self->read.value);
+ self->read.time_enabled = bswap_64(self->read.time_enabled);
+ self->read.time_running = bswap_64(self->read.time_running);
+ self->read.id = bswap_64(self->read.id);
+}
+
+typedef void (*event__swap_op)(event_t *self);
+
+static event__swap_op event__swap_ops[] = {
+ [PERF_RECORD_MMAP] = event__mmap_swap,
+ [PERF_RECORD_COMM] = event__comm_swap,
+ [PERF_RECORD_FORK] = event__task_swap,
+ [PERF_RECORD_EXIT] = event__task_swap,
+ [PERF_RECORD_LOST] = event__all64_swap,
+ [PERF_RECORD_READ] = event__read_swap,
+ [PERF_RECORD_SAMPLE] = event__all64_swap,
+ [PERF_RECORD_MAX] = NULL,
+};
+
+static int perf_session__process_event(struct perf_session *self,
+ event_t *event,
+ struct perf_event_ops *ops,
+ u64 offset, u64 head)
+{
+ trace_event(event);
+
+ if (event->header.type < PERF_RECORD_MAX) {
+ dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
+ offset + head, event->header.size,
+ event__name[event->header.type]);
+ ++event__total[0];
+ ++event__total[event->header.type];
+ }
+
+ if (self->header.needs_swap && event__swap_ops[event->header.type])
+ event__swap_ops[event->header.type](event);
+
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ return ops->sample(event, self);
+ case PERF_RECORD_MMAP:
+ return ops->mmap(event, self);
+ case PERF_RECORD_COMM:
+ return ops->comm(event, self);
+ case PERF_RECORD_FORK:
+ return ops->fork(event, self);
+ case PERF_RECORD_EXIT:
+ return ops->exit(event, self);
+ case PERF_RECORD_LOST:
+ return ops->lost(event, self);
+ case PERF_RECORD_READ:
+ return ops->read(event, self);
+ case PERF_RECORD_THROTTLE:
+ return ops->throttle(event, self);
+ case PERF_RECORD_UNTHROTTLE:
+ return ops->unthrottle(event, self);
+ default:
+ self->unknown_events++;
+ return -1;
+ }
+}
+
+void perf_event_header__bswap(struct perf_event_header *self)
+{
+ self->type = bswap_32(self->type);
+ self->misc = bswap_16(self->misc);
+ self->size = bswap_16(self->size);
+}
+
+int perf_header__read_build_ids(struct perf_header *self,
+ int input, u64 offset, u64 size)
+{
+ struct build_id_event bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size;
+ int err = -1;
+
+ while (offset < limit) {
+ struct dso *dso;
+ ssize_t len;
+ struct list_head *head = &dsos__user;
+
+ if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+ goto out;
+
+ if (self->needs_swap)
+ perf_event_header__bswap(&bev.header);
+
+ len = bev.header.size - sizeof(bev);
+ if (read(input, filename, len) != len)
+ goto out;
+
+ if (bev.header.misc & PERF_RECORD_MISC_KERNEL)
+ head = &dsos__kernel;
+
+ dso = __dsos__findnew(head, filename);
+ if (dso != NULL) {
+ dso__set_build_id(dso, &bev.build_id);
+ if (head == &dsos__kernel && filename[0] == '[')
+ dso->kernel = 1;
+ }
+
+ offset += bev.header.size;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+static struct thread *perf_session__register_idle_thread(struct perf_session *self)
+{
+ struct thread *thread = perf_session__findnew(self, 0);
+
+ if (thread == NULL || thread__set_comm(thread, "swapper")) {
+ pr_err("problem inserting idle task.\n");
+ thread = NULL;
+ }
+
+ return thread;
+}
+
+int perf_session__process_events(struct perf_session *self,
+ struct perf_event_ops *ops)
+{
+ int err, mmap_prot, mmap_flags;
+ u64 head, shift;
+ u64 offset = 0;
+ size_t page_size;
+ event_t *event;
+ uint32_t size;
+ char *buf;
+
+ if (perf_session__register_idle_thread(self) == NULL)
+ return -ENOMEM;
+
+ perf_event_ops__fill_defaults(ops);
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ head = self->header.data_offset;
+
+ if (!symbol_conf.full_paths) {
+ char bf[PATH_MAX];
+
+ if (getcwd(bf, sizeof(bf)) == NULL) {
+ err = -errno;
+out_getcwd_err:
+ pr_err("failed to get the current directory\n");
+ goto out_err;
+ }
+ self->cwd = strdup(bf);
+ if (self->cwd == NULL) {
+ err = -ENOMEM;
+ goto out_getcwd_err;
+ }
+ self->cwdlen = strlen(self->cwd);
+ }
+
+ shift = page_size * (head / page_size);
+ offset += shift;
+ head -= shift;
+
+ mmap_prot = PROT_READ;
+ mmap_flags = MAP_SHARED;
+
+ if (self->header.needs_swap) {
+ mmap_prot |= PROT_WRITE;
+ mmap_flags = MAP_PRIVATE;
+ }
+remap:
+ buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
+ mmap_flags, self->fd, offset);
+ if (buf == MAP_FAILED) {
+ pr_err("failed to mmap file\n");
+ err = -errno;
+ goto out_err;
+ }
+
+more:
+ event = (event_t *)(buf + head);
+
+ if (self->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+ size = event->header.size;
+ if (size == 0)
+ size = 8;
+
+ if (head + event->header.size >= page_size * self->mmap_window) {
+ int munmap_ret;
+
+ shift = page_size * (head / page_size);
+
+ munmap_ret = munmap(buf, page_size * self->mmap_window);
+ assert(munmap_ret == 0);
+
+ offset += shift;
+ head -= shift;
+ goto remap;
+ }
+
+ size = event->header.size;
+
+ dump_printf("\n%#Lx [%#x]: event: %d\n",
+ offset + head, event->header.size, event->header.type);
+
+ if (size == 0 ||
+ perf_session__process_event(self, event, ops, offset, head) < 0) {
+ dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
+ offset + head, event->header.size,
+ event->header.type);
+ /*
+ * assume we lost track of the stream, check alignment, and
+ * increment a single u64 in the hope to catch on again 'soon'.
+ */
+ if (unlikely(head & 7))
+ head &= ~7ULL;
+
+ size = 8;
+ }
+
+ head += size;
+
+ if (offset + head >= self->header.data_offset + self->header.data_size)
+ goto done;
+
+ if (offset + head < self->size)
+ goto more;
+done:
+ err = 0;
+out_err:
+ return err;
+}
+
+bool perf_session__has_traces(struct perf_session *self, const char *msg)
+{
+ if (!(self->sample_type & PERF_SAMPLE_RAW)) {
+ pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+ return false;
+ }
+
+ return true;
+}
+
+int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+ const char *symbol_name,
+ u64 addr)
+{
+ char *bracket;
+
+ self->ref_reloc_sym.name = strdup(symbol_name);
+ if (self->ref_reloc_sym.name == NULL)
+ return -ENOMEM;
+
+ bracket = strchr(self->ref_reloc_sym.name, ']');
+ if (bracket)
+ *bracket = '\0';
+
+ self->ref_reloc_sym.addr = addr;
+ return 0;
+}
+
+static u64 map__reloc_map_ip(struct map *map, u64 ip)
+{
+ return ip + (s64)map->pgoff;
+}
+
+static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
+{
+ return ip - (s64)map->pgoff;
+}
+
+void perf_session__reloc_vmlinux_maps(struct perf_session *self,
+ u64 unrelocated_addr)
+{
+ enum map_type type;
+ s64 reloc = unrelocated_addr - self->ref_reloc_sym.addr;
+
+ if (!reloc)
+ return;
+
+ for (type = 0; type < MAP__NR_TYPES; ++type) {
+ struct map *map = self->vmlinux_maps[type];
+
+ map->map_ip = map__reloc_map_ip;
+ map->unmap_ip = map__reloc_unmap_ip;
+ map->pgoff = reloc;
+ }
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 32eaa1b..36d1a80 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -18,10 +18,16 @@
struct map_groups kmaps;
struct rb_root threads;
struct thread *last_match;
+ struct map *vmlinux_maps[MAP__NR_TYPES];
struct events_stats events_stats;
unsigned long event_total[PERF_RECORD_MAX];
+ unsigned long unknown_events;
struct rb_root hists;
u64 sample_type;
+ struct {
+ const char *name;
+ u64 addr;
+ } ref_reloc_sym;
int fd;
int cwdlen;
char *cwd;
@@ -31,23 +37,22 @@
typedef int (*event_op)(event_t *self, struct perf_session *session);
struct perf_event_ops {
- event_op process_sample_event;
- event_op process_mmap_event;
- event_op process_comm_event;
- event_op process_fork_event;
- event_op process_exit_event;
- event_op process_lost_event;
- event_op process_read_event;
- event_op process_throttle_event;
- event_op process_unthrottle_event;
- int (*sample_type_check)(struct perf_session *session);
- unsigned long total_unknown;
- bool full_paths;
+ event_op sample,
+ mmap,
+ comm,
+ fork,
+ exit,
+ lost,
+ read,
+ throttle,
+ unthrottle;
};
struct perf_session *perf_session__new(const char *filename, int mode, bool force);
void perf_session__delete(struct perf_session *self);
+void perf_event_header__bswap(struct perf_event_header *self);
+
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
@@ -56,6 +61,17 @@
struct ip_callchain *chain,
struct symbol **parent);
-int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
+bool perf_session__has_traces(struct perf_session *self, const char *msg);
+
+int perf_header__read_build_ids(struct perf_header *self, int input,
+ u64 offset, u64 file_size);
+
+int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+ const char *symbol_name,
+ u64 addr);
+void perf_session__reloc_vmlinux_maps(struct perf_session *self,
+ u64 unrelocated_addr);
+
+void mem_bswap_64(void *src, int byte_size);
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 5352d7d..c397d4f 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -227,16 +227,73 @@
return NULL;
}
-/* Glob expression pattern matching */
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+ bool complement = false, ret = true;
+
+ if (*pat == '!') {
+ complement = true;
+ pat++;
+ }
+ if (*pat++ == c) /* First character is special */
+ goto end;
+
+ while (*pat && *pat != ']') { /* Matching */
+ if (*pat == '-' && *(pat + 1) != ']') { /* Range */
+ if (*(pat - 1) <= c && c <= *(pat + 1))
+ goto end;
+ if (*(pat - 1) > *(pat + 1))
+ goto error;
+ pat += 2;
+ } else if (*pat++ == c)
+ goto end;
+ }
+ if (!*pat)
+ goto error;
+ ret = false;
+
+end:
+ while (*pat && *pat != ']') /* Searching closing */
+ pat++;
+ if (!*pat)
+ goto error;
+ *npat = pat + 1;
+ return complement ? !ret : ret;
+
+error:
+ return false;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
bool strglobmatch(const char *str, const char *pat)
{
while (*str && *pat && *pat != '*') {
- if (*pat == '?') {
+ if (*pat == '?') { /* Matches any single character */
str++;
pat++;
- } else
- if (*str++ != *pat++)
+ continue;
+ } else if (*pat == '[') /* Character classes/Ranges */
+ if (__match_charclass(pat + 1, *str, &pat)) {
+ str++;
+ continue;
+ } else
return false;
+ else if (*pat == '\\') /* Escaped char match as normal char */
+ pat++;
+ if (*str++ != *pat++)
+ return false;
}
/* Check wild card */
if (*pat == '*') {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ab92763..f9049d1 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -22,6 +22,7 @@
enum dso_origin {
DSO__ORIG_KERNEL = 0,
DSO__ORIG_JAVA_JIT,
+ DSO__ORIG_BUILD_ID_CACHE,
DSO__ORIG_FEDORA,
DSO__ORIG_UBUNTU,
DSO__ORIG_BUILDID,
@@ -63,7 +64,7 @@
self->sorted_by_name |= (1 << type);
}
-static bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+bool symbol_type__is_a(char symbol_type, enum map_type map_type)
{
switch (map_type) {
case MAP__FUNCTION:
@@ -160,7 +161,7 @@
self->start, self->end, self->name);
}
-static void dso__set_long_name(struct dso *self, char *name)
+void dso__set_long_name(struct dso *self, char *name)
{
if (name == NULL)
return;
@@ -175,7 +176,7 @@
struct dso *dso__new(const char *name)
{
- struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+ struct dso *self = zalloc(sizeof(*self) + strlen(name) + 1);
if (self != NULL) {
int i;
@@ -344,10 +345,10 @@
&self->symbols[type]);
}
-int build_id__sprintf(u8 *self, int len, char *bf)
+int build_id__sprintf(const u8 *self, int len, char *bf)
{
char *bid = bf;
- u8 *raw = self;
+ const u8 *raw = self;
int i;
for (i = 0; i < len; ++i) {
@@ -382,24 +383,20 @@
return ret;
}
-/*
- * Loads the function entries in /proc/kallsyms into kernel_map->dso,
- * so that we can in the next step set the symbol ->end address and then
- * call kernel_maps__split_kallsyms.
- */
-static int dso__load_all_kallsyms(struct dso *self, struct map *map)
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start))
{
char *line = NULL;
size_t n;
- struct rb_root *root = &self->symbols[map->type];
- FILE *file = fopen("/proc/kallsyms", "r");
+ int err = 0;
+ FILE *file = fopen(filename, "r");
if (file == NULL)
goto out_failure;
while (!feof(file)) {
u64 start;
- struct symbol *sym;
int line_len, len;
char symbol_type;
char *symbol_name;
@@ -420,35 +417,63 @@
continue;
symbol_type = toupper(line[len]);
- if (!symbol_type__is_a(symbol_type, map->type))
- continue;
-
symbol_name = line + len + 2;
- /*
- * Will fix up the end later, when we have all symbols sorted.
- */
- sym = symbol__new(start, 0, symbol_name);
- if (sym == NULL)
- goto out_delete_line;
- /*
- * We will pass the symbols to the filter later, in
- * map__split_kallsyms, when we have split the maps per module
- */
- symbols__insert(root, sym);
+ err = process_symbol(arg, symbol_name, symbol_type, start);
+ if (err)
+ break;
}
free(line);
fclose(file);
+ return err;
- return 0;
-
-out_delete_line:
- free(line);
out_failure:
return -1;
}
+struct process_kallsyms_args {
+ struct map *map;
+ struct dso *dso;
+};
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+ char type, u64 start)
+{
+ struct symbol *sym;
+ struct process_kallsyms_args *a = arg;
+ struct rb_root *root = &a->dso->symbols[a->map->type];
+
+ if (!symbol_type__is_a(type, a->map->type))
+ return 0;
+
+ /*
+ * Will fix up the end later, when we have all symbols sorted.
+ */
+ sym = symbol__new(start, 0, name);
+
+ if (sym == NULL)
+ return -ENOMEM;
+ /*
+ * We will pass the symbols to the filter later, in
+ * map__split_kallsyms, when we have split the maps per module
+ */
+ symbols__insert(root, sym);
+ return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *self, const char *filename,
+ struct map *map)
+{
+ struct process_kallsyms_args args = { .map = map, .dso = self, };
+ return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
+}
+
/*
* Split the symbols into maps, making sure there are no overlaps, i.e. the
* kernel range is broken in several maps, named [kernel].N, as we don't have
@@ -477,13 +502,17 @@
*module++ = '\0';
- if (strcmp(self->name, module)) {
+ if (strcmp(curr_map->dso->short_name, module)) {
curr_map = map_groups__find_by_name(&session->kmaps, map->type, module);
if (curr_map == NULL) {
pr_debug("/proc/{kallsyms,modules} "
- "inconsistency!\n");
+ "inconsistency while looking "
+ "for \"%s\" module!\n", module);
return -1;
}
+
+ if (curr_map->dso->loaded)
+ goto discard_symbol;
}
/*
* So that we look just like we get from .ko files,
@@ -529,10 +558,10 @@
}
-static int dso__load_kallsyms(struct dso *self, struct map *map,
+static int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
struct perf_session *session, symbol_filter_t filter)
{
- if (dso__load_all_kallsyms(self, map) < 0)
+ if (dso__load_all_kallsyms(self, filename, map) < 0)
return -1;
symbols__fixup_end(&self->symbols[map->type]);
@@ -933,11 +962,15 @@
elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
struct symbol *f;
- const char *elf_name;
+ const char *elf_name = elf_sym__name(&sym, symstrs);
char *demangled = NULL;
int is_label = elf_sym__is_label(&sym);
const char *section_name;
+ if (kernel && session->ref_reloc_sym.name != NULL &&
+ strcmp(elf_name, session->ref_reloc_sym.name) == 0)
+ perf_session__reloc_vmlinux_maps(session, sym.st_value);
+
if (!is_label && !elf_sym__is_a(&sym, map->type))
continue;
@@ -950,7 +983,6 @@
if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
continue;
- elf_name = elf_sym__name(&sym, symstrs);
section_name = elf_sec__name(&shdr, secstrs);
if (kernel || kmodule) {
@@ -1191,6 +1223,7 @@
static const char origin[] = {
[DSO__ORIG_KERNEL] = 'k',
[DSO__ORIG_JAVA_JIT] = 'j',
+ [DSO__ORIG_BUILD_ID_CACHE] = 'B',
[DSO__ORIG_FEDORA] = 'f',
[DSO__ORIG_UBUNTU] = 'u',
[DSO__ORIG_BUILDID] = 'b',
@@ -1209,6 +1242,7 @@
int size = PATH_MAX;
char *name;
u8 build_id[BUILD_ID_SIZE];
+ char build_id_hex[BUILD_ID_SIZE * 2 + 1];
int ret = -1;
int fd;
@@ -1230,8 +1264,16 @@
return ret;
}
- self->origin = DSO__ORIG_FEDORA - 1;
+ self->origin = DSO__ORIG_BUILD_ID_CACHE;
+ if (self->has_build_id) {
+ build_id__sprintf(self->build_id, sizeof(self->build_id),
+ build_id_hex);
+ snprintf(name, size, "%s/%s/.build-id/%.2s/%s",
+ getenv("HOME"), DEBUG_CACHE_DIR,
+ build_id_hex, build_id_hex + 2);
+ goto open_file;
+ }
more:
do {
self->origin++;
@@ -1247,8 +1289,6 @@
case DSO__ORIG_BUILDID:
if (filename__read_build_id(self->long_name, build_id,
sizeof(build_id))) {
- char build_id_hex[BUILD_ID_SIZE * 2 + 1];
-
build_id__sprintf(build_id, sizeof(build_id),
build_id_hex);
snprintf(name, size,
@@ -1276,7 +1316,7 @@
if (!dso__build_id_equal(self, build_id))
goto more;
}
-
+open_file:
fd = open(name, O_RDONLY);
} while (fd < 0);
@@ -1309,13 +1349,33 @@
for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
struct map *map = rb_entry(nd, struct map, rb_node);
- if (map->dso && strcmp(map->dso->name, name) == 0)
+ if (map->dso && strcmp(map->dso->short_name, name) == 0)
return map;
}
return NULL;
}
+static int dso__kernel_module_get_build_id(struct dso *self)
+{
+ char filename[PATH_MAX];
+ /*
+ * kernel module short names are of the form "[module]" and
+ * we need just "module" here.
+ */
+ const char *name = self->short_name + 1;
+
+ snprintf(filename, sizeof(filename),
+ "/sys/module/%.*s/notes/.note.gnu.build-id",
+ (int)strlen(name - 1), name);
+
+ if (sysfs__read_build_id(filename, self->build_id,
+ sizeof(self->build_id)) == 0)
+ self->has_build_id = true;
+
+ return 0;
+}
+
static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname)
{
struct dirent *dent;
@@ -1361,6 +1421,7 @@
if (long_name == NULL)
goto failure;
dso__set_long_name(map->dso, long_name);
+ dso__kernel_module_get_build_id(map->dso);
}
}
@@ -1403,6 +1464,24 @@
return self;
}
+struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
+ const char *filename)
+{
+ struct map *map;
+ struct dso *dso = __dsos__findnew(&dsos__kernel, filename);
+
+ if (dso == NULL)
+ return NULL;
+
+ map = map__new2(start, dso, MAP__FUNCTION);
+ if (map == NULL)
+ return NULL;
+
+ dso->origin = DSO__ORIG_KMODULE;
+ map_groups__insert(&self->kmaps, map);
+ return map;
+}
+
static int perf_session__create_module_maps(struct perf_session *self)
{
char *line = NULL;
@@ -1416,7 +1495,6 @@
while (!feof(file)) {
char name[PATH_MAX];
u64 start;
- struct dso *dso;
char *sep;
int line_len;
@@ -1442,26 +1520,10 @@
*sep = '\0';
snprintf(name, sizeof(name), "[%s]", line);
- dso = dso__new(name);
-
- if (dso == NULL)
+ map = perf_session__new_module_map(self, start, name);
+ if (map == NULL)
goto out_delete_line;
-
- map = map__new2(start, dso, MAP__FUNCTION);
- if (map == NULL) {
- dso__delete(dso);
- goto out_delete_line;
- }
-
- snprintf(name, sizeof(name),
- "/sys/module/%s/notes/.note.gnu.build-id", line);
- if (sysfs__read_build_id(name, dso->build_id,
- sizeof(dso->build_id)) == 0)
- dso->has_build_id = true;
-
- dso->origin = DSO__ORIG_KMODULE;
- map_groups__insert(&self->kmaps, map);
- dsos__add(&dsos__kernel, dso);
+ dso__kernel_module_get_build_id(map->dso);
}
free(line);
@@ -1510,51 +1572,117 @@
return -1;
dso__set_loaded(self, map->type);
- err = dso__load_sym(self, map, session, self->long_name, fd, filter, 1, 0);
+ err = dso__load_sym(self, map, session, vmlinux, fd, filter, 1, 0);
close(fd);
return err;
}
+int dso__load_vmlinux_path(struct dso *self, struct map *map,
+ struct perf_session *session, symbol_filter_t filter)
+{
+ int i, err = 0;
+
+ pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+ vmlinux_path__nr_entries);
+
+ for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+ err = dso__load_vmlinux(self, map, session, vmlinux_path[i],
+ filter);
+ if (err > 0) {
+ pr_debug("Using %s for symbols\n", vmlinux_path[i]);
+ dso__set_long_name(self, strdup(vmlinux_path[i]));
+ break;
+ }
+ }
+
+ return err;
+}
+
static int dso__load_kernel_sym(struct dso *self, struct map *map,
struct perf_session *session, symbol_filter_t filter)
{
int err;
- bool is_kallsyms;
+ const char *kallsyms_filename = NULL;
+ char *kallsyms_allocated_filename = NULL;
+ /*
+ * Step 1: if the user specified a vmlinux filename, use it and only
+ * it, reporting errors to the user if it cannot be used.
+ *
+ * For instance, try to analyse an ARM perf.data file _without_ a
+ * build-id, or if the user specifies the wrong path to the right
+ * vmlinux file, obviously we can't fallback to another vmlinux (a
+ * x86_86 one, on the machine where analysis is being performed, say),
+ * or worse, /proc/kallsyms.
+ *
+ * If the specified file _has_ a build-id and there is a build-id
+ * section in the perf.data file, we will still do the expected
+ * validation in dso__load_vmlinux and will bail out if they don't
+ * match.
+ */
+ if (symbol_conf.vmlinux_name != NULL) {
+ err = dso__load_vmlinux(self, map, session,
+ symbol_conf.vmlinux_name, filter);
+ goto out_try_fixup;
+ }
if (vmlinux_path != NULL) {
- int i;
- pr_debug("Looking at the vmlinux_path (%d entries long)\n",
- vmlinux_path__nr_entries);
- for (i = 0; i < vmlinux_path__nr_entries; ++i) {
- err = dso__load_vmlinux(self, map, session,
- vmlinux_path[i], filter);
- if (err > 0) {
- pr_debug("Using %s for symbols\n",
- vmlinux_path[i]);
- dso__set_long_name(self,
- strdup(vmlinux_path[i]));
- goto out_fixup;
+ err = dso__load_vmlinux_path(self, map, session, filter);
+ if (err > 0)
+ goto out_fixup;
+ }
+
+ /*
+ * Say the kernel DSO was created when processing the build-id header table,
+ * we have a build-id, so check if it is the same as the running kernel,
+ * using it if it is.
+ */
+ if (self->has_build_id) {
+ u8 kallsyms_build_id[BUILD_ID_SIZE];
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
+ sizeof(kallsyms_build_id)) == 0) {
+ if (dso__build_id_equal(self, kallsyms_build_id)) {
+ kallsyms_filename = "/proc/kallsyms";
+ goto do_kallsyms;
}
}
+ /*
+ * Now look if we have it on the build-id cache in
+ * $HOME/.debug/[kernel.kallsyms].
+ */
+ build_id__sprintf(self->build_id, sizeof(self->build_id),
+ sbuild_id);
+
+ if (asprintf(&kallsyms_allocated_filename,
+ "%s/.debug/[kernel.kallsyms]/%s",
+ getenv("HOME"), sbuild_id) == -1)
+ return -1;
+
+ kallsyms_filename = kallsyms_allocated_filename;
+
+ if (access(kallsyms_filename, F_OK)) {
+ free(kallsyms_allocated_filename);
+ return -1;
+ }
+ } else {
+ /*
+ * Last resort, if we don't have a build-id and couldn't find
+ * any vmlinux file, try the running kernel kallsyms table.
+ */
+ kallsyms_filename = "/proc/kallsyms";
}
- is_kallsyms = self->long_name[0] == '[';
- if (is_kallsyms)
- goto do_kallsyms;
-
- err = dso__load_vmlinux(self, map, session, self->long_name, filter);
- if (err <= 0) {
- pr_info("The file %s cannot be used, "
- "trying to use /proc/kallsyms...", self->long_name);
do_kallsyms:
- err = dso__load_kallsyms(self, map, session, filter);
- if (err > 0 && !is_kallsyms)
- dso__set_long_name(self, strdup("[kernel.kallsyms]"));
- }
+ err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter);
+ free(kallsyms_allocated_filename);
+out_try_fixup:
if (err > 0) {
out_fixup:
+ if (kallsyms_filename != NULL)
+ dso__set_long_name(self, strdup("[kernel.kallsyms]"));
map__fixup_start(map);
map__fixup_end(map);
}
@@ -1576,19 +1704,19 @@
struct dso *pos;
list_for_each_entry(pos, head, node)
- if (strcmp(pos->name, name) == 0)
+ if (strcmp(pos->long_name, name) == 0)
return pos;
return NULL;
}
-struct dso *dsos__findnew(const char *name)
+struct dso *__dsos__findnew(struct list_head *head, const char *name)
{
- struct dso *dso = dsos__find(&dsos__user, name);
+ struct dso *dso = dsos__find(head, name);
if (!dso) {
dso = dso__new(name);
if (dso != NULL) {
- dsos__add(&dsos__user, dso);
+ dsos__add(head, dso);
dso__set_basename(dso);
}
}
@@ -1613,42 +1741,59 @@
__dsos__fprintf(&dsos__user, fp);
}
-static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp)
+static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+ bool with_hits)
{
struct dso *pos;
size_t ret = 0;
list_for_each_entry(pos, head, node) {
+ if (with_hits && !pos->hit)
+ continue;
ret += dso__fprintf_buildid(pos, fp);
ret += fprintf(fp, " %s\n", pos->long_name);
}
return ret;
}
-size_t dsos__fprintf_buildid(FILE *fp)
+size_t dsos__fprintf_buildid(FILE *fp, bool with_hits)
{
- return (__dsos__fprintf_buildid(&dsos__kernel, fp) +
- __dsos__fprintf_buildid(&dsos__user, fp));
+ return (__dsos__fprintf_buildid(&dsos__kernel, fp, with_hits) +
+ __dsos__fprintf_buildid(&dsos__user, fp, with_hits));
}
-static struct dso *dsos__create_kernel( const char *vmlinux)
+struct dso *dso__new_kernel(const char *name)
{
- struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
+ struct dso *self = dso__new(name ?: "[kernel.kallsyms]");
+
+ if (self != NULL) {
+ self->short_name = "[kernel]";
+ self->kernel = 1;
+ }
+
+ return self;
+}
+
+void dso__read_running_kernel_build_id(struct dso *self)
+{
+ if (sysfs__read_build_id("/sys/kernel/notes", self->build_id,
+ sizeof(self->build_id)) == 0)
+ self->has_build_id = true;
+}
+
+static struct dso *dsos__create_kernel(const char *vmlinux)
+{
+ struct dso *kernel = dso__new_kernel(vmlinux);
if (kernel == NULL)
return NULL;
- kernel->short_name = "[kernel]";
- kernel->kernel = 1;
-
vdso = dso__new("[vdso]");
if (vdso == NULL)
goto out_delete_kernel_dso;
dso__set_loaded(vdso, MAP__FUNCTION);
- if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
- sizeof(kernel->build_id)) == 0)
- kernel->has_build_id = true;
+ dso__read_running_kernel_build_id(kernel);
dsos__add(&dsos__kernel, kernel);
dsos__add(&dsos__user, vdso);
@@ -1660,30 +1805,35 @@
return NULL;
}
-static int map_groups__create_kernel_maps(struct map_groups *self, const char *vmlinux)
+int __map_groups__create_kernel_maps(struct map_groups *self,
+ struct map *vmlinux_maps[MAP__NR_TYPES],
+ struct dso *kernel)
{
- struct map *functions, *variables;
+ enum map_type type;
+
+ for (type = 0; type < MAP__NR_TYPES; ++type) {
+ vmlinux_maps[type] = map__new2(0, kernel, type);
+ if (vmlinux_maps[type] == NULL)
+ return -1;
+
+ vmlinux_maps[type]->map_ip =
+ vmlinux_maps[type]->unmap_ip = identity__map_ip;
+ map_groups__insert(self, vmlinux_maps[type]);
+ }
+
+ return 0;
+}
+
+static int map_groups__create_kernel_maps(struct map_groups *self,
+ struct map *vmlinux_maps[MAP__NR_TYPES],
+ const char *vmlinux)
+{
struct dso *kernel = dsos__create_kernel(vmlinux);
if (kernel == NULL)
return -1;
- functions = map__new2(0, kernel, MAP__FUNCTION);
- if (functions == NULL)
- return -1;
-
- variables = map__new2(0, kernel, MAP__VARIABLE);
- if (variables == NULL) {
- map__delete(functions);
- return -1;
- }
-
- functions->map_ip = functions->unmap_ip =
- variables->map_ip = variables->unmap_ip = identity__map_ip;
- map_groups__insert(self, functions);
- map_groups__insert(self, variables);
-
- return 0;
+ return __map_groups__create_kernel_maps(self, vmlinux_maps, kernel);
}
static void vmlinux_path__exit(void)
@@ -1793,7 +1943,7 @@
int perf_session__create_kernel_maps(struct perf_session *self)
{
- if (map_groups__create_kernel_maps(&self->kmaps,
+ if (map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps,
symbol_conf.vmlinux_name) < 0)
return -1;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8aded23..1243027 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -8,6 +8,8 @@
#include <linux/rbtree.h>
#include "event.h"
+#define DEBUG_CACHE_DIR ".debug"
+
#ifdef HAVE_CPLUS_DEMANGLE
extern char *cplus_demangle(const char *, int);
@@ -58,7 +60,8 @@
sort_by_name,
show_nr_samples,
use_callchain,
- exclude_other;
+ exclude_other,
+ full_paths;
const char *vmlinux_name,
*field_sep;
char *dso_list_str,
@@ -94,6 +97,7 @@
u8 slen_calculated:1;
u8 has_build_id:1;
u8 kernel:1;
+ u8 hit:1;
unsigned char origin;
u8 sorted_by_name;
u8 loaded;
@@ -105,6 +109,7 @@
};
struct dso *dso__new(const char *name);
+struct dso *dso__new_kernel(const char *name);
void dso__delete(struct dso *self);
bool dso__loaded(const struct dso *self, enum map_type type);
@@ -112,18 +117,30 @@
void dso__sort_by_name(struct dso *self, enum map_type type);
+extern struct list_head dsos__user, dsos__kernel;
+
+struct dso *__dsos__findnew(struct list_head *head, const char *name);
+
+static inline struct dso *dsos__findnew(const char *name)
+{
+ return __dsos__findnew(&dsos__user, name);
+}
+
struct perf_session;
-struct dso *dsos__findnew(const char *name);
int dso__load(struct dso *self, struct map *map, struct perf_session *session,
symbol_filter_t filter);
+int dso__load_vmlinux_path(struct dso *self, struct map *map,
+ struct perf_session *session, symbol_filter_t filter);
void dsos__fprintf(FILE *fp);
-size_t dsos__fprintf_buildid(FILE *fp);
+size_t dsos__fprintf_buildid(FILE *fp, bool with_hits);
size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
char dso__symtab_origin(const struct dso *self);
+void dso__set_long_name(struct dso *self, char *name);
void dso__set_build_id(struct dso *self, void *build_id);
+void dso__read_running_kernel_build_id(struct dso *self);
struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
const char *name);
@@ -131,11 +148,17 @@
int filename__read_build_id(const char *filename, void *bf, size_t size);
int sysfs__read_build_id(const char *filename, void *bf, size_t size);
bool dsos__read_build_ids(void);
-int build_id__sprintf(u8 *self, int len, char *bf);
+int build_id__sprintf(const u8 *self, int len, char *bf);
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start));
int symbol__init(void);
+bool symbol_type__is_a(char symbol_type, enum map_type map_type);
+
int perf_session__create_kernel_maps(struct perf_session *self);
-extern struct list_head dsos__user, dsos__kernel;
+struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
+ const char *filename);
extern struct dso *vdso;
#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index c206f72..e35653c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -48,6 +48,11 @@
return self ? map_groups__find(&self->mg, type, addr) : NULL;
}
+void thread__find_addr_map(struct thread *self,
+ struct perf_session *session, u8 cpumode,
+ enum map_type type, u64 addr,
+ struct addr_location *al);
+
void thread__find_addr_location(struct thread *self,
struct perf_session *session, u8 cpumode,
enum map_type type, u64 addr,
@@ -67,4 +72,8 @@
struct map *map_groups__find_by_name(struct map_groups *self,
enum map_type type, const char *name);
+
+int __map_groups__create_kernel_maps(struct map_groups *self,
+ struct map *vmlinux_maps[MAP__NR_TYPES],
+ struct dso *kernel);
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index cace355..5ea8973 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -20,6 +20,7 @@
*/
#define _GNU_SOURCE
#include <dirent.h>
+#include <mntent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -37,6 +38,7 @@
#include "../perf.h"
#include "trace-event.h"
+#include "debugfs.h"
#define VERSION "0.5"
@@ -101,32 +103,12 @@
static const char *find_debugfs(void)
{
- static char debugfs[MAX_PATH+1];
- static int debugfs_found;
- char type[100];
- FILE *fp;
+ const char *path = debugfs_mount(NULL);
- if (debugfs_found)
- return debugfs;
+ if (!path)
+ die("Your kernel not support debugfs filesystem");
- if ((fp = fopen("/proc/mounts","r")) == NULL)
- die("Can't open /proc/mounts for read");
-
- while (fscanf(fp, "%*s %"
- STR(MAX_PATH)
- "s %99s %*s %*d %*d\n",
- debugfs, type) == 2) {
- if (strcmp(type, "debugfs") == 0)
- break;
- }
- fclose(fp);
-
- if (strcmp(type, "debugfs") != 0)
- die("debugfs not mounted, please mount");
-
- debugfs_found = 1;
-
- return debugfs;
+ return path;
}
/*
@@ -271,6 +253,8 @@
write_or_die("header_page", 12);
write_or_die(&size, 8);
check_size = copy_file_fd(fd);
+ close(fd);
+
if (size != check_size)
die("wrong size for '%s' size=%lld read=%lld",
path, size, check_size);
@@ -289,6 +273,7 @@
if (size != check_size)
die("wrong size for '%s'", path);
put_tracing_file(path);
+ close(fd);
}
static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -317,7 +302,8 @@
die("can't read directory '%s'", sys);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
!name_in_tp_list(dent->d_name, tps))
continue;
@@ -334,7 +320,8 @@
rewinddir(dir);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
!name_in_tp_list(dent->d_name, tps))
continue;
@@ -353,6 +340,7 @@
free(format);
}
+ closedir(dir);
}
static void read_ftrace_files(struct tracepoint_path *tps)
@@ -394,26 +382,21 @@
die("can't read directory '%s'", path);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
strcmp(dent->d_name, "ftrace") == 0 ||
!system_in_tp_list(dent->d_name, tps))
continue;
- sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2);
- sprintf(sys, "%s/%s", path, dent->d_name);
- ret = stat(sys, &st);
- free(sys);
- if (ret < 0)
- continue;
- if (S_ISDIR(st.st_mode))
- count++;
+ count++;
}
write_or_die(&count, 4);
rewinddir(dir);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
strcmp(dent->d_name, "ftrace") == 0 ||
!system_in_tp_list(dent->d_name, tps))
@@ -422,14 +405,13 @@
sprintf(sys, "%s/%s", path, dent->d_name);
ret = stat(sys, &st);
if (ret >= 0) {
- if (S_ISDIR(st.st_mode)) {
- write_or_die(dent->d_name, strlen(dent->d_name) + 1);
- copy_event_system(sys, tps);
- }
+ write_or_die(dent->d_name, strlen(dent->d_name) + 1);
+ copy_event_system(sys, tps);
}
free(sys);
}
+ closedir(dir);
put_tracing_file(path);
}
@@ -533,7 +515,7 @@
write_or_die(buf, 1);
/* save page_size */
- page_size = getpagesize();
+ page_size = sysconf(_SC_PAGESIZE);
write_or_die(&page_size, 4);
read_header_files();
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 0000000..f9b890f
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,94 @@
+#include "util.h"
+#include <sys/mman.h>
+
+int mkdir_p(char *path, mode_t mode)
+{
+ struct stat st;
+ int err;
+ char *d = path;
+
+ if (*d != '/')
+ return -1;
+
+ if (stat(path, &st) == 0)
+ return 0;
+
+ while (*++d == '/');
+
+ while ((d = strchr(d, '/'))) {
+ *d = '\0';
+ err = stat(path, &st) && mkdir(path, mode);
+ *d++ = '/';
+ if (err)
+ return -1;
+ while (*d == '/')
+ ++d;
+ }
+ return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+static int slow_copyfile(const char *from, const char *to)
+{
+ int err = 0;
+ char *line = NULL;
+ size_t n;
+ FILE *from_fp = fopen(from, "r"), *to_fp;
+
+ if (from_fp == NULL)
+ goto out;
+
+ to_fp = fopen(to, "w");
+ if (to_fp == NULL)
+ goto out_fclose_from;
+
+ while (getline(&line, &n, from_fp) > 0)
+ if (fputs(line, to_fp) == EOF)
+ goto out_fclose_to;
+ err = 0;
+out_fclose_to:
+ fclose(to_fp);
+ free(line);
+out_fclose_from:
+ fclose(from_fp);
+out:
+ return err;
+}
+
+int copyfile(const char *from, const char *to)
+{
+ int fromfd, tofd;
+ struct stat st;
+ void *addr;
+ int err = -1;
+
+ if (stat(from, &st))
+ goto out;
+
+ if (st.st_size == 0) /* /proc? do it slowly... */
+ return slow_copyfile(from, to);
+
+ fromfd = open(from, O_RDONLY);
+ if (fromfd < 0)
+ goto out;
+
+ tofd = creat(to, 0755);
+ if (tofd < 0)
+ goto out_close_from;
+
+ addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
+ if (addr == MAP_FAILED)
+ goto out_close_to;
+
+ if (write(tofd, addr, st.st_size) == st.st_size)
+ err = 0;
+
+ munmap(addr, st.st_size);
+out_close_to:
+ close(tofd);
+ if (err)
+ unlink(to);
+out_close_from:
+ close(fromfd);
+out:
+ return err;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c673d88..0f5b2a6 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -403,4 +403,7 @@
#endif
#endif
+int mkdir_p(char *path, mode_t mode);
+int copyfile(const char *from, const char *to);
+
#endif
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index 1c15e39..cfa55d6 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -169,6 +169,7 @@
counterwidth[j], values->value[i][j]);
fprintf(fp, "\n");
}
+ free(counterwidth);
}
static void perf_read_values__display_raw(FILE *fp,