Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
perf python scripting: Add futex-contention script
perf python scripting: Fixup cut'n'paste error in sctop script
perf scripting: Shut up 'perf record' final status
perf record: Remove newline character from perror() argument
perf python scripting: Support fedora 11 (audit 1.7.17)
perf python scripting: Improve the syscalls-by-pid script
perf python scripting: print the syscall name on sctop
perf python scripting: Improve the syscalls-counts script
perf python scripting: Improve the failed-syscalls-by-pid script
kprobes: Remove redundant text_mutex lock in optimize
x86/oprofile: Fix uninitialized variable use in debug printk
tracing: Fix 'faild' -> 'failed' typo
perf probe: Fix format specified for Dwarf_Off parameter
perf trace: Fix detection of script extension
perf trace: Use $PERF_EXEC_PATH in canned report scripts
perf tools: Document event modifiers
perf tools: Remove direct slang.h include
perf_events: Fix for transaction recovery in group_sched_in()
perf_events: Revert: Fix transaction recovery in group_sched_in()
perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
...
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83c4bb1..3ea3dc4 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,7 @@
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a
+#define MSR_AMD64_IBSBRTARGET 0xc001103b
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 6e742cc..550e26b1 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -111,17 +111,18 @@
#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
/* IbsFetchCtl bits/masks */
-#define IBS_FETCH_RAND_EN (1ULL<<57)
-#define IBS_FETCH_VAL (1ULL<<49)
-#define IBS_FETCH_ENABLE (1ULL<<48)
-#define IBS_FETCH_CNT 0xFFFF0000ULL
-#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
+#define IBS_FETCH_RAND_EN (1ULL<<57)
+#define IBS_FETCH_VAL (1ULL<<49)
+#define IBS_FETCH_ENABLE (1ULL<<48)
+#define IBS_FETCH_CNT 0xFFFF0000ULL
+#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
/* IbsOpCtl bits */
-#define IBS_OP_CNT_CTL (1ULL<<19)
-#define IBS_OP_VAL (1ULL<<18)
-#define IBS_OP_ENABLE (1ULL<<17)
-#define IBS_OP_MAX_CNT 0x0000FFFFULL
+#define IBS_OP_CNT_CTL (1ULL<<19)
+#define IBS_OP_VAL (1ULL<<18)
+#define IBS_OP_ENABLE (1ULL<<17)
+#define IBS_OP_MAX_CNT 0x0000FFFFULL
+#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c1e8c7a..ed63101 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -237,6 +237,7 @@
* Intel DebugStore bits
*/
int bts, pebs;
+ int bts_active, pebs_active;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
@@ -380,7 +381,7 @@
#endif
-static int reserve_ds_buffers(void);
+static void reserve_ds_buffers(void);
static void release_ds_buffers(void);
static void hw_perf_event_destroy(struct perf_event *event)
@@ -477,7 +478,7 @@
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
(hwc->sample_period == 1)) {
/* BTS is not supported by this architecture. */
- if (!x86_pmu.bts)
+ if (!x86_pmu.bts_active)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
@@ -496,12 +497,13 @@
int precise = 0;
/* Support for constant skid */
- if (x86_pmu.pebs)
+ if (x86_pmu.pebs_active) {
precise++;
- /* Support for IP fixup */
- if (x86_pmu.lbr_nr)
- precise++;
+ /* Support for IP fixup */
+ if (x86_pmu.lbr_nr)
+ precise++;
+ }
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
@@ -543,11 +545,8 @@
if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
- else {
- err = reserve_ds_buffers();
- if (err)
- release_pmc_hardware();
- }
+ else
+ reserve_ds_buffers();
}
if (!err)
atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c..b7dcd9f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}
+static int alloc_pebs_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ int node = cpu_to_node(cpu);
+ int max, thresh = 1; /* always use a single PEBS record */
+ void *buffer;
+
+ if (!x86_pmu.pebs)
+ return 0;
+
+ buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+ max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+
+ ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+ ds->pebs_index = ds->pebs_buffer_base;
+ ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+ max * x86_pmu.pebs_record_size;
+
+ ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
+ thresh * x86_pmu.pebs_record_size;
+
+ return 0;
+}
+
+static void release_pebs_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds || !x86_pmu.pebs)
+ return;
+
+ kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ ds->pebs_buffer_base = 0;
+}
+
+static int alloc_bts_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ int node = cpu_to_node(cpu);
+ int max, thresh;
+ void *buffer;
+
+ if (!x86_pmu.bts)
+ return 0;
+
+ buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+ max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+ thresh = max / 16;
+
+ ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ ds->bts_index = ds->bts_buffer_base;
+ ds->bts_absolute_maximum = ds->bts_buffer_base +
+ max * BTS_RECORD_SIZE;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+ thresh * BTS_RECORD_SIZE;
+
+ return 0;
+}
+
+static void release_bts_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds || !x86_pmu.bts)
+ return;
+
+ kfree((void *)(unsigned long)ds->bts_buffer_base);
+ ds->bts_buffer_base = 0;
+}
+
+static int alloc_ds_buffer(int cpu)
+{
+ int node = cpu_to_node(cpu);
+ struct debug_store *ds;
+
+ ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+ if (unlikely(!ds))
+ return -ENOMEM;
+
+ per_cpu(cpu_hw_events, cpu).ds = ds;
+
+ return 0;
+}
+
+static void release_ds_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds)
+ return;
+
+ per_cpu(cpu_hw_events, cpu).ds = NULL;
+ kfree(ds);
+}
+
static void release_ds_buffers(void)
{
int cpu;
@@ -82,93 +183,77 @@
return;
get_online_cpus();
-
for_each_online_cpu(cpu)
fini_debug_store_on_cpu(cpu);
for_each_possible_cpu(cpu) {
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds)
- continue;
-
- per_cpu(cpu_hw_events, cpu).ds = NULL;
-
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
- kfree((void *)(unsigned long)ds->bts_buffer_base);
- kfree(ds);
+ release_pebs_buffer(cpu);
+ release_bts_buffer(cpu);
+ release_ds_buffer(cpu);
}
-
put_online_cpus();
}
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
{
- int cpu, err = 0;
+ int bts_err = 0, pebs_err = 0;
+ int cpu;
+
+ x86_pmu.bts_active = 0;
+ x86_pmu.pebs_active = 0;
if (!x86_pmu.bts && !x86_pmu.pebs)
- return 0;
+ return;
+
+ if (!x86_pmu.bts)
+ bts_err = 1;
+
+ if (!x86_pmu.pebs)
+ pebs_err = 1;
get_online_cpus();
for_each_possible_cpu(cpu) {
- struct debug_store *ds;
- void *buffer;
- int max, thresh;
+ if (alloc_ds_buffer(cpu)) {
+ bts_err = 1;
+ pebs_err = 1;
+ }
- err = -ENOMEM;
- ds = kzalloc(sizeof(*ds), GFP_KERNEL);
- if (unlikely(!ds))
+ if (!bts_err && alloc_bts_buffer(cpu))
+ bts_err = 1;
+
+ if (!pebs_err && alloc_pebs_buffer(cpu))
+ pebs_err = 1;
+
+ if (bts_err && pebs_err)
break;
- per_cpu(cpu_hw_events, cpu).ds = ds;
-
- if (x86_pmu.bts) {
- buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
- if (unlikely(!buffer))
- break;
-
- max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
- thresh = max / 16;
-
- ds->bts_buffer_base = (u64)(unsigned long)buffer;
- ds->bts_index = ds->bts_buffer_base;
- ds->bts_absolute_maximum = ds->bts_buffer_base +
- max * BTS_RECORD_SIZE;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
- thresh * BTS_RECORD_SIZE;
- }
-
- if (x86_pmu.pebs) {
- buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
- if (unlikely(!buffer))
- break;
-
- max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
-
- ds->pebs_buffer_base = (u64)(unsigned long)buffer;
- ds->pebs_index = ds->pebs_buffer_base;
- ds->pebs_absolute_maximum = ds->pebs_buffer_base +
- max * x86_pmu.pebs_record_size;
- /*
- * Always use single record PEBS
- */
- ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
- x86_pmu.pebs_record_size;
- }
-
- err = 0;
}
- if (err)
- release_ds_buffers();
- else {
+ if (bts_err) {
+ for_each_possible_cpu(cpu)
+ release_bts_buffer(cpu);
+ }
+
+ if (pebs_err) {
+ for_each_possible_cpu(cpu)
+ release_pebs_buffer(cpu);
+ }
+
+ if (bts_err && pebs_err) {
+ for_each_possible_cpu(cpu)
+ release_ds_buffer(cpu);
+ } else {
+ if (x86_pmu.bts && !bts_err)
+ x86_pmu.bts_active = 1;
+
+ if (x86_pmu.pebs && !pebs_err)
+ x86_pmu.pebs_active = 1;
+
for_each_online_cpu(cpu)
init_debug_store_on_cpu(cpu);
}
put_online_cpus();
-
- return err;
}
/*
@@ -233,7 +318,7 @@
if (!event)
return 0;
- if (!ds)
+ if (!x86_pmu.bts_active)
return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@
struct pebs_record_core *at, *top;
int n;
- if (!ds || !x86_pmu.pebs)
+ if (!x86_pmu.pebs_active)
return;
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@
u64 status = 0;
int bit, n;
- if (!ds || !x86_pmu.pebs)
+ if (!x86_pmu.pebs_active)
return;
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@
#else /* CONFIG_CPU_SUP_INTEL */
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
{
- return 0;
}
static void release_ds_buffers(void)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index bd1489c..4e8baad 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -726,6 +726,12 @@
case 0x11:
cpu_type = "x86-64/family11h";
break;
+ case 0x12:
+ cpu_type = "x86-64/family12h";
+ break;
+ case 0x14:
+ cpu_type = "x86-64/family14h";
+ break;
default:
return -ENODEV;
}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 42fb46f..a011bcc 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -48,17 +48,24 @@
static u32 ibs_caps;
-struct op_ibs_config {
+struct ibs_config {
unsigned long op_enabled;
unsigned long fetch_enabled;
unsigned long max_cnt_fetch;
unsigned long max_cnt_op;
unsigned long rand_en;
unsigned long dispatched_ops;
+ unsigned long branch_target;
};
-static struct op_ibs_config ibs_config;
-static u64 ibs_op_ctl;
+struct ibs_state {
+ u64 ibs_op_ctl;
+ int branch_target;
+ unsigned long sample_size;
+};
+
+static struct ibs_config ibs_config;
+static struct ibs_state ibs_state;
/*
* IBS cpuid feature detection
@@ -71,8 +78,16 @@
* bit 0 is used to indicate the existence of IBS.
*/
#define IBS_CAPS_AVAIL (1U<<0)
+#define IBS_CAPS_FETCHSAM (1U<<1)
+#define IBS_CAPS_OPSAM (1U<<2)
#define IBS_CAPS_RDWROPCNT (1U<<3)
#define IBS_CAPS_OPCNT (1U<<4)
+#define IBS_CAPS_BRNTRGT (1U<<5)
+#define IBS_CAPS_OPCNTEXT (1U<<6)
+
+#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
+ | IBS_CAPS_FETCHSAM \
+ | IBS_CAPS_OPSAM)
/*
* IBS APIC setup
@@ -99,12 +114,12 @@
/* check IBS cpuid feature flags */
max_level = cpuid_eax(0x80000000);
if (max_level < IBS_CPUID_FEATURES)
- return IBS_CAPS_AVAIL;
+ return IBS_CAPS_DEFAULT;
ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
if (!(ibs_caps & IBS_CAPS_AVAIL))
/* cpuid flags not valid */
- return IBS_CAPS_AVAIL;
+ return IBS_CAPS_DEFAULT;
return ibs_caps;
}
@@ -197,8 +212,8 @@
rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
if (ctl & IBS_OP_VAL) {
rdmsrl(MSR_AMD64_IBSOPRIP, val);
- oprofile_write_reserve(&entry, regs, val,
- IBS_OP_CODE, IBS_OP_SIZE);
+ oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
+ ibs_state.sample_size);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA, val);
oprofile_add_data64(&entry, val);
@@ -210,10 +225,14 @@
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
oprofile_add_data64(&entry, val);
+ if (ibs_state.branch_target) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, val);
+ oprofile_add_data(&entry, (unsigned long)val);
+ }
oprofile_write_commit(&entry);
/* reenable the IRQ */
- ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
+ ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
}
}
@@ -226,21 +245,32 @@
if (!ibs_caps)
return;
+ memset(&ibs_state, 0, sizeof(ibs_state));
+
+ /*
+ * Note: Since the max count settings may out of range we
+ * write back the actual used values so that userland can read
+ * it.
+ */
+
if (ibs_config.fetch_enabled) {
- val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
+ val = ibs_config.max_cnt_fetch >> 4;
+ val = min(val, IBS_FETCH_MAX_CNT);
+ ibs_config.max_cnt_fetch = val << 4;
val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
val |= IBS_FETCH_ENABLE;
wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
}
if (ibs_config.op_enabled) {
- ibs_op_ctl = ibs_config.max_cnt_op >> 4;
+ val = ibs_config.max_cnt_op >> 4;
if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
/*
* IbsOpCurCnt not supported. See
* op_amd_randomize_ibs_op() for details.
*/
- ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
+ val = clamp(val, 0x0081ULL, 0xFF80ULL);
+ ibs_config.max_cnt_op = val << 4;
} else {
/*
* The start value is randomized with a
@@ -248,13 +278,24 @@
* with the half of the randomized range. Also
* avoid underflows.
*/
- ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
- IBS_OP_MAX_CNT);
+ val += IBS_RANDOM_MAXCNT_OFFSET;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ val = min(val, IBS_OP_MAX_CNT_EXT);
+ else
+ val = min(val, IBS_OP_MAX_CNT);
+ ibs_config.max_cnt_op =
+ (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
}
- if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
- ibs_op_ctl |= IBS_OP_CNT_CTL;
- ibs_op_ctl |= IBS_OP_ENABLE;
- val = op_amd_randomize_ibs_op(ibs_op_ctl);
+ val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
+ val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+ val |= IBS_OP_ENABLE;
+ ibs_state.ibs_op_ctl = val;
+ ibs_state.sample_size = IBS_OP_SIZE;
+ if (ibs_config.branch_target) {
+ ibs_state.branch_target = 1;
+ ibs_state.sample_size++;
+ }
+ val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, val);
}
}
@@ -281,29 +322,25 @@
static inline int ibs_eilvt_valid(void)
{
- u64 val;
int offset;
+ u64 val;
rdmsrl(MSR_AMD64_IBSCTL, val);
+ offset = val & IBSCTL_LVT_OFFSET_MASK;
+
if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
- pr_err(FW_BUG "cpu %d, invalid IBS "
- "interrupt offset %d (MSR%08X=0x%016llx)",
- smp_processor_id(), offset,
- MSR_AMD64_IBSCTL, val);
+ pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
return 0;
}
- offset = val & IBSCTL_LVT_OFFSET_MASK;
+ if (!eilvt_is_available(offset)) {
+ pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+ return 0;
+ }
- if (eilvt_is_available(offset))
- return !0;
-
- pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
- "not available (MSR%08X=0x%016llx)",
- smp_processor_id(), offset,
- MSR_AMD64_IBSCTL, val);
-
- return 0;
+ return 1;
}
static inline int get_ibs_offset(void)
@@ -630,28 +667,33 @@
/* model specific files */
/* setup some reasonable defaults */
+ memset(&ibs_config, 0, sizeof(ibs_config));
ibs_config.max_cnt_fetch = 250000;
- ibs_config.fetch_enabled = 0;
ibs_config.max_cnt_op = 250000;
- ibs_config.op_enabled = 0;
- ibs_config.dispatched_ops = 0;
- dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
- oprofilefs_create_ulong(sb, dir, "enable",
- &ibs_config.fetch_enabled);
- oprofilefs_create_ulong(sb, dir, "max_count",
- &ibs_config.max_cnt_fetch);
- oprofilefs_create_ulong(sb, dir, "rand_enable",
- &ibs_config.rand_en);
+ if (ibs_caps & IBS_CAPS_FETCHSAM) {
+ dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
+ oprofilefs_create_ulong(sb, dir, "enable",
+ &ibs_config.fetch_enabled);
+ oprofilefs_create_ulong(sb, dir, "max_count",
+ &ibs_config.max_cnt_fetch);
+ oprofilefs_create_ulong(sb, dir, "rand_enable",
+ &ibs_config.rand_en);
+ }
- dir = oprofilefs_mkdir(sb, root, "ibs_op");
- oprofilefs_create_ulong(sb, dir, "enable",
- &ibs_config.op_enabled);
- oprofilefs_create_ulong(sb, dir, "max_count",
- &ibs_config.max_cnt_op);
- if (ibs_caps & IBS_CAPS_OPCNT)
- oprofilefs_create_ulong(sb, dir, "dispatched_ops",
- &ibs_config.dispatched_ops);
+ if (ibs_caps & IBS_CAPS_OPSAM) {
+ dir = oprofilefs_mkdir(sb, root, "ibs_op");
+ oprofilefs_create_ulong(sb, dir, "enable",
+ &ibs_config.op_enabled);
+ oprofilefs_create_ulong(sb, dir, "max_count",
+ &ibs_config.max_cnt_op);
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+ &ibs_config.dispatched_ops);
+ if (ibs_caps & IBS_CAPS_BRNTRGT)
+ oprofilefs_create_ulong(sb, dir, "branch_target",
+ &ibs_config.branch_target);
+ }
return 0;
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 01b2816..79d0c4f 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -410,7 +410,7 @@
extern void softirq_init(void);
static inline void __raise_softirq_irqoff(unsigned int nr)
{
- trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+ trace_softirq_raise(nr);
or_softirq_pending(1UL << nr);
}
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 25b4f68..8d3a248 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -62,18 +62,6 @@
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);
-/**
- * ring_buffer_event_time_delta - return the delta timestamp of the event
- * @event: the event to get the delta timestamp of
- *
- * The delta timestamp is the 27 bit timestamp since the last event.
- */
-static inline unsigned
-ring_buffer_event_time_delta(struct ring_buffer_event *event)
-{
- return event->time_delta;
-}
-
/*
* ring_buffer_discard_commit will remove an event that has not
* ben committed yet. If this is used, then ring_buffer_unlock_commit
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 6fa7cbab7..1c09820 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -86,76 +86,62 @@
DECLARE_EVENT_CLASS(softirq,
- TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+ TP_PROTO(unsigned int vec_nr),
- TP_ARGS(h, vec),
+ TP_ARGS(vec_nr),
TP_STRUCT__entry(
- __field( int, vec )
+ __field( unsigned int, vec )
),
TP_fast_assign(
- if (vec)
- __entry->vec = (int)(h - vec);
- else
- __entry->vec = (int)(long)h;
+ __entry->vec = vec_nr;
),
- TP_printk("vec=%d [action=%s]", __entry->vec,
+ TP_printk("vec=%u [action=%s]", __entry->vec,
show_softirq_name(__entry->vec))
);
/**
* softirq_entry - called immediately before the softirq handler
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr: softirq vector number
*
- * The @h parameter, contains a pointer to the struct softirq_action
- * which has a pointer to the action handler that is called. By subtracting
- * the @vec pointer from the @h pointer, we can determine the softirq
- * number. Also, when used in combination with the softirq_exit tracepoint
- * we can determine the softirq latency.
+ * When used in combination with the softirq_exit tracepoint
+ * we can determine the softirq handler runtine.
*/
DEFINE_EVENT(softirq, softirq_entry,
- TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+ TP_PROTO(unsigned int vec_nr),
- TP_ARGS(h, vec)
+ TP_ARGS(vec_nr)
);
/**
* softirq_exit - called immediately after the softirq handler returns
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr: softirq vector number
*
- * The @h parameter contains a pointer to the struct softirq_action
- * that has handled the softirq. By subtracting the @vec pointer from
- * the @h pointer, we can determine the softirq number. Also, when used in
- * combination with the softirq_entry tracepoint we can determine the softirq
- * latency.
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq handler runtine.
*/
DEFINE_EVENT(softirq, softirq_exit,
- TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+ TP_PROTO(unsigned int vec_nr),
- TP_ARGS(h, vec)
+ TP_ARGS(vec_nr)
);
/**
* softirq_raise - called immediately when a softirq is raised
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr: softirq vector number
*
- * The @h parameter contains a pointer to the softirq vector number which is
- * raised. @vec is NULL and it means @h includes vector number not
- * softirq_action. When used in combination with the softirq_entry tracepoint
- * we can determine the softirq raise latency.
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise to run latency.
*/
DEFINE_EVENT(softirq, softirq_raise,
- TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+ TP_PROTO(unsigned int vec_nr),
- TP_ARGS(h, vec)
+ TP_ARGS(vec_nr)
);
#endif /* _TRACE_IRQ_H */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 56a8919..99865c3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -74,7 +74,8 @@
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobes_all_disarmed;
-static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
+/* This protects kprobe_table and optimizing_list */
+static DEFINE_MUTEX(kprobe_mutex);
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
spinlock_t lock ____cacheline_aligned_in_smp;
@@ -595,6 +596,7 @@
}
#ifdef CONFIG_SYSCTL
+/* This should be called with kprobe_mutex locked */
static void __kprobes optimize_all_kprobes(void)
{
struct hlist_head *head;
@@ -607,17 +609,16 @@
return;
kprobes_allow_optimization = true;
- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
- mutex_unlock(&text_mutex);
printk(KERN_INFO "Kprobes globally optimized\n");
}
+/* This should be called with kprobe_mutex locked */
static void __kprobes unoptimize_all_kprobes(void)
{
struct hlist_head *head;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f309e80..517d827 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -417,8 +417,8 @@
return event->cpu == -1 || event->cpu == smp_processor_id();
}
-static int
-__event_sched_out(struct perf_event *event,
+static void
+event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
@@ -437,13 +437,14 @@
}
if (event->state != PERF_EVENT_STATE_ACTIVE)
- return 0;
+ return;
event->state = PERF_EVENT_STATE_INACTIVE;
if (event->pending_disable) {
event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF;
}
+ event->tstamp_stopped = ctx->time;
event->pmu->del(event, 0);
event->oncpu = -1;
@@ -452,19 +453,6 @@
ctx->nr_active--;
if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
- return 1;
-}
-
-static void
-event_sched_out(struct perf_event *event,
- struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- int ret;
-
- ret = __event_sched_out(event, cpuctx, ctx);
- if (ret)
- event->tstamp_stopped = ctx->time;
}
static void
@@ -664,7 +652,7 @@
}
static int
-__event_sched_in(struct perf_event *event,
+event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
@@ -684,6 +672,8 @@
return -EAGAIN;
}
+ event->tstamp_running += ctx->time - event->tstamp_stopped;
+
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
@@ -694,35 +684,6 @@
return 0;
}
-static inline int
-event_sched_in(struct perf_event *event,
- struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- int ret = __event_sched_in(event, cpuctx, ctx);
- if (ret)
- return ret;
- event->tstamp_running += ctx->time - event->tstamp_stopped;
- return 0;
-}
-
-static void
-group_commit_event_sched_in(struct perf_event *group_event,
- struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- struct perf_event *event;
- u64 now = ctx->time;
-
- group_event->tstamp_running += now - group_event->tstamp_stopped;
- /*
- * Schedule in siblings as one group (if any):
- */
- list_for_each_entry(event, &group_event->sibling_list, group_entry) {
- event->tstamp_running += now - event->tstamp_stopped;
- }
-}
-
static int
group_sched_in(struct perf_event *group_event,
struct perf_cpu_context *cpuctx,
@@ -730,19 +691,15 @@
{
struct perf_event *event, *partial_group = NULL;
struct pmu *pmu = group_event->pmu;
+ u64 now = ctx->time;
+ bool simulate = false;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
pmu->start_txn(pmu);
- /*
- * use __event_sched_in() to delay updating tstamp_running
- * until the transaction is committed. In case of failure
- * we will keep an unmodified tstamp_running which is a
- * requirement to get correct timing information
- */
- if (__event_sched_in(group_event, cpuctx, ctx)) {
+ if (event_sched_in(group_event, cpuctx, ctx)) {
pmu->cancel_txn(pmu);
return -EAGAIN;
}
@@ -751,31 +708,42 @@
* Schedule in siblings as one group (if any):
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
- if (__event_sched_in(event, cpuctx, ctx)) {
+ if (event_sched_in(event, cpuctx, ctx)) {
partial_group = event;
goto group_error;
}
}
- if (!pmu->commit_txn(pmu)) {
- /* commit tstamp_running */
- group_commit_event_sched_in(group_event, cpuctx, ctx);
+ if (!pmu->commit_txn(pmu))
return 0;
- }
+
group_error:
/*
* Groups can be scheduled in as one unit only, so undo any
* partial group before returning:
+ * The events up to the failed event are scheduled out normally,
+ * tstamp_stopped will be updated.
*
- * use __event_sched_out() to avoid updating tstamp_stopped
- * because the event never actually ran
+ * The failed events and the remaining siblings need to have
+ * their timings updated as if they had gone thru event_sched_in()
+ * and event_sched_out(). This is required to get consistent timings
+ * across the group. This also takes care of the case where the group
+ * could never be scheduled by ensuring tstamp_stopped is set to mark
+ * the time the event was actually stopped, such that time delta
+ * calculation in update_event_times() is correct.
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
if (event == partial_group)
- break;
- __event_sched_out(event, cpuctx, ctx);
+ simulate = true;
+
+ if (simulate) {
+ event->tstamp_running += now - event->tstamp_stopped;
+ event->tstamp_stopped = now;
+ } else {
+ event_sched_out(event, cpuctx, ctx);
+ }
}
- __event_sched_out(group_event, cpuctx, ctx);
+ event_sched_out(group_event, cpuctx, ctx);
pmu->cancel_txn(pmu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f02a9df..18f4be0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -229,18 +229,20 @@
do {
if (pending & 1) {
+ unsigned int vec_nr = h - softirq_vec;
int prev_count = preempt_count();
- kstat_incr_softirqs_this_cpu(h - softirq_vec);
- trace_softirq_entry(h, softirq_vec);
+ kstat_incr_softirqs_this_cpu(vec_nr);
+
+ trace_softirq_entry(vec_nr);
h->action(h);
- trace_softirq_exit(h, softirq_vec);
+ trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
- printk(KERN_ERR "huh, entered softirq %td %s %p"
+ printk(KERN_ERR "huh, entered softirq %u %s %p"
"with preempt_count %08x,"
- " exited with %08x?\n", h - softirq_vec,
- softirq_to_name[h - softirq_vec],
- h->action, prev_count, preempt_count());
+ " exited with %08x?\n", vec_nr,
+ softirq_to_name[vec_nr], h->action,
+ prev_count, preempt_count());
preempt_count() = prev_count;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c3dab05..9ed509a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@
RB_LEN_TIME_STAMP = 16,
};
+#define skip_time_extend(event) \
+ ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@
return length + RB_EVNT_HDR_SIZE;
}
-/* inline for ring buffer fast paths */
-static unsigned
+/*
+ * Return the length of the given event. Will return
+ * the length of the time extend if the event is a
+ * time extend.
+ */
+static inline unsigned
rb_event_length(struct ring_buffer_event *event)
{
switch (event->type_len) {
@@ -274,13 +281,41 @@
return 0;
}
+/*
+ * Return total length of time extend and data,
+ * or just the event length for all other events.
+ */
+static inline unsigned
+rb_event_ts_length(struct ring_buffer_event *event)
+{
+ unsigned len = 0;
+
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ /* time extends include the data event after it */
+ len = RB_LEN_TIME_EXTEND;
+ event = skip_time_extend(event);
+ }
+ return len + rb_event_length(event);
+}
+
/**
* ring_buffer_event_length - return the length of the event
* @event: the event to get the length of
+ *
+ * Returns the size of the data load of a data event.
+ * If the event is something other than a data event, it
+ * returns the size of the event itself. With the exception
+ * of a TIME EXTEND, where it still returns the size of the
+ * data load of the data event after it.
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
- unsigned length = rb_event_length(event);
+ unsigned length;
+
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
+
+ length = rb_event_length(event);
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
return length;
length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@
static void *
rb_event_data(struct ring_buffer_event *event)
{
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
if (event->type_len)
@@ -404,9 +441,6 @@
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
-/* Max number of timestamps that can fit on a page */
-#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
-
int ring_buffer_print_page_header(struct trace_seq *s)
{
struct buffer_data_page field;
@@ -1546,6 +1580,25 @@
iter->head = 0;
}
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+{
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+
+ /* Not the first event on the page? */
+ if (rb_event_index(event)) {
+ event->time_delta = delta & TS_MASK;
+ event->array[0] = delta >> TS_SHIFT;
+ } else {
+ /* nope, just zero it */
+ event->time_delta = 0;
+ event->array[0] = 0;
+ }
+
+ return skip_time_extend(event);
+}
+
/**
* ring_buffer_update_event - update event type and data
* @event: the even to update
@@ -1558,28 +1611,31 @@
* data field.
*/
static void
-rb_update_event(struct ring_buffer_event *event,
- unsigned type, unsigned length)
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event, unsigned length,
+ int add_timestamp, u64 delta)
{
- event->type_len = type;
+ /* Only a commit updates the timestamp */
+ if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+ delta = 0;
- switch (type) {
-
- case RINGBUF_TYPE_PADDING:
- case RINGBUF_TYPE_TIME_EXTEND:
- case RINGBUF_TYPE_TIME_STAMP:
- break;
-
- case 0:
- length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
- event->array[0] = length;
- else
- event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
- break;
- default:
- BUG();
+ /*
+ * If we need to add a timestamp, then we
+ * add it to the start of the resevered space.
+ */
+ if (unlikely(add_timestamp)) {
+ event = rb_add_time_stamp(event, delta);
+ length -= RB_LEN_TIME_EXTEND;
+ delta = 0;
}
+
+ event->time_delta = delta;
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ event->type_len = 0;
+ event->array[0] = length;
+ } else
+ event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
/*
@@ -1823,10 +1879,13 @@
local_sub(length, &tail_page->write);
}
-static struct ring_buffer_event *
+/*
+ * This is the slow path, force gcc not to inline it.
+ */
+static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail,
- struct buffer_page *tail_page, u64 *ts)
+ struct buffer_page *tail_page, u64 ts)
{
struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@
* Nested commits always have zero deltas, so
* just reread the time stamp
*/
- *ts = rb_time_stamp(buffer);
- next_page->page->time_stamp = *ts;
+ ts = rb_time_stamp(buffer);
+ next_page->page->time_stamp = ts;
}
out_again:
@@ -1929,12 +1988,21 @@
static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned type, unsigned long length, u64 *ts)
+ unsigned long length, u64 ts,
+ u64 delta, int add_timestamp)
{
struct buffer_page *tail_page;
struct ring_buffer_event *event;
unsigned long tail, write;
+ /*
+ * If the time delta since the last event is too big to
+ * hold in the time field of the event, then we append a
+ * TIME EXTEND event ahead of the data event.
+ */
+ if (unlikely(add_timestamp))
+ length += RB_LEN_TIME_EXTEND;
+
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
@@ -1943,7 +2011,7 @@
tail = write - length;
/* See if we shot pass the end of this buffer page */
- if (write > BUF_PAGE_SIZE)
+ if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
tail_page, ts);
@@ -1951,18 +2019,16 @@
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
- rb_update_event(event, type, length);
+ rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
- /* The passed in type is zero for DATA */
- if (likely(!type))
- local_inc(&tail_page->entries);
+ local_inc(&tail_page->entries);
/*
* If this is the first commit on the page, then update
* its timestamp.
*/
if (!tail)
- tail_page->page->time_stamp = *ts;
+ tail_page->page->time_stamp = ts;
return event;
}
@@ -1977,7 +2043,7 @@
unsigned long addr;
new_index = rb_event_index(event);
- old_index = new_index + rb_event_length(event);
+ old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event;
addr &= PAGE_MASK;
@@ -2003,76 +2069,13 @@
return 0;
}
-static int
-rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
- u64 *ts, u64 *delta)
-{
- struct ring_buffer_event *event;
- int ret;
-
- WARN_ONCE(*delta > (1ULL << 59),
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
- (unsigned long long)*delta,
- (unsigned long long)*ts,
- (unsigned long long)cpu_buffer->write_stamp);
-
- /*
- * The delta is too big, we to add a
- * new timestamp.
- */
- event = __rb_reserve_next(cpu_buffer,
- RINGBUF_TYPE_TIME_EXTEND,
- RB_LEN_TIME_EXTEND,
- ts);
- if (!event)
- return -EBUSY;
-
- if (PTR_ERR(event) == -EAGAIN)
- return -EAGAIN;
-
- /* Only a commited time event can update the write stamp */
- if (rb_event_is_commit(cpu_buffer, event)) {
- /*
- * If this is the first on the page, then it was
- * updated with the page itself. Try to discard it
- * and if we can't just make it zero.
- */
- if (rb_event_index(event)) {
- event->time_delta = *delta & TS_MASK;
- event->array[0] = *delta >> TS_SHIFT;
- } else {
- /* try to discard, since we do not need this */
- if (!rb_try_to_discard(cpu_buffer, event)) {
- /* nope, just zero it */
- event->time_delta = 0;
- event->array[0] = 0;
- }
- }
- cpu_buffer->write_stamp = *ts;
- /* let the caller know this was the commit */
- ret = 1;
- } else {
- /* Try to discard the event */
- if (!rb_try_to_discard(cpu_buffer, event)) {
- /* Darn, this is just wasted space */
- event->time_delta = 0;
- event->array[0] = 0;
- }
- ret = 0;
- }
-
- *delta = 0;
-
- return ret;
-}
-
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits);
}
-static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
@@ -2110,9 +2113,10 @@
unsigned long length)
{
struct ring_buffer_event *event;
- u64 ts, delta = 0;
- int commit = 0;
+ u64 ts, delta;
int nr_loops = 0;
+ int add_timestamp;
+ u64 diff;
rb_start_commit(cpu_buffer);
@@ -2133,6 +2137,9 @@
length = rb_calculate_event_length(length);
again:
+ add_timestamp = 0;
+ delta = 0;
+
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@
goto out_fail;
ts = rb_time_stamp(cpu_buffer->buffer);
+ diff = ts - cpu_buffer->write_stamp;
- /*
- * Only the first commit can update the timestamp.
- * Yes there is a race here. If an interrupt comes in
- * just after the conditional and it traces too, then it
- * will also check the deltas. More than one timestamp may
- * also be made. But only the entry that did the actual
- * commit will be something other than zero.
- */
- if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
- rb_page_write(cpu_buffer->tail_page) ==
- rb_commit_index(cpu_buffer))) {
- u64 diff;
+ /* make sure this diff is calculated here */
+ barrier();
- diff = ts - cpu_buffer->write_stamp;
-
- /* make sure this diff is calculated here */
- barrier();
-
- /* Did the write stamp get updated already? */
- if (unlikely(ts < cpu_buffer->write_stamp))
- goto get_event;
-
+ /* Did the write stamp get updated already? */
+ if (likely(ts >= cpu_buffer->write_stamp)) {
delta = diff;
if (unlikely(test_time_stamp(delta))) {
-
- commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
- if (commit == -EBUSY)
- goto out_fail;
-
- if (commit == -EAGAIN)
- goto again;
-
- RB_WARN_ON(cpu_buffer, commit < 0);
+ WARN_ONCE(delta > (1ULL << 59),
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+ (unsigned long long)delta,
+ (unsigned long long)ts,
+ (unsigned long long)cpu_buffer->write_stamp);
+ add_timestamp = 1;
}
}
- get_event:
- event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+ event = __rb_reserve_next(cpu_buffer, length, ts,
+ delta, add_timestamp);
if (unlikely(PTR_ERR(event) == -EAGAIN))
goto again;
if (!event)
goto out_fail;
- if (!rb_event_is_commit(cpu_buffer, event))
- delta = 0;
-
- event->time_delta = delta;
-
return event;
out_fail:
@@ -2207,13 +2190,9 @@
#define TRACE_RECURSIVE_DEPTH 16
-static int trace_recursive_lock(void)
+/* Keep this code out of the fast path cache */
+static noinline void trace_recursive_fail(void)
{
- current->trace_recursion++;
-
- if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
- return 0;
-
/* Disable all tracing before we do anything else */
tracing_off_permanent();
@@ -2225,10 +2204,21 @@
in_nmi());
WARN_ON_ONCE(1);
+}
+
+static inline int trace_recursive_lock(void)
+{
+ current->trace_recursion++;
+
+ if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+ return 0;
+
+ trace_recursive_fail();
+
return -1;
}
-static void trace_recursive_unlock(void)
+static inline void trace_recursive_unlock(void)
{
WARN_ON_ONCE(!current->trace_recursion);
@@ -2308,12 +2298,28 @@
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
+ u64 delta;
+
/*
* The event first in the commit queue updates the
* time stamp.
*/
- if (rb_event_is_commit(cpu_buffer, event))
- cpu_buffer->write_stamp += event->time_delta;
+ if (rb_event_is_commit(cpu_buffer, event)) {
+ /*
+ * A commit event that is first on a page
+ * updates the write timestamp with the page stamp
+ */
+ if (!rb_event_index(event))
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ cpu_buffer->write_stamp += delta;
+ } else
+ cpu_buffer->write_stamp += event->time_delta;
+ }
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@
static inline void rb_event_discard(struct ring_buffer_event *event)
{
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
+
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
@@ -3049,12 +3058,12 @@
again:
/*
- * We repeat when a timestamp is encountered. It is possible
- * to get multiple timestamps from an interrupt entering just
- * as one timestamp is about to be written, or from discarded
- * commits. The most that we can have is the number on a single page.
+ * We repeat when a time extend is encountered.
+ * Since the time extend is always attached to a data event,
+ * we should never loop more than once.
+ * (We never hit the following condition more than twice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
reader = rb_get_reader_page(cpu_buffer);
@@ -3130,14 +3139,12 @@
return NULL;
/*
- * We repeat when a timestamp is encountered.
- * We can get multiple timestamps by nested interrupts or also
- * if filtering is on (discarding commits). Since discarding
- * commits can be frequent we can get a lot of timestamps.
- * But we limit them by not adding timestamps if they begin
- * at the start of a page.
+ * We repeat when a time extend is encountered.
+ * Since the time extend is always attached to a data event,
+ * we should never loop more than once.
+ * (We never hit the following condition more than twice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
if (rb_per_cpu_empty(cpu_buffer))
@@ -3835,7 +3842,8 @@
if (len > (commit - read))
len = (commit - read);
- size = rb_event_length(event);
+ /* Always keep the time extend and data together */
+ size = rb_event_ts_length(event);
if (len < size)
goto out_unlock;
@@ -3857,7 +3865,8 @@
break;
event = rb_reader_event(cpu_buffer);
- size = rb_event_length(event);
+ /* Always keep the time extend and data together */
+ size = rb_event_ts_length(event);
} while (len > size);
/* update bpage */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 001bcd2..82d9b81 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3996,13 +3996,9 @@
{
struct dentry *d_percpu = tracing_dentry_percpu();
struct dentry *d_cpu;
- /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
- char cpu_dir[7];
+ char cpu_dir[30]; /* 30 characters should be more than enough */
- if (cpu > 999 || cpu < 0)
- return;
-
- sprintf(cpu_dir, "cpu%ld", cpu);
+ snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {
pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 43e3dd2..399751b 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -15,6 +15,23 @@
This command displays the symbolic event types which can be selected in the
various perf commands with the -e option.
+EVENT MODIFIERS
+---------------
+
+Events can optionally have a modifer by appending a colon and one or
+more modifiers. Modifiers allow the user to restrict when events are
+counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
+
+The 'p' modifier can be used for specifying how precise the instruction
+address should be. The 'p' modifier is currently only implemented for
+Intel PEBS and can be specified multiple times:
+ 0 - SAMPLE_IP can have arbitrary skid
+ 1 - SAMPLE_IP must have constant skid
+ 2 - SAMPLE_IP requested to have 0 skid
+ 3 - SAMPLE_IP must have 0 skid
+
+The PEBS implementation now supports up to 2.
+
RAW HARDWARE EVENT DESCRIPTOR
-----------------------------
Even when an event is not available in a symbolic form within perf right now,
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 27d52da..62de1b7 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -16,7 +16,9 @@
or
'perf probe' --list
or
-'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
+'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
+or
+'perf probe' [options] --vars='PROBEPOINT'
DESCRIPTION
-----------
@@ -31,6 +33,11 @@
--vmlinux=PATH::
Specify vmlinux path which has debuginfo (Dwarf binary).
+-m::
+--module=MODNAME::
+ Specify module name in which perf-probe searches probe points
+ or lines.
+
-s::
--source=PATH::
Specify path to kernel source.
@@ -57,6 +64,15 @@
Show source code lines which can be probed. This needs an argument
which specifies a range of the source code. (see LINE SYNTAX for detail)
+-V::
+--vars=::
+ Show available local variables at given probe point. The argument
+ syntax is same as PROBE SYNTAX, but NO ARGs.
+
+--externs::
+ (Only for --vars) Show external defined variables in addition to local
+ variables.
+
-f::
--force::
Forcibly add events with existing name.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3ee27dc..a91f9f9 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -83,6 +83,10 @@
--call-graph::
Do call-graph (stack chain/backtrace) recording.
+-q::
+--quiet::
+ Don't print any message, useful for scripting.
+
-v::
--verbose::
Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 199d5e1..2e000c0 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -50,14 +50,17 @@
bool list_events;
bool force_add;
bool show_lines;
+ bool show_vars;
+ bool show_ext_vars;
+ bool mod_events;
int nevents;
struct perf_probe_event events[MAX_PROBES];
struct strlist *dellist;
struct line_range line_range;
+ const char *target_module;
int max_probe_points;
} params;
-
/* Parse an event definition. Note that any error must die. */
static int parse_probe_event(const char *str)
{
@@ -92,6 +95,7 @@
len = 0;
for (i = 0; i < argc; i++)
len += sprintf(&buf[len], "%s ", argv[i]);
+ params.mod_events = true;
ret = parse_probe_event(buf);
free(buf);
return ret;
@@ -100,9 +104,10 @@
static int opt_add_probe_event(const struct option *opt __used,
const char *str, int unset __used)
{
- if (str)
+ if (str) {
+ params.mod_events = true;
return parse_probe_event(str);
- else
+ } else
return 0;
}
@@ -110,6 +115,7 @@
const char *str, int unset __used)
{
if (str) {
+ params.mod_events = true;
if (!params.dellist)
params.dellist = strlist__new(true, NULL);
strlist__add(params.dellist, str);
@@ -130,6 +136,25 @@
return ret;
}
+
+static int opt_show_vars(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ struct perf_probe_event *pev = ¶ms.events[params.nevents];
+ int ret;
+
+ if (!str)
+ return 0;
+
+ ret = parse_probe_event(str);
+ if (!ret && pev->nargs != 0) {
+ pr_err(" Error: '--vars' doesn't accept arguments.\n");
+ return -EINVAL;
+ }
+ params.show_vars = true;
+
+ return ret;
+}
#endif
static const char * const probe_usage[] = {
@@ -138,7 +163,8 @@
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
#ifdef DWARF_SUPPORT
- "perf probe --line 'LINEDESC'",
+ "perf probe [<options>] --line 'LINEDESC'",
+ "perf probe [<options>] --vars 'PROBEPOINT'",
#endif
NULL
};
@@ -180,10 +206,17 @@
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
+ OPT_CALLBACK('V', "vars", NULL,
+ "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
+ "Show accessible variables on PROBEDEF", opt_show_vars),
+ OPT_BOOLEAN('\0', "externs", ¶ms.show_ext_vars,
+ "Show external variables too (with --vars only)"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_STRING('s', "source", &symbol_conf.source_prefix,
"directory", "path to kernel source"),
+ OPT_STRING('m', "module", ¶ms.target_module,
+ "modname", "target module name"),
#endif
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points,
@@ -217,7 +250,7 @@
usage_with_options(probe_usage, options);
if (params.list_events) {
- if (params.nevents != 0 || params.dellist) {
+ if (params.mod_events) {
pr_err(" Error: Don't use --list with --add/--del.\n");
usage_with_options(probe_usage, options);
}
@@ -225,6 +258,10 @@
pr_err(" Error: Don't use --list with --line.\n");
usage_with_options(probe_usage, options);
}
+ if (params.show_vars) {
+ pr_err(" Error: Don't use --list with --vars.\n");
+ usage_with_options(probe_usage, options);
+ }
ret = show_perf_probe_events();
if (ret < 0)
pr_err(" Error: Failed to show event list. (%d)\n",
@@ -234,17 +271,35 @@
#ifdef DWARF_SUPPORT
if (params.show_lines) {
- if (params.nevents != 0 || params.dellist) {
- pr_warning(" Error: Don't use --line with"
- " --add/--del.\n");
+ if (params.mod_events) {
+ pr_err(" Error: Don't use --line with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_vars) {
+ pr_err(" Error: Don't use --line with --vars.\n");
usage_with_options(probe_usage, options);
}
- ret = show_line_range(¶ms.line_range);
+ ret = show_line_range(¶ms.line_range, params.target_module);
if (ret < 0)
pr_err(" Error: Failed to show lines. (%d)\n", ret);
return ret;
}
+ if (params.show_vars) {
+ if (params.mod_events) {
+ pr_err(" Error: Don't use --vars with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ ret = show_available_vars(params.events, params.nevents,
+ params.max_probe_points,
+ params.target_module,
+ params.show_ext_vars);
+ if (ret < 0)
+ pr_err(" Error: Failed to show vars. (%d)\n", ret);
+ return ret;
+ }
#endif
if (params.dellist) {
@@ -258,8 +313,9 @@
if (params.nevents) {
ret = add_perf_probe_events(params.events, params.nevents,
- params.force_add,
- params.max_probe_points);
+ params.max_probe_points,
+ params.target_module,
+ params.force_add);
if (ret < 0) {
pr_err(" Error: Failed to add events. (%d)\n", ret);
return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ff77b80..4e75583 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -353,7 +353,7 @@
}
if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
- perror("Unable to read perf file descriptor\n");
+ perror("Unable to read perf file descriptor");
exit(-1);
}
@@ -626,7 +626,7 @@
nr_cpus = read_cpu_map(cpu_list);
if (nr_cpus < 1) {
- perror("failed to collect number of CPUs\n");
+ perror("failed to collect number of CPUs");
return -1;
}
@@ -761,6 +761,9 @@
}
}
+ if (quiet)
+ return 0;
+
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
/*
@@ -820,6 +823,7 @@
"do call-graph (stack chain/backtrace) recording"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
OPT_BOOLEAN('s', "stat", &inherit_stat,
"per thread counts"),
OPT_BOOLEAN('d', "data", &sample_address,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 40a6a29..2f8df45 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -46,9 +46,6 @@
static void setup_scripting(void)
{
- /* make sure PERF_EXEC_PATH is set for scripts */
- perf_set_argv_exec_path(perf_exec_path());
-
setup_perl_scripting();
setup_python_scripting();
@@ -285,7 +282,7 @@
script++;
} else {
script = str;
- ext = strchr(script, '.');
+ ext = strrchr(script, '.');
if (!ext) {
fprintf(stderr, "invalid script extension");
return -1;
@@ -593,6 +590,9 @@
suffix = REPORT_SUFFIX;
}
+ /* make sure PERF_EXEC_PATH is set for scripts */
+ perf_set_argv_exec_path(perf_exec_path());
+
if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {
char *record_script_path, *report_script_path;
int live_pipe[2];
@@ -625,12 +625,13 @@
dup2(live_pipe[1], 1);
close(live_pipe[0]);
- __argv = malloc(5 * sizeof(const char *));
+ __argv = malloc(6 * sizeof(const char *));
__argv[0] = "/bin/sh";
__argv[1] = record_script_path;
- __argv[2] = "-o";
- __argv[3] = "-";
- __argv[4] = NULL;
+ __argv[2] = "-q";
+ __argv[3] = "-o";
+ __argv[4] = "-";
+ __argv[5] = NULL;
execvp("/bin/sh", (char **)__argv);
exit(-1);
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
index e3a5e55..4028d92 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-report
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -7,4 +7,4 @@
shift
fi
fi
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
index d83070b..ba25f4d 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-report
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -7,7 +7,7 @@
fi
comm=$1
shift
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
index 7ef4698..641a3f5 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-report
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -1,6 +1,6 @@
#!/bin/bash
# description: system-wide r/w activity
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
index 93e698c..4918dba 100644
--- a/tools/perf/scripts/perl/bin/rwtop-report
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -17,7 +17,7 @@
interval=$1
shift
fi
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
index a0d898f..49052eb 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-report
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -1,6 +1,6 @@
#!/bin/bash
# description: system-wide min/max/avg wakeup latency
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
index 3508113..df0c65f 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -1,6 +1,6 @@
#!/bin/bash
# description: workqueue stats (ins/exe/create/destroy)
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 9689bc0..13cc02b 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -6,6 +6,14 @@
# Public License ("GPL") version 2 as published by the Free Software
# Foundation.
+import errno, os
+
+FUTEX_WAIT = 0
+FUTEX_WAKE = 1
+FUTEX_PRIVATE_FLAG = 128
+FUTEX_CLOCK_REALTIME = 256
+FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
+
NSECS_PER_SEC = 1000000000
def avg(total, n):
@@ -24,5 +32,55 @@
str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
return str
+def add_stats(dict, key, value):
+ if not dict.has_key(key):
+ dict[key] = (value, value, value, 1)
+ else:
+ min, max, avg, count = dict[key]
+ if value < min:
+ min = value
+ if value > max:
+ max = value
+ avg = (avg + value) / 2
+ dict[key] = (min, max, avg, count + 1)
+
def clear_term():
print("\x1b[H\x1b[2J")
+
+audit_package_warned = False
+
+try:
+ import audit
+ machine_to_id = {
+ 'x86_64': audit.MACH_86_64,
+ 'alpha' : audit.MACH_ALPHA,
+ 'ia64' : audit.MACH_IA64,
+ 'ppc' : audit.MACH_PPC,
+ 'ppc64' : audit.MACH_PPC64,
+ 's390' : audit.MACH_S390,
+ 's390x' : audit.MACH_S390X,
+ 'i386' : audit.MACH_X86,
+ 'i586' : audit.MACH_X86,
+ 'i686' : audit.MACH_X86,
+ }
+ try:
+ machine_to_id['armeb'] = audit.MACH_ARMEB
+ except:
+ pass
+ machine_id = machine_to_id[os.uname()[4]]
+except:
+ if not audit_package_warned:
+ audit_package_warned = True
+ print "Install the audit-libs-python package to get syscall names"
+
+def syscall_name(id):
+ try:
+ return audit.audit_syscall_to_name(id, machine_id)
+ except:
+ return str(id)
+
+def strerror(nr):
+ try:
+ return errno.errorcode[abs(nr)]
+ except:
+ return "Unknown %d errno" % nr
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
index 3029354..0358702 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -7,4 +7,4 @@
shift
fi
fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-record b/tools/perf/scripts/python/bin/futex-contention-record
new file mode 100644
index 0000000..5ecbb43
--- /dev/null
+++ b/tools/perf/scripts/python/bin/futex-contention-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -a -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
new file mode 100644
index 0000000..c826813
--- /dev/null
+++ b/tools/perf/scripts/python/bin/futex-contention-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: futext contention measurement
+
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
index c3d0a63..4ad361b 100644
--- a/tools/perf/scripts/python/bin/netdev-times-report
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -2,4 +2,4 @@
# description: display a process of packet and processing time
# args: [tx] [rx] [dev=] [debug]
-perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@
+perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
index 61d05f7..df1791f 100644
--- a/tools/perf/scripts/python/bin/sched-migration-report
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -1,3 +1,3 @@
#!/bin/bash
# description: sched migration overview
-perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
index b01c842..36b409c 100644
--- a/tools/perf/scripts/python/bin/sctop-report
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -21,4 +21,4 @@
interval=$1
shift
fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
index 9e9d8dd..4eb88c9 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -7,4 +7,4 @@
shift
fi
fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
index dc076b6..cb2f9c5 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -7,4 +7,4 @@
shift
fi
fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
index 0ca0227..acd7848 100644
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -13,21 +13,26 @@
from perf_trace_context import *
from Core import *
+from Util import *
-usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
for_comm = None
+for_pid = None
if len(sys.argv) > 2:
sys.exit(usage)
if len(sys.argv) > 1:
- for_comm = sys.argv[1]
+ try:
+ for_pid = int(sys.argv[1])
+ except:
+ for_comm = sys.argv[1]
syscalls = autodict()
def trace_begin():
- pass
+ print "Press control+C to stop and show the summary"
def trace_end():
print_error_totals()
@@ -35,9 +40,9 @@
def raw_syscalls__sys_exit(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, ret):
- if for_comm is not None:
- if common_comm != for_comm:
- return
+ if (for_comm and common_comm != for_comm) or \
+ (for_pid and common_pid != for_pid ):
+ return
if ret < 0:
try:
@@ -62,7 +67,7 @@
print "\n%s [%d]\n" % (comm, pid),
id_keys = syscalls[comm][pid].keys()
for id in id_keys:
- print " syscall: %-16d\n" % (id),
+ print " syscall: %-16s\n" % syscall_name(id),
ret_keys = syscalls[comm][pid][id].keys()
for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True):
- print " err = %-20d %10d\n" % (ret, val),
+ print " err = %-20s %10d\n" % (strerror(ret), val),
diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py
new file mode 100644
index 0000000..11e70a3
--- /dev/null
+++ b/tools/perf/scripts/python/futex-contention.py
@@ -0,0 +1,50 @@
+# futex contention
+# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Translation of:
+#
+# http://sourceware.org/systemtap/wiki/WSFutexContention
+#
+# to perf python scripting.
+#
+# Measures futex contention
+
+import os, sys
+sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+from Util import *
+
+process_names = {}
+thread_thislock = {}
+thread_blocktime = {}
+
+lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
+process_names = {} # long-lived pid-to-execname mapping
+
+def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
+ nr, uaddr, op, val, utime, uaddr2, val3):
+ cmd = op & FUTEX_CMD_MASK
+ if cmd != FUTEX_WAIT:
+ return # we don't care about originators of WAKE events
+
+ process_names[tid] = comm
+ thread_thislock[tid] = uaddr
+ thread_blocktime[tid] = nsecs(s, ns)
+
+def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm,
+ nr, ret):
+ if thread_blocktime.has_key(tid):
+ elapsed = nsecs(s, ns) - thread_blocktime[tid]
+ add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
+ del thread_blocktime[tid]
+ del thread_thislock[tid]
+
+def trace_begin():
+ print "Press control+C to stop and show the summary"
+
+def trace_end():
+ for (tid, lock) in lock_waits:
+ min, max, avg, count = lock_waits[tid, lock]
+ print "%s[%d] lock %x contended %d times, %d avg ns" % \
+ (process_names[tid], tid, lock, count, avg)
+
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
index 6cafad4..7a6ec2c 100644
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -8,10 +8,7 @@
# will be refreshed every [interval] seconds. The default interval is
# 3 seconds.
-import thread
-import time
-import os
-import sys
+import os, sys, thread, time
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -20,7 +17,7 @@
from Core import *
from Util import *
-usage = "perf trace -s syscall-counts.py [comm] [interval]\n";
+usage = "perf trace -s sctop.py [comm] [interval]\n";
for_comm = None
default_interval = 3
@@ -71,7 +68,7 @@
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True):
try:
- print "%-40d %10d\n" % (id, val),
+ print "%-40s %10d\n" % (syscall_name(id), val),
except TypeError:
pass
syscalls.clear()
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
index af722d6..d1ee3ec 100644
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -5,29 +5,33 @@
# Displays system-wide system call totals, broken down by syscall.
# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
-import os
-import sys
+import os, sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
+from Util import syscall_name
usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
for_comm = None
+for_pid = None
if len(sys.argv) > 2:
sys.exit(usage)
if len(sys.argv) > 1:
- for_comm = sys.argv[1]
+ try:
+ for_pid = int(sys.argv[1])
+ except:
+ for_comm = sys.argv[1]
syscalls = autodict()
def trace_begin():
- pass
+ print "Press control+C to stop and show the summary"
def trace_end():
print_syscall_totals()
@@ -35,9 +39,10 @@
def raw_syscalls__sys_enter(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, args):
- if for_comm is not None:
- if common_comm != for_comm:
- return
+
+ if (for_comm and common_comm != for_comm) or \
+ (for_pid and common_pid != for_pid ):
+ return
try:
syscalls[common_comm][common_pid][id] += 1
except TypeError:
@@ -61,4 +66,4 @@
id_keys = syscalls[comm][pid].keys()
for id, val in sorted(syscalls[comm][pid].iteritems(), \
key = lambda(k, v): (v, k), reverse = True):
- print " %-38d %10d\n" % (id, val),
+ print " %-38s %10d\n" % (syscall_name(id), val),
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
index f977e85..ea183dc 100644
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -13,6 +13,7 @@
from perf_trace_context import *
from Core import *
+from Util import syscall_name
usage = "perf trace -s syscall-counts.py [comm]\n";
@@ -27,7 +28,7 @@
syscalls = autodict()
def trace_begin():
- pass
+ print "Press control+C to stop and show the summary"
def trace_end():
print_syscall_totals()
@@ -55,4 +56,4 @@
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True):
- print "%-40d %10d\n" % (id, val),
+ print "%-40s %10d\n" % (syscall_name(id), val),
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index f9c7e3a..c8d81b0 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -12,8 +12,8 @@
#include "debug.h"
#include "util.h"
-int verbose = 0;
-bool dump_trace = false;
+int verbose;
+bool dump_trace = false, quiet = false;
int eprintf(int level, const char *fmt, ...)
{
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7a17ee0..7b51408 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -6,7 +6,7 @@
#include "event.h"
extern int verbose;
-extern bool dump_trace;
+extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
void trace_event(event_t *event);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 7857579..b397c03 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -215,6 +215,16 @@
return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);
}
+static inline
+struct symbol *machine__find_kernel_function_by_name(struct machine *self,
+ const char *name,
+ struct map **mapp,
+ symbol_filter_t filter)
+{
+ return map_groups__find_function_by_name(&self->kmaps, name, mapp,
+ filter);
+}
+
int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
int verbose, FILE *fp);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index fcc16e4..3b6a529 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -74,10 +74,9 @@
static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
static struct machine machine;
-/* Initialize symbol maps and path of vmlinux */
+/* Initialize symbol maps and path of vmlinux/modules */
static int init_vmlinux(void)
{
- struct dso *kernel;
int ret;
symbol_conf.sort_by_name = true;
@@ -91,33 +90,61 @@
goto out;
}
- ret = machine__init(&machine, "/", 0);
+ ret = machine__init(&machine, "", HOST_KERNEL_ID);
if (ret < 0)
goto out;
- kernel = dso__new_kernel(symbol_conf.vmlinux_name);
- if (kernel == NULL)
- die("Failed to create kernel dso.");
-
- ret = __machine__create_kernel_maps(&machine, kernel);
- if (ret < 0)
- pr_debug("Failed to create kernel maps.\n");
-
+ if (machine__create_kernel_maps(&machine) < 0) {
+ pr_debug("machine__create_kernel_maps ");
+ goto out;
+ }
out:
if (ret < 0)
pr_warning("Failed to init vmlinux path.\n");
return ret;
}
-#ifdef DWARF_SUPPORT
-static int open_vmlinux(void)
+static struct symbol *__find_kernel_function_by_name(const char *name,
+ struct map **mapp)
{
- if (map__load(machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
- pr_debug("Failed to load kernel map.\n");
- return -EINVAL;
+ return machine__find_kernel_function_by_name(&machine, name, mapp,
+ NULL);
+}
+
+const char *kernel_get_module_path(const char *module)
+{
+ struct dso *dso;
+
+ if (module) {
+ list_for_each_entry(dso, &machine.kernel_dsos, node) {
+ if (strncmp(dso->short_name + 1, module,
+ dso->short_name_len - 2) == 0)
+ goto found;
+ }
+ pr_debug("Failed to find module %s.\n", module);
+ return NULL;
+ } else {
+ dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
+ if (dso__load_vmlinux_path(dso,
+ machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
+ pr_debug("Failed to load kernel map.\n");
+ return NULL;
+ }
}
- pr_debug("Try to open %s\n", machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name);
- return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
+found:
+ return dso->long_name;
+}
+
+#ifdef DWARF_SUPPORT
+static int open_vmlinux(const char *module)
+{
+ const char *path = kernel_get_module_path(module);
+ if (!path) {
+ pr_err("Failed to find path of %s module", module ?: "kernel");
+ return -ENOENT;
+ }
+ pr_debug("Try to open %s\n", path);
+ return open(path, O_RDONLY);
}
/*
@@ -125,20 +152,19 @@
* Currently only handles kprobes.
*/
static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
- struct perf_probe_point *pp)
+ struct perf_probe_point *pp)
{
struct symbol *sym;
- int fd, ret = -ENOENT;
+ struct map *map;
+ u64 addr;
+ int ret = -ENOENT;
- sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION],
- tp->symbol, NULL);
+ sym = __find_kernel_function_by_name(tp->symbol, &map);
if (sym) {
- fd = open_vmlinux();
- if (fd >= 0) {
- ret = find_perf_probe_point(fd,
- sym->start + tp->offset, pp);
- close(fd);
- }
+ addr = map->unmap_ip(map, sym->start + tp->offset);
+ pr_debug("try to find %s+%ld@%llx\n", tp->symbol,
+ tp->offset, addr);
+ ret = find_perf_probe_point((unsigned long)addr, pp);
}
if (ret <= 0) {
pr_debug("Failed to find corresponding probes from "
@@ -156,12 +182,12 @@
/* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs,
- int max_tevs)
+ int max_tevs, const char *module)
{
bool need_dwarf = perf_probe_event_need_dwarf(pev);
int fd, ntevs;
- fd = open_vmlinux();
+ fd = open_vmlinux(module);
if (fd < 0) {
if (need_dwarf) {
pr_warning("Failed to open debuginfo file.\n");
@@ -300,7 +326,7 @@
* Show line-range always requires debuginfo to find source file and
* line number.
*/
-int show_line_range(struct line_range *lr)
+int show_line_range(struct line_range *lr, const char *module)
{
int l = 1;
struct line_node *ln;
@@ -313,7 +339,7 @@
if (ret < 0)
return ret;
- fd = open_vmlinux();
+ fd = open_vmlinux(module);
if (fd < 0) {
pr_warning("Failed to open debuginfo file.\n");
return fd;
@@ -378,11 +404,84 @@
return ret;
}
+static int show_available_vars_at(int fd, struct perf_probe_event *pev,
+ int max_vls, bool externs)
+{
+ char *buf;
+ int ret, i;
+ struct str_node *node;
+ struct variable_list *vls = NULL, *vl;
+
+ buf = synthesize_perf_probe_point(&pev->point);
+ if (!buf)
+ return -EINVAL;
+ pr_debug("Searching variables at %s\n", buf);
+
+ ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
+ if (ret > 0) {
+ /* Some variables were found */
+ fprintf(stdout, "Available variables at %s\n", buf);
+ for (i = 0; i < ret; i++) {
+ vl = &vls[i];
+ /*
+ * A probe point might be converted to
+ * several trace points.
+ */
+ fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+ vl->point.offset);
+ free(vl->point.symbol);
+ if (vl->vars) {
+ strlist__for_each(node, vl->vars)
+ fprintf(stdout, "\t\t%s\n", node->s);
+ strlist__delete(vl->vars);
+ } else
+ fprintf(stdout, "(No variables)\n");
+ }
+ free(vls);
+ } else
+ pr_err("Failed to find variables at %s (%d)\n", buf, ret);
+
+ free(buf);
+ return ret;
+}
+
+/* Show available variables on given probe point */
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ int max_vls, const char *module, bool externs)
+{
+ int i, fd, ret = 0;
+
+ ret = init_vmlinux();
+ if (ret < 0)
+ return ret;
+
+ fd = open_vmlinux(module);
+ if (fd < 0) {
+ pr_warning("Failed to open debuginfo file.\n");
+ return fd;
+ }
+
+ setup_pager();
+
+ for (i = 0; i < npevs && ret >= 0; i++)
+ ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
+
+ close(fd);
+ return ret;
+}
+
#else /* !DWARF_SUPPORT */
static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
- struct perf_probe_point *pp)
+ struct perf_probe_point *pp)
{
+ struct symbol *sym;
+
+ sym = __find_kernel_function_by_name(tp->symbol, NULL);
+ if (!sym) {
+ pr_err("Failed to find symbol %s in kernel.\n", tp->symbol);
+ return -ENOENT;
+ }
pp->function = strdup(tp->symbol);
if (pp->function == NULL)
return -ENOMEM;
@@ -394,7 +493,7 @@
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs __unused,
- int max_tevs __unused)
+ int max_tevs __unused, const char *mod __unused)
{
if (perf_probe_event_need_dwarf(pev)) {
pr_warning("Debuginfo-analysis is not supported.\n");
@@ -403,12 +502,19 @@
return 0;
}
-int show_line_range(struct line_range *lr __unused)
+int show_line_range(struct line_range *lr __unused, const char *module __unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
}
+int show_available_vars(struct perf_probe_event *pevs __unused,
+ int npevs __unused, int max_vls __unused,
+ const char *module __unused, bool externs __unused)
+{
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+}
#endif
int parse_line_range_desc(const char *arg, struct line_range *lr)
@@ -1087,7 +1193,7 @@
}
static int convert_to_perf_probe_event(struct probe_trace_event *tev,
- struct perf_probe_event *pev)
+ struct perf_probe_event *pev)
{
char buf[64] = "";
int i, ret;
@@ -1516,14 +1622,14 @@
static int convert_to_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs,
- int max_tevs)
+ int max_tevs, const char *module)
{
struct symbol *sym;
int ret = 0, i;
struct probe_trace_event *tev;
/* Convert perf_probe_event with debuginfo */
- ret = try_to_find_probe_trace_events(pev, tevs, max_tevs);
+ ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
if (ret != 0)
return ret;
@@ -1572,8 +1678,7 @@
}
/* Currently just checking function name from symbol map */
- sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION],
- tev->point.symbol, NULL);
+ sym = __find_kernel_function_by_name(tev->point.symbol, NULL);
if (!sym) {
pr_warning("Kernel symbol \'%s\' not found.\n",
tev->point.symbol);
@@ -1596,7 +1701,7 @@
};
int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
- bool force_add, int max_tevs)
+ int max_tevs, const char *module, bool force_add)
{
int i, j, ret;
struct __event_package *pkgs;
@@ -1617,7 +1722,9 @@
pkgs[i].pev = &pevs[i];
/* Convert with or without debuginfo */
ret = convert_to_probe_trace_events(pkgs[i].pev,
- &pkgs[i].tevs, max_tevs);
+ &pkgs[i].tevs,
+ max_tevs,
+ module);
if (ret < 0)
goto end;
pkgs[i].ntevs = ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5af3924..5accbed 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -90,6 +90,12 @@
struct list_head line_list; /* Visible lines */
};
+/* List of variables */
+struct variable_list {
+ struct probe_trace_point point; /* Actual probepoint */
+ struct strlist *vars; /* Available variables */
+};
+
/* Command string to events */
extern int parse_perf_probe_command(const char *cmd,
struct perf_probe_event *pev);
@@ -109,12 +115,18 @@
/* Command string to line-range */
extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
+/* Internal use: Return kernel/module path */
+extern const char *kernel_get_module_path(const char *module);
extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
- bool force_add, int max_probe_points);
+ int max_probe_points, const char *module,
+ bool force_add);
extern int del_perf_probe_events(struct strlist *dellist);
extern int show_perf_probe_events(void);
-extern int show_line_range(struct line_range *lr);
+extern int show_line_range(struct line_range *lr, const char *module);
+extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ int max_probe_points, const char *module,
+ bool externs);
/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 32b81f7..3991d73 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -116,6 +116,101 @@
}
}
+/* Dwarf FL wrappers */
+
+static int __linux_kernel_find_elf(Dwfl_Module *mod,
+ void **userdata,
+ const char *module_name,
+ Dwarf_Addr base,
+ char **file_name, Elf **elfp)
+{
+ int fd;
+ const char *path = kernel_get_module_path(module_name);
+
+ if (path) {
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ *file_name = strdup(path);
+ return fd;
+ }
+ }
+ /* If failed, try to call standard method */
+ return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
+ file_name, elfp);
+}
+
+static char *debuginfo_path; /* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+
+ .section_address = dwfl_offline_section_address,
+
+ /* We use this table for core files too. */
+ .find_elf = dwfl_build_id_find_elf,
+};
+
+static const Dwfl_Callbacks kernel_callbacks = {
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+
+ .find_elf = __linux_kernel_find_elf,
+ .section_address = dwfl_linux_kernel_module_section_address,
+};
+
+/* Get a Dwarf from offline image */
+static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
+{
+ Dwfl_Module *mod;
+ Dwarf *dbg = NULL;
+
+ if (!dwflp)
+ return NULL;
+
+ *dwflp = dwfl_begin(&offline_callbacks);
+ if (!*dwflp)
+ return NULL;
+
+ mod = dwfl_report_offline(*dwflp, "", "", fd);
+ if (!mod)
+ goto error;
+
+ dbg = dwfl_module_getdwarf(mod, bias);
+ if (!dbg) {
+error:
+ dwfl_end(*dwflp);
+ *dwflp = NULL;
+ }
+ return dbg;
+}
+
+/* Get a Dwarf from live kernel image */
+static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
+ Dwarf_Addr *bias)
+{
+ Dwarf *dbg;
+
+ if (!dwflp)
+ return NULL;
+
+ *dwflp = dwfl_begin(&kernel_callbacks);
+ if (!*dwflp)
+ return NULL;
+
+ /* Load the kernel dwarves: Don't care the result here */
+ dwfl_linux_kernel_report_kernel(*dwflp);
+ dwfl_linux_kernel_report_modules(*dwflp);
+
+ dbg = dwfl_addrdwarf(*dwflp, addr, bias);
+ /* Here, check whether we could get a real dwarf */
+ if (!dbg) {
+ dwfl_end(*dwflp);
+ *dwflp = NULL;
+ }
+ return dbg;
+}
+
/* Dwarf wrappers */
/* Find the realpath of the target file. */
@@ -160,26 +255,44 @@
return name ? (strcmp(tname, name) == 0) : false;
}
-/* Get type die, but skip qualifiers and typedef */
-static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+/* Get type die */
+static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+ dwarf_formref_die(&attr, die_mem))
+ return die_mem;
+ else
+ return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
int tag;
do {
- if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL ||
- dwarf_formref_die(&attr, die_mem) == NULL)
- return NULL;
-
- tag = dwarf_tag(die_mem);
- vr_die = die_mem;
+ vr_die = die_get_type(vr_die, die_mem);
+ if (!vr_die)
+ break;
+ tag = dwarf_tag(vr_die);
} while (tag == DW_TAG_const_type ||
tag == DW_TAG_restrict_type ||
tag == DW_TAG_volatile_type ||
- tag == DW_TAG_shared_type ||
- tag == DW_TAG_typedef);
+ tag == DW_TAG_shared_type);
- return die_mem;
+ return vr_die;
+}
+
+/* Get a type die, but skip qualifiers and typedef */
+static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ do {
+ vr_die = __die_get_real_type(vr_die, die_mem);
+ } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+ return vr_die;
}
static bool die_is_signed_type(Dwarf_Die *tp_die)
@@ -320,25 +433,35 @@
return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
}
+struct __find_variable_param {
+ const char *name;
+ Dwarf_Addr addr;
+};
+
static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
{
- const char *name = data;
+ struct __find_variable_param *fvp = data;
int tag;
tag = dwarf_tag(die_mem);
if ((tag == DW_TAG_formal_parameter ||
tag == DW_TAG_variable) &&
- die_compare_name(die_mem, name))
+ die_compare_name(die_mem, fvp->name))
return DIE_FIND_CB_FOUND;
- return DIE_FIND_CB_CONTINUE;
+ if (dwarf_haspc(die_mem, fvp->addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
}
-/* Find a variable called 'name' */
-static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name,
- Dwarf_Die *die_mem)
+/* Find a variable called 'name' at given address */
+static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Addr addr, Dwarf_Die *die_mem)
{
- return die_find_child(sp_die, __die_find_variable_cb, (void *)name,
+ struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+ return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
die_mem);
}
@@ -361,6 +484,60 @@
die_mem);
}
+/* Get the name of given variable DIE */
+static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
+{
+ Dwarf_Die type;
+ int tag, ret, ret2;
+ const char *tmp = "";
+
+ if (__die_get_real_type(vr_die, &type) == NULL)
+ return -ENOENT;
+
+ tag = dwarf_tag(&type);
+ if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+ tmp = "*";
+ else if (tag == DW_TAG_subroutine_type) {
+ /* Function pointer */
+ ret = snprintf(buf, len, "(function_type)");
+ return (ret >= len) ? -E2BIG : ret;
+ } else {
+ if (!dwarf_diename(&type))
+ return -ENOENT;
+ if (tag == DW_TAG_union_type)
+ tmp = "union ";
+ else if (tag == DW_TAG_structure_type)
+ tmp = "struct ";
+ /* Write a base name */
+ ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
+ return (ret >= len) ? -E2BIG : ret;
+ }
+ ret = die_get_typename(&type, buf, len);
+ if (ret > 0) {
+ ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
+ ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+ }
+ return ret;
+}
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+{
+ int ret, ret2;
+
+ ret = die_get_typename(vr_die, buf, len);
+ if (ret < 0) {
+ pr_debug("Failed to get type, make it unknown.\n");
+ ret = snprintf(buf, len, "(unknown_type)");
+ }
+ if (ret > 0) {
+ ret2 = snprintf(buf + ret, len - ret, "\t%s",
+ dwarf_diename(vr_die));
+ ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+ }
+ return ret;
+}
+
/*
* Probe finder related functions
*/
@@ -374,8 +551,13 @@
return ref;
}
-/* Show a location */
-static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
+/*
+ * Convert a location into trace_arg.
+ * If tvar == NULL, this just checks variable can be converted.
+ */
+static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
+ Dwarf_Op *fb_ops,
+ struct probe_trace_arg *tvar)
{
Dwarf_Attribute attr;
Dwarf_Op *op;
@@ -384,20 +566,23 @@
Dwarf_Word offs = 0;
bool ref = false;
const char *regs;
- struct probe_trace_arg *tvar = pf->tvar;
int ret;
+ if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
+ goto static_var;
+
/* TODO: handle more than 1 exprs */
if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
- dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 ||
+ dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 ||
nops == 0) {
/* TODO: Support const_value */
- pr_err("Failed to find the location of %s at this address.\n"
- " Perhaps, it has been optimized out.\n", pf->pvar->var);
return -ENOENT;
}
if (op->atom == DW_OP_addr) {
+static_var:
+ if (!tvar)
+ return 0;
/* Static variables on memory (not stack), make @varname */
ret = strlen(dwarf_diename(vr_die));
tvar->value = zalloc(ret + 2);
@@ -412,14 +597,11 @@
/* If this is based on frame buffer, set the offset */
if (op->atom == DW_OP_fbreg) {
- if (pf->fb_ops == NULL) {
- pr_warning("The attribute of frame base is not "
- "supported.\n");
+ if (fb_ops == NULL)
return -ENOTSUP;
- }
ref = true;
offs = op->number;
- op = &pf->fb_ops[0];
+ op = &fb_ops[0];
}
if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
@@ -435,13 +617,18 @@
} else if (op->atom == DW_OP_regx) {
regn = op->number;
} else {
- pr_warning("DW_OP %x is not supported.\n", op->atom);
+ pr_debug("DW_OP %x is not supported.\n", op->atom);
return -ENOTSUP;
}
+ if (!tvar)
+ return 0;
+
regs = get_arch_regstr(regn);
if (!regs) {
- pr_warning("Mapping for DWARF register number %u missing on this architecture.", regn);
+ /* This should be a bug in DWARF or this tool */
+ pr_warning("Mapping for DWARF register number %u "
+ "missing on this architecture.", regn);
return -ERANGE;
}
@@ -666,8 +853,14 @@
pr_debug("Converting variable %s into trace event.\n",
dwarf_diename(vr_die));
- ret = convert_variable_location(vr_die, pf);
- if (ret == 0 && pf->pvar->field) {
+ ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
+ pf->tvar);
+ if (ret == -ENOENT)
+ pr_err("Failed to find the location of %s at this address.\n"
+ " Perhaps, it has been optimized out.\n", pf->pvar->var);
+ else if (ret == -ENOTSUP)
+ pr_err("Sorry, we don't support this variable location yet.\n");
+ else if (pf->pvar->field) {
ret = convert_variable_fields(vr_die, pf->pvar->var,
pf->pvar->field, &pf->tvar->ref,
&die_mem);
@@ -722,56 +915,39 @@
pr_debug("Searching '%s' variable in context.\n",
pf->pvar->var);
/* Search child die for local variables and parameters. */
- if (die_find_variable(sp_die, pf->pvar->var, &vr_die))
+ if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die))
ret = convert_variable(&vr_die, pf);
else {
/* Search upper class */
nscopes = dwarf_getscopes_die(sp_die, &scopes);
- if (nscopes > 0) {
- ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var,
- 0, NULL, 0, 0, &vr_die);
- if (ret >= 0)
+ while (nscopes-- > 1) {
+ pr_debug("Searching variables in %s\n",
+ dwarf_diename(&scopes[nscopes]));
+ /* We should check this scope, so give dummy address */
+ if (die_find_variable_at(&scopes[nscopes],
+ pf->pvar->var, 0,
+ &vr_die)) {
ret = convert_variable(&vr_die, pf);
- else
- ret = -ENOENT;
+ goto found;
+ }
+ }
+ if (scopes)
free(scopes);
- } else
- ret = -ENOENT;
+ ret = -ENOENT;
}
+found:
if (ret < 0)
pr_warning("Failed to find '%s' in this function.\n",
pf->pvar->var);
return ret;
}
-/* Show a probe point to output buffer */
-static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
+/* Convert subprogram DIE to trace point */
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
+ bool retprobe, struct probe_trace_point *tp)
{
- struct probe_trace_event *tev;
Dwarf_Addr eaddr;
- Dwarf_Die die_mem;
const char *name;
- int ret, i;
- Dwarf_Attribute fb_attr;
- size_t nops;
-
- if (pf->ntevs == pf->max_tevs) {
- pr_warning("Too many( > %d) probe point found.\n",
- pf->max_tevs);
- return -ERANGE;
- }
- tev = &pf->tevs[pf->ntevs++];
-
- /* If no real subprogram, find a real one */
- if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
- sp_die = die_find_real_subprogram(&pf->cu_die,
- pf->addr, &die_mem);
- if (!sp_die) {
- pr_warning("Failed to find probe point in any "
- "functions.\n");
- return -ENOENT;
- }
- }
/* Copy the name of probe point */
name = dwarf_diename(sp_die);
@@ -781,26 +957,45 @@
dwarf_diename(sp_die));
return -ENOENT;
}
- tev->point.symbol = strdup(name);
- if (tev->point.symbol == NULL)
+ tp->symbol = strdup(name);
+ if (tp->symbol == NULL)
return -ENOMEM;
- tev->point.offset = (unsigned long)(pf->addr - eaddr);
+ tp->offset = (unsigned long)(paddr - eaddr);
} else
/* This function has no name. */
- tev->point.offset = (unsigned long)pf->addr;
+ tp->offset = (unsigned long)paddr;
/* Return probe must be on the head of a subprogram */
- if (pf->pev->point.retprobe) {
- if (tev->point.offset != 0) {
+ if (retprobe) {
+ if (eaddr != paddr) {
pr_warning("Return probe must be on the head of"
" a real function\n");
return -EINVAL;
}
- tev->point.retprobe = true;
+ tp->retprobe = true;
}
- pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
- tev->point.offset);
+ return 0;
+}
+
+/* Call probe_finder callback with real subprogram DIE */
+static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ Dwarf_Die die_mem;
+ Dwarf_Attribute fb_attr;
+ size_t nops;
+ int ret;
+
+ /* If no real subprogram, find a real one */
+ if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
+ sp_die = die_find_real_subprogram(&pf->cu_die,
+ pf->addr, &die_mem);
+ if (!sp_die) {
+ pr_warning("Failed to find probe point in any "
+ "functions.\n");
+ return -ENOENT;
+ }
+ }
/* Get the frame base attribute/ops */
dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
@@ -820,22 +1015,13 @@
#endif
}
- /* Find each argument */
- tev->nargs = pf->pev->nargs;
- tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
- if (tev->args == NULL)
- return -ENOMEM;
- for (i = 0; i < pf->pev->nargs; i++) {
- pf->pvar = &pf->pev->args[i];
- pf->tvar = &tev->args[i];
- ret = find_variable(sp_die, pf);
- if (ret != 0)
- return ret;
- }
+ /* Call finder's callback handler */
+ ret = pf->callback(sp_die, pf);
/* *pf->fb_ops will be cached in libdw. Don't free it. */
pf->fb_ops = NULL;
- return 0;
+
+ return ret;
}
/* Find probe point from its line number */
@@ -871,7 +1057,7 @@
(int)i, lineno, (uintmax_t)addr);
pf->addr = addr;
- ret = convert_probe_point(NULL, pf);
+ ret = call_probe_finder(NULL, pf);
/* Continuing, because target line might be inlined. */
}
return ret;
@@ -984,7 +1170,7 @@
(int)i, lineno, (unsigned long long)addr);
pf->addr = addr;
- ret = convert_probe_point(sp_die, pf);
+ ret = call_probe_finder(sp_die, pf);
/* Continuing, because target line might be inlined. */
}
/* TODO: deallocate lines, but how? */
@@ -1019,7 +1205,7 @@
pr_debug("found inline addr: 0x%jx\n",
(uintmax_t)pf->addr);
- param->retval = convert_probe_point(in_die, pf);
+ param->retval = call_probe_finder(in_die, pf);
if (param->retval < 0)
return DWARF_CB_ABORT;
}
@@ -1057,7 +1243,7 @@
}
pf->addr += pp->offset;
/* TODO: Check the address in this function */
- param->retval = convert_probe_point(sp_die, pf);
+ param->retval = call_probe_finder(sp_die, pf);
}
} else {
struct dwarf_callback_param _param = {.data = (void *)pf,
@@ -1079,90 +1265,276 @@
return _param.retval;
}
-/* Find probe_trace_events specified by perf_probe_event from debuginfo */
-int find_probe_trace_events(int fd, struct perf_probe_event *pev,
- struct probe_trace_event **tevs, int max_tevs)
+/* Find probe points from debuginfo */
+static int find_probes(int fd, struct probe_finder *pf)
{
- struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs};
- struct perf_probe_point *pp = &pev->point;
+ struct perf_probe_point *pp = &pf->pev->point;
Dwarf_Off off, noff;
size_t cuhl;
Dwarf_Die *diep;
- Dwarf *dbg;
+ Dwarf *dbg = NULL;
+ Dwfl *dwfl;
+ Dwarf_Addr bias; /* Currently ignored */
int ret = 0;
- pf.tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
- if (pf.tevs == NULL)
- return -ENOMEM;
- *tevs = pf.tevs;
- pf.ntevs = 0;
-
- dbg = dwarf_begin(fd, DWARF_C_READ);
+ dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
if (!dbg) {
pr_warning("No dwarf info found in the vmlinux - "
"please rebuild with CONFIG_DEBUG_INFO=y.\n");
- free(pf.tevs);
- *tevs = NULL;
return -EBADF;
}
#if _ELFUTILS_PREREQ(0, 142)
/* Get the call frame information from this dwarf */
- pf.cfi = dwarf_getcfi(dbg);
+ pf->cfi = dwarf_getcfi(dbg);
#endif
off = 0;
- line_list__init(&pf.lcache);
+ line_list__init(&pf->lcache);
/* Loop on CUs (Compilation Unit) */
while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&
ret >= 0) {
/* Get the DIE(Debugging Information Entry) of this CU */
- diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die);
+ diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
if (!diep)
continue;
/* Check if target file is included. */
if (pp->file)
- pf.fname = cu_find_realpath(&pf.cu_die, pp->file);
+ pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
else
- pf.fname = NULL;
+ pf->fname = NULL;
- if (!pp->file || pf.fname) {
+ if (!pp->file || pf->fname) {
if (pp->function)
- ret = find_probe_point_by_func(&pf);
+ ret = find_probe_point_by_func(pf);
else if (pp->lazy_line)
- ret = find_probe_point_lazy(NULL, &pf);
+ ret = find_probe_point_lazy(NULL, pf);
else {
- pf.lno = pp->line;
- ret = find_probe_point_by_line(&pf);
+ pf->lno = pp->line;
+ ret = find_probe_point_by_line(pf);
}
}
off = noff;
}
- line_list__free(&pf.lcache);
- dwarf_end(dbg);
+ line_list__free(&pf->lcache);
+ if (dwfl)
+ dwfl_end(dwfl);
- return (ret < 0) ? ret : pf.ntevs;
+ return ret;
+}
+
+/* Add a found probe point into trace event list */
+static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ struct trace_event_finder *tf =
+ container_of(pf, struct trace_event_finder, pf);
+ struct probe_trace_event *tev;
+ int ret, i;
+
+ /* Check number of tevs */
+ if (tf->ntevs == tf->max_tevs) {
+ pr_warning("Too many( > %d) probe point found.\n",
+ tf->max_tevs);
+ return -ERANGE;
+ }
+ tev = &tf->tevs[tf->ntevs++];
+
+ ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
+ &tev->point);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
+ tev->point.offset);
+
+ /* Find each argument */
+ tev->nargs = pf->pev->nargs;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (tev->args == NULL)
+ return -ENOMEM;
+ for (i = 0; i < pf->pev->nargs; i++) {
+ pf->pvar = &pf->pev->args[i];
+ pf->tvar = &tev->args[i];
+ ret = find_variable(sp_die, pf);
+ if (ret != 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int find_probe_trace_events(int fd, struct perf_probe_event *pev,
+ struct probe_trace_event **tevs, int max_tevs)
+{
+ struct trace_event_finder tf = {
+ .pf = {.pev = pev, .callback = add_probe_trace_event},
+ .max_tevs = max_tevs};
+ int ret;
+
+ /* Allocate result tevs array */
+ *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
+ if (*tevs == NULL)
+ return -ENOMEM;
+
+ tf.tevs = *tevs;
+ tf.ntevs = 0;
+
+ ret = find_probes(fd, &tf.pf);
+ if (ret < 0) {
+ free(*tevs);
+ *tevs = NULL;
+ return ret;
+ }
+
+ return (ret < 0) ? ret : tf.ntevs;
+}
+
+#define MAX_VAR_LEN 64
+
+/* Collect available variables in this scope */
+static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct available_var_finder *af = data;
+ struct variable_list *vl;
+ char buf[MAX_VAR_LEN];
+ int tag, ret;
+
+ vl = &af->vls[af->nvls - 1];
+
+ tag = dwarf_tag(die_mem);
+ if (tag == DW_TAG_formal_parameter ||
+ tag == DW_TAG_variable) {
+ ret = convert_variable_location(die_mem, af->pf.addr,
+ af->pf.fb_ops, NULL);
+ if (ret == 0) {
+ ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
+ pr_debug2("Add new var: %s\n", buf);
+ if (ret > 0)
+ strlist__add(vl->vars, buf);
+ }
+ }
+
+ if (af->child && dwarf_haspc(die_mem, af->pf.addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+}
+
+/* Add a found vars into available variables list */
+static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ struct available_var_finder *af =
+ container_of(pf, struct available_var_finder, pf);
+ struct variable_list *vl;
+ Dwarf_Die die_mem, *scopes = NULL;
+ int ret, nscopes;
+
+ /* Check number of tevs */
+ if (af->nvls == af->max_vls) {
+ pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
+ return -ERANGE;
+ }
+ vl = &af->vls[af->nvls++];
+
+ ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
+ &vl->point);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
+ vl->point.offset);
+
+ /* Find local variables */
+ vl->vars = strlist__new(true, NULL);
+ if (vl->vars == NULL)
+ return -ENOMEM;
+ af->child = true;
+ die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem);
+
+ /* Find external variables */
+ if (!af->externs)
+ goto out;
+ /* Don't need to search child DIE for externs. */
+ af->child = false;
+ nscopes = dwarf_getscopes_die(sp_die, &scopes);
+ while (nscopes-- > 1)
+ die_find_child(&scopes[nscopes], collect_variables_cb,
+ (void *)af, &die_mem);
+ if (scopes)
+ free(scopes);
+
+out:
+ if (strlist__empty(vl->vars)) {
+ strlist__delete(vl->vars);
+ vl->vars = NULL;
+ }
+
+ return ret;
+}
+
+/* Find available variables at given probe point */
+int find_available_vars_at(int fd, struct perf_probe_event *pev,
+ struct variable_list **vls, int max_vls,
+ bool externs)
+{
+ struct available_var_finder af = {
+ .pf = {.pev = pev, .callback = add_available_vars},
+ .max_vls = max_vls, .externs = externs};
+ int ret;
+
+ /* Allocate result vls array */
+ *vls = zalloc(sizeof(struct variable_list) * max_vls);
+ if (*vls == NULL)
+ return -ENOMEM;
+
+ af.vls = *vls;
+ af.nvls = 0;
+
+ ret = find_probes(fd, &af.pf);
+ if (ret < 0) {
+ /* Free vlist for error */
+ while (af.nvls--) {
+ if (af.vls[af.nvls].point.symbol)
+ free(af.vls[af.nvls].point.symbol);
+ if (af.vls[af.nvls].vars)
+ strlist__delete(af.vls[af.nvls].vars);
+ }
+ free(af.vls);
+ *vls = NULL;
+ return ret;
+ }
+
+ return (ret < 0) ? ret : af.nvls;
}
/* Reverse search */
-int find_perf_probe_point(int fd, unsigned long addr,
- struct perf_probe_point *ppt)
+int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
{
Dwarf_Die cudie, spdie, indie;
- Dwarf *dbg;
+ Dwarf *dbg = NULL;
+ Dwfl *dwfl = NULL;
Dwarf_Line *line;
- Dwarf_Addr laddr, eaddr;
+ Dwarf_Addr laddr, eaddr, bias = 0;
const char *tmp;
int lineno, ret = 0;
bool found = false;
- dbg = dwarf_begin(fd, DWARF_C_READ);
- if (!dbg)
- return -EBADF;
+ /* Open the live linux kernel */
+ dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
+ if (!dbg) {
+ pr_warning("No dwarf info found in the vmlinux - "
+ "please rebuild with CONFIG_DEBUG_INFO=y.\n");
+ ret = -EINVAL;
+ goto end;
+ }
+ /* Adjust address with bias */
+ addr += bias;
/* Find cu die */
- if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) {
+ if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
+ pr_warning("No CU DIE is found at %lx\n", addr);
ret = -EINVAL;
goto end;
}
@@ -1225,7 +1597,8 @@
}
end:
- dwarf_end(dbg);
+ if (dwfl)
+ dwfl_end(dwfl);
if (ret >= 0)
ret = found ? 1 : 0;
return ret;
@@ -1358,6 +1731,9 @@
struct line_finder *lf = param->data;
struct line_range *lr = lf->lr;
+ pr_debug("find (%llx) %s\n",
+ (unsigned long long)dwarf_dieoffset(sp_die),
+ dwarf_diename(sp_die));
if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
die_compare_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die);
@@ -1401,10 +1777,12 @@
Dwarf_Off off = 0, noff;
size_t cuhl;
Dwarf_Die *diep;
- Dwarf *dbg;
+ Dwarf *dbg = NULL;
+ Dwfl *dwfl;
+ Dwarf_Addr bias; /* Currently ignored */
const char *comp_dir;
- dbg = dwarf_begin(fd, DWARF_C_READ);
+ dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
if (!dbg) {
pr_warning("No dwarf info found in the vmlinux - "
"please rebuild with CONFIG_DEBUG_INFO=y.\n");
@@ -1450,8 +1828,7 @@
}
pr_debug("path: %s\n", lr->path);
- dwarf_end(dbg);
-
+ dwfl_end(dwfl);
return (ret < 0) ? ret : lf.found;
}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 4507d51..bba69d4 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -22,20 +22,27 @@
int max_tevs);
/* Find a perf_probe_point from debuginfo */
-extern int find_perf_probe_point(int fd, unsigned long addr,
+extern int find_perf_probe_point(unsigned long addr,
struct perf_probe_point *ppt);
+/* Find a line range */
extern int find_line_range(int fd, struct line_range *lr);
+/* Find available variables */
+extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
+ struct variable_list **vls, int max_points,
+ bool externs);
+
#include <dwarf.h>
#include <libdw.h>
+#include <libdwfl.h>
#include <version.h>
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
- struct probe_trace_event *tevs; /* Result trace events */
- int ntevs; /* Number of trace events */
- int max_tevs; /* Max number of trace events */
+
+ /* Callback when a probe point is found */
+ int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);
/* For function searching */
int lno; /* Line number */
@@ -53,6 +60,22 @@
struct probe_trace_arg *tvar; /* Current result variable */
};
+struct trace_event_finder {
+ struct probe_finder pf;
+ struct probe_trace_event *tevs; /* Found trace events */
+ int ntevs; /* Number of trace events */
+ int max_tevs; /* Max number of trace events */
+};
+
+struct available_var_finder {
+ struct probe_finder pf;
+ struct variable_list *vls; /* Found variable lists */
+ int nvls; /* Number of variable lists */
+ int max_vls; /* Max no. of variable lists */
+ bool externs; /* Find external vars too */
+ bool child; /* Search child scopes */
+};
+
struct line_finder {
struct line_range *lr; /* Target line range */
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 6d0df80..8bc010e 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,4 +1,3 @@
-#include <slang.h>
#include "libslang.h"
#include <linux/compiler.h>
#include <linux/list.h>