Merge tag 'v4.7-rc5' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/Makefile b/Makefile
index 6471f20..938089d 100644
--- a/Makefile
+++ b/Makefile
@@ -1038,7 +1038,7 @@
ifeq ($(has_libelf),1)
objtool_target := tools/objtool FORCE
else
- $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev or elfutils-libelf-devel")
+ $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
SKIP_STACK_VALIDATION := 1
export SKIP_STACK_VALIDATION
endif
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee..929655d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1622,6 +1622,29 @@
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c66695..3ed528c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include "../perf_event.h"
@@ -177,7 +178,7 @@
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -186,10 +187,8 @@
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
@@ -225,14 +224,51 @@
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
+ "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ "event=0xe,umask=0x1"); /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
+ "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
@@ -258,7 +294,7 @@
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -1332,6 +1368,29 @@
},
};
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+ EVENT_PTR(td_total_slots_slm),
+ EVENT_PTR(td_total_slots_scale_slm),
+ EVENT_PTR(td_fetch_bubbles_slm),
+ EVENT_PTR(td_fetch_bubbles_scale_slm),
+ EVENT_PTR(td_slots_issued_slm),
+ EVENT_PTR(td_slots_retired_slm),
+ NULL
+};
+
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3261,11 +3320,11 @@
u32 rev = UINT_MAX; /* default to broken for unknown models */
switch (cpu_data(cpu).x86_model) {
- case 42: /* SNB */
+ case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
- case 45: /* SNB-EP */
+ case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_mask) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
@@ -3437,6 +3496,13 @@
EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
@@ -3508,15 +3574,15 @@
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65nm Core "Yonah" */
+ case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
break;
- case 15: /* 65nm Core2 "Merom" */
+ case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* 65nm Core2 "Merom-L" */
- case 23: /* 45nm Core2 "Penryn" */
- case 29: /* 45nm Core2 "Dunnington (MP) */
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3527,9 +3593,9 @@
pr_cont("Core2 events, ");
break;
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3557,11 +3623,11 @@
pr_cont("Nehalem events, ");
break;
- case 28: /* 45nm Atom "Pineview" */
- case 38: /* 45nm Atom "Lincroft" */
- case 39: /* 32nm Atom "Penwell" */
- case 53: /* 32nm Atom "Cloverview" */
- case 54: /* 32nm Atom "Cedarview" */
+ case INTEL_FAM6_ATOM_PINEVIEW:
+ case INTEL_FAM6_ATOM_LINCROFT:
+ case INTEL_FAM6_ATOM_PENWELL:
+ case INTEL_FAM6_ATOM_CLOVERVIEW:
+ case INTEL_FAM6_ATOM_CEDARVIEW:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3573,9 +3639,9 @@
pr_cont("Atom events, ");
break;
- case 55: /* 22nm Atom "Silvermont" */
- case 76: /* 14nm Atom "Airmont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3587,11 +3653,12 @@
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.cpu_events = slm_events_attrs;
pr_cont("Silvermont events, ");
break;
- case 92: /* 14nm Atom "Goldmont" */
- case 95: /* 14nm Atom "Goldmont Denverton" */
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3614,9 +3681,9 @@
pr_cont("Goldmont events, ");
break;
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3643,8 +3710,8 @@
pr_cont("Westmere events, ");
break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3657,7 +3724,7 @@
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == 45)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3679,8 +3746,8 @@
pr_cont("SandyBridge events, ");
break;
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3696,7 +3763,7 @@
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == 62)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3714,10 +3781,10 @@
break;
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3741,10 +3808,10 @@
pr_cont("Haswell events, ");
break;
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3777,7 +3844,7 @@
pr_cont("Broadwell events, ");
break;
- case 87: /* Knights Landing Xeon Phi */
+ case INTEL_FAM6_XEON_PHI_KNL:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -3795,16 +3862,22 @@
pr_cont("Knights Landing events, ");
break;
- case 142: /* 14nm Kabylake Mobile */
- case 158: /* 14nm Kabylake Desktop */
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- case 85: /* 14nm Skylake Server */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
+ /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ event_attr_td_recovery_bubbles.event_str_noht =
+ "event=0xd,umask=0x1,cmask=1";
+ event_attr_td_recovery_bubbles.event_str_ht =
+ "event=0xd,umask=0x1,cmask=1,any=1";
+
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3917,16 +3990,14 @@
*/
static __init int fixup_ht_bug(void)
{
- int cpu = smp_processor_id();
- int w, c;
+ int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
+ if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9ba4e41..4c7638b 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
- X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
- X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
- X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
- X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
- X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
- X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
- X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
- X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
- X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
- X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
- X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
- X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
- X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
- X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
- X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
- X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
- X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
- X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
- X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e30eef4..d0c58b3 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -786,26 +787,27 @@
};
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
- X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
- X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
- X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
- X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
- X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
- X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
- X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
- X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
- X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce7406..dc965d2 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -882,7 +883,7 @@
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
@@ -903,20 +904,37 @@
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
- */
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@ -956,7 +974,7 @@
static void uncore_pci_remove(struct pci_dev *pdev)
{
- struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, pkg;
@@ -1365,26 +1383,26 @@
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
- X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
- X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
- X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
- X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
- X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
- X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
- X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
- X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
- X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
- X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9..66c3a36 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+ ((dev << 24) | (func << 16) | (type << 8) | idx)
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff)
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 874e8bd..824e540 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@
*/
static const struct pci_device_id knl_uncore_pci_ids[] = {
- { /* MC UClk */
+ { /* MC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
},
- { /* MC DClk Channel */
+ { /* MC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+ },
+ { /* MC0 DClk CH 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
},
- { /* EDC UClk */
+ { /* MC0 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+ },
+ { /* MC0 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+ },
+ { /* MC1 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+ },
+ { /* MC1 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+ },
+ { /* MC1 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+ },
+ { /* EDC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
},
- { /* EDC EClk */
+ { /* EDC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+ },
+ { /* EDC2 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+ },
+ { /* EDC3 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
+ },
+ { /* EDC4 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+ },
+ { /* EDC5 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+ },
+ { /* EDC6 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+ },
+ { /* EDC7 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+ },
+ { /* EDC0 EClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* EDC1 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+ },
+ { /* EDC2 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+ },
+ { /* EDC3 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+ },
+ { /* EDC4 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+ },
+ { /* EDC5 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+ },
+ { /* EDC6 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
+ },
+ { /* EDC7 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
},
{ /* M2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 85ef3c2..50b3a05 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
#include <linux/perf_event.h>
+#include <asm/intel-family.h>
enum perf_msr_id {
PERF_MSR_TSC = 0,
@@ -34,39 +35,43 @@
return false;
switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE2:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
if (idx == PERF_MSR_SMI)
return true;
break;
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764d..e2d7285 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@
.event_str = str, \
};
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \
+static struct perf_pmu_events_ht_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+ .id = 0, \
+ .event_str_noht = noht, \
+ .event_str_ht = ht, \
+}
+
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
#ifdef CONFIG_CPU_SUP_AMD
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5..e346572 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+ return __max_smt_threads;
+}
+
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
extern int topology_phys_to_logical_pkg(unsigned int pkg);
#else
@@ -136,6 +144,7 @@
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 7788ce6..016f426 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,8 +18,16 @@
#include <linux/nmi.h>
#include <linux/module.h>
#include <linux/delay.h>
+#include <linux/perf_event.h>
#ifdef CONFIG_HARDLOCKUP_DETECTOR
+int hw_nmi_get_event(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return PERF_COUNT_HW_REF_CPU_CYCLES;
+ return PERF_COUNT_HW_CPU_CYCLES;
+}
+
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
return (u64)(cpu_khz) * 1000 * watchdog_thresh;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b9..2ed0ec1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -493,7 +496,7 @@
bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
- int i;
+ int i, threads;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
@@ -550,6 +553,10 @@
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
+
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@
#ifdef CONFIG_HOTPLUG_CPU
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+ int max_threads, cpu;
+
+ max_threads = 0;
+ for_each_online_cpu (cpu) {
+ int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+ if (threads > max_threads)
+ max_threads = threads;
+ }
+ __max_smt_threads = max_threads;
+}
+
static void remove_siblinginfo(int cpu)
{
int sibling;
@@ -1465,6 +1487,7 @@
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+ recompute_smt_state();
}
static void remove_cpu_from_maps(int cpu)
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c
index 2776bec..e57f923 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -26,6 +26,7 @@
#include <linux/seq_file.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include <asm/pmc_core.h>
#include "intel_pmc_core.h"
@@ -138,10 +139,10 @@
#endif /* CONFIG_DEBUG_FS */
static const struct x86_cpu_id intel_pmc_core_ids[] = {
- { X86_VENDOR_INTEL, 6, 0x4e, X86_FEATURE_MWAIT,
- (kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
- { X86_VENDOR_INTEL, 6, 0x5e, X86_FEATURE_MWAIT,
- (kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_MOBILE, X86_FEATURE_MWAIT,
+ (kernel_ulong_t)NULL},
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_DESKTOP, X86_FEATURE_MWAIT,
+ (kernel_ulong_t)NULL},
{}
};
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 4630eea..79858af 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -66,6 +66,7 @@
#ifdef CONFIG_LOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(int watchdog_thresh);
+int hw_nmi_get_event(void);
extern int nmi_watchdog_enabled;
extern int soft_watchdog_enabled;
extern int watchdog_user_enabled;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a827ce..7921f4f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -517,6 +517,11 @@
struct perf_cgroup;
struct ring_buffer;
+struct pmu_event_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -675,6 +680,7 @@
int cgrp_defer_enabled;
#endif
+ struct list_head sb_list;
#endif /* CONFIG_PERF_EVENTS */
};
@@ -1074,7 +1080,7 @@
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark);
-extern int get_callchain_buffers(void);
+extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern int sysctl_perf_event_max_stack;
@@ -1326,6 +1332,13 @@
const char *event_str;
};
+struct perf_pmu_events_ht_attr {
+ struct device_attribute attr;
+ u64 id;
+ const char *event_str_ht;
+ const char *event_str_noht;
+};
+
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 36ce552..c66a485 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -276,6 +276,9 @@
/*
* Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ * should be < /proc/sys/kernel/perf_event_max_stack
*/
struct perf_event_attr {
@@ -385,7 +388,8 @@
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
- __u32 __reserved_2; /* align to __u64 */
+ __u16 sample_max_stack;
+ __u16 __reserved_2; /* align to __u64 */
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 080a2df..bf4495f 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -99,7 +99,7 @@
if (err)
goto free_smap;
- err = get_callchain_buffers();
+ err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
goto free_smap;
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 179ef46..e9fdb52 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -104,7 +104,7 @@
return -ENOMEM;
}
-int get_callchain_buffers(void)
+int get_callchain_buffers(int event_max_stack)
{
int err = 0;
int count;
@@ -121,6 +121,15 @@
/* If the allocation failed, give up */
if (!callchain_cpus_entries)
err = -ENOMEM;
+ /*
+ * If requesting per event more than the global cap,
+ * return a different error to help userspace figure
+ * this out.
+ *
+ * And also do it here so that we have &callchain_mutex held.
+ */
+ if (event_max_stack > sysctl_perf_event_max_stack)
+ err = -EOVERFLOW;
goto exit;
}
@@ -174,11 +183,12 @@
bool user = !event->attr.exclude_callchain_user;
/* Disallow cross-task user callchains. */
bool crosstask = event->ctx->task && event->ctx->task != current;
+ const u32 max_stack = event->attr.sample_max_stack;
if (!kernel && !user)
return NULL;
- return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
+ return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
}
struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9c51ec3..9345028 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -335,6 +335,7 @@
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
+static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -396,6 +397,13 @@
if (ret || !write)
return ret;
+ /*
+ * If throttling is disabled don't allow the write:
+ */
+ if (sysctl_perf_cpu_time_max_percent == 100 ||
+ sysctl_perf_cpu_time_max_percent == 0)
+ return -EINVAL;
+
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
update_perf_cpu_limits();
@@ -3665,6 +3673,39 @@
static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb);
+static void detach_sb_event(struct perf_event *event)
+{
+ struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+ raw_spin_lock(&pel->lock);
+ list_del_rcu(&event->sb_list);
+ raw_spin_unlock(&pel->lock);
+}
+
+static bool is_sb_event(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (event->parent)
+ return false;
+
+ if (event->attach_state & PERF_ATTACH_TASK)
+ return false;
+
+ if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+ attr->comm || attr->comm_exec ||
+ attr->task ||
+ attr->context_switch)
+ return true;
+ return false;
+}
+
+static void unaccount_pmu_sb_event(struct perf_event *event)
+{
+ if (is_sb_event(event))
+ detach_sb_event(event);
+}
+
static void unaccount_event_cpu(struct perf_event *event, int cpu)
{
if (event->parent)
@@ -3728,6 +3769,8 @@
}
unaccount_event_cpu(event, event->cpu);
+
+ unaccount_pmu_sb_event(event);
}
static void perf_sched_delayed(struct work_struct *work)
@@ -5854,11 +5897,11 @@
perf_output_end(&handle);
}
-typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+typedef void (perf_iterate_f)(struct perf_event *event, void *data);
static void
-perf_event_aux_ctx(struct perf_event_context *ctx,
- perf_event_aux_output_cb output,
+perf_iterate_ctx(struct perf_event_context *ctx,
+ perf_iterate_f output,
void *data, bool all)
{
struct perf_event *event;
@@ -5875,52 +5918,55 @@
}
}
-static void
-perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
- struct perf_event_context *task_ctx)
+static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
{
- rcu_read_lock();
- preempt_disable();
- perf_event_aux_ctx(task_ctx, output, data, false);
- preempt_enable();
- rcu_read_unlock();
+ struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
+ struct perf_event *event;
+
+ list_for_each_entry_rcu(event, &pel->list, sb_list) {
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ continue;
+ if (!event_filter_match(event))
+ continue;
+ output(event, data);
+ }
}
+/*
+ * Iterate all events that need to receive side-band events.
+ *
+ * For new callers; ensure that account_pmu_sb_event() includes
+ * your event, otherwise it might not get delivered.
+ */
static void
-perf_event_aux(perf_event_aux_output_cb output, void *data,
+perf_iterate_sb(perf_iterate_f output, void *data,
struct perf_event_context *task_ctx)
{
- struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
- struct pmu *pmu;
int ctxn;
+ rcu_read_lock();
+ preempt_disable();
+
/*
- * If we have task_ctx != NULL we only notify
- * the task context itself. The task_ctx is set
- * only for EXIT events before releasing task
+ * If we have task_ctx != NULL we only notify the task context itself.
+ * The task_ctx is set only for EXIT events before releasing task
* context.
*/
if (task_ctx) {
- perf_event_aux_task_ctx(output, data, task_ctx);
- return;
+ perf_iterate_ctx(task_ctx, output, data, false);
+ goto done;
}
- rcu_read_lock();
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->unique_pmu != pmu)
- goto next;
- perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
- ctxn = pmu->task_ctx_nr;
- if (ctxn < 0)
- goto next;
+ perf_iterate_sb_cpu(output, data);
+
+ for_each_task_context_nr(ctxn) {
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx)
- perf_event_aux_ctx(ctx, output, data, false);
-next:
- put_cpu_ptr(pmu->pmu_cpu_context);
+ perf_iterate_ctx(ctx, output, data, false);
}
+done:
+ preempt_enable();
rcu_read_unlock();
}
@@ -5969,7 +6015,7 @@
perf_event_enable_on_exec(ctxn);
- perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+ perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
true);
}
rcu_read_unlock();
@@ -6013,9 +6059,9 @@
};
rcu_read_lock();
- perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+ perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
if (cpuctx->task_ctx)
- perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+ perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
&ro, false);
rcu_read_unlock();
@@ -6144,7 +6190,7 @@
},
};
- perf_event_aux(perf_event_task_output,
+ perf_iterate_sb(perf_event_task_output,
&task_event,
task_ctx);
}
@@ -6223,7 +6269,7 @@
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
- perf_event_aux(perf_event_comm_output,
+ perf_iterate_sb(perf_event_comm_output,
comm_event,
NULL);
}
@@ -6454,7 +6500,7 @@
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
- perf_event_aux(perf_event_mmap_output,
+ perf_iterate_sb(perf_event_mmap_output,
mmap_event,
NULL);
@@ -6537,7 +6583,7 @@
if (!ctx)
continue;
- perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+ perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
}
rcu_read_unlock();
}
@@ -6724,7 +6770,7 @@
},
};
- perf_event_aux(perf_event_switch_output,
+ perf_iterate_sb(perf_event_switch_output,
&switch_event,
NULL);
}
@@ -8646,6 +8692,28 @@
return pmu;
}
+static void attach_sb_event(struct perf_event *event)
+{
+ struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+ raw_spin_lock(&pel->lock);
+ list_add_rcu(&event->sb_list, &pel->list);
+ raw_spin_unlock(&pel->lock);
+}
+
+/*
+ * We keep a list of all !task (and therefore per-cpu) events
+ * that need to receive side-band records.
+ *
+ * This avoids having to scan all the various PMU per-cpu contexts
+ * looking for them.
+ */
+static void account_pmu_sb_event(struct perf_event *event)
+{
+ if (is_sb_event(event))
+ attach_sb_event(event);
+}
+
static void account_event_cpu(struct perf_event *event, int cpu)
{
if (event->parent)
@@ -8726,6 +8794,8 @@
enabled:
account_event_cpu(event, event->cpu);
+
+ account_pmu_sb_event(event);
}
/*
@@ -8874,7 +8944,7 @@
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
- err = get_callchain_buffers();
+ err = get_callchain_buffers(attr->sample_max_stack);
if (err)
goto err_addr_filters;
}
@@ -9196,6 +9266,9 @@
return -EINVAL;
}
+ if (!attr.sample_max_stack)
+ attr.sample_max_stack = sysctl_perf_event_max_stack;
+
/*
* In cgroup mode, the pid argument is used to pass the fd
* opened to the cgroup directory in cgroupfs. The cpu argument
@@ -9269,7 +9342,7 @@
if (is_sampling_event(event)) {
if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
- err = -ENOTSUPP;
+ err = -EOPNOTSUPP;
goto err_alloc;
}
}
@@ -10231,6 +10304,9 @@
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
+
+ INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
+ raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
}
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..8dd30fcd 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -315,6 +315,12 @@
#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* Can be overriden by architecture */
+__weak int hw_nmi_get_event(void)
+{
+ return PERF_COUNT_HW_CPU_CYCLES;
+}
+
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -604,6 +610,7 @@
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+ wd_attr->config = hw_nmi_get_event();
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 316f308..67ff93e 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -10,6 +10,7 @@
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar
+LD = $(CROSS_COMPILE)ld
MAKEFLAGS += --no-print-directory
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 0e636c4..b0a035f 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -85,7 +85,8 @@
}
int fdarray__filter(struct fdarray *fda, short revents,
- void (*entry_destructor)(struct fdarray *fda, int fd))
+ void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+ void *arg)
{
int fd, nr = 0;
@@ -95,7 +96,7 @@
for (fd = 0; fd < fda->nr; ++fd) {
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
- entry_destructor(fda, fd);
+ entry_destructor(fda, fd, arg);
continue;
}
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index 45db018..e87fd80 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -34,7 +34,8 @@
int fdarray__add(struct fdarray *fda, int fd, short revents);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
- void (*entry_destructor)(struct fdarray *fda, int fd));
+ void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+ void *arg);
int fdarray__grow(struct fdarray *fda, int extra);
int fdarray__fprintf(struct fdarray *fda, FILE *fp);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7e543c3..462e526 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1186,20 +1186,14 @@
return next;
}
-const char *
-bpf_object__get_name(struct bpf_object *obj)
+const char *bpf_object__name(struct bpf_object *obj)
{
- if (!obj)
- return ERR_PTR(-EINVAL);
- return obj->path;
+ return obj ? obj->path : ERR_PTR(-EINVAL);
}
-unsigned int
-bpf_object__get_kversion(struct bpf_object *obj)
+unsigned int bpf_object__kversion(struct bpf_object *obj)
{
- if (!obj)
- return 0;
- return obj->kern_version;
+ return obj ? obj->kern_version : 0;
}
struct bpf_program *
@@ -1224,9 +1218,8 @@
return &obj->programs[idx];
}
-int bpf_program__set_private(struct bpf_program *prog,
- void *priv,
- bpf_program_clear_priv_t clear_priv)
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv)
{
if (prog->priv && prog->clear_priv)
prog->clear_priv(prog, prog->priv);
@@ -1236,10 +1229,9 @@
return 0;
}
-int bpf_program__get_private(struct bpf_program *prog, void **ppriv)
+void *bpf_program__priv(struct bpf_program *prog)
{
- *ppriv = prog->priv;
- return 0;
+ return prog ? prog->priv : ERR_PTR(-EINVAL);
}
const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
@@ -1311,32 +1303,23 @@
return fd;
}
-int bpf_map__get_fd(struct bpf_map *map)
+int bpf_map__fd(struct bpf_map *map)
{
- if (!map)
- return -EINVAL;
-
- return map->fd;
+ return map ? map->fd : -EINVAL;
}
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef)
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
{
- if (!map || !pdef)
- return -EINVAL;
-
- *pdef = map->def;
- return 0;
+ return map ? &map->def : ERR_PTR(-EINVAL);
}
-const char *bpf_map__get_name(struct bpf_map *map)
+const char *bpf_map__name(struct bpf_map *map)
{
- if (!map)
- return NULL;
- return map->name;
+ return map ? map->name : NULL;
}
-int bpf_map__set_private(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv)
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
{
if (!map)
return -EINVAL;
@@ -1351,14 +1334,9 @@
return 0;
}
-int bpf_map__get_private(struct bpf_map *map, void **ppriv)
+void *bpf_map__priv(struct bpf_map *map)
{
- if (!map)
- return -EINVAL;
-
- if (ppriv)
- *ppriv = map->priv;
- return 0;
+ return map ? map->priv : ERR_PTR(-EINVAL);
}
struct bpf_map *
@@ -1389,7 +1367,7 @@
}
struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
{
struct bpf_map *pos;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a51594c..722f46b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -55,8 +55,8 @@
/* Load/unload object into/from kernel */
int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj);
-const char *bpf_object__get_name(struct bpf_object *obj);
-unsigned int bpf_object__get_kversion(struct bpf_object *obj);
+const char *bpf_object__name(struct bpf_object *obj);
+unsigned int bpf_object__kversion(struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
@@ -78,11 +78,10 @@
typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
void *);
-int bpf_program__set_private(struct bpf_program *prog, void *priv,
- bpf_program_clear_priv_t clear_priv);
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv);
-int bpf_program__get_private(struct bpf_program *prog,
- void **ppriv);
+void *bpf_program__priv(struct bpf_program *prog);
const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
@@ -171,7 +170,7 @@
*/
struct bpf_map;
struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
struct bpf_map *
bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
@@ -180,13 +179,13 @@
(pos) != NULL; \
(pos) = bpf_map__next((pos), (obj)))
-int bpf_map__get_fd(struct bpf_map *map);
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef);
-const char *bpf_map__get_name(struct bpf_map *map);
+int bpf_map__fd(struct bpf_map *map);
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+const char *bpf_map__name(struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-int bpf_map__set_private(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv);
-int bpf_map__get_private(struct bpf_map *map, void **ppriv);
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+void *bpf_map__priv(struct bpf_map *map);
#endif
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 3d1bb80..3db3db9 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -30,3 +30,4 @@
*.pyo
.config-detected
util/intel-pt-decoder/inat-tables.c
+arch/*/include/generated/
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 1d6092c..7349632 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -56,6 +56,9 @@
--all-user::
Configure all used events to run in user space.
+--ldload::
+ Specify desired latency for loads event.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 3a8a9ba..947db6f 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -109,6 +109,10 @@
Dry run. With this option, --add and --del doesn't execute actual
adding and removal operations.
+--cache::
+ Cache the probes (with --add option). Any events which successfully added
+ are also stored in the cache file.
+
--max-probes=NUM::
Set the maximum number of probe points for an event. Default is 128.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 8dbee83..5b46b1d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -360,6 +360,13 @@
Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+--dry-run::
+Parse options then exit. --dry-run can be used to detect errors in cmdline
+options.
+
+'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
+in config file is set to true.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4fc44c7..1f6c705 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -119,13 +119,13 @@
srcline, period, iregs, brstack, brstacksym, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
- e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
+ e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
- perf script -f <fields>
+ perf script -F <fields>
is equivalent to:
- perf script -f trace:<fields> -f sw:<fields> -f hw:<fields>
+ perf script -F trace:<fields> -F sw:<fields> -F hw:<fields>
i.e., the specified fields apply to all event types if the type string
is not given.
@@ -133,9 +133,9 @@
The arguments are processed in the order received. A later usage can
reset a prior request. e.g.:
- -f trace: -f comm,tid,time,ip,sym
+ -F trace: -F comm,tid,time,ip,sym
- The first -f suppresses trace events (field list is ""), but then the
+ The first -F suppresses trace events (field list is ""), but then the
second invocation sets the fields to comm,tid,time,ip,sym. In this case a
warning is given to the user:
@@ -143,9 +143,9 @@
Alternatively, consider the order:
- -f comm,tid,time,ip,sym -f trace:
+ -F comm,tid,time,ip,sym -F trace:
- The first -f sets the fields for all events and the second -f
+ The first -F sets the fields for all events and the second -F
suppresses trace events. The user is given a warning message about
the override, and the result of the above is that only S/W and H/W
events are displayed with the given fields.
@@ -154,14 +154,14 @@
event type, a message is displayed to the user that the option is
ignored for that type. For example:
- $ perf script -f comm,tid,trace
+ $ perf script -F comm,tid,trace
'trace' not valid for hardware events. Ignoring.
'trace' not valid for software events. Ignoring.
Alternatively, if the type is given an invalid field is specified it
is an error. For example:
- perf script -v -f sw:comm,tid,trace
+ perf script -v -F sw:comm,tid,trace
'trace' not valid for software events.
At this point usage is displayed, and perf-script exits.
@@ -170,10 +170,19 @@
Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
call, return, conditional, system, asynchronous, interrupt,
transaction abort, trace begin, trace end, and in transaction,
- respectively.
+ respectively. Known combinations of flags are printed more nicely e.g.
+ "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
+ "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
+ "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
+ "tr end" for "bE". However the "x" flag will be display separately in those
+ cases e.g. "jcc (x)" for a condition branch within a transaction.
+
+ The callindent field is synthesized and may have a value when
+ Instruction Trace decoding. For calls and returns, it will display the
+ name of the symbol indented with spaces to reflect the stack depth.
Finally, a user may not set fields to none for all event types.
- i.e., -f "" is not allowed.
+ i.e., -F "" is not allowed.
The brstack output includes branch related information with raw addresses using the
/v/v/v/v/ syntax in the following order:
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04f23b4..d96ccd4 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@
--no-aggr::
Do not aggregate counts across all monitored CPUs.
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
EXAMPLES
--------
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index bde8cba..d0a2cb1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -254,7 +254,8 @@
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
- $(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+ $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
+ CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
$(PYTHON_WORD) util/setup.py \
--quiet build_ext; \
mkdir -p $(OUTPUT)python && \
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index d22e3d0..f98da17 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -1,4 +1,4 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index e58123a8..02f41db 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,2 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
index a87afa9..c116b71 100644
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -1,11 +1,13 @@
+#ifndef REMOTE_UNWIND_LIBUNWIND
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"
+#endif
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
{
switch (regnum) {
case UNW_AARCH64_X0:
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index e83c8ce..fa090a9 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -102,7 +102,7 @@
* Return architecture name in a normalized form.
* The conversion logic comes from the Makefile.
*/
-static const char *normalize_arch(char *arch)
+const char *normalize_arch(char *arch)
{
if (!strcmp(arch, "x86_64"))
return "x86";
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 7529cfb..6b01c73 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -6,5 +6,6 @@
extern const char *objdump_path;
int perf_env__lookup_objdump(struct perf_env *env);
+const char *normalize_arch(char *arch);
#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index d4aa567..5c76cc8 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -154,10 +154,6 @@
err = 0;
out_err:
- if (evlist) {
- perf_evlist__disable(evlist);
- perf_evlist__delete(evlist);
- }
-
+ perf_evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 4659703..f95e6f4 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,11 +3,12 @@
libperf-y += pmu.o
libperf-y += kvm-stat.o
libperf-y += perf_regs.o
+libperf-y += group.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 7a78055..cc1d865 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -37,7 +37,7 @@
intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
if (evlist) {
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (intel_pt_pmu &&
evsel->attr.type == intel_pt_pmu->type)
found_pt = true;
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644
index 0000000..37f92aa
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+ int n;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+ return false;
+ if (n > 0) {
+ *warn = true;
+ return false;
+ }
+ return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+ fprintf(stderr,
+ "nmi_watchdog enabled with topdown. May give wrong results.\n"
+ "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 7dc3063..5132775 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -124,7 +124,7 @@
btsr->evlist = evlist;
btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == intel_bts_pmu->type) {
if (intel_bts_evsel) {
pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
@@ -327,7 +327,7 @@
container_of(itr, struct intel_bts_recording, itr);
struct perf_evsel *evsel;
- evlist__for_each(btsr->evlist, evsel) {
+ evlist__for_each_entry(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
return perf_evsel__disable(evsel);
}
@@ -340,7 +340,7 @@
container_of(itr, struct intel_bts_recording, itr);
struct perf_evsel *evsel;
- evlist__for_each(btsr->evlist, evsel) {
+ evlist__for_each_entry(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
return perf_evsel__enable(evsel);
}
@@ -422,7 +422,7 @@
container_of(itr, struct intel_bts_recording, itr);
struct perf_evsel *evsel;
- evlist__for_each(btsr->evlist, evsel) {
+ evlist__for_each_entry(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
return perf_evlist__enable_event_idx(btsr->evlist,
evsel, idx);
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a07b960..fb51457 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -131,7 +131,7 @@
if (!mask)
return -EINVAL;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == intel_pt_pmu->type) {
*res = intel_pt_masked_bits(mask, evsel->attr.config);
return 0;
@@ -511,7 +511,7 @@
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == intel_pt_pmu->type) {
if (intel_pt_evsel) {
pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
@@ -725,7 +725,7 @@
container_of(itr, struct intel_pt_recording, itr);
struct perf_evsel *evsel;
- evlist__for_each(ptr->evlist, evsel) {
+ evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
return perf_evsel__disable(evsel);
}
@@ -738,7 +738,7 @@
container_of(itr, struct intel_pt_recording, itr);
struct perf_evsel *evsel;
- evlist__for_each(ptr->evlist, evsel) {
+ evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
return perf_evsel__enable(evsel);
}
@@ -1011,7 +1011,7 @@
container_of(itr, struct intel_pt_recording, itr);
struct perf_evsel *evsel;
- evlist__for_each(ptr->evlist, evsel) {
+ evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
return perf_evlist__enable_event_idx(ptr->evlist, evsel,
idx);
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 357f1b1..2e5567c 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -62,6 +62,8 @@
struct perf_tsc_conversion tc;
int err;
+ if (!pc)
+ return 0;
err = perf_read_tsc_conversion(pc, &tc);
if (err == -EOPNOTSUPP)
return 0;
diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index db25e93..4f16661 100644
--- a/tools/perf/arch/x86/util/unwind-libunwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -1,12 +1,14 @@
+#ifndef REMOTE_UNWIND_LIBUNWIND
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"
+#endif
#ifdef HAVE_ARCH_X86_64_SUPPORT
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
{
int id;
@@ -70,7 +72,7 @@
return id;
}
#else
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
{
int id;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 25c8173..b15e768 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -75,7 +75,7 @@
sample->period = 1;
sample->weight = 1;
- he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
+ he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
if (he == NULL)
return -ENOMEM;
@@ -236,7 +236,7 @@
perf_session__fprintf_dsos(session, stdout);
total_nr_samples = 0;
- evlist__for_each(session->evlist, pos) {
+ evlist__for_each_entry(session->evlist, pos) {
struct hists *hists = evsel__hists(pos);
u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d75bded..76a4d03 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -209,7 +209,7 @@
if (err)
goto out;
- strlist__for_each(pos, list) {
+ strlist__for_each_entry(pos, list) {
err = build_id_cache__remove_s(pos->s);
pr_debug("Removing %s %s: %s\n", pos->s, pathname,
err ? "FAIL" : "Ok");
@@ -343,7 +343,7 @@
if (add_name_list_str) {
list = strlist__new(add_name_list_str, NULL);
if (list) {
- strlist__for_each(pos, list)
+ strlist__for_each_entry(pos, list)
if (build_id_cache__add_file(pos->s)) {
if (errno == EEXIST) {
pr_debug("%s already in the cache\n",
@@ -361,7 +361,7 @@
if (remove_name_list_str) {
list = strlist__new(remove_name_list_str, NULL);
if (list) {
- strlist__for_each(pos, list)
+ strlist__for_each_entry(pos, list)
if (build_id_cache__remove_file(pos->s)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
@@ -379,7 +379,7 @@
if (purge_name_list_str) {
list = strlist__new(purge_name_list_str, NULL);
if (list) {
- strlist__for_each(pos, list)
+ strlist__for_each_entry(pos, list)
if (build_id_cache__purge_path(pos->s)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
@@ -400,7 +400,7 @@
if (update_name_list_str) {
list = strlist__new(update_name_list_str, NULL);
if (list) {
- strlist__for_each(pos, list)
+ strlist__for_each_entry(pos, list)
if (build_id_cache__update_file(pos->s)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
@@ -419,8 +419,7 @@
pr_warning("Couldn't add %s\n", kcore_filename);
out:
- if (session)
- perf_session__delete(session);
+ perf_session__delete(session);
return ret;
}
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index fe1b77f..e4207a2 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -37,23 +37,16 @@
{
struct perf_config_section *section;
struct perf_config_item *item;
- struct list_head *sections;
if (set == NULL)
return -1;
- sections = &set->sections;
- if (list_empty(sections))
- return -1;
+ perf_config_set__for_each_entry(set, section, item) {
+ char *value = item->value;
- list_for_each_entry(section, sections, node) {
- list_for_each_entry(item, §ion->items, node) {
- char *value = item->value;
-
- if (value)
- printf("%s.%s=%s\n", section->name,
- item->name, value);
- }
+ if (value)
+ printf("%s.%s=%s\n", section->name,
+ item->name, value);
}
return 0;
@@ -80,6 +73,10 @@
else if (use_user_config)
config_exclusive_filename = user_config;
+ /*
+ * At only 'config' sub-command, individually use the config set
+ * because of reinitializing with options config file location.
+ */
set = perf_config_set__new();
if (!set) {
ret = -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f7645a4..21ee753 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -310,16 +310,6 @@
return -1;
}
-static int hists__add_entry(struct hists *hists,
- struct addr_location *al,
- struct perf_sample *sample)
-{
- if (__hists__add_entry(hists, al, NULL, NULL, NULL,
- sample, true) != NULL)
- return 0;
- return -ENOMEM;
-}
-
static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -336,7 +326,7 @@
return -1;
}
- if (hists__add_entry(hists, &al, sample)) {
+ if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) {
pr_warning("problem incrementing symbol period, skipping event\n");
goto out_put;
}
@@ -373,7 +363,7 @@
{
struct perf_evsel *e;
- evlist__for_each(evlist, e) {
+ evlist__for_each_entry(evlist, e) {
if (perf_evsel__match2(evsel, e))
return e;
}
@@ -385,7 +375,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
struct hists *hists = evsel__hists(evsel);
hists__collapse_resort(hists, NULL);
@@ -666,7 +656,8 @@
hists__precompute(hists);
hists__output_resort(hists, NULL);
- hists__fprintf(hists, true, 0, 0, 0, stdout);
+ hists__fprintf(hists, true, 0, 0, 0, stdout,
+ symbol_conf.use_callchain);
}
static void data__fprintf(void)
@@ -690,7 +681,7 @@
struct perf_evsel *evsel_base;
bool first = true;
- evlist__for_each(evlist_base, evsel_base) {
+ evlist__for_each_entry(evlist_base, evsel_base) {
struct hists *hists_base = evsel__hists(evsel_base);
struct data__file *d;
int i;
@@ -765,9 +756,7 @@
out_delete:
data__for_each_file(i, d) {
- if (d->session)
- perf_session__delete(d->session);
-
+ perf_session__delete(d->session);
data__free(d);
}
@@ -1044,7 +1033,7 @@
}
static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct perf_evsel *evsel __maybe_unused)
+ struct hists *hists __maybe_unused)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
@@ -1055,7 +1044,7 @@
static int hpp__width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
- struct perf_evsel *evsel __maybe_unused)
+ struct hists *hists __maybe_unused)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 8a31f51..e09c428 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -32,7 +32,7 @@
if (session == NULL)
return -1;
- evlist__for_each(session->evlist, pos) {
+ evlist__for_each_entry(session->evlist, pos) {
perf_evsel__fprintf(pos, details, stdout);
if (pos->attr.type == PERF_TYPE_TRACEPOINT)
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index f9830c9..268ab73 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -4,7 +4,7 @@
* Builtin help command
*/
#include "perf.h"
-#include "util/cache.h"
+#include "util/config.h"
#include "builtin.h"
#include <subcmd/exec-cmd.h>
#include "common-cmds.h"
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e5afa8f..73c1c4c 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -562,7 +562,7 @@
inject->tool.context_switch = perf_event__drop;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
evsel->handler = drop_sample;
}
@@ -590,7 +590,7 @@
if (!has_tracking(evsel_to_remove))
return true;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->handler != drop_sample) {
cnt += 1;
if ((evsel->attr.sample_type & COMPAT_MASK) ==
@@ -608,7 +608,7 @@
struct perf_evsel *evsel, *tmp;
/* Remove non-synthesized evsels if possible */
- evlist__for_each_safe(evlist, tmp, evsel) {
+ evlist__for_each_entry_safe(evlist, tmp, evsel) {
if (evsel->handler == drop_sample &&
ok_to_remove(evlist, evsel)) {
pr_debug("Deleting %s\n", perf_evsel__name(evsel));
@@ -643,7 +643,7 @@
} else if (inject->sched_stat) {
struct perf_evsel *evsel;
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
const char *name = perf_evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch")) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 58adfee..b1d491c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -4,7 +4,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/util.h"
-#include "util/cache.h"
+#include "util/config.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
@@ -1354,7 +1354,7 @@
goto out;
}
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
perf_evsel__field(evsel, "pfn")) {
use_pfn = true;
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 6487c06..f4efef9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -988,7 +988,7 @@
* Note: exclude_{guest,host} do not apply here.
* This command processes KVM tracepoints from host only
*/
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
struct perf_event_attr *attr = &pos->attr;
/* make sure these *are* set */
@@ -1426,11 +1426,9 @@
err = kvm_events_live_report(kvm);
out:
- if (kvm->session)
- perf_session__delete(kvm->session);
+ perf_session__delete(kvm->session);
kvm->session = NULL;
- if (kvm->evlist)
- perf_evlist__delete(kvm->evlist);
+ perf_evlist__delete(kvm->evlist);
return err;
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 1dc140c..d608a2c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -67,6 +67,7 @@
OPT_CALLBACK('e', "event", &mem, "event",
"event selector. use 'perf mem record -e list' to list available events",
parse_record_events),
+ OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 9af859b..3426232 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -389,7 +389,7 @@
ret = probe_file__get_events(kfd, filter, klist);
if (ret == 0) {
- strlist__for_each(ent, klist)
+ strlist__for_each_entry(ent, klist)
pr_info("Removed event: %s\n", ent->s);
ret = probe_file__del_strlist(kfd, klist);
@@ -399,7 +399,7 @@
ret2 = probe_file__get_events(ufd, filter, ulist);
if (ret2 == 0) {
- strlist__for_each(ent, ulist)
+ strlist__for_each_entry(ent, ulist)
pr_info("Removed event: %s\n", ent->s);
ret2 = probe_file__del_strlist(ufd, ulist);
@@ -512,6 +512,7 @@
"Enable symbol demangling"),
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
+ OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"),
OPT_END()
};
int ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dc3fcb5..81411b1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -13,6 +13,7 @@
#include "util/util.h"
#include <subcmd/parse-options.h>
#include "util/parse-events.h"
+#include "util/config.h"
#include "util/callchain.h"
#include "util/cgroup.h"
@@ -352,7 +353,7 @@
perf_evlist__config(evlist, opts, &callchain_param);
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
try_again:
if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
@@ -655,6 +656,13 @@
return 0;
}
+static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
+{
+ if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
+ return rec->evlist->mmap[0].base;
+ return NULL;
+}
+
static int record__synthesize(struct record *rec)
{
struct perf_session *session = rec->session;
@@ -692,7 +700,7 @@
}
}
- err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+ err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
process_synthesized_event, machine);
if (err)
goto out;
@@ -1267,6 +1275,8 @@
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
"\n\t\t\t\tDefault: fp";
+static bool dry_run;
+
/*
* XXX Will stay a global variable till we fix builtin-script.c to stop messing
* with it and switch to use the library functions in perf_evlist that came
@@ -1386,6 +1396,8 @@
"append timestamp to output filename"),
OPT_BOOLEAN(0, "switch-output", &record.switch_output,
"Switch output when receive SIGUSR2"),
+ OPT_BOOLEAN(0, "dry-run", &dry_run,
+ "Parse options then exit"),
OPT_END()
};
@@ -1455,6 +1467,9 @@
if (err)
return err;
+ if (dry_run)
+ return 0;
+
err = bpf__setup_stdout(rec->evlist);
if (err) {
bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a87cb33..f6cb35798 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -8,7 +8,7 @@
#include "builtin.h"
#include "util/util.h"
-#include "util/cache.h"
+#include "util/config.h"
#include "util/annotate.h"
#include "util/color.h"
@@ -361,7 +361,7 @@
struct perf_evsel *pos;
fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
const char *evname = perf_evsel__name(pos);
@@ -370,7 +370,8 @@
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
- hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout);
+ hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout,
+ symbol_conf.use_callchain);
fprintf(stdout, "\n\n");
}
@@ -477,7 +478,7 @@
ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
- evlist__for_each(rep->session->evlist, pos) {
+ evlist__for_each_entry(rep->session->evlist, pos) {
struct hists *hists = evsel__hists(pos);
if (pos->idx == 0)
@@ -510,7 +511,7 @@
ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
- evlist__for_each(rep->session->evlist, pos)
+ evlist__for_each_entry(rep->session->evlist, pos)
perf_evsel__output_resort(pos, &prog);
ui_progress__finish();
@@ -551,7 +552,7 @@
report__warn_kptr_restrict(rep);
- evlist__for_each(session->evlist, pos)
+ evlist__for_each_entry(session->evlist, pos)
rep->nr_entries += evsel__hists(pos)->nr_entries;
if (use_browser == 0) {
@@ -582,7 +583,7 @@
* might be changed during the collapse phase.
*/
rep->nr_entries = 0;
- evlist__for_each(session->evlist, pos)
+ evlist__for_each_entry(session->evlist, pos)
rep->nr_entries += evsel__hists(pos)->nr_entries;
if (rep->nr_entries == 0) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e3ce2f3..971ff91 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -21,6 +21,7 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/stat.h"
+#include "util/thread-stack.h"
#include <linux/bitmap.h>
#include <linux/stringify.h>
#include "asm/bug.h"
@@ -63,6 +64,7 @@
PERF_OUTPUT_DATA_SRC = 1U << 17,
PERF_OUTPUT_WEIGHT = 1U << 18,
PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
+ PERF_OUTPUT_CALLINDENT = 1U << 20,
};
struct output_option {
@@ -89,6 +91,7 @@
{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
{.str = "weight", .field = PERF_OUTPUT_WEIGHT},
{.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT},
+ {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
};
/* default set to maintain compatibility with current format */
@@ -339,7 +342,7 @@
*/
static int perf_session__check_output_opt(struct perf_session *session)
{
- int j;
+ unsigned int j;
struct perf_evsel *evsel;
for (j = 0; j < PERF_TYPE_MAX; ++j) {
@@ -369,7 +372,7 @@
if (!no_callchain) {
bool use_callchain = false;
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
use_callchain = true;
break;
@@ -388,17 +391,20 @@
struct perf_event_attr *attr;
j = PERF_TYPE_TRACEPOINT;
- evsel = perf_session__find_first_evtype(session, j);
- if (evsel == NULL)
- goto out;
- attr = &evsel->attr;
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.type != j)
+ continue;
- if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
- output[j].fields |= PERF_OUTPUT_IP;
- output[j].fields |= PERF_OUTPUT_SYM;
- output[j].fields |= PERF_OUTPUT_DSO;
- set_print_ip_opts(attr);
+ attr = &evsel->attr;
+
+ if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+ output[j].fields |= PERF_OUTPUT_IP;
+ output[j].fields |= PERF_OUTPUT_SYM;
+ output[j].fields |= PERF_OUTPUT_DSO;
+ set_print_ip_opts(attr);
+ goto out;
+ }
}
}
@@ -559,6 +565,62 @@
}
}
+static void print_sample_callindent(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct addr_location *al)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ size_t depth = thread_stack__depth(thread);
+ struct addr_location addr_al;
+ const char *name = NULL;
+ static int spacing;
+ int len = 0;
+ u64 ip = 0;
+
+ /*
+ * The 'return' has already been popped off the stack so the depth has
+ * to be adjusted to match the 'call'.
+ */
+ if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
+ depth += 1;
+
+ if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+ if (sample_addr_correlates_sym(attr)) {
+ thread__resolve(thread, &addr_al, sample);
+ if (addr_al.sym)
+ name = addr_al.sym->name;
+ else
+ ip = sample->addr;
+ } else {
+ ip = sample->addr;
+ }
+ } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+ if (al->sym)
+ name = al->sym->name;
+ else
+ ip = sample->ip;
+ }
+
+ if (name)
+ len = printf("%*s%s", (int)depth * 4, "", name);
+ else if (ip)
+ len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip);
+
+ if (len < 0)
+ return;
+
+ /*
+ * Try to keep the output length from changing frequently so that the
+ * output lines up more nicely.
+ */
+ if (len > spacing || (len && len < spacing - 52))
+ spacing = round_up(len + 4, 32);
+
+ if (len < spacing)
+ printf("%*s", spacing - len, "");
+}
+
static void print_sample_bts(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
@@ -567,6 +629,9 @@
struct perf_event_attr *attr = &evsel->attr;
bool print_srcline_last = false;
+ if (PRINT_FIELD(CALLINDENT))
+ print_sample_callindent(sample, evsel, thread, al);
+
/* print branch_from information */
if (PRINT_FIELD(IP)) {
unsigned int print_opts = output[attr->type].print_ip_opts;
@@ -603,13 +668,42 @@
printf("\n");
}
+static struct {
+ u32 flags;
+ const char *name;
+} sample_flags[] = {
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
+ {PERF_IP_FLAG_BRANCH, "jmp"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, "hw int"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+ {0, NULL}
+};
+
static void print_sample_flags(u32 flags)
{
const char *chars = PERF_IP_FLAG_CHARS;
const int n = strlen(PERF_IP_FLAG_CHARS);
+ bool in_tx = flags & PERF_IP_FLAG_IN_TX;
+ const char *name = NULL;
char str[33];
int i, pos = 0;
+ for (i = 0; sample_flags[i].name ; i++) {
+ if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) {
+ name = sample_flags[i].name;
+ break;
+ }
+ }
+
for (i = 0; i < n; i++, flags >>= 1) {
if (flags & 1)
str[pos++] = chars[i];
@@ -619,7 +713,11 @@
str[pos++] = '?';
}
str[pos] = 0;
- printf(" %-4s ", str);
+
+ if (name)
+ printf(" %-7s%4s ", name, in_tx ? "(x)" : "");
+ else
+ printf(" %-11s ", str);
}
struct printer_data {
@@ -717,7 +815,7 @@
struct perf_evsel *evsel;
int max = 0;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
int len = strlen(perf_evsel__name(evsel));
max = MAX(len, max);
@@ -942,7 +1040,7 @@
if (evsel->attr.type >= PERF_TYPE_MAX)
return 0;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (pos->attr.type == evsel->attr.type && pos != evsel)
return 0;
}
@@ -1668,7 +1766,7 @@
snprintf(evname, len + 1, "%s", p);
match = 0;
- evlist__for_each(session->evlist, pos) {
+ evlist__for_each_entry(session->evlist, pos) {
if (!strcmp(perf_evsel__name(pos), evname)) {
match = 1;
break;
@@ -1870,7 +1968,7 @@
struct stat_round_event *round = &event->stat_round;
struct perf_evsel *counter;
- evlist__for_each(session->evlist, counter) {
+ evlist__for_each_entry(session->evlist, counter) {
perf_stat_process_counter(&stat_config, counter);
process_stat(counter, round->time);
}
@@ -2017,7 +2115,8 @@
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
+ "addr,symoff,period,iregs,brstack,brstacksym,flags,"
+ "callindent", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -2256,6 +2355,9 @@
script.session = session;
script__setup_sample_type(&script);
+ if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+ itrace_synth_opts.thread_stack = true;
+
session->itrace_synth_opts = &itrace_synth_opts;
if (cpu_list) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee7ada7..c367a43 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/counts.h"
+#include "util/group.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/group.h"
#include "asm/bug.h"
+#include <api/fs/fs.h>
#include <stdlib.h>
#include <sys/prctl.h>
#include <locale.h>
@@ -98,6 +101,15 @@
"}"
};
+static const char * topdown_attrs[] = {
+ "topdown-total-slots",
+ "topdown-slots-retired",
+ "topdown-recovery-bubbles",
+ "topdown-fetch-bubbles",
+ "topdown-slots-issued",
+ NULL,
+};
+
static struct perf_evlist *evsel_list;
static struct target target = {
@@ -112,6 +124,7 @@
static bool null_run = false;
static int detailed_run = 0;
static bool transaction_run;
+static bool topdown_run = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
@@ -124,6 +137,7 @@
static unsigned int unit_width = 4; /* strlen("unit") */
static bool forever = false;
static bool metric_only = false;
+static bool force_metric_only = false;
static struct timespec ref_time;
static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id;
@@ -317,7 +331,7 @@
{
struct perf_evsel *counter;
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
if (read_counter(counter))
pr_debug("failed to read counter %s\n", counter->name);
@@ -403,7 +417,7 @@
* Synthesize other events stuff not carried within
* attr event - unit, scale, name
*/
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported)
continue;
@@ -536,7 +550,7 @@
if (group)
perf_evlist__set_leader(evsel_list);
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
try_again:
if (create_perf_stat_counter(counter) < 0) {
/*
@@ -1120,7 +1134,7 @@
for (s = 0; s < aggr_map->nr; s++) {
id = aggr_map->map[s];
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
val = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
s2 = aggr_get_id(evsel_list->cpus, cpu);
@@ -1159,7 +1173,7 @@
id = aggr_map->map[s];
first = true;
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
@@ -1278,7 +1292,7 @@
if (prefix)
fputs(prefix, stat_config.output);
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
if (first) {
aggr_printout(counter, cpu, 0);
first = false;
@@ -1302,7 +1316,15 @@
[AGGR_GLOBAL] = 0,
};
-static void print_metric_headers(char *prefix)
+static const char *aggr_header_csv[] = {
+ [AGGR_CORE] = "core,cpus,",
+ [AGGR_SOCKET] = "socket,cpus",
+ [AGGR_NONE] = "cpu,",
+ [AGGR_THREAD] = "comm-pid,",
+ [AGGR_GLOBAL] = ""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
{
struct perf_stat_output_ctx out;
struct perf_evsel *counter;
@@ -1313,12 +1335,18 @@
if (prefix)
fprintf(stat_config.output, "%s", prefix);
- if (!csv_output)
+ if (!csv_output && !no_indent)
fprintf(stat_config.output, "%*s",
aggr_header_lens[stat_config.aggr_mode], "");
+ if (csv_output) {
+ if (stat_config.interval)
+ fputs("time,", stat_config.output);
+ fputs(aggr_header_csv[stat_config.aggr_mode],
+ stat_config.output);
+ }
/* Print metrics headers only */
- evlist__for_each(evsel_list, counter) {
+ evlist__for_each_entry(evsel_list, counter) {
os.evsel = counter;
out.ctx = &os;
out.print_metric = print_metric_header;
@@ -1338,28 +1366,40 @@
sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
- if (num_print_interval == 0 && !csv_output && !metric_only) {
+ if (num_print_interval == 0 && !csv_output) {
switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
- fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time socket cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_CORE:
- fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time core cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_NONE:
- fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time CPU");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_THREAD:
- fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time comm-pid");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_GLOBAL:
default:
- fprintf(output, "# time counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
case AGGR_UNSET:
break;
}
}
+ if (num_print_interval == 0 && metric_only)
+ print_metric_headers(" ", true);
if (++num_print_interval == 25)
num_print_interval = 0;
}
@@ -1428,8 +1468,8 @@
if (metric_only) {
static int num_print_iv;
- if (num_print_iv == 0)
- print_metric_headers(prefix);
+ if (num_print_iv == 0 && !interval)
+ print_metric_headers(prefix, false);
if (num_print_iv++ == 25)
num_print_iv = 0;
if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
@@ -1442,11 +1482,11 @@
print_aggr(prefix);
break;
case AGGR_THREAD:
- evlist__for_each(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter)
print_aggr_thread(counter, prefix);
break;
case AGGR_GLOBAL:
- evlist__for_each(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter)
print_counter_aggr(counter, prefix);
if (metric_only)
fputc('\n', stat_config.output);
@@ -1455,7 +1495,7 @@
if (metric_only)
print_no_aggr_metric(prefix);
else {
- evlist__for_each(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter)
print_counter(counter, prefix);
}
break;
@@ -1520,6 +1560,14 @@
return 0;
}
+static int enable_metric_only(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused, int unset)
+{
+ force_metric_only = true;
+ metric_only = !unset;
+ return 0;
+}
+
static const struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1578,8 +1626,10 @@
"aggregate counts per thread", AGGR_THREAD),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
- OPT_BOOLEAN(0, "metric-only", &metric_only,
- "Only print computed metrics. No raw values"),
+ OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+ "Only print computed metrics. No raw values", enable_metric_only),
+ OPT_BOOLEAN(0, "topdown", &topdown_run,
+ "measure topdown level 1 statistics"),
OPT_END()
};
@@ -1772,12 +1822,62 @@
return 0;
}
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+ int off = 0;
+ int i;
+ int len = 0;
+ char *s;
+
+ for (i = 0; attr[i]; i++) {
+ if (pmu_have_event("cpu", attr[i])) {
+ len += strlen(attr[i]) + 1;
+ attr[i - off] = attr[i];
+ } else
+ off++;
+ }
+ attr[i - off] = NULL;
+
+ *str = malloc(len + 1 + 2);
+ if (!*str)
+ return -1;
+ s = *str;
+ if (i - off == 0) {
+ *s = 0;
+ return 0;
+ }
+ if (use_group)
+ *s++ = '{';
+ for (i = 0; attr[i]; i++) {
+ strcpy(s, attr[i]);
+ s += strlen(s);
+ *s++ = ',';
+ }
+ if (use_group) {
+ s[-1] = '}';
+ *s = 0;
+ } else
+ s[-1] = 0;
+ return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+ *warn = false;
+ return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
*/
static int add_default_attributes(void)
{
+ int err;
struct perf_event_attr default_attrs0[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -1896,7 +1996,6 @@
return 0;
if (transaction_run) {
- int err;
if (pmu_have_event("cpu", "cycles-ct") &&
pmu_have_event("cpu", "el-start"))
err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +2008,46 @@
return 0;
}
+ if (topdown_run) {
+ char *str = NULL;
+ bool warn = false;
+
+ if (stat_config.aggr_mode != AGGR_GLOBAL &&
+ stat_config.aggr_mode != AGGR_CORE) {
+ pr_err("top down event configuration requires --per-core mode\n");
+ return -1;
+ }
+ stat_config.aggr_mode = AGGR_CORE;
+ if (nr_cgroups || !target__has_cpu(&target)) {
+ pr_err("top down event configuration requires system-wide mode (-a)\n");
+ return -1;
+ }
+
+ if (!force_metric_only)
+ metric_only = true;
+ if (topdown_filter_events(topdown_attrs, &str,
+ arch_topdown_check_group(&warn)) < 0) {
+ pr_err("Out of memory\n");
+ return -1;
+ }
+ if (topdown_attrs[0] && str) {
+ if (warn)
+ arch_topdown_group_warn();
+ err = parse_events(evsel_list, str, NULL);
+ if (err) {
+ fprintf(stderr,
+ "Cannot set up top down events %s: %d\n",
+ str, err);
+ free(str);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "System does not support topdown\n");
+ return -1;
+ }
+ free(str);
+ }
+
if (!evsel_list->nr_entries) {
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
@@ -2010,7 +2149,7 @@
const char **argv = session->header.env.cmdline_argv;
int argc = session->header.env.nr_cmdline;
- evlist__for_each(evsel_list, counter)
+ evlist__for_each_entry(evsel_list, counter)
perf_stat_process_counter(&stat_config, counter);
if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2a6cc25..07fc792 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,7 +22,7 @@
#include "perf.h"
#include "util/annotate.h"
-#include "util/cache.h"
+#include "util/config.h"
#include "util/color.h"
#include "util/evlist.h"
#include "util/evsel.h"
@@ -295,7 +295,7 @@
hists__output_recalc_col_len(hists, top->print_entries - printed);
putchar('\n');
hists__fprintf(hists, false, top->print_entries - printed, win_width,
- top->min_percent, stdout);
+ top->min_percent, stdout, symbol_conf.use_callchain);
}
static void prompt_integer(int *target, const char *msg)
@@ -479,7 +479,7 @@
fprintf(stderr, "\nAvailable events:");
- evlist__for_each(top->evlist, top->sym_evsel)
+ evlist__for_each_entry(top->evlist, top->sym_evsel)
fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
prompt_integer(&counter, "Enter details event counter");
@@ -490,7 +490,7 @@
sleep(1);
break;
}
- evlist__for_each(top->evlist, top->sym_evsel)
+ evlist__for_each_entry(top->evlist, top->sym_evsel)
if (top->sym_evsel->idx == counter)
break;
} else
@@ -583,7 +583,7 @@
* Zooming in/out UIDs. For now juse use whatever the user passed
* via --uid.
*/
- evlist__for_each(top->evlist, pos) {
+ evlist__for_each_entry(top->evlist, pos) {
struct hists *hists = evsel__hists(pos);
hists->uid_filter_str = top->record_opts.target.uid_str;
}
@@ -888,7 +888,7 @@
perf_evlist__config(evlist, opts, &callchain_param);
- evlist__for_each(evlist, counter) {
+ evlist__for_each_entry(evlist, counter) {
try_again:
if (perf_evsel__open(counter, top->evlist->cpus,
top->evlist->threads) < 0) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5c50fe7..cf90de8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1247,7 +1247,7 @@
i = 0;
- strlist__for_each(pos, trace->ev_qualifier) {
+ strlist__for_each_entry(pos, trace->ev_qualifier) {
const char *sc = pos->s;
int id = syscalltbl__id(trace->sctbl, sc);
@@ -2483,7 +2483,7 @@
goto out;
}
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
(evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
@@ -2550,7 +2550,7 @@
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
- resort_rb__for_each(nd, syscall_stats) {
+ resort_rb__for_each_entry(nd, syscall_stats) {
struct stats *stats = syscall_stats_entry->stats;
if (stats) {
double min = (double)(stats->min) / NSEC_PER_MSEC;
@@ -2627,7 +2627,7 @@
return 0;
}
- resort_rb__for_each(nd, threads)
+ resort_rb__for_each_entry(nd, threads)
printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
resort_rb__delete(threads);
@@ -2714,7 +2714,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
evsel->handler = handler;
}
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 5ad0255..534c811 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -73,17 +73,25 @@
#
# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
#
+
+libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code))
+define libunwind_arch_set_flags_code
+ FEATURE_CHECK_CFLAGS-libunwind-$(1) = -I$(LIBUNWIND_DIR)/include
+ FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib
+endef
+
ifdef LIBUNWIND_DIR
LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include
LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+ LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
+ $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
endif
-LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
# Set per-feature check compilation flags
FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
ifeq ($(NO_PERF_REGS),0)
CFLAGS += -DHAVE_PERF_REGS_SUPPORT
@@ -249,7 +257,7 @@
LIBC_SUPPORT := 1
endif
ifeq ($(LIBC_SUPPORT),1)
- msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install elfutils-libelf-devel/libelf-dev);
+ msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel);
NO_LIBELF := 1
NO_DWARF := 1
@@ -351,10 +359,42 @@
endif
ifndef NO_LIBUNWIND
+ have_libunwind :=
+
+ ifeq ($(feature-libunwind-x86), 1)
+ $(call detected,CONFIG_LIBUNWIND_X86)
+ CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
+ LDFLAGS += -lunwind-x86
+ EXTLIBS_LIBUNWIND += -lunwind-x86
+ have_libunwind = 1
+ endif
+
+ ifeq ($(feature-libunwind-aarch64), 1)
+ $(call detected,CONFIG_LIBUNWIND_AARCH64)
+ CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
+ LDFLAGS += -lunwind-aarch64
+ EXTLIBS_LIBUNWIND += -lunwind-aarch64
+ have_libunwind = 1
+ $(call feature_check,libunwind-debug-frame-aarch64)
+ ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
+ msg := $(warning No debug_frame support found in libunwind-aarch64);
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
+ endif
+ endif
+
ifneq ($(feature-libunwind), 1)
msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+ NO_LOCAL_LIBUNWIND := 1
+ else
+ have_libunwind := 1
+ $(call detected,CONFIG_LOCAL_LIBUNWIND)
+ endif
+
+ ifneq ($(have_libunwind), 1)
NO_LIBUNWIND := 1
endif
+else
+ NO_LOCAL_LIBUNWIND := 1
endif
ifndef NO_LIBBPF
@@ -392,7 +432,7 @@
NO_DWARF_UNWIND := 1
endif
-ifndef NO_LIBUNWIND
+ifndef NO_LOCAL_LIBUNWIND
ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
@@ -403,10 +443,15 @@
# non-ARM has no dwarf_find_debug_frame() function:
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
endif
- CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
EXTLIBS += $(LIBUNWIND_LIBS)
+ LDFLAGS += $(LIBUNWIND_LIBS)
+endif
+
+ifndef NO_LIBUNWIND
+ CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
CFLAGS += $(LIBUNWIND_CFLAGS)
LDFLAGS += $(LIBUNWIND_LDFLAGS)
+ EXTLIBS += $(EXTLIBS_LIBUNWIND)
endif
ifndef NO_LIBAUDIT
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 15982ce..8f21922 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -10,7 +10,7 @@
#include "util/env.h"
#include <subcmd/exec-cmd.h>
-#include "util/cache.h"
+#include "util/config.h"
#include "util/quote.h"
#include <subcmd/run-command.h>
#include "util/parse-events.h"
@@ -139,8 +139,6 @@
OPT_ARGUMENT("html-path", "html-path"),
OPT_ARGUMENT("paginate", "paginate"),
OPT_ARGUMENT("no-pager", "no-pager"),
- OPT_ARGUMENT("perf-dir", "perf-dir"),
- OPT_ARGUMENT("work-tree", "work-tree"),
OPT_ARGUMENT("debugfs-dir", "debugfs-dir"),
OPT_ARGUMENT("buildid-dir", "buildid-dir"),
OPT_ARGUMENT("list-cmds", "list-cmds"),
@@ -200,35 +198,6 @@
use_pager = 0;
if (envchanged)
*envchanged = 1;
- } else if (!strcmp(cmd, "--perf-dir")) {
- if (*argc < 2) {
- fprintf(stderr, "No directory given for --perf-dir.\n");
- usage(perf_usage_string);
- }
- setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
- if (envchanged)
- *envchanged = 1;
- (*argv)++;
- (*argc)--;
- handled++;
- } else if (!prefixcmp(cmd, CMD_PERF_DIR)) {
- setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1);
- if (envchanged)
- *envchanged = 1;
- } else if (!strcmp(cmd, "--work-tree")) {
- if (*argc < 2) {
- fprintf(stderr, "No directory given for --work-tree.\n");
- usage(perf_usage_string);
- }
- setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
- if (envchanged)
- *envchanged = 1;
- (*argv)++;
- (*argc)--;
- } else if (!prefixcmp(cmd, CMD_WORK_TREE)) {
- setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1);
- if (envchanged)
- *envchanged = 1;
} else if (!strcmp(cmd, "--debugfs-dir")) {
if (*argc < 2) {
fprintf(stderr, "No directory given for --debugfs-dir.\n");
@@ -363,11 +332,6 @@
#define RUN_SETUP (1<<0)
#define USE_PAGER (1<<1)
-/*
- * require working tree to be present -- anything uses this needs
- * RUN_SETUP for reading from the configuration file.
- */
-#define NEED_WORK_TREE (1<<2)
static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
{
@@ -391,6 +355,7 @@
perf_env__set_cmdline(&perf_env, argc, argv);
status = p->fn(argc, argv, prefix);
+ perf_config__exit();
exit_browser(status);
perf_env__exit(&perf_env);
bpf__clear();
@@ -558,6 +523,7 @@
srandom(time(NULL));
+ perf_config__init();
perf_config(perf_default_config, NULL);
set_buildid_dir(NULL);
diff --git a/tools/perf/scripts/python/bin/stackcollapse-record b/tools/perf/scripts/python/bin/stackcollapse-record
new file mode 100755
index 0000000..9d8f9f0
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-record
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report
new file mode 100755
index 0000000..356b965
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py
new file mode 100755
index 0000000..5a605f7
--- /dev/null
+++ b/tools/perf/scripts/python/stackcollapse.py
@@ -0,0 +1,125 @@
+# stackcollapse.py - format perf samples with one line per distinct call stack
+#
+# This script's output has two space-separated fields. The first is a semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack. The second is a count:
+#
+# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+# perf record -a -g -F 99 sleep 60
+# perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini <pbonzini@redhat.com>
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+ # formatting options for the bottom entry of the stack
+ make_option("--include-tid", dest="include_tid",
+ action="store_true", default=False,
+ help="include thread id in stack"),
+ make_option("--include-pid", dest="include_pid",
+ action="store_true", default=False,
+ help="include process id in stack"),
+ make_option("--no-comm", dest="include_comm",
+ action="store_false", default=True,
+ help="do not separate stacks according to comm"),
+ make_option("--tidy-java", dest="tidy_java",
+ action="store_true", default=False,
+ help="beautify Java signatures"),
+ make_option("--kernel", dest="annotate_kernel",
+ action="store_true", default=False,
+ help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+ parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+ parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+ parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines = defaultdict(lambda: 0)
+
+def process_event(param_dict):
+ def tidy_function_name(sym, dso):
+ if sym is None:
+ sym = '[unknown]'
+
+ sym = sym.replace(';', ':')
+ if opts.tidy_java:
+ # the original stackcollapse-perf.pl script gives the
+ # example of converting this:
+ # Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+ # to this:
+ # org/mozilla/javascript/MemberBox:.init
+ sym = sym.replace('<', '')
+ sym = sym.replace('>', '')
+ if sym[0] == 'L' and sym.find('/'):
+ sym = sym[1:]
+ try:
+ sym = sym[:sym.index('(')]
+ except ValueError:
+ pass
+
+ if opts.annotate_kernel and dso == '[kernel.kallsyms]':
+ return sym + '_[k]'
+ else:
+ return sym
+
+ stack = list()
+ if 'callchain' in param_dict:
+ for entry in param_dict['callchain']:
+ entry.setdefault('sym', dict())
+ entry['sym'].setdefault('name', None)
+ entry.setdefault('dso', None)
+ stack.append(tidy_function_name(entry['sym']['name'],
+ entry['dso']))
+ else:
+ param_dict.setdefault('symbol', None)
+ param_dict.setdefault('dso', None)
+ stack.append(tidy_function_name(param_dict['symbol'],
+ param_dict['dso']))
+
+ if opts.include_comm:
+ comm = param_dict["comm"].replace(' ', '_')
+ sep = "-"
+ if opts.include_pid:
+ comm = comm + sep + str(param_dict['sample']['pid'])
+ sep = "/"
+ if opts.include_tid:
+ comm = comm + sep + str(param_dict['sample']['tid'])
+ stack.append(comm)
+
+ stack_string = ';'.join(reversed(stack))
+ lines[stack_string] = lines[stack_string] + 1
+
+def trace_end():
+ list = lines.keys()
+ list.sort()
+ for stack in list:
+ print "%s %d" % (stack, lines[stack])
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index d9ba991..e70313f 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -118,7 +118,7 @@
perf_evlist__config(evlist, &opts, NULL);
/* Set backward bit, ring buffer should be writing from end */
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
evsel->attr.write_backward = 1;
err = perf_evlist__open(evlist);
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 95fb744..9f5698a 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -200,8 +200,7 @@
count.ena, count.run);
out_err:
- if (evlist)
- perf_evlist__delete(evlist);
+ perf_evlist__delete(evlist);
return !err ? TEST_OK : TEST_FAIL;
}
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 2de4a4f..60926a1 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -80,7 +80,7 @@
}
err = 0;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
--err;
pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c809463..59dbd05 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -36,7 +36,7 @@
}
fdarray__init_revents(fda, POLLIN);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
if (nr_fds != fda->nr_alloc) {
pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
nr_fds, fda->nr_alloc);
@@ -44,7 +44,7 @@
}
fdarray__init_revents(fda, POLLHUP);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
if (nr_fds != 0) {
pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
nr_fds, fda->nr_alloc);
@@ -57,7 +57,7 @@
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 1) {
pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
@@ -78,7 +78,7 @@
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 2) {
pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index e846f8c..62efb14 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -56,7 +56,7 @@
* (perf [perf] main) will be collapsed to an existing entry
* so total 9 entries will be in the tree.
*/
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
struct hist_entry_iter iter = {
.evsel = evsel,
@@ -136,7 +136,7 @@
if (err < 0)
goto out;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
struct hists *hists = evsel__hists(evsel);
hists__collapse_resort(hists, NULL);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index acf5a13..eddc740 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -72,7 +72,7 @@
* However the second evsel also has a collapsed entry for
* "bash [libc] malloc" so total 9 entries will be in the tree.
*/
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
struct hists *hists = evsel__hists(evsel);
for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
@@ -84,7 +84,7 @@
if (machine__resolve(machine, &al, &sample) < 0)
goto out;
- he = __hists__add_entry(hists, &al, NULL,
+ he = hists__add_entry(hists, &al, NULL,
NULL, NULL, &sample, true);
if (he == NULL) {
addr_location__put(&al);
@@ -103,7 +103,7 @@
if (machine__resolve(machine, &al, &sample) < 0)
goto out;
- he = __hists__add_entry(hists, &al, NULL,
+ he = hists__add_entry(hists, &al, NULL,
NULL, NULL, &sample, true);
if (he == NULL) {
addr_location__put(&al);
@@ -301,7 +301,7 @@
if (err < 0)
goto out;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
hists = evsel__hists(evsel);
hists__collapse_resort(hists, NULL);
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 359e98f..aea33f5 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -126,7 +126,7 @@
}
err = 0;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
pr_debug("expected %d %s events, got %d\n",
expected_nr_events[evsel->idx],
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7865f68..20c2e64 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -32,7 +32,7 @@
TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
TEST_ASSERT_VAL("wrong type",
PERF_TYPE_TRACEPOINT == evsel->attr.type);
TEST_ASSERT_VAL("wrong sample_type",
@@ -207,7 +207,7 @@
TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
TEST_ASSERT_VAL("wrong exclude_user",
!evsel->attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel",
@@ -1783,8 +1783,8 @@
struct evlist_test e;
char name[MAX_NAME];
- if (!strcmp(ent->d_name, ".") ||
- !strcmp(ent->d_name, ".."))
+ /* Names containing . are special and cannot be used directly */
+ if (strchr(ent->d_name, '.'))
continue;
snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name);
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index 294c76b..81c6eea 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -44,8 +44,7 @@
for (i = 0; i < count && !err; i++)
err = process_event(&evlist, events[i]);
- if (evlist)
- perf_evlist__delete(evlist);
+ perf_evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 39a689b..7ddbe26 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -432,7 +432,7 @@
}
/* Check non-tracking events are not tracking */
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel != tracking_evsel) {
if (evsel->attr.mmap || evsel->attr.comm) {
pr_debug("Non-tracking event is tracking\n");
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index af68a9d..3eb3edb 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -1,5 +1,5 @@
#include "../util.h"
-#include "../cache.h"
+#include "../config.h"
#include "../../perf.h"
#include "libslang.h"
#include "ui.h"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 4fc208e..0e106bb 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -8,6 +8,7 @@
#include "../../util/sort.h"
#include "../../util/symbol.h"
#include "../../util/evsel.h"
+#include "../../util/config.h"
#include <pthread.h>
struct disasm_line_samples {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 538bae8..e08b8f7 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -12,35 +12,17 @@
#include "../../util/top.h"
#include "../../arch/common.h"
-#include "../browser.h"
+#include "../browsers/hists.h"
#include "../helpline.h"
#include "../util.h"
#include "../ui.h"
#include "map.h"
#include "annotate.h"
-struct hist_browser {
- struct ui_browser b;
- struct hists *hists;
- struct hist_entry *he_selection;
- struct map_symbol *selection;
- struct hist_browser_timer *hbt;
- struct pstack *pstack;
- struct perf_env *env;
- int print_seq;
- bool show_dso;
- bool show_headers;
- float min_pcnt;
- u64 nr_non_filtered_entries;
- u64 nr_hierarchy_entries;
- u64 nr_callchain_rows;
-};
-
extern void hist_browser__init_hpp(void);
-static int hists__browser_title(struct hists *hists,
- struct hist_browser_timer *hbt,
- char *bf, size_t size);
+static int perf_evsel_browser_title(struct hist_browser *browser,
+ char *bf, size_t size);
static void hist_browser__update_nr_entries(struct hist_browser *hb);
static struct rb_node *hists__filter_entries(struct rb_node *nd,
@@ -585,7 +567,12 @@
"Or reduce the sampling frequency.");
}
-static int hist_browser__run(struct hist_browser *browser, const char *help)
+static int hist_browser__title(struct hist_browser *browser, char *bf, size_t size)
+{
+ return browser->title ? browser->title(browser, bf, size) : 0;
+}
+
+int hist_browser__run(struct hist_browser *browser, const char *help)
{
int key;
char title[160];
@@ -595,7 +582,7 @@
browser->b.entries = &browser->hists->entries;
browser->b.nr_entries = hist_browser__nr_entries(browser);
- hists__browser_title(browser->hists, hbt, title, sizeof(title));
+ hist_browser__title(browser, title, sizeof(title));
if (ui_browser__show(&browser->b, title, "%s", help) < 0)
return -1;
@@ -621,8 +608,7 @@
ui_browser__warn_lost_events(&browser->b);
}
- hists__browser_title(browser->hists,
- hbt, title, sizeof(title));
+ hist_browser__title(browser, title, sizeof(title));
ui_browser__show_title(&browser->b, title);
continue;
}
@@ -1470,7 +1456,7 @@
column++ < browser->b.horiz_scroll)
continue;
- ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists));
+ ret = fmt->width(fmt, NULL, browser->hists);
if (first) {
/* for folded sign */
@@ -1531,7 +1517,7 @@
if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll)
continue;
- ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+ ret = fmt->header(fmt, &dummy_hpp, hists);
if (advance_hpp_check(&dummy_hpp, ret))
break;
@@ -1568,7 +1554,7 @@
if (column++ < browser->b.horiz_scroll)
continue;
- ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+ ret = fmt->header(fmt, &dummy_hpp, hists);
if (advance_hpp_check(&dummy_hpp, ret))
break;
@@ -1605,7 +1591,7 @@
}
first_col = false;
- ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+ ret = fmt->header(fmt, &dummy_hpp, hists);
dummy_hpp.buf[ret] = '\0';
start = trim(dummy_hpp.buf);
@@ -1622,21 +1608,38 @@
return ret;
}
-static void hist_browser__show_headers(struct hist_browser *browser)
+static void hists_browser__hierarchy_headers(struct hist_browser *browser)
{
char headers[1024];
- if (symbol_conf.report_hierarchy)
- hists_browser__scnprintf_hierarchy_headers(browser, headers,
- sizeof(headers));
- else
- hists_browser__scnprintf_headers(browser, headers,
- sizeof(headers));
+ hists_browser__scnprintf_hierarchy_headers(browser, headers,
+ sizeof(headers));
+
ui_browser__gotorc(&browser->b, 0, 0);
ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
}
+static void hists_browser__headers(struct hist_browser *browser)
+{
+ char headers[1024];
+
+ hists_browser__scnprintf_headers(browser, headers,
+ sizeof(headers));
+
+ ui_browser__gotorc(&browser->b, 0, 0);
+ ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+}
+
+static void hist_browser__show_headers(struct hist_browser *browser)
+{
+ if (symbol_conf.report_hierarchy)
+ hists_browser__hierarchy_headers(browser);
+ else
+ hists_browser__headers(browser);
+}
+
static void ui_browser__hists_init_top(struct ui_browser *browser)
{
if (browser->top == NULL) {
@@ -2039,27 +2042,50 @@
return 0;
}
-static struct hist_browser *hist_browser__new(struct hists *hists,
- struct hist_browser_timer *hbt,
- struct perf_env *env)
+void hist_browser__init(struct hist_browser *browser,
+ struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+
+ browser->hists = hists;
+ browser->b.refresh = hist_browser__refresh;
+ browser->b.refresh_dimensions = hist_browser__refresh_dimensions;
+ browser->b.seek = ui_browser__hists_seek;
+ browser->b.use_navkeypressed = true;
+ browser->show_headers = symbol_conf.show_hist_headers;
+
+ hists__for_each_format(hists, fmt) {
+ perf_hpp__reset_width(fmt, hists);
+ ++browser->b.columns;
+ }
+}
+
+struct hist_browser *hist_browser__new(struct hists *hists)
{
struct hist_browser *browser = zalloc(sizeof(*browser));
- if (browser) {
- browser->hists = hists;
- browser->b.refresh = hist_browser__refresh;
- browser->b.refresh_dimensions = hist_browser__refresh_dimensions;
- browser->b.seek = ui_browser__hists_seek;
- browser->b.use_navkeypressed = true;
- browser->show_headers = symbol_conf.show_hist_headers;
- browser->hbt = hbt;
- browser->env = env;
- }
+ if (browser)
+ hist_browser__init(browser, hists);
return browser;
}
-static void hist_browser__delete(struct hist_browser *browser)
+static struct hist_browser *
+perf_evsel_browser__new(struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct perf_env *env)
+{
+ struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
+
+ if (browser) {
+ browser->hbt = hbt;
+ browser->env = env;
+ browser->title = perf_evsel_browser_title;
+ }
+ return browser;
+}
+
+void hist_browser__delete(struct hist_browser *browser)
{
free(browser);
}
@@ -2080,10 +2106,11 @@
return timer == NULL;
}
-static int hists__browser_title(struct hists *hists,
- struct hist_browser_timer *hbt,
+static int perf_evsel_browser_title(struct hist_browser *browser,
char *bf, size_t size)
{
+ struct hist_browser_timer *hbt = browser->hbt;
+ struct hists *hists = browser->hists;
char unit;
int printed;
const struct dso *dso = hists->dso_filter;
@@ -2640,7 +2667,7 @@
struct perf_env *env)
{
struct hists *hists = evsel__hists(evsel);
- struct hist_browser *browser = hist_browser__new(hists, hbt, env);
+ struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
struct branch_info *bi;
#define MAX_OPTIONS 16
char *options[MAX_OPTIONS];
@@ -2649,7 +2676,6 @@
int key = -1;
char buf[64];
int delay_secs = hbt ? hbt->refresh : 0;
- struct perf_hpp_fmt *fmt;
#define HIST_BROWSER_HELP_COMMON \
"h/?/F1 Show this window\n" \
@@ -2708,18 +2734,6 @@
memset(options, 0, sizeof(options));
memset(actions, 0, sizeof(actions));
- hists__for_each_format(browser->hists, fmt) {
- perf_hpp__reset_width(fmt, hists);
- /*
- * This is done just once, and activates the horizontal scrolling
- * code in the ui_browser code, it would be better to have a the
- * counter in the perf_hpp code, but I couldn't find doing it here
- * works, FIXME by setting this in hist_browser__new, for now, be
- * clever 8-)
- */
- ++browser->b.columns;
- }
-
if (symbol_conf.col_width_list_str)
perf_hpp__set_user_width(symbol_conf.col_width_list_str);
@@ -3185,7 +3199,7 @@
ui_helpline__push("Press ESC to exit");
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
const char *ev_name = perf_evsel__name(pos);
size_t line_len = strlen(ev_name) + 7;
@@ -3216,7 +3230,7 @@
struct perf_evsel *pos;
nr_entries = 0;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (perf_evsel__is_group_leader(pos))
nr_entries++;
}
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
new file mode 100644
index 0000000..39bd0f2
--- /dev/null
+++ b/tools/perf/ui/browsers/hists.h
@@ -0,0 +1,32 @@
+#ifndef _PERF_UI_BROWSER_HISTS_H_
+#define _PERF_UI_BROWSER_HISTS_H_ 1
+
+#include "ui/browser.h"
+
+struct hist_browser {
+ struct ui_browser b;
+ struct hists *hists;
+ struct hist_entry *he_selection;
+ struct map_symbol *selection;
+ struct hist_browser_timer *hbt;
+ struct pstack *pstack;
+ struct perf_env *env;
+ int print_seq;
+ bool show_dso;
+ bool show_headers;
+ float min_pcnt;
+ u64 nr_non_filtered_entries;
+ u64 nr_hierarchy_entries;
+ u64 nr_callchain_rows;
+
+ /* Get title string. */
+ int (*title)(struct hist_browser *browser,
+ char *bf, size_t size);
+};
+
+struct hist_browser *hist_browser__new(struct hists *hists);
+void hist_browser__delete(struct hist_browser *browser);
+int hist_browser__run(struct hist_browser *browser, const char *help);
+void hist_browser__init(struct hist_browser *browser,
+ struct hists *hists);
+#endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 932adfa..c5f3677 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -549,7 +549,7 @@
strcat(buf, "+");
first_col = false;
- fmt->header(fmt, &hpp, hists_to_evsel(hists));
+ fmt->header(fmt, &hpp, hists);
strcat(buf, ltrim(rtrim(hpp.buf)));
}
}
@@ -627,7 +627,7 @@
gtk_container_add(GTK_CONTAINER(window), vbox);
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
const char *evname = perf_evsel__name(pos);
GtkWidget *scrolled_window;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index af07ffb..4274969 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -215,9 +215,10 @@
static int hpp__width_fn(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
- struct perf_evsel *evsel)
+ struct hists *hists)
{
int len = fmt->user_len ?: fmt->len;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
if (symbol_conf.event_group)
len = max(len, evsel->nr_members * fmt->len);
@@ -229,9 +230,9 @@
}
static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct perf_evsel *evsel)
+ struct hists *hists)
{
- int len = hpp__width_fn(fmt, hpp, evsel);
+ int len = hpp__width_fn(fmt, hpp, hists);
return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
}
@@ -632,7 +633,7 @@
else
ret += 2;
- ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+ ret += fmt->width(fmt, &dummy_hpp, hists);
}
if (verbose && hists__has(hists, sym)) /* Addr + origin */
@@ -657,7 +658,7 @@
else
ret += 2;
- ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+ ret += fmt->width(fmt, &dummy_hpp, hists);
}
return ret;
@@ -765,7 +766,7 @@
if (!symbol_conf.report_hierarchy)
return 0;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
hists = evsel__hists(evsel);
perf_hpp_list__for_each_sort_list(list, fmt) {
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 560eb47..f04a631 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -492,14 +492,15 @@
}
static int hist_entry__fprintf(struct hist_entry *he, size_t size,
- struct hists *hists,
- char *bf, size_t bfsz, FILE *fp)
+ char *bf, size_t bfsz, FILE *fp,
+ bool use_callchain)
{
int ret;
struct perf_hpp hpp = {
.buf = bf,
.size = size,
};
+ struct hists *hists = he->hists;
u64 total_period = hists->stats.total_period;
if (size == 0 || size > bfsz)
@@ -512,7 +513,7 @@
ret = fprintf(fp, "%s\n", bf);
- if (symbol_conf.use_callchain)
+ if (use_callchain)
ret += hist_entry_callchain__fprintf(he, total_period, 0, fp);
return ret;
@@ -548,7 +549,7 @@
struct perf_hpp_list_node, list);
perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
- fmt->header(fmt, hpp, hists_to_evsel(hists));
+ fmt->header(fmt, hpp, hists);
fprintf(fp, "%s%s", hpp->buf, sep ?: " ");
}
@@ -568,7 +569,7 @@
header_width += fprintf(fp, "+");
first_col = false;
- fmt->header(fmt, hpp, hists_to_evsel(hists));
+ fmt->header(fmt, hpp, hists);
header_width += fprintf(fp, "%s", trim(hpp->buf));
}
@@ -589,7 +590,7 @@
fprintf(fp, "%s", sep ?: "..");
first_col = false;
- width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+ width = fmt->width(fmt, hpp, hists);
fprintf(fp, "%.*s", width, dots);
}
@@ -606,7 +607,7 @@
width++; /* for '+' sign between column header */
first_col = false;
- width += fmt->width(fmt, hpp, hists_to_evsel(hists));
+ width += fmt->width(fmt, hpp, hists);
}
if (width > header_width)
@@ -622,47 +623,31 @@
return 2;
}
-size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
- int max_cols, float min_pcnt, FILE *fp)
+static int
+hists__fprintf_hierarchy_headers(struct hists *hists,
+ struct perf_hpp *hpp,
+ FILE *fp)
+{
+ struct perf_hpp_list_node *fmt_node;
+ struct perf_hpp_fmt *fmt;
+
+ list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+ perf_hpp__reset_width(fmt, hists);
+ }
+
+ return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
+}
+
+static int
+hists__fprintf_standard_headers(struct hists *hists,
+ struct perf_hpp *hpp,
+ FILE *fp)
{
struct perf_hpp_fmt *fmt;
- struct perf_hpp_list_node *fmt_node;
- struct rb_node *nd;
- size_t ret = 0;
unsigned int width;
const char *sep = symbol_conf.field_sep;
- int nr_rows = 0;
- char bf[96];
- struct perf_hpp dummy_hpp = {
- .buf = bf,
- .size = sizeof(bf),
- };
bool first = true;
- size_t linesz;
- char *line = NULL;
- unsigned indent;
-
- init_rem_hits();
-
- hists__for_each_format(hists, fmt)
- perf_hpp__reset_width(fmt, hists);
-
- if (symbol_conf.col_width_list_str)
- perf_hpp__set_user_width(symbol_conf.col_width_list_str);
-
- if (!show_header)
- goto print_entries;
-
- fprintf(fp, "# ");
-
- if (symbol_conf.report_hierarchy) {
- list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
- perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
- perf_hpp__reset_width(fmt, hists);
- }
- nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
- goto print_entries;
- }
hists__for_each_format(hists, fmt) {
if (perf_hpp__should_skip(fmt, hists))
@@ -673,16 +658,14 @@
else
first = false;
- fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
- fprintf(fp, "%s", bf);
+ fmt->header(fmt, hpp, hists);
+ fprintf(fp, "%s", hpp->buf);
}
fprintf(fp, "\n");
- if (max_rows && ++nr_rows >= max_rows)
- goto out;
if (sep)
- goto print_entries;
+ return 1;
first = true;
@@ -699,20 +682,60 @@
else
first = false;
- width = fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+ width = fmt->width(fmt, hpp, hists);
for (i = 0; i < width; i++)
fprintf(fp, ".");
}
fprintf(fp, "\n");
- if (max_rows && ++nr_rows >= max_rows)
- goto out;
-
fprintf(fp, "#\n");
- if (max_rows && ++nr_rows >= max_rows)
+ return 3;
+}
+
+static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+{
+ char bf[96];
+ struct perf_hpp dummy_hpp = {
+ .buf = bf,
+ .size = sizeof(bf),
+ };
+
+ fprintf(fp, "# ");
+
+ if (symbol_conf.report_hierarchy)
+ return hists__fprintf_hierarchy_headers(hists, &dummy_hpp, fp);
+ else
+ return hists__fprintf_standard_headers(hists, &dummy_hpp, fp);
+
+}
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+ int max_cols, float min_pcnt, FILE *fp,
+ bool use_callchain)
+{
+ struct perf_hpp_fmt *fmt;
+ struct rb_node *nd;
+ size_t ret = 0;
+ const char *sep = symbol_conf.field_sep;
+ int nr_rows = 0;
+ size_t linesz;
+ char *line = NULL;
+ unsigned indent;
+
+ init_rem_hits();
+
+ hists__for_each_format(hists, fmt)
+ perf_hpp__reset_width(fmt, hists);
+
+ if (symbol_conf.col_width_list_str)
+ perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+
+ if (show_header)
+ nr_rows += hists__fprintf_headers(hists, fp);
+
+ if (max_rows && nr_rows >= max_rows)
goto out;
-print_entries:
linesz = hists__sort_list_width(hists) + 3 + 1;
linesz += perf_hpp__color_overhead();
line = malloc(linesz);
@@ -734,7 +757,7 @@
if (percent < min_pcnt)
continue;
- ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp);
+ ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, use_callchain);
if (max_rows && ++nr_rows >= max_rows)
break;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8c6c8a0..fced833 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -99,7 +99,10 @@
libperf-$(CONFIG_DWARF) += dwarf-aux.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
+libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c
index c0b43ee..6c80f83 100644
--- a/tools/perf/util/alias.c
+++ b/tools/perf/util/alias.c
@@ -1,4 +1,5 @@
#include "cache.h"
+#include "config.h"
static const char *alias_key;
static char *alias_val;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 767989e..ac5f0d7 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -63,6 +63,7 @@
* @calls: limit branch samples to calls (can be combined with @returns)
* @returns: limit branch samples to returns (can be combined with @calls)
* @callchain: add callchain to 'instructions' events
+ * @thread_stack: feed branches to the thread_stack
* @last_branch: add branch context to 'instruction' events
* @callchain_sz: maximum callchain size
* @last_branch_sz: branch context size
@@ -82,6 +83,7 @@
bool calls;
bool returns;
bool callchain;
+ bool thread_stack;
bool last_branch;
unsigned int callchain_sz;
unsigned int last_branch_sz;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 493307d..dcc8845 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -339,7 +339,7 @@
}
pr_debug("bpf: config '%s' is ok\n", config_str);
- err = bpf_program__set_private(prog, priv, clear_prog_priv);
+ err = bpf_program__set_priv(prog, priv, clear_prog_priv);
if (err) {
pr_debug("Failed to set priv for program '%s'\n", config_str);
goto errout;
@@ -380,15 +380,14 @@
struct bpf_insn *orig_insns, int orig_insns_cnt,
struct bpf_prog_prep_result *res)
{
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
- struct bpf_prog_priv *priv;
struct bpf_insn *buf;
size_t prologue_cnt = 0;
int i, err;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv)
+ if (IS_ERR(priv) || !priv)
goto errout;
pev = &priv->pev;
@@ -535,13 +534,12 @@
static int hook_load_preprocessor(struct bpf_program *prog)
{
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
struct perf_probe_event *pev;
- struct bpf_prog_priv *priv;
bool need_prologue = false;
int err, i;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv) {
+ if (IS_ERR(priv) || !priv) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -607,9 +605,11 @@
if (err)
goto out;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv)
+ priv = bpf_program__priv(prog);
+ if (IS_ERR(priv) || !priv) {
+ err = PTR_ERR(priv);
goto out;
+ }
pev = &priv->pev;
err = convert_perf_probe_events(pev, 1);
@@ -645,13 +645,12 @@
{
int err, ret = 0;
struct bpf_program *prog;
- struct bpf_prog_priv *priv;
bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv)
+ if (IS_ERR(priv) || !priv)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@@ -702,14 +701,12 @@
int err;
bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
- struct bpf_prog_priv *priv;
int i, fd;
- err = bpf_program__get_private(prog,
- (void **)&priv);
- if (err || !priv) {
+ if (IS_ERR(priv) || !priv) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -897,15 +894,12 @@
static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
- struct bpf_map_priv *priv;
- const char *map_name;
- int err;
+ const char *map_name = bpf_map__name(map);
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- map_name = bpf_map__get_name(map);
- err = bpf_map__get_private(map, (void **)&priv);
- if (err) {
+ if (IS_ERR(priv)) {
pr_debug("Failed to get private from map %s\n", map_name);
- return err;
+ return PTR_ERR(priv);
}
if (!priv) {
@@ -916,7 +910,7 @@
}
INIT_LIST_HEAD(&priv->ops_list);
- if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+ if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
free(priv);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -948,30 +942,26 @@
__bpf_map__config_value(struct bpf_map *map,
struct parse_events_term *term)
{
- struct bpf_map_def def;
struct bpf_map_op *op;
- const char *map_name;
- int err;
+ const char *map_name = bpf_map__name(map);
+ const struct bpf_map_def *def = bpf_map__def(map);
- map_name = bpf_map__get_name(map);
-
- err = bpf_map__get_def(map, &def);
- if (err) {
+ if (IS_ERR(def)) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- if (def.type != BPF_MAP_TYPE_ARRAY) {
+ if (def->type != BPF_MAP_TYPE_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
- if (def.key_size < sizeof(unsigned int)) {
+ if (def->key_size < sizeof(unsigned int)) {
pr_debug("Map %s has incorrect key size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
}
- switch (def.value_size) {
+ switch (def->value_size) {
case 1:
case 2:
case 4:
@@ -1014,12 +1004,10 @@
struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
- struct bpf_map_def def;
+ const struct bpf_map_def *def;
struct bpf_map_op *op;
- const char *map_name;
- int err;
+ const char *map_name = bpf_map__name(map);
- map_name = bpf_map__get_name(map);
evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
if (!evsel) {
pr_debug("Event (for '%s') '%s' doesn't exist\n",
@@ -1027,18 +1015,18 @@
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
- err = bpf_map__get_def(map, &def);
- if (err) {
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
- return err;
+ return PTR_ERR(def);
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
- if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1087,9 +1075,8 @@
const char *map_name)
{
struct parse_events_array *array = &term->array;
- struct bpf_map_def def;
+ const struct bpf_map_def *def;
unsigned int i;
- int err;
if (!array->nr_ranges)
return 0;
@@ -1099,8 +1086,8 @@
return -BPF_LOADER_ERRNO__INTERNAL;
}
- err = bpf_map__get_def(map, &def);
- if (err) {
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
pr_debug("ERROR: Unable to get map definition from '%s'\n",
map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
@@ -1111,7 +1098,7 @@
size_t length = array->ranges[i].length;
unsigned int idx = start + length - 1;
- if (idx >= def.max_entries) {
+ if (idx >= def->max_entries) {
pr_debug("ERROR: index %d too large\n", idx);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
}
@@ -1147,7 +1134,7 @@
goto out;
}
- map = bpf_object__get_map_by_name(obj, map_name);
+ map = bpf_object__find_map_by_name(obj, map_name);
if (!map) {
pr_debug("ERROR: Map %s doesn't exist\n", map_name);
err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
@@ -1204,14 +1191,14 @@
}
typedef int (*map_config_func_t)(const char *name, int map_fd,
- struct bpf_map_def *pdef,
+ const struct bpf_map_def *pdef,
struct bpf_map_op *op,
void *pkey, void *arg);
static int
foreach_key_array_all(map_config_func_t func,
void *arg, const char *name,
- int map_fd, struct bpf_map_def *pdef,
+ int map_fd, const struct bpf_map_def *pdef,
struct bpf_map_op *op)
{
unsigned int i;
@@ -1231,7 +1218,7 @@
static int
foreach_key_array_ranges(map_config_func_t func, void *arg,
const char *name, int map_fd,
- struct bpf_map_def *pdef,
+ const struct bpf_map_def *pdef,
struct bpf_map_op *op)
{
unsigned int i, j;
@@ -1261,15 +1248,12 @@
void *arg)
{
int err, map_fd;
- const char *name;
struct bpf_map_op *op;
- struct bpf_map_def def;
- struct bpf_map_priv *priv;
+ const struct bpf_map_def *def;
+ const char *name = bpf_map__name(map);
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- name = bpf_map__get_name(map);
-
- err = bpf_map__get_private(map, (void **)&priv);
- if (err) {
+ if (IS_ERR(priv)) {
pr_debug("ERROR: failed to get private from map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -1278,29 +1262,29 @@
return 0;
}
- err = bpf_map__get_def(map, &def);
- if (err) {
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
pr_debug("ERROR: failed to get definition from map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- map_fd = bpf_map__get_fd(map);
+ map_fd = bpf_map__fd(map);
if (map_fd < 0) {
pr_debug("ERROR: failed to get fd from map %s\n", name);
return map_fd;
}
list_for_each_entry(op, &priv->ops_list, list) {
- switch (def.type) {
+ switch (def->type) {
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name,
- map_fd, &def, op);
+ map_fd, def, op);
break;
case BPF_MAP_KEY_RANGES:
err = foreach_key_array_ranges(func, arg, name,
- map_fd, &def,
+ map_fd, def,
op);
break;
default:
@@ -1410,7 +1394,7 @@
static int
apply_obj_config_map_for_key(const char *name, int map_fd,
- struct bpf_map_def *pdef __maybe_unused,
+ const struct bpf_map_def *pdef,
struct bpf_map_op *op,
void *pkey, void *arg __maybe_unused)
{
@@ -1475,9 +1459,9 @@
#define bpf__for_each_stdout_map(pos, obj, objtmp) \
bpf__for_each_map(pos, obj, objtmp) \
- if (bpf_map__get_name(pos) && \
+ if (bpf_map__name(pos) && \
(strcmp("__bpf_stdout__", \
- bpf_map__get_name(pos)) == 0))
+ bpf_map__name(pos)) == 0))
int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
{
@@ -1489,10 +1473,9 @@
bool need_init = false;
bpf__for_each_stdout_map(map, obj, tmp) {
- struct bpf_map_priv *priv;
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- err = bpf_map__get_private(map, (void **)&priv);
- if (err)
+ if (IS_ERR(priv))
return -BPF_LOADER_ERRNO__INTERNAL;
/*
@@ -1520,10 +1503,9 @@
}
bpf__for_each_stdout_map(map, obj, tmp) {
- struct bpf_map_priv *priv;
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- err = bpf_map__get_private(map, (void **)&priv);
- if (err)
+ if (IS_ERR(priv))
return -BPF_LOADER_ERRNO__INTERNAL;
if (priv)
continue;
@@ -1533,7 +1515,7 @@
if (!priv)
return -ENOMEM;
- err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+ err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
if (err) {
bpf_map_priv__clear(map, priv);
return err;
@@ -1677,7 +1659,7 @@
{
bpf__strerror_head(err, buf, size);
case LIBBPF_ERRNO__KVER: {
- unsigned int obj_kver = bpf_object__get_kversion(obj);
+ unsigned int obj_kver = bpf_object__kversion(obj);
unsigned int real_kver;
if (fetch_kernel_version(&real_kver, NULL, 0)) {
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 67e5966..62b1473 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -144,7 +144,29 @@
return ret;
}
-static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size)
+{
+ bool retry_old = true;
+
+ snprintf(bf, size, "%s/%s/%s/kallsyms",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+ if (!access(bf, F_OK))
+ return bf;
+ if (retry_old) {
+ /* Try old style kallsyms cache */
+ snprintf(bf, size, "%s/%s/%s",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+ retry_old = false;
+ goto retry;
+ }
+
+ return NULL;
+}
+
+static char *build_id_cache__linkname(const char *sbuild_id, char *bf,
+ size_t size)
{
char *tmp = bf;
int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
@@ -154,23 +176,52 @@
return bf;
}
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso)
+{
+ return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf");
+}
+
char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
{
- char build_id_hex[SBUILD_ID_SIZE];
+ bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+ bool is_vdso = dso__is_vdso((struct dso *)dso);
+ char sbuild_id[SBUILD_ID_SIZE];
+ char *linkname;
+ bool alloc = (bf == NULL);
+ int ret;
if (!dso->has_build_id)
return NULL;
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex);
- return build_id__filename(build_id_hex, bf, size);
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+ if (!linkname)
+ return NULL;
+
+ /* Check if old style build_id cache */
+ if (is_regular_file(linkname))
+ ret = asnprintf(&bf, size, "%s", linkname);
+ else
+ ret = asnprintf(&bf, size, "%s/%s", linkname,
+ build_id_cache__basename(is_kallsyms, is_vdso));
+ if (ret < 0 || (!alloc && size < (unsigned int)ret))
+ bf = NULL;
+ free(linkname);
+
+ return bf;
}
bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
{
- char *id_name, *ch;
+ char *id_name = NULL, *ch;
struct stat sb;
+ char sbuild_id[SBUILD_ID_SIZE];
- id_name = dso__build_id_filename(dso, bf, size);
+ if (!dso->has_build_id)
+ goto err;
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!id_name)
goto err;
if (access(id_name, F_OK))
@@ -194,18 +245,14 @@
if (ch - 3 < bf)
goto err;
+ free(id_name);
return strncmp(".ko", ch - 3, 3) == 0;
err:
- /*
- * If dso__build_id_filename work, get id_name again,
- * because id_name points to bf and is broken.
- */
- if (id_name)
- id_name = dso__build_id_filename(dso, bf, size);
pr_err("Invalid build id: %s\n", id_name ? :
dso->long_name ? :
dso->short_name ? :
"[unknown]");
+ free(id_name);
return false;
}
@@ -340,8 +387,8 @@
no_buildid_cache = true;
}
-static char *build_id_cache__dirname_from_path(const char *name,
- bool is_kallsyms, bool is_vdso)
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+ bool is_kallsyms, bool is_vdso)
{
char *realname = (char *)name, *filename;
bool slash = is_kallsyms || is_vdso;
@@ -352,8 +399,9 @@
return NULL;
}
- if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "",
- is_vdso ? DSO__NAME_VDSO : realname) < 0)
+ if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+ is_vdso ? DSO__NAME_VDSO : realname,
+ sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
filename = NULL;
if (!slash)
@@ -368,7 +416,7 @@
char *dir_name;
int ret = 0;
- dir_name = build_id_cache__dirname_from_path(pathname, false, false);
+ dir_name = build_id_cache__cachedir(NULL, pathname, false, false);
if (!dir_name)
return -ENOMEM;
@@ -385,7 +433,7 @@
{
const size_t size = PATH_MAX;
char *realname = NULL, *filename = NULL, *dir_name = NULL,
- *linkname = zalloc(size), *targetname, *tmp;
+ *linkname = zalloc(size), *tmp;
int err = -1;
if (!is_kallsyms) {
@@ -394,14 +442,22 @@
goto out_free;
}
- dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso);
+ dir_name = build_id_cache__cachedir(sbuild_id, name,
+ is_kallsyms, is_vdso);
if (!dir_name)
goto out_free;
+ /* Remove old style build-id cache */
+ if (is_regular_file(dir_name))
+ if (unlink(dir_name))
+ goto out_free;
+
if (mkdir_p(dir_name, 0755))
goto out_free;
- if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) {
+ /* Save the allocated buildid dirname */
+ if (asprintf(&filename, "%s/%s", dir_name,
+ build_id_cache__basename(is_kallsyms, is_vdso)) < 0) {
filename = NULL;
goto out_free;
}
@@ -415,7 +471,7 @@
goto out_free;
}
- if (!build_id__filename(sbuild_id, linkname, size))
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
goto out_free;
tmp = strrchr(linkname, '/');
*tmp = '\0';
@@ -424,10 +480,10 @@
goto out_free;
*tmp = '/';
- targetname = filename + strlen(buildid_dir) - 5;
- memcpy(targetname, "../..", 5);
+ tmp = dir_name + strlen(buildid_dir) - 5;
+ memcpy(tmp, "../..", 5);
- if (symlink(targetname, linkname) == 0)
+ if (symlink(tmp, linkname) == 0)
err = 0;
out_free:
if (!is_kallsyms)
@@ -452,7 +508,7 @@
bool build_id_cache__cached(const char *sbuild_id)
{
bool ret = false;
- char *filename = build_id__filename(sbuild_id, NULL, 0);
+ char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
if (filename && !access(filename, F_OK))
ret = true;
@@ -471,7 +527,7 @@
if (filename == NULL || linkname == NULL)
goto out_free;
- if (!build_id__filename(sbuild_id, linkname, size))
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
goto out_free;
if (access(linkname, F_OK))
@@ -489,7 +545,7 @@
tmp = strrchr(linkname, '/') + 1;
snprintf(tmp, size - (tmp - linkname), "%s", filename);
- if (unlink(linkname))
+ if (rm_rf(linkname))
goto out_free;
err = 0;
@@ -501,7 +557,7 @@
static int dso__cache_build_id(struct dso *dso, struct machine *machine)
{
- bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
+ bool is_kallsyms = dso__is_kallsyms(dso);
bool is_vdso = dso__is_vdso(dso);
const char *name = dso->long_name;
char nm[PATH_MAX];
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 64af3e2..d8c7f2f 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -14,6 +14,8 @@
int build_id__sprintf(const u8 *build_id, int len, char *bf);
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size);
char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
@@ -28,6 +30,8 @@
int perf_session__write_buildid_table(struct perf_session *session, int fd);
int perf_session__cache_build_ids(struct perf_session *session);
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+ bool is_kallsyms, bool is_vdso);
int build_id_cache__list_build_ids(const char *pathname,
struct strlist **result);
bool build_id_cache__cached(const char *sbuild_id);
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 0d814bb..9f90e36 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -11,30 +11,13 @@
#include <linux/string.h>
#define CMD_EXEC_PATH "--exec-path"
-#define CMD_PERF_DIR "--perf-dir="
-#define CMD_WORK_TREE "--work-tree="
#define CMD_DEBUGFS_DIR "--debugfs-dir="
-#define PERF_DIR_ENVIRONMENT "PERF_DIR"
-#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
-#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
-extern const char *config_exclusive_filename;
-
-typedef int (*config_fn_t)(const char *, const char *, void *);
-int perf_default_config(const char *, const char *, void *);
-int perf_config(config_fn_t fn, void *);
-int perf_config_int(const char *, const char *);
-u64 perf_config_u64(const char *, const char *);
-int perf_config_bool(const char *, const char *);
-int config_error_nonbool(const char *);
-const char *perf_config_dirname(const char *, const char *);
-const char *perf_etc_perfconfig(void);
-
char *alias_lookup(const char *alias);
int split_cmdline(char *cmdline, const char ***argv);
@@ -45,9 +28,6 @@
return path[0] == '/';
}
-char *strip_path_suffix(const char *path, const char *suffix);
-
char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 65e2a4f..a70f6b5 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -94,6 +94,7 @@
enum perf_call_graph_mode record_mode;
u32 dump_size;
enum chain_mode mode;
+ u16 max_stack;
u32 print_limit;
double min_percent;
sort_chain_func_t sort;
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 90aa1b4..8fdee24 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -81,7 +81,7 @@
/*
* check if cgrp is already defined, if so we reuse it
*/
- evlist__for_each(evlist, counter) {
+ evlist__for_each_entry(evlist, counter) {
cgrp = counter->cgrp;
if (!cgrp)
continue;
@@ -110,7 +110,7 @@
* if add cgroup N, then need to find event N
*/
n = 0;
- evlist__for_each(evlist, counter) {
+ evlist__for_each_entry(evlist, counter) {
if (n == nr_cgroups)
goto found;
n++;
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 43e84aa..1210ba5 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
#include "cache.h"
+#include "config.h"
#include "color.h"
#include <math.h>
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dad7d82..18dae74 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -26,6 +26,7 @@
static const char *config_file_name;
static int config_linenr;
static int config_file_eof;
+static struct perf_config_set *config_set;
const char *config_exclusive_filename;
@@ -275,7 +276,8 @@
break;
}
}
- die("bad config file line %d in %s", config_linenr, config_file_name);
+ pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+ return -1;
}
static int parse_unit_factor(const char *end, unsigned long *val)
@@ -371,7 +373,7 @@
return !!perf_config_bool_or_int(name, value, &discard);
}
-const char *perf_config_dirname(const char *name, const char *value)
+static const char *perf_config_dirname(const char *name, const char *value)
{
if (!name)
return NULL;
@@ -477,54 +479,6 @@
return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
}
-int perf_config(config_fn_t fn, void *data)
-{
- int ret = 0, found = 0;
- const char *home = NULL;
-
- /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
- if (config_exclusive_filename)
- return perf_config_from_file(fn, config_exclusive_filename, data);
- if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
- ret += perf_config_from_file(fn, perf_etc_perfconfig(),
- data);
- found += 1;
- }
-
- home = getenv("HOME");
- if (perf_config_global() && home) {
- char *user_config = strdup(mkpath("%s/.perfconfig", home));
- struct stat st;
-
- if (user_config == NULL) {
- warning("Not enough memory to process %s/.perfconfig, "
- "ignoring it.", home);
- goto out;
- }
-
- if (stat(user_config, &st) < 0)
- goto out_free;
-
- if (st.st_uid && (st.st_uid != geteuid())) {
- warning("File %s not owned by current user or root, "
- "ignoring it.", user_config);
- goto out_free;
- }
-
- if (!st.st_size)
- goto out_free;
-
- ret += perf_config_from_file(fn, user_config, data);
- found += 1;
-out_free:
- free(user_config);
- }
-out:
- if (found == 0)
- return -1;
- return ret;
-}
-
static struct perf_config_section *find_section(struct list_head *sections,
const char *section_name)
{
@@ -609,8 +563,12 @@
struct perf_config_section *section = NULL;
struct perf_config_item *item = NULL;
struct perf_config_set *set = perf_config_set;
- struct list_head *sections = &set->sections;
+ struct list_head *sections;
+ if (set == NULL)
+ return -1;
+
+ sections = &set->sections;
key = ptr = strdup(var);
if (!key) {
pr_debug("%s: strdup failed\n", __func__);
@@ -641,22 +599,115 @@
out_free:
free(key);
- perf_config_set__delete(set);
return -1;
}
+static int perf_config_set__init(struct perf_config_set *set)
+{
+ int ret = -1;
+ const char *home = NULL;
+
+ /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+ if (config_exclusive_filename)
+ return perf_config_from_file(collect_config, config_exclusive_filename, set);
+ if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+ if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+ goto out;
+ }
+
+ home = getenv("HOME");
+ if (perf_config_global() && home) {
+ char *user_config = strdup(mkpath("%s/.perfconfig", home));
+ struct stat st;
+
+ if (user_config == NULL) {
+ warning("Not enough memory to process %s/.perfconfig, "
+ "ignoring it.", home);
+ goto out;
+ }
+
+ if (stat(user_config, &st) < 0)
+ goto out_free;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ warning("File %s not owned by current user or root, "
+ "ignoring it.", user_config);
+ goto out_free;
+ }
+
+ if (!st.st_size)
+ goto out_free;
+
+ ret = perf_config_from_file(collect_config, user_config, set);
+
+out_free:
+ free(user_config);
+ }
+out:
+ return ret;
+}
+
struct perf_config_set *perf_config_set__new(void)
{
struct perf_config_set *set = zalloc(sizeof(*set));
if (set) {
INIT_LIST_HEAD(&set->sections);
- perf_config(collect_config, set);
+ if (perf_config_set__init(set) < 0) {
+ perf_config_set__delete(set);
+ set = NULL;
+ }
}
return set;
}
+int perf_config(config_fn_t fn, void *data)
+{
+ int ret = 0;
+ char key[BUFSIZ];
+ struct perf_config_section *section;
+ struct perf_config_item *item;
+
+ if (config_set == NULL)
+ return -1;
+
+ perf_config_set__for_each_entry(config_set, section, item) {
+ char *value = item->value;
+
+ if (value) {
+ scnprintf(key, sizeof(key), "%s.%s",
+ section->name, item->name);
+ ret = fn(key, value, data);
+ if (ret < 0) {
+ pr_err("Error: wrong config key-value pair %s=%s\n",
+ key, value);
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+void perf_config__init(void)
+{
+ if (config_set == NULL)
+ config_set = perf_config_set__new();
+}
+
+void perf_config__exit(void)
+{
+ perf_config_set__delete(config_set);
+ config_set = NULL;
+}
+
+void perf_config__refresh(void)
+{
+ perf_config__exit();
+ perf_config__init();
+}
+
static void perf_config_item__delete(struct perf_config_item *item)
{
zfree(&item->name);
@@ -693,6 +744,9 @@
void perf_config_set__delete(struct perf_config_set *set)
{
+ if (set == NULL)
+ return;
+
perf_config_set__purge(set);
free(set);
}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 22ec626..6f813d4 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -20,7 +20,47 @@
struct list_head sections;
};
+extern const char *config_exclusive_filename;
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(const char *, const char *);
+u64 perf_config_u64(const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_etc_perfconfig(void);
+
struct perf_config_set *perf_config_set__new(void);
void perf_config_set__delete(struct perf_config_set *set);
+void perf_config__init(void);
+void perf_config__exit(void);
+void perf_config__refresh(void);
+
+/**
+ * perf_config_sections__for_each - iterate thru all the sections
+ * @list: list_head instance to iterate
+ * @section: struct perf_config_section iterator
+ */
+#define perf_config_sections__for_each_entry(list, section) \
+ list_for_each_entry(section, list, node)
+
+/**
+ * perf_config_items__for_each - iterate thru all the items
+ * @list: list_head instance to iterate
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_items__for_each_entry(list, item) \
+ list_for_each_entry(item, list, node)
+
+/**
+ * perf_config_set__for_each - iterate thru all the config section-item pairs
+ * @set: evlist instance to iterate
+ * @section: struct perf_config_section iterator
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_set__for_each_entry(set, section, item) \
+ perf_config_sections__for_each_entry(&set->sections, section) \
+ perf_config_items__for_each_entry(§ion->items, item)
#endif /* __PERF_CONFIG_H */
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 9f53020..4b59879 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -997,7 +997,7 @@
struct perf_evsel *evsel;
int ret;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
ret = add_event(cw, evsel);
if (ret)
return ret;
@@ -1010,7 +1010,7 @@
struct perf_evlist *evlist = session->evlist;
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
struct evsel_priv *priv;
priv = evsel->priv;
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index c9a6dc1..b0c2b5c 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -233,17 +233,6 @@
return 0;
}
-static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
-{
- if (thread->pid_ == thread->tid)
- return thread__get(thread);
-
- if (thread->pid_ == -1)
- return NULL;
-
- return machine__find_thread(machine, thread->pid_, thread->pid_);
-}
-
static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
{
@@ -382,7 +371,7 @@
if (err)
return err;
- main_thread = get_main_thread(al->machine, thread);
+ main_thread = thread__main_thread(al->machine, thread);
if (main_thread)
comm = machine__thread_exec_comm(al->machine, main_thread);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 0953280..76d79d0 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -349,6 +349,11 @@
dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
}
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+ return dso->kernel && dso->long_name[0] != '/';
+}
+
void dso__free_a2l(struct dso *dso);
enum dso_type dso__type(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e82ba90..1135077 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -100,7 +100,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
perf_evsel__calc_id_pos(evsel);
perf_evlist__set_id_pos(evlist);
@@ -110,7 +110,7 @@
{
struct perf_evsel *pos, *n;
- evlist__for_each_safe(evlist, n, pos) {
+ evlist__for_each_entry_safe(evlist, n, pos) {
list_del_init(&pos->node);
pos->evlist = NULL;
perf_evsel__delete(pos);
@@ -127,6 +127,9 @@
void perf_evlist__delete(struct perf_evlist *evlist)
{
+ if (evlist == NULL)
+ return;
+
perf_evlist__munmap(evlist);
perf_evlist__close(evlist);
cpu_map__put(evlist->cpus);
@@ -161,7 +164,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
__perf_evlist__propagate_maps(evlist, evsel);
}
@@ -190,7 +193,7 @@
{
struct perf_evsel *evsel, *temp;
- __evlist__for_each_safe(list, temp, evsel) {
+ __evlist__for_each_entry_safe(list, temp, evsel) {
list_del_init(&evsel->node);
perf_evlist__add(evlist, evsel);
}
@@ -205,7 +208,7 @@
leader->nr_members = evsel->idx - leader->idx + 1;
- __evlist__for_each(list, evsel) {
+ __evlist__for_each_entry(list, evsel) {
evsel->leader = leader;
}
}
@@ -296,7 +299,7 @@
return 0;
out_delete_partial_list:
- __evlist__for_each_safe(&head, n, evsel)
+ __evlist__for_each_entry_safe(&head, n, evsel)
perf_evsel__delete(evsel);
return -1;
}
@@ -317,7 +320,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
(int)evsel->attr.config == id)
return evsel;
@@ -332,7 +335,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
(strcmp(evsel->name, name) == 0))
return evsel;
@@ -367,7 +370,7 @@
{
struct perf_evsel *pos;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->fd)
continue;
perf_evsel__disable(pos);
@@ -380,7 +383,7 @@
{
struct perf_evsel *pos;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->fd)
continue;
perf_evsel__enable(pos);
@@ -448,7 +451,7 @@
int nfds = 0;
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->system_wide)
nfds += nr_cpus;
else
@@ -462,9 +465,9 @@
return 0;
}
-static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent)
{
- int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
+ int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
/*
* Save the idx so that when we filter out fds POLLHUP'ed we can
* close the associated evlist->mmap[] entry.
@@ -480,10 +483,11 @@
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
{
- return __perf_evlist__add_pollfd(evlist, fd, -1);
+ return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN);
}
-static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+ void *arg __maybe_unused)
{
struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
@@ -493,7 +497,7 @@
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
return fdarray__filter(&evlist->pollfd, revents_and_mask,
- perf_evlist__munmap_filtered);
+ perf_evlist__munmap_filtered, NULL);
}
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
@@ -777,7 +781,7 @@
return event;
}
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
u64 head;
@@ -832,6 +836,13 @@
return perf_mmap__read(md, false, start, end, &md->prev);
}
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+ if (!evlist->backward)
+ return perf_evlist__mmap_read_forward(evlist, idx);
+ return perf_evlist__mmap_read_backward(evlist, idx);
+}
+
void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
@@ -856,9 +867,11 @@
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
- BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
+ struct perf_mmap *md = &evlist->mmap[idx];
- if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
+ BUG_ON(md->base && atomic_read(&md->refcnt) == 0);
+
+ if (atomic_dec_and_test(&md->refcnt))
__perf_evlist__munmap(evlist, idx);
}
@@ -936,9 +949,12 @@
if (cpu_map__empty(evlist->cpus))
evlist->nr_mmaps = thread_map__nr(evlist->threads);
evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+ if (!evlist->mmap)
+ return -ENOMEM;
+
for (i = 0; i < evlist->nr_mmaps; i++)
evlist->mmap[i].fd = -1;
- return evlist->mmap != NULL ? 0 : -ENOMEM;
+ return 0;
}
struct mmap_params {
@@ -983,15 +999,28 @@
return 0;
}
+static bool
+perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
+ struct perf_evsel *evsel)
+{
+ if (evsel->overwrite)
+ return false;
+ return true;
+}
+
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu,
int thread, int *output)
{
struct perf_evsel *evsel;
+ int revent;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
int fd;
+ if (evsel->overwrite != (evlist->overwrite && evlist->backward))
+ continue;
+
if (evsel->system_wide && thread)
continue;
@@ -1008,6 +1037,8 @@
perf_evlist__mmap_get(evlist, idx);
}
+ revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+
/*
* The system_wide flag causes a selected event to be opened
* always without a pid. Consequently it will never get a
@@ -1016,7 +1047,7 @@
* Therefore don't add it for polling.
*/
if (!evsel->system_wide &&
- __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
+ __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) {
perf_evlist__mmap_put(evlist, idx);
return -1;
}
@@ -1231,7 +1262,7 @@
auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
auxtrace_pages, auxtrace_overwrite);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
@@ -1307,7 +1338,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
__perf_evsel__set_sample_bit(evsel, bit);
}
@@ -1316,7 +1347,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
__perf_evsel__reset_sample_bit(evsel, bit);
}
@@ -1327,7 +1358,7 @@
const int ncpus = cpu_map__nr(evlist->cpus),
nthreads = thread_map__nr(evlist->threads);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->filter == NULL)
continue;
@@ -1350,7 +1381,7 @@
struct perf_evsel *evsel;
int err = 0;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
continue;
@@ -1404,7 +1435,7 @@
if (evlist->id_pos < 0 || evlist->is_pos < 0)
return false;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (pos->id_pos != evlist->id_pos ||
pos->is_pos != evlist->is_pos)
return false;
@@ -1420,7 +1451,7 @@
if (evlist->combined_sample_type)
return evlist->combined_sample_type;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
evlist->combined_sample_type |= evsel->attr.sample_type;
return evlist->combined_sample_type;
@@ -1437,7 +1468,7 @@
struct perf_evsel *evsel;
u64 branch_type = 0;
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
branch_type |= evsel->attr.branch_sample_type;
return branch_type;
}
@@ -1448,7 +1479,7 @@
u64 read_format = first->attr.read_format;
u64 sample_type = first->attr.sample_type;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (read_format != pos->attr.read_format)
return false;
}
@@ -1505,7 +1536,7 @@
{
struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
- evlist__for_each_continue(evlist, pos) {
+ evlist__for_each_entry_continue(evlist, pos) {
if (first->attr.sample_id_all != pos->attr.sample_id_all)
return false;
}
@@ -1532,7 +1563,7 @@
int nthreads = thread_map__nr(evlist->threads);
int n;
- evlist__for_each_reverse(evlist, evsel) {
+ evlist__for_each_entry_reverse(evlist, evsel) {
n = evsel->cpus ? evsel->cpus->nr : ncpus;
perf_evsel__close(evsel, n, nthreads);
}
@@ -1586,7 +1617,7 @@
perf_evlist__update_id_pos(evlist);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
if (err < 0)
goto out_err;
@@ -1747,7 +1778,7 @@
struct perf_evsel *evsel;
size_t printed = 0;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
perf_evsel__name(evsel));
}
@@ -1849,7 +1880,7 @@
if (move_evsel == perf_evlist__first(evlist))
return;
- evlist__for_each_safe(evlist, n, evsel) {
+ evlist__for_each_entry_safe(evlist, n, evsel) {
if (evsel->leader == move_evsel->leader)
list_move_tail(&evsel->node, &move);
}
@@ -1865,7 +1896,7 @@
if (tracking_evsel->tracking)
return;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel != tracking_evsel)
evsel->tracking = false;
}
@@ -1879,7 +1910,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (!evsel->name)
continue;
if (strcmp(str, evsel->name) == 0)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d740fb8..872912b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -131,6 +131,8 @@
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
+ int idx);
union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
int idx);
void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
@@ -249,70 +251,70 @@
struct perf_evsel *move_evsel);
/**
- * __evlist__for_each - iterate thru all the evsels
+ * __evlist__for_each_entry - iterate thru all the evsels
* @list: list_head instance to iterate
* @evsel: struct evsel iterator
*/
-#define __evlist__for_each(list, evsel) \
+#define __evlist__for_each_entry(list, evsel) \
list_for_each_entry(evsel, list, node)
/**
- * evlist__for_each - iterate thru all the evsels
+ * evlist__for_each_entry - iterate thru all the evsels
* @evlist: evlist instance to iterate
* @evsel: struct evsel iterator
*/
-#define evlist__for_each(evlist, evsel) \
- __evlist__for_each(&(evlist)->entries, evsel)
+#define evlist__for_each_entry(evlist, evsel) \
+ __evlist__for_each_entry(&(evlist)->entries, evsel)
/**
- * __evlist__for_each_continue - continue iteration thru all the evsels
+ * __evlist__for_each_entry_continue - continue iteration thru all the evsels
* @list: list_head instance to iterate
* @evsel: struct evsel iterator
*/
-#define __evlist__for_each_continue(list, evsel) \
+#define __evlist__for_each_entry_continue(list, evsel) \
list_for_each_entry_continue(evsel, list, node)
/**
- * evlist__for_each_continue - continue iteration thru all the evsels
+ * evlist__for_each_entry_continue - continue iteration thru all the evsels
* @evlist: evlist instance to iterate
* @evsel: struct evsel iterator
*/
-#define evlist__for_each_continue(evlist, evsel) \
- __evlist__for_each_continue(&(evlist)->entries, evsel)
+#define evlist__for_each_entry_continue(evlist, evsel) \
+ __evlist__for_each_entry_continue(&(evlist)->entries, evsel)
/**
- * __evlist__for_each_reverse - iterate thru all the evsels in reverse order
+ * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
* @list: list_head instance to iterate
* @evsel: struct evsel iterator
*/
-#define __evlist__for_each_reverse(list, evsel) \
+#define __evlist__for_each_entry_reverse(list, evsel) \
list_for_each_entry_reverse(evsel, list, node)
/**
- * evlist__for_each_reverse - iterate thru all the evsels in reverse order
+ * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
* @evlist: evlist instance to iterate
* @evsel: struct evsel iterator
*/
-#define evlist__for_each_reverse(evlist, evsel) \
- __evlist__for_each_reverse(&(evlist)->entries, evsel)
+#define evlist__for_each_entry_reverse(evlist, evsel) \
+ __evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
/**
- * __evlist__for_each_safe - safely iterate thru all the evsels
+ * __evlist__for_each_entry_safe - safely iterate thru all the evsels
* @list: list_head instance to iterate
* @tmp: struct evsel temp iterator
* @evsel: struct evsel iterator
*/
-#define __evlist__for_each_safe(list, tmp, evsel) \
+#define __evlist__for_each_entry_safe(list, tmp, evsel) \
list_for_each_entry_safe(evsel, tmp, list, node)
/**
- * evlist__for_each_safe - safely iterate thru all the evsels
+ * evlist__for_each_entry_safe - safely iterate thru all the evsels
* @evlist: evlist instance to iterate
* @evsel: struct evsel iterator
* @tmp: struct evsel temp iterator
*/
-#define evlist__for_each_safe(evlist, tmp, evsel) \
- __evlist__for_each_safe(&(evlist)->entries, tmp, evsel)
+#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+ __evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5d7037e..1d8f2bb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -572,6 +572,8 @@
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ attr->sample_max_stack = param->max_stack;
+
if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
@@ -635,7 +637,8 @@
struct perf_event_attr *attr = &evsel->attr;
struct callchain_param param;
u32 dump_size = 0;
- char *callgraph_buf = NULL;
+ int max_stack = 0;
+ const char *callgraph_buf = NULL;
/* callgraph default */
param.record_mode = callchain_param.record_mode;
@@ -662,6 +665,9 @@
case PERF_EVSEL__CONFIG_TERM_STACK_USER:
dump_size = term->val.stack_user;
break;
+ case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+ max_stack = term->val.max_stack;
+ break;
case PERF_EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
@@ -677,7 +683,12 @@
}
/* User explicitly set per-event callgraph, clear the old setting and reset. */
- if ((callgraph_buf != NULL) || (dump_size > 0)) {
+ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+ if (max_stack) {
+ param.max_stack = max_stack;
+ if (callgraph_buf == NULL)
+ callgraph_buf = "fp";
+ }
/* parse callgraph parameters */
if (callgraph_buf != NULL) {
@@ -1329,6 +1340,7 @@
PRINT_ATTRf(clockid, p_signed);
PRINT_ATTRf(sample_regs_intr, p_hex);
PRINT_ATTRf(aux_watermark, p_unsigned);
+ PRINT_ATTRf(sample_max_stack, p_unsigned);
return ret;
}
@@ -1377,8 +1389,11 @@
if (perf_missing_features.lbr_flags)
evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES);
- if (perf_missing_features.write_backward)
+ if (perf_missing_features.write_backward) {
+ if (evsel->overwrite)
+ return -EINVAL;
evsel->attr.write_backward = false;
+ }
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1441,12 +1456,6 @@
err = -EINVAL;
goto out_close;
}
-
- if (evsel->overwrite &&
- perf_missing_features.write_backward) {
- err = -EINVAL;
- goto out_close;
- }
}
}
@@ -1484,7 +1493,10 @@
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
+ if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+ perf_missing_features.write_backward = true;
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
perf_missing_features.clockid_wrong = true;
goto fallback_missing_features;
} else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
@@ -1509,12 +1521,7 @@
PERF_SAMPLE_BRANCH_NO_FLAGS))) {
perf_missing_features.lbr_flags = true;
goto fallback_missing_features;
- } else if (!perf_missing_features.write_backward &&
- evsel->attr.write_backward) {
- perf_missing_features.write_backward = true;
- goto fallback_missing_features;
}
-
out_close:
do {
while (--thread >= 0) {
@@ -2239,17 +2246,11 @@
return sample->raw_data + offset;
}
-u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
- const char *name)
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+ bool needs_swap)
{
- struct format_field *field = perf_evsel__field(evsel, name);
- void *ptr;
u64 value;
-
- if (!field)
- return 0;
-
- ptr = sample->raw_data + field->offset;
+ void *ptr = sample->raw_data + field->offset;
switch (field->size) {
case 1:
@@ -2267,7 +2268,7 @@
return 0;
}
- if (!evsel->needs_swap)
+ if (!needs_swap)
return value;
switch (field->size) {
@@ -2284,6 +2285,17 @@
return 0;
}
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name)
+{
+ struct format_field *field = perf_evsel__field(evsel, name);
+
+ if (!field)
+ return 0;
+
+ return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{
@@ -2372,6 +2384,9 @@
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->attr.sample_period != 0)
+ return scnprintf(msg, size, "%s",
+ "PMU Hardware doesn't support sampling/overflow-interrupts.");
if (evsel->attr.precise_ip)
return scnprintf(msg, size, "%s",
"\'precise\' request may not be supported. Try removing 'p' modifier.");
@@ -2389,6 +2404,8 @@
"We found oprofile daemon running, please stop it and try again.");
break;
case EINVAL:
+ if (evsel->overwrite && perf_missing_features.write_backward)
+ return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
if (perf_missing_features.clockid)
return scnprintf(msg, size, "clockid feature not supported.");
if (perf_missing_features.clockid_wrong)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index c1f1015..828ddd1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -44,6 +44,7 @@
PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_INHERIT,
+ PERF_EVSEL__CONFIG_TERM_MAX_STACK,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -56,6 +57,7 @@
bool time;
char *callgraph;
u64 stack_user;
+ int max_stack;
bool inherit;
} val;
};
@@ -259,6 +261,8 @@
struct format_field;
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
#define perf_evsel__match(evsel, t, c) \
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 0000000..116debe
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 08852dd..c5cd269 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -336,7 +336,7 @@
if (ret < 0)
return ret;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
ret = do_write(fd, &evsel->attr, sz);
if (ret < 0)
return ret;
@@ -801,7 +801,7 @@
if (ret < 0)
return ret;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (perf_evsel__is_group_leader(evsel) &&
evsel->nr_members > 1) {
const char *name = evsel->group_name ?: "{anon_group}";
@@ -1425,7 +1425,7 @@
session = container_of(ph, struct perf_session, header);
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (perf_evsel__is_group_leader(evsel) &&
evsel->nr_members > 1) {
fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
@@ -1703,7 +1703,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->idx == idx)
return evsel;
}
@@ -2075,7 +2075,7 @@
session->evlist->nr_groups = nr_groups;
i = nr = 0;
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (evsel->idx == (int) desc[i].leader_idx) {
evsel->leader = evsel;
/* {anon_group} is a dummy name */
@@ -2383,7 +2383,7 @@
lseek(fd, sizeof(f_header), SEEK_SET);
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
evsel->id_offset = lseek(fd, 0, SEEK_CUR);
err = do_write(fd, evsel->id, evsel->ids * sizeof(u64));
if (err < 0) {
@@ -2394,7 +2394,7 @@
attr_offset = lseek(fd, 0, SEEK_CUR);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
f_attr = (struct perf_file_attr){
.attr = evsel->attr,
.ids = {
@@ -2828,7 +2828,7 @@
{
struct perf_evsel *pos;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
perf_evsel__prepare_tracepoint_event(pos, pevent))
return -1;
@@ -3127,7 +3127,7 @@
struct perf_evsel *evsel;
int err = 0;
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
evsel->id, process);
if (err) {
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index d62ccae..776e285 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -1,4 +1,5 @@
#include "cache.h"
+#include "config.h"
#include <subcmd/help.h>
#include "../builtin.h"
#include "levenshtein.h"
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index d1f19e0..e1fcc8d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -79,7 +79,7 @@
len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len))
- hists__set_col_len(hists, HISTC_THREAD, len + 6);
+ hists__set_col_len(hists, HISTC_THREAD, len + 8);
if (h->ms.map) {
len = dso__name_len(h->ms.map->dso);
@@ -531,13 +531,13 @@
return he;
}
-struct hist_entry *__hists__add_entry(struct hists *hists,
- struct addr_location *al,
- struct symbol *sym_parent,
- struct branch_info *bi,
- struct mem_info *mi,
- struct perf_sample *sample,
- bool sample_self)
+struct hist_entry *hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self)
{
struct hist_entry entry = {
.thread = al->thread,
@@ -622,8 +622,8 @@
*/
sample->period = cost;
- he = __hists__add_entry(hists, al, iter->parent, NULL, mi,
- sample, true);
+ he = hists__add_entry(hists, al, iter->parent, NULL, mi,
+ sample, true);
if (!he)
return -ENOMEM;
@@ -727,8 +727,8 @@
sample->period = 1;
sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
- he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
- sample, true);
+ he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -764,8 +764,8 @@
struct perf_sample *sample = iter->sample;
struct hist_entry *he;
- he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
- sample, true);
+ he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -825,8 +825,8 @@
struct hist_entry *he;
int err = 0;
- he = __hists__add_entry(hists, al, iter->parent, NULL, NULL,
- sample, true);
+ he = hists__add_entry(hists, al, iter->parent, NULL, NULL,
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -900,8 +900,8 @@
}
}
- he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
- sample, false);
+ he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+ sample, false);
if (he == NULL)
return -ENOMEM;
@@ -1081,7 +1081,7 @@
struct perf_hpp_fmt *fmt, int printed)
{
if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
- const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists));
+ const int width = fmt->width(fmt, hpp, he->hists);
if (printed < width) {
advance_hpp(hpp, printed);
printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
@@ -2199,7 +2199,7 @@
struct perf_evsel *pos;
size_t ret = 0;
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 7b54ccf..0a03e08 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -120,13 +120,13 @@
extern const struct hist_iter_ops hist_iter_mem;
extern const struct hist_iter_ops hist_iter_cumulative;
-struct hist_entry *__hists__add_entry(struct hists *hists,
- struct addr_location *al,
- struct symbol *parent,
- struct branch_info *bi,
- struct mem_info *mi,
- struct perf_sample *sample,
- bool sample_self);
+struct hist_entry *hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
int max_stack_depth, void *arg);
@@ -159,7 +159,8 @@
size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
- int max_cols, float min_pcnt, FILE *fp);
+ int max_cols, float min_pcnt, FILE *fp,
+ bool use_callchain);
size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp);
void hists__filter_by_dso(struct hists *hists);
@@ -214,9 +215,9 @@
struct perf_hpp_fmt {
const char *name;
int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct perf_evsel *evsel);
+ struct hists *hists);
int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct perf_evsel *evsel);
+ struct hists *hists);
int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he);
int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 9df9960..749e6f2 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -422,7 +422,8 @@
}
static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
- struct auxtrace_buffer *buffer)
+ struct auxtrace_buffer *buffer,
+ struct thread *thread)
{
struct branch *branch;
size_t sz, bsz = sizeof(struct branch);
@@ -444,6 +445,12 @@
if (!branch->from && !branch->to)
continue;
intel_bts_get_branch_type(btsq, branch);
+ if (btsq->bts->synth_opts.thread_stack)
+ thread_stack__event(thread, btsq->sample_flags,
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ btsq->intel_pt_insn.length,
+ buffer->buffer_nr + 1);
if (filter && !(filter & btsq->sample_flags))
continue;
err = intel_bts_synth_branch_sample(btsq, branch);
@@ -507,12 +514,13 @@
goto out_put;
}
- if (!btsq->bts->synth_opts.callchain && thread &&
+ if (!btsq->bts->synth_opts.callchain &&
+ !btsq->bts->synth_opts.thread_stack && thread &&
(!old_buffer || btsq->bts->sampling_mode ||
(btsq->bts->snapshot_mode && !buffer->consecutive)))
thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
- err = intel_bts_process_buffer(btsq, buffer);
+ err = intel_bts_process_buffer(btsq, buffer, thread);
auxtrace_buffer__drop_data(buffer);
@@ -777,7 +785,7 @@
u64 id;
int err;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == bts->pmu_type && evsel->ids) {
found = true;
break;
@@ -905,10 +913,14 @@
if (dump_trace)
return 0;
- if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
bts->synth_opts = *session->itrace_synth_opts;
- else
+ } else {
itrace_synth_opts__set_default(&bts->synth_opts);
+ if (session->itrace_synth_opts)
+ bts->synth_opts.thread_stack =
+ session->itrace_synth_opts->thread_stack;
+ }
if (bts->synth_opts.calls)
bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 1371969..551ff6f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -39,6 +39,7 @@
#include "auxtrace.h"
#include "tsc.h"
#include "intel-pt.h"
+#include "config.h"
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
@@ -556,7 +557,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(pt->session->evlist, evsel) {
+ evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
!evsel->attr.exclude_kernel)
return false;
@@ -572,7 +573,7 @@
if (!pt->noretcomp_bit)
return true;
- evlist__for_each(pt->session->evlist, evsel) {
+ evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config) &&
(config & pt->noretcomp_bit))
return false;
@@ -592,7 +593,7 @@
for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
config >>= 1;
- evlist__for_each(pt->session->evlist, evsel) {
+ evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config))
return (config & pt->mtc_freq_bits) >> shift;
}
@@ -608,7 +609,7 @@
if (!pt->tsc_bit || !pt->cap_user_time_zero)
return true;
- evlist__for_each(pt->session->evlist, evsel) {
+ evlist__for_each_entry(pt->session->evlist, evsel) {
if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
return true;
if (intel_pt_get_config(pt, &evsel->attr, &config)) {
@@ -625,7 +626,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(pt->session->evlist, evsel) {
+ evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
!evsel->attr.exclude_kernel)
return true;
@@ -642,7 +643,7 @@
if (!pt->tsc_bit)
return false;
- evlist__for_each(pt->session->evlist, evsel) {
+ evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config)) {
if (config & pt->tsc_bit)
have_tsc = true;
@@ -1233,7 +1234,7 @@
if (!(state->type & INTEL_PT_BRANCH))
return 0;
- if (pt->synth_opts.callchain)
+ if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
@@ -1850,7 +1851,7 @@
u64 id;
int err;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == pt->pmu_type && evsel->ids) {
found = true;
break;
@@ -1930,7 +1931,7 @@
pt->sample_transactions = true;
pt->transactions_id = id;
id += 1;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->id && evsel->id[0] == pt->transactions_id) {
if (evsel->name)
zfree(&evsel->name);
@@ -1968,7 +1969,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each_reverse(evlist, evsel) {
+ evlist__for_each_entry_reverse(evlist, evsel) {
const char *name = perf_evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch"))
@@ -1982,7 +1983,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.context_switch)
return true;
}
@@ -2136,6 +2137,9 @@
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
}
+ if (session->itrace_synth_opts)
+ pt->synth_opts.thread_stack =
+ session->itrace_synth_opts->thread_stack;
}
if (pt->synth_opts.log)
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index aa6877d..020b9ca 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -57,21 +57,21 @@
}
/**
- * intlist_for_each - iterate over a intlist
+ * intlist__for_each_entry - iterate over a intlist
* @pos: the &struct int_node to use as a loop cursor.
* @ilist: the &struct intlist for loop.
*/
-#define intlist__for_each(pos, ilist) \
+#define intlist__for_each_entry(pos, ilist) \
for (pos = intlist__first(ilist); pos; pos = intlist__next(pos))
/**
- * intlist_for_each_safe - iterate over a intlist safe against removal of
+ * intlist__for_each_entry_safe - iterate over a intlist safe against removal of
* int_node
* @pos: the &struct int_node to use as a loop cursor.
* @n: another &struct int_node to use as temporary storage.
* @ilist: the &struct intlist for loop.
*/
-#define intlist__for_each_safe(pos, n, ilist) \
+#define intlist__for_each_entry_safe(pos, n, ilist) \
for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\
pos = n, n = intlist__next(n))
#endif /* __PERF_INTLIST_H */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 86afe96..9f3305f 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -108,7 +108,7 @@
/*
* check that all events use CLOCK_MONOTONIC
*/
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
return -1;
}
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
new file mode 100644
index 0000000..6559bc5
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,40 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-aarch64.h"
+#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c
new file mode 100644
index 0000000..957ffff
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,43 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 33071d6..40b6f72 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include "debug.h"
#include "llvm-utils.h"
+#include "config.h"
#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
@@ -42,6 +43,8 @@
llvm_param.kbuild_dir = strdup(value);
else if (!strcmp(var, "kbuild-opts"))
llvm_param.kbuild_opts = strdup(value);
+ else if (!strcmp(var, "dump-obj"))
+ llvm_param.dump_obj = !!perf_config_bool(var, value);
else
return -1;
llvm_param.user_set_param = true;
@@ -326,6 +329,42 @@
pr_debug("include option is set to %s\n", *kbuild_include_opts);
}
+static void
+dump_obj(const char *path, void *obj_buf, size_t size)
+{
+ char *obj_path = strdup(path);
+ FILE *fp;
+ char *p;
+
+ if (!obj_path) {
+ pr_warning("WARNING: No enough memory, skip object dumping\n");
+ return;
+ }
+
+ p = strrchr(obj_path, '.');
+ if (!p || (strcmp(p, ".c") != 0)) {
+ pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
+ obj_path);
+ goto out;
+ }
+
+ p[1] = 'o';
+ fp = fopen(obj_path, "wb");
+ if (!fp) {
+ pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
+ obj_path, strerror(errno));
+ goto out;
+ }
+
+ pr_info("LLVM: dumping %s\n", obj_path);
+ if (fwrite(obj_buf, size, 1, fp) != 1)
+ pr_warning("WARNING: failed to write to file '%s': %s, skip object dumping\n",
+ obj_path, strerror(errno));
+ fclose(fp);
+out:
+ free(obj_path);
+}
+
int llvm__compile_bpf(const char *path, void **p_obj_buf,
size_t *p_obj_buf_sz)
{
@@ -411,6 +450,10 @@
free(kbuild_dir);
free(kbuild_include_opts);
+
+ if (llvm_param.dump_obj)
+ dump_obj(path, obj_buf, obj_buf_sz);
+
if (!p_obj_buf)
free(obj_buf);
else
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
index 23b9a74..9f501ce 100644
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h
@@ -30,6 +30,11 @@
*/
const char *kbuild_opts;
/*
+ * Default is false. If set to true, write compiling result
+ * to object file.
+ */
+ bool dump_obj;
+ /*
* Default is false. If one of the above fields is set by user
* explicitly then user_set_llvm is set to true. This is used
* for perf test. If user doesn't set anything in .perfconfig
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b177218..bc2cdbd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -138,8 +138,10 @@
void machine__delete(struct machine *machine)
{
- machine__exit(machine);
- free(machine);
+ if (machine) {
+ machine__exit(machine);
+ free(machine);
+ }
}
void machines__init(struct machines *machines)
@@ -1353,11 +1355,16 @@
if (map == NULL)
goto out_problem_map;
- thread__insert_map(thread, map);
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
thread__put(thread);
map__put(map);
return 0;
+out_problem_insert:
+ map__put(map);
out_problem_map:
thread__put(thread);
out_problem:
@@ -1403,11 +1410,16 @@
if (map == NULL)
goto out_problem_map;
- thread__insert_map(thread, map);
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
thread__put(thread);
map__put(map);
return 0;
+out_problem_insert:
+ map__put(map);
out_problem_map:
thread__put(thread);
out_problem:
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 75465f8..bbc368e 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -10,18 +10,33 @@
#include "debug.h"
#include "symbol.h"
+unsigned int perf_mem_events__loads_ldlat = 30;
+
#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
- E("ldlat-loads", "cpu/mem-loads,ldlat=30/P", "mem-loads"),
+ E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"),
E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
};
#undef E
#undef E
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
char *perf_mem_events__name(int i)
{
+ if (i == PERF_MEM_EVENTS__LOAD) {
+ if (!mem_loads_name__init) {
+ mem_loads_name__init = true;
+ scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ perf_mem_events[i].name,
+ perf_mem_events__loads_ldlat);
+ }
+ return mem_loads_name;
+ }
+
return (char *)perf_mem_events[i].name;
}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 5d6d930..7f69bf9 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -18,6 +18,7 @@
};
extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+extern unsigned int perf_mem_events__loads_ldlat;
int perf_mem_events__parse(const char *str);
int perf_mem_events__init(void);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c6fd047..ebd87b7 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -900,6 +900,7 @@
[PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size",
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
+ [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
};
static bool config_term_shrinked;
@@ -995,6 +996,9 @@
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ CHECK_TYPE_VAL(NUM);
+ break;
default:
err->str = strdup("unknown term");
err->idx = term->err_term;
@@ -1040,6 +1044,7 @@
case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
case PARSE_EVENTS__TERM_TYPE_INHERIT:
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
return config_term_common(attr, term, err);
default:
if (err) {
@@ -1109,6 +1114,9 @@
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+ break;
default:
break;
}
@@ -1388,7 +1396,7 @@
if (!add && get_event_modifier(&mod, str, NULL))
return -EINVAL;
- __evlist__for_each(list, evsel) {
+ __evlist__for_each_entry(list, evsel) {
if (add && get_event_modifier(&mod, str, evsel))
return -EINVAL;
@@ -1414,7 +1422,7 @@
{
struct perf_evsel *evsel;
- __evlist__for_each(list, evsel) {
+ __evlist__for_each_entry(list, evsel) {
if (!evsel->name)
evsel->name = strdup(name);
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d740c3c..46c05cc 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -68,6 +68,7 @@
PARSE_EVENTS__TERM_TYPE_STACKSIZE,
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
PARSE_EVENTS__TERM_TYPE_INHERIT,
+ PARSE_EVENTS__TERM_TYPE_MAX_STACK,
__PARSE_EVENTS__TERM_TYPE_NR,
};
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 1477fbc..3c15b33 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -199,6 +199,7 @@
time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
, { return ','; }
@@ -259,6 +260,7 @@
cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 3bf6bf8..cff8bf0 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -14,14 +14,8 @@
static char bad_path[] = "/bad-path/";
/*
- * Two hacks:
+ * One hack:
*/
-
-static const char *get_perf_dir(void)
-{
- return ".";
-}
-
static char *get_pathname(void)
{
static char pathname_array[4][PATH_MAX];
@@ -54,60 +48,3 @@
return bad_path;
return cleanup_path(pathname);
}
-
-char *perf_path(const char *fmt, ...)
-{
- const char *perf_dir = get_perf_dir();
- char *pathname = get_pathname();
- va_list args;
- unsigned len;
-
- len = strlen(perf_dir);
- if (len > PATH_MAX-100)
- return bad_path;
- memcpy(pathname, perf_dir, len);
- if (len && perf_dir[len-1] != '/')
- pathname[len++] = '/';
- va_start(args, fmt);
- len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
- va_end(args);
- if (len >= PATH_MAX)
- return bad_path;
- return cleanup_path(pathname);
-}
-
-/* strip arbitrary amount of directory separators at end of path */
-static inline int chomp_trailing_dir_sep(const char *path, int len)
-{
- while (len && is_dir_sep(path[len - 1]))
- len--;
- return len;
-}
-
-/*
- * If path ends with suffix (complete path components), returns the
- * part before suffix (sans trailing directory separators).
- * Otherwise returns NULL.
- */
-char *strip_path_suffix(const char *path, const char *suffix)
-{
- int path_len = strlen(path), suffix_len = strlen(suffix);
-
- while (suffix_len) {
- if (!path_len)
- return NULL;
-
- if (is_dir_sep(path[path_len - 1])) {
- if (!is_dir_sep(suffix[suffix_len - 1]))
- return NULL;
- path_len = chomp_trailing_dir_sep(path, path_len);
- suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
- }
- else if (path[--path_len] != suffix[--suffix_len])
- return NULL;
- }
-
- if (path_len && !is_dir_sep(path[path_len - 1]))
- return NULL;
- return strndup(path, chomp_trailing_dir_sep(path, path_len));
-}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 74401a2..55f41d5 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -67,7 +67,6 @@
return ret;
}
-static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
static struct machine *host_machine;
/* Initialize symbol maps and path of vmlinux/modules */
@@ -103,10 +102,8 @@
void exit_probe_symbol_maps(void)
{
- if (host_machine) {
- machine__delete(host_machine);
- host_machine = NULL;
- }
+ machine__delete(host_machine);
+ host_machine = NULL;
symbol__exit();
}
@@ -899,7 +896,7 @@
goto end;
}
- intlist__for_each(ln, lr->line_list) {
+ intlist__for_each_entry(ln, lr->line_list) {
for (; ln->i > l; l++) {
ret = show_one_line(fp, l - lr->offset);
if (ret < 0)
@@ -983,7 +980,7 @@
zfree(&vl->point.symbol);
nvars = 0;
if (vl->vars) {
- strlist__for_each(node, vl->vars) {
+ strlist__for_each_entry(node, vl->vars) {
var = strchr(node->s, '\t') + 1;
if (strfilter__compare(_filter, var)) {
fprintf(stdout, "\t\t%s\n", node->s);
@@ -1603,6 +1600,10 @@
p = strchr(argv[1], ':');
if (p) {
tp->module = strndup(argv[1], p - argv[1]);
+ if (!tp->module) {
+ ret = -ENOMEM;
+ goto out;
+ }
p++;
} else
p = argv[1];
@@ -1712,7 +1713,7 @@
}
/* Compose only probe point (not argument) */
-static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+char *synthesize_perf_probe_point(struct perf_probe_point *pp)
{
struct strbuf buf;
char *tmp, *ret = NULL;
@@ -1751,30 +1752,36 @@
return ret;
}
-#if 0
char *synthesize_perf_probe_command(struct perf_probe_event *pev)
{
- char *buf;
- int i, len, ret;
+ struct strbuf buf;
+ char *tmp, *ret = NULL;
+ int i;
- buf = synthesize_perf_probe_point(&pev->point);
- if (!buf)
+ if (strbuf_init(&buf, 64))
return NULL;
+ if (pev->event)
+ if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP,
+ pev->event) < 0)
+ goto out;
- len = strlen(buf);
+ tmp = synthesize_perf_probe_point(&pev->point);
+ if (!tmp || strbuf_addstr(&buf, tmp) < 0)
+ goto out;
+ free(tmp);
+
for (i = 0; i < pev->nargs; i++) {
- ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
- pev->args[i].name);
- if (ret <= 0) {
- free(buf);
- return NULL;
- }
- len += ret;
+ tmp = synthesize_perf_probe_arg(pev->args + i);
+ if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0)
+ goto out;
+ free(tmp);
}
- return buf;
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
}
-#endif
static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
struct strbuf *buf, int depth)
@@ -2026,6 +2033,79 @@
memset(pev, 0, sizeof(*pev));
}
+#define strdup_or_goto(str, label) \
+({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; })
+
+static int perf_probe_point__copy(struct perf_probe_point *dst,
+ struct perf_probe_point *src)
+{
+ dst->file = strdup_or_goto(src->file, out_err);
+ dst->function = strdup_or_goto(src->function, out_err);
+ dst->lazy_line = strdup_or_goto(src->lazy_line, out_err);
+ dst->line = src->line;
+ dst->retprobe = src->retprobe;
+ dst->offset = src->offset;
+ return 0;
+
+out_err:
+ clear_perf_probe_point(dst);
+ return -ENOMEM;
+}
+
+static int perf_probe_arg__copy(struct perf_probe_arg *dst,
+ struct perf_probe_arg *src)
+{
+ struct perf_probe_arg_field *field, **ppfield;
+
+ dst->name = strdup_or_goto(src->name, out_err);
+ dst->var = strdup_or_goto(src->var, out_err);
+ dst->type = strdup_or_goto(src->type, out_err);
+
+ field = src->field;
+ ppfield = &(dst->field);
+ while (field) {
+ *ppfield = zalloc(sizeof(*field));
+ if (!*ppfield)
+ goto out_err;
+ (*ppfield)->name = strdup_or_goto(field->name, out_err);
+ (*ppfield)->index = field->index;
+ (*ppfield)->ref = field->ref;
+ field = field->next;
+ ppfield = &((*ppfield)->next);
+ }
+ return 0;
+out_err:
+ return -ENOMEM;
+}
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+ struct perf_probe_event *src)
+{
+ int i;
+
+ dst->event = strdup_or_goto(src->event, out_err);
+ dst->group = strdup_or_goto(src->group, out_err);
+ dst->target = strdup_or_goto(src->target, out_err);
+ dst->uprobes = src->uprobes;
+
+ if (perf_probe_point__copy(&dst->point, &src->point) < 0)
+ goto out_err;
+
+ dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs);
+ if (!dst->args)
+ goto out_err;
+ dst->nargs = src->nargs;
+
+ for (i = 0; i < src->nargs; i++)
+ if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0)
+ goto out_err;
+ return 0;
+
+out_err:
+ clear_perf_probe_event(dst);
+ return -ENOMEM;
+}
+
void clear_probe_trace_event(struct probe_trace_event *tev)
{
struct probe_trace_arg_ref *ref, *next;
@@ -2253,7 +2333,7 @@
if (!rawlist)
return -ENOMEM;
- strlist__for_each(ent, rawlist) {
+ strlist__for_each_entry(ent, rawlist) {
ret = parse_probe_trace_command(ent->s, &tev);
if (ret >= 0) {
if (!filter_probe_trace_event(&tev, filter))
@@ -2432,6 +2512,7 @@
{
int i, fd, ret;
struct probe_trace_event *tev = NULL;
+ struct probe_cache *cache = NULL;
struct strlist *namelist;
fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0));
@@ -2473,6 +2554,14 @@
}
if (ret == -EINVAL && pev->uprobes)
warn_uprobe_event_compat(tev);
+ if (ret == 0 && probe_conf.cache) {
+ cache = probe_cache__new(pev->target);
+ if (!cache ||
+ probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 ||
+ probe_cache__commit(cache) < 0)
+ pr_warning("Failed to add event to probe cache\n");
+ probe_cache__delete(cache);
+ }
strlist__delete(namelist);
close_out:
@@ -2501,9 +2590,6 @@
return found;
}
-#define strdup_or_goto(str, label) \
- ({ char *__p = strdup(str); if (!__p) goto label; __p; })
-
void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
struct probe_trace_event *tev __maybe_unused,
struct map *map __maybe_unused,
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5a27eb4..432b690 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -12,6 +12,7 @@
bool show_location_range;
bool force_add;
bool no_inlines;
+ bool cache;
int max_probes;
};
extern struct probe_conf probe_conf;
@@ -121,6 +122,10 @@
char *synthesize_perf_probe_command(struct perf_probe_event *pev);
char *synthesize_probe_trace_command(struct probe_trace_event *tev);
char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
+char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+ struct perf_probe_event *src);
/* Check the perf_probe_event needs debuginfo */
bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 3fe6214..1c12c1a 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -14,6 +14,7 @@
* GNU General Public License for more details.
*
*/
+#include <sys/uio.h>
#include "util.h"
#include "event.h"
#include "strlist.h"
@@ -177,7 +178,7 @@
if (!rawlist)
return NULL;
sl = strlist__new(NULL, NULL);
- strlist__for_each(ent, rawlist) {
+ strlist__for_each_entry(ent, rawlist) {
ret = parse_probe_trace_command(ent->s, &tev);
if (ret < 0)
break;
@@ -280,7 +281,7 @@
if (!namelist)
return -ENOENT;
- strlist__for_each(ent, namelist) {
+ strlist__for_each_entry(ent, namelist) {
p = strchr(ent->s, ':');
if ((p && strfilter__compare(filter, p + 1)) ||
strfilter__compare(filter, ent->s)) {
@@ -298,7 +299,7 @@
int ret = 0;
struct str_node *ent;
- strlist__for_each(ent, namelist) {
+ strlist__for_each_entry(ent, namelist) {
ret = __del_trace_probe_event(fd, ent);
if (ret < 0)
break;
@@ -324,3 +325,333 @@
return ret;
}
+
+/* Caller must ensure to remove this entry from list */
+static void probe_cache_entry__delete(struct probe_cache_entry *entry)
+{
+ if (entry) {
+ BUG_ON(!list_empty(&entry->node));
+
+ strlist__delete(entry->tevlist);
+ clear_perf_probe_event(&entry->pev);
+ zfree(&entry->spev);
+ free(entry);
+ }
+}
+
+static struct probe_cache_entry *
+probe_cache_entry__new(struct perf_probe_event *pev)
+{
+ struct probe_cache_entry *entry = zalloc(sizeof(*entry));
+
+ if (entry) {
+ INIT_LIST_HEAD(&entry->node);
+ entry->tevlist = strlist__new(NULL, NULL);
+ if (!entry->tevlist)
+ zfree(&entry);
+ else if (pev) {
+ entry->spev = synthesize_perf_probe_command(pev);
+ if (!entry->spev ||
+ perf_probe_event__copy(&entry->pev, pev) < 0) {
+ probe_cache_entry__delete(entry);
+ return NULL;
+ }
+ }
+ }
+
+ return entry;
+}
+
+/* For the kernel probe caches, pass target = NULL */
+static int probe_cache__open(struct probe_cache *pcache, const char *target)
+{
+ char cpath[PATH_MAX];
+ char sbuildid[SBUILD_ID_SIZE];
+ char *dir_name;
+ bool is_kallsyms = !target;
+ int ret, fd;
+
+ if (target)
+ ret = filename__sprintf_build_id(target, sbuildid);
+ else {
+ target = DSO__NAME_KALLSYMS;
+ ret = sysfs__sprintf_build_id("/", sbuildid);
+ }
+ if (ret < 0) {
+ pr_debug("Failed to get build-id from %s.\n", target);
+ return ret;
+ }
+
+ /* If we have no buildid cache, make it */
+ if (!build_id_cache__cached(sbuildid)) {
+ ret = build_id_cache__add_s(sbuildid, target,
+ is_kallsyms, NULL);
+ if (ret < 0) {
+ pr_debug("Failed to add build-id cache: %s\n", target);
+ return ret;
+ }
+ }
+
+ dir_name = build_id_cache__cachedir(sbuildid, target, is_kallsyms,
+ false);
+ if (!dir_name)
+ return -ENOMEM;
+
+ snprintf(cpath, PATH_MAX, "%s/probes", dir_name);
+ fd = open(cpath, O_CREAT | O_RDWR, 0644);
+ if (fd < 0)
+ pr_debug("Failed to open cache(%d): %s\n", fd, cpath);
+ free(dir_name);
+ pcache->fd = fd;
+
+ return fd;
+}
+
+static int probe_cache__load(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry = NULL;
+ char buf[MAX_CMDLEN], *p;
+ int ret = 0;
+ FILE *fp;
+
+ fp = fdopen(dup(pcache->fd), "r");
+ if (!fp)
+ return -EINVAL;
+
+ while (!feof(fp)) {
+ if (!fgets(buf, MAX_CMDLEN, fp))
+ break;
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ if (buf[0] == '#') { /* #perf_probe_event */
+ entry = probe_cache_entry__new(NULL);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ entry->spev = strdup(buf + 1);
+ if (entry->spev)
+ ret = parse_perf_probe_command(buf + 1,
+ &entry->pev);
+ else
+ ret = -ENOMEM;
+ if (ret < 0) {
+ probe_cache_entry__delete(entry);
+ goto out;
+ }
+ list_add_tail(&entry->node, &pcache->entries);
+ } else { /* trace_probe_event */
+ if (!entry) {
+ ret = -EINVAL;
+ goto out;
+ }
+ strlist__add(entry->tevlist, buf);
+ }
+ }
+out:
+ fclose(fp);
+ return ret;
+}
+
+static struct probe_cache *probe_cache__alloc(void)
+{
+ struct probe_cache *pcache = zalloc(sizeof(*pcache));
+
+ if (pcache) {
+ INIT_LIST_HEAD(&pcache->entries);
+ pcache->fd = -EINVAL;
+ }
+ return pcache;
+}
+
+void probe_cache__purge(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry, *n;
+
+ list_for_each_entry_safe(entry, n, &pcache->entries, node) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+}
+
+void probe_cache__delete(struct probe_cache *pcache)
+{
+ if (!pcache)
+ return;
+
+ probe_cache__purge(pcache);
+ if (pcache->fd > 0)
+ close(pcache->fd);
+ free(pcache);
+}
+
+struct probe_cache *probe_cache__new(const char *target)
+{
+ struct probe_cache *pcache = probe_cache__alloc();
+ int ret;
+
+ if (!pcache)
+ return NULL;
+
+ ret = probe_cache__open(pcache, target);
+ if (ret < 0) {
+ pr_debug("Cache open error: %d\n", ret);
+ goto out_err;
+ }
+
+ ret = probe_cache__load(pcache);
+ if (ret < 0) {
+ pr_debug("Cache read error: %d\n", ret);
+ goto out_err;
+ }
+
+ return pcache;
+
+out_err:
+ probe_cache__delete(pcache);
+ return NULL;
+}
+
+static bool streql(const char *a, const char *b)
+{
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return !strcmp(a, b);
+}
+
+static struct probe_cache_entry *
+probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev)
+{
+ struct probe_cache_entry *entry = NULL;
+ char *cmd = synthesize_perf_probe_command(pev);
+
+ if (!cmd)
+ return NULL;
+
+ list_for_each_entry(entry, &pcache->entries, node) {
+ /* Hit if same event name or same command-string */
+ if ((pev->event &&
+ (streql(entry->pev.group, pev->group) &&
+ streql(entry->pev.event, pev->event))) ||
+ (!strcmp(entry->spev, cmd)))
+ goto found;
+ }
+ entry = NULL;
+
+found:
+ free(cmd);
+ return entry;
+}
+
+int probe_cache__add_entry(struct probe_cache *pcache,
+ struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs)
+{
+ struct probe_cache_entry *entry = NULL;
+ char *command;
+ int i, ret = 0;
+
+ if (!pcache || !pev || !tevs || ntevs <= 0) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ /* Remove old cache entry */
+ entry = probe_cache__find(pcache, pev);
+ if (entry) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+
+ ret = -ENOMEM;
+ entry = probe_cache_entry__new(pev);
+ if (!entry)
+ goto out_err;
+
+ for (i = 0; i < ntevs; i++) {
+ if (!tevs[i].point.symbol)
+ continue;
+
+ command = synthesize_probe_trace_command(&tevs[i]);
+ if (!command)
+ goto out_err;
+ strlist__add(entry->tevlist, command);
+ free(command);
+ }
+ list_add_tail(&entry->node, &pcache->entries);
+ pr_debug("Added probe cache: %d\n", ntevs);
+ return 0;
+
+out_err:
+ pr_debug("Failed to add probe caches\n");
+ probe_cache_entry__delete(entry);
+ return ret;
+}
+
+static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd)
+{
+ struct str_node *snode;
+ struct stat st;
+ struct iovec iov[3];
+ int ret;
+ /* Save stat for rollback */
+ ret = fstat(fd, &st);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Writing cache: #%s\n", entry->spev);
+ iov[0].iov_base = (void *)"#"; iov[0].iov_len = 1;
+ iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev);
+ iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1;
+ ret = writev(fd, iov, 3);
+ if (ret < (int)iov[1].iov_len + 2)
+ goto rollback;
+
+ strlist__for_each_entry(snode, entry->tevlist) {
+ iov[0].iov_base = (void *)snode->s;
+ iov[0].iov_len = strlen(snode->s);
+ iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1;
+ ret = writev(fd, iov, 2);
+ if (ret < (int)iov[0].iov_len + 1)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ /* Rollback to avoid cache file corruption */
+ if (ret > 0)
+ ret = -1;
+ if (ftruncate(fd, st.st_size) < 0)
+ ret = -2;
+
+ return ret;
+}
+
+int probe_cache__commit(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry;
+ int ret = 0;
+
+ /* TBD: if we do not update existing entries, skip it */
+ ret = lseek(pcache->fd, 0, SEEK_SET);
+ if (ret < 0)
+ goto out;
+
+ ret = ftruncate(pcache->fd, 0);
+ if (ret < 0)
+ goto out;
+
+ list_for_each_entry(entry, &pcache->entries, node) {
+ ret = probe_cache_entry__write(entry, pcache->fd);
+ pr_debug("Cache committed: %d\n", ret);
+ if (ret < 0)
+ break;
+ }
+out:
+ return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 18ac9cf..d872e3d 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -5,6 +5,19 @@
#include "strfilter.h"
#include "probe-event.h"
+/* Cache of probe definitions */
+struct probe_cache_entry {
+ struct list_head node;
+ struct perf_probe_event pev;
+ char *spev;
+ struct strlist *tevlist;
+};
+
+struct probe_cache {
+ int fd;
+ struct list_head entries;
+};
+
#define PF_FL_UPROBE 1
#define PF_FL_RW 2
@@ -18,5 +31,12 @@
struct strlist *plist);
int probe_file__del_strlist(int fd, struct strlist *namelist);
+struct probe_cache *probe_cache__new(const char *target);
+int probe_cache__add_entry(struct probe_cache *pcache,
+ struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs);
+int probe_cache__commit(struct probe_cache *pcache);
+void probe_cache__purge(struct probe_cache *pcache);
+void probe_cache__delete(struct probe_cache *pcache);
#endif
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 98f127a..65c6c73 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -957,7 +957,7 @@
if (i >= pevlist->evlist.nr_entries)
return NULL;
- evlist__for_each(&pevlist->evlist, pos) {
+ evlist__for_each_entry(&pevlist->evlist, pos) {
if (i-- == 0)
break;
}
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index abc76e3..808cc45 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -35,7 +35,7 @@
struct rb_node *nd;
- resort_rb__for_each(nd, threads) {
+ resort_rb__for_each_entry(nd, threads) {
struct thread *t = threads_entry;
printf("%s: %d\n", t->shortname, t->tid);
}
@@ -123,7 +123,7 @@
struct __name##_sorted_entry *__name##_entry; \
struct __name##_sorted *__name = __name##_sorted__new
-#define resort_rb__for_each(__nd, __name) \
+#define resort_rb__for_each_entry(__nd, __name) \
for (__nd = rb_first(&__name->entries); \
__name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
rb_node), __nd; \
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 481792c..98bf584 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -148,7 +148,7 @@
use_comm_exec = perf_can_comm_exec();
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
perf_evsel__config(evsel, opts, callchain);
if (evsel->tracking && use_comm_exec)
evsel->attr.comm_exec = 1;
@@ -161,18 +161,18 @@
* match the id.
*/
use_sample_identifier = perf_can_sample_identifier();
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
perf_evsel__set_sample_id(evsel, use_sample_identifier);
} else if (evlist->nr_entries > 1) {
struct perf_evsel *first = perf_evlist__first(evlist);
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.sample_type == first->attr.sample_type)
continue;
use_sample_identifier = perf_can_sample_identifier();
break;
}
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
perf_evsel__set_sample_id(evsel, use_sample_identifier);
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5214974..078d496 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -83,7 +83,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (evsel->attr.comm_exec)
return true;
}
@@ -178,6 +178,8 @@
void perf_session__delete(struct perf_session *session)
{
+ if (session == NULL)
+ return;
auxtrace__free(session);
auxtrace_index__free(&session->auxtrace_index);
perf_session__destroy_kernel_maps(session);
@@ -593,6 +595,7 @@
if (bswap_safe(f, 0)) \
attr->f = bswap_##sz(attr->f); \
} while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
#define bswap_field_32(f) bswap_field(f, 32)
#define bswap_field_64(f) bswap_field(f, 64)
@@ -608,6 +611,7 @@
bswap_field_64(sample_regs_user);
bswap_field_32(sample_stack_user);
bswap_field_32(aux_watermark);
+ bswap_field_16(sample_max_stack);
/*
* After read_format are bitfields. Check read_format because
@@ -1868,7 +1872,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(session->evlist, evsel) {
+ evlist__for_each_entry(session->evlist, evsel) {
if (evsel->attr.type == PERF_TYPE_TRACEPOINT)
return true;
}
@@ -1950,7 +1954,7 @@
{
struct perf_evsel *pos;
- evlist__for_each(session->evlist, pos) {
+ evlist__for_each_entry(session->evlist, pos) {
if (pos->attr.type == type)
return pos;
}
@@ -2105,7 +2109,7 @@
max_nr = (UINT16_MAX - sizeof(struct id_index_event)) /
sizeof(struct id_index_entry);
- evlist__for_each(evlist, evsel)
+ evlist__for_each_entry(evlist, evsel)
nr += evsel->ids;
n = nr > max_nr ? max_nr : nr;
@@ -2118,7 +2122,7 @@
ev->id_index.header.size = sz;
ev->id_index.nr = n;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
u32 j;
for (j = 0; j < evsel->ids; j++) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c4e9bd7..5854b46 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -79,8 +79,8 @@
{
const char *comm = thread__comm_str(he->thread);
- width = max(7U, width) - 6;
- return repsep_snprintf(bf, size, "%5d:%-*.*s", he->thread->tid,
+ width = max(7U, width) - 8;
+ return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
width, width, comm ?: "");
}
@@ -95,7 +95,7 @@
}
struct sort_entry sort_thread = {
- .se_header = " Pid:Command",
+ .se_header = " Pid:Command",
.se_cmp = sort__thread_cmp,
.se_snprintf = hist_entry__thread_snprintf,
.se_filter = hist_entry__thread_filter,
@@ -1218,7 +1218,7 @@
.se_header = "Data Object",
.se_cmp = sort__dso_daddr_cmp,
.se_snprintf = hist_entry__dso_daddr_snprintf,
- .se_width_idx = HISTC_MEM_DADDR_SYMBOL,
+ .se_width_idx = HISTC_MEM_DADDR_DSO,
};
struct sort_entry sort_mem_locked = {
@@ -1488,7 +1488,7 @@
}
static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct perf_evsel *evsel)
+ struct hists *hists)
{
struct hpp_sort_entry *hse;
size_t len = fmt->user_len;
@@ -1496,14 +1496,14 @@
hse = container_of(fmt, struct hpp_sort_entry, hpp);
if (!len)
- len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
+ len = hists__col_len(hists, hse->se->se_width_idx);
return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
}
static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
- struct perf_evsel *evsel)
+ struct hists *hists)
{
struct hpp_sort_entry *hse;
size_t len = fmt->user_len;
@@ -1511,7 +1511,7 @@
hse = container_of(fmt, struct hpp_sort_entry, hpp);
if (!len)
- len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
+ len = hists__col_len(hists, hse->se->se_width_idx);
return len;
}
@@ -1793,7 +1793,7 @@
}
static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct perf_evsel *evsel __maybe_unused)
+ struct hists *hists __maybe_unused)
{
struct hpp_dynamic_entry *hde;
size_t len = fmt->user_len;
@@ -1808,7 +1808,7 @@
static int __sort__hde_width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
- struct perf_evsel *evsel __maybe_unused)
+ struct hists *hists __maybe_unused)
{
struct hpp_dynamic_entry *hde;
size_t len = fmt->user_len;
@@ -2069,7 +2069,7 @@
}
full_name = !!strchr(event_name, ':');
- evlist__for_each(evlist, pos) {
+ evlist__for_each_entry(evlist, pos) {
/* case 2 */
if (full_name && !strcmp(pos->name, event_name))
return pos;
@@ -2125,7 +2125,7 @@
int ret;
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
continue;
@@ -2143,7 +2143,7 @@
struct perf_evsel *evsel;
struct format_field *field;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
continue;
@@ -2456,7 +2456,7 @@
if (evlist == NULL)
goto out_no_evlist;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
use_trace = false;
break;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index aa9efe0..8a2bbd2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -36,6 +36,11 @@
static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static bool have_frontend_stalled;
struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@
sizeof(runtime_transaction_stats));
memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
+ memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
+ memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
+ memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
+ memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
}
/*
@@ -105,6 +115,16 @@
update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, ELISION_START))
update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+ update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+ update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+ update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+ update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+ update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@
out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
}
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ * TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+ if (x < 0 && x >= -0.02)
+ return 0.0;
+ return x;
+}
+
+static double td_total_slots(int ctx, int cpu)
+{
+ return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+}
+
+static double td_bad_spec(int ctx, int cpu)
+{
+ double bad_spec = 0;
+ double total_slots;
+ double total;
+
+ total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
+ avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
+ avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
+ total_slots = td_total_slots(ctx, cpu);
+ if (total_slots)
+ bad_spec = total / total_slots;
+ return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu)
+{
+ double retiring = 0;
+ double total_slots = td_total_slots(ctx, cpu);
+ double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+
+ if (total_slots)
+ retiring = ret_slots / total_slots;
+ return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu)
+{
+ double fe_bound = 0;
+ double total_slots = td_total_slots(ctx, cpu);
+ double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+
+ if (total_slots)
+ fe_bound = fetch_bub / total_slots;
+ return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu)
+{
+ double sum = (td_fe_bound(ctx, cpu) +
+ td_bad_spec(ctx, cpu) +
+ td_retiring(ctx, cpu));
+ if (sum == 0)
+ return 0;
+ return sanitize_val(1.0 - sum);
+}
+
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
+ const char *color = NULL;
int ctx = evsel_context(evsel);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@
avg / ratio);
else
print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+ double fe_bound = td_fe_bound(ctx, cpu);
+
+ if (fe_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+ fe_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+ double retiring = td_retiring(ctx, cpu);
+
+ if (retiring > 0.7)
+ color = PERF_COLOR_GREEN;
+ print_metric(ctxp, color, "%8.1f%%", "retiring",
+ retiring * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+ double bad_spec = td_bad_spec(ctx, cpu);
+
+ if (bad_spec > 0.1)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+ bad_spec * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+ double be_bound = td_be_bound(ctx, cpu);
+ const char *name = "backend bound";
+ static int have_recovery_bubbles = -1;
+
+ /* In case the CPU does not support topdown-recovery-bubbles */
+ if (have_recovery_bubbles < 0)
+ have_recovery_bubbles = pmu_have_event("cpu",
+ "topdown-recovery-bubbles");
+ if (!have_recovery_bubbles)
+ name = "backend bound/bad spec";
+
+ if (be_bound > 0.2)
+ color = PERF_COLOR_RED;
+ if (td_total_slots(ctx, cpu) > 0)
+ print_metric(ctxp, color, "%8.1f%%", name,
+ be_bound * 100.);
+ else
+ print_metric(ctxp, NULL, NULL, name, 0);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ffa1d06..39345c2d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -79,6 +79,11 @@
ID(TRANSACTION_START, cpu/tx-start/),
ID(ELISION_START, cpu/el-start/),
ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
+ ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+ ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+ ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+ ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+ ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
};
#undef ID
@@ -157,7 +162,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
if (perf_evsel__alloc_stats(evsel, alloc_raw))
goto out_free;
}
@@ -173,7 +178,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
perf_evsel__free_stat_priv(evsel);
perf_evsel__free_counts(evsel);
perf_evsel__free_prev_raw_counts(evsel);
@@ -184,7 +189,7 @@
{
struct perf_evsel *evsel;
- evlist__for_each(evlist, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
perf_evsel__reset_stat_priv(evsel);
perf_evsel__reset_counts(evsel);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0150e78..c29bb94 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -17,6 +17,11 @@
PERF_STAT_EVSEL_ID__TRANSACTION_START,
PERF_STAT_EVSEL_ID__ELISION_START,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+ PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+ PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__MAX,
};
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index ca99002..19207e5 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -73,7 +73,7 @@
* @pos: the &struct str_node to use as a loop cursor.
* @slist: the &struct strlist for loop.
*/
-#define strlist__for_each(pos, slist) \
+#define strlist__for_each_entry(pos, slist) \
for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
/**
@@ -83,7 +83,7 @@
* @n: another &struct str_node to use as temporary storage.
* @slist: the &struct strlist for loop.
*/
-#define strlist__for_each_safe(pos, n, slist) \
+#define strlist__for_each_entry_safe(pos, n, slist) \
for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
pos = n, n = strlist__next(n))
#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 54c4ff2..b044f1a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1626,7 +1626,7 @@
if (!dirs)
return -1;
- strlist__for_each(nd, dirs) {
+ strlist__for_each_entry(nd, dirs) {
scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
"%s/%s/kallsyms", dir, nd->s);
if (!validate_kcore_addresses(kallsyms_filename, map)) {
@@ -1641,6 +1641,20 @@
return ret;
}
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+ int fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
static char *dso__find_kallsyms(struct dso *dso, struct map *map)
{
u8 host_build_id[BUILD_ID_SIZE];
@@ -1660,58 +1674,43 @@
sizeof(host_build_id)) == 0)
is_host = dso__build_id_equal(dso, host_build_id);
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-
- scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir,
- DSO__NAME_KCORE, sbuild_id);
-
- /* Use /proc/kallsyms if possible */
+ /* Try a fast path for /proc/kallsyms if possible */
if (is_host) {
- DIR *d;
- int fd;
-
- /* If no cached kcore go with /proc/kallsyms */
- d = opendir(path);
- if (!d)
- goto proc_kallsyms;
- closedir(d);
-
/*
- * Do not check the build-id cache, until we know we cannot use
- * /proc/kcore.
+ * Do not check the build-id cache, unless we know we cannot use
+ * /proc/kcore or module maps don't match to /proc/kallsyms.
+ * To check readability of /proc/kcore, do not use access(R_OK)
+ * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+ * can't check it.
*/
- fd = open("/proc/kcore", O_RDONLY);
- if (fd != -1) {
- close(fd);
- /* If module maps match go with /proc/kallsyms */
- if (!validate_kcore_addresses("/proc/kallsyms", map))
- goto proc_kallsyms;
- }
-
- /* Find kallsyms in build-id cache with kcore */
- if (!find_matching_kcore(map, path, sizeof(path)))
- return strdup(path);
-
- goto proc_kallsyms;
+ if (filename__readable("/proc/kcore") &&
+ !validate_kcore_addresses("/proc/kallsyms", map))
+ goto proc_kallsyms;
}
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
/* Find kallsyms in build-id cache with kcore */
+ scnprintf(path, sizeof(path), "%s/%s/%s",
+ buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
if (!find_matching_kcore(map, path, sizeof(path)))
return strdup(path);
- scnprintf(path, sizeof(path), "%s/%s/%s",
- buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+ /* Use current /proc/kallsyms if possible */
+ if (is_host) {
+proc_kallsyms:
+ return strdup("/proc/kallsyms");
+ }
- if (access(path, F_OK)) {
+ /* Finally, find a cache of kallsyms */
+ if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
pr_err("No kallsyms or vmlinux with build-id %s was found\n",
sbuild_id);
return NULL;
}
return strdup(path);
-
-proc_kallsyms:
- return strdup("/proc/kallsyms");
}
static int dso__load_kernel_sym(struct dso *dso, struct map *map,
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 825086a..d330152 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -616,3 +616,10 @@
return err;
}
+
+size_t thread_stack__depth(struct thread *thread)
+{
+ if (!thread->ts)
+ return 0;
+ return thread->ts->cnt;
+}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index ad44c79..b7e41c4 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -87,6 +87,7 @@
size_t sz, u64 ip);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread);
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, void *data),
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 45fcb71..f30f956 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -43,9 +43,6 @@
thread->cpu = -1;
INIT_LIST_HEAD(&thread->comm_list);
- if (unwind__prepare_access(thread) < 0)
- goto err_thread;
-
comm_str = malloc(32);
if (!comm_str)
goto err_thread;
@@ -201,10 +198,18 @@
map_groups__fprintf(thread->mg, fp);
}
-void thread__insert_map(struct thread *thread, struct map *map)
+int thread__insert_map(struct thread *thread, struct map *map)
{
+ int ret;
+
+ ret = unwind__prepare_access(thread, map);
+ if (ret)
+ return ret;
+
map_groups__fixup_overlappings(thread->mg, map, stderr);
map_groups__insert(thread->mg, map);
+
+ return 0;
}
static int thread__clone_map_groups(struct thread *thread,
@@ -265,3 +270,14 @@
break;
}
}
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+ if (thread->pid_ == thread->tid)
+ return thread__get(thread);
+
+ if (thread->pid_ == -1)
+ return NULL;
+
+ return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 45fba13..99263cb 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -9,11 +9,9 @@
#include "symbol.h"
#include <strlist.h>
#include <intlist.h>
-#ifdef HAVE_LIBUNWIND_SUPPORT
-#include <libunwind.h>
-#endif
struct thread_stack;
+struct unwind_libunwind_ops;
struct thread {
union {
@@ -36,7 +34,8 @@
void *priv;
struct thread_stack *ts;
#ifdef HAVE_LIBUNWIND_SUPPORT
- unw_addr_space_t addr_space;
+ void *addr_space;
+ struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif
};
@@ -77,10 +76,12 @@
struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
const char *thread__comm_str(const struct thread *thread);
-void thread__insert_map(struct thread *thread, struct map *map);
+int thread__insert_map(struct thread *thread, struct map *map);
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
size_t thread__fprintf(struct thread *thread, FILE *fp);
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
void thread__find_addr_map(struct thread *thread,
u8 cpumode, enum map_type type, u64 addr,
struct addr_location *al);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 5654fe1..40585f5 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -202,7 +202,7 @@
if (!slist)
return NULL;
- strlist__for_each(pos, slist) {
+ strlist__for_each_entry(pos, slist) {
pid = strtol(pos->s, &end_ptr, 10);
if (pid == INT_MIN || pid == INT_MAX ||
@@ -278,7 +278,7 @@
if (!slist)
return NULL;
- strlist__for_each(pos, slist) {
+ strlist__for_each_entry(pos, slist) {
tid = strtol(pos->s, &end_ptr, 10);
if (tid == INT_MIN || tid == INT_MAX ||
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
new file mode 100644
index 0000000..97c0f8f
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,699 @@
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+#include "asm/bug.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip,
+ unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */
+#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit 0xff
+#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */
+#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */
+#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */
+#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */
+#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr 0x00 /* absolute value */
+#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
+#define DW_EH_PE_aligned 0x50 /* aligned pointer */
+
+/* Flags intentionaly not handled, since they're not needed:
+ * #define DW_EH_PE_indirect 0x80
+ * #define DW_EH_PE_uleb128 0x01
+ * #define DW_EH_PE_udata2 0x02
+ * #define DW_EH_PE_sleb128 0x09
+ * #define DW_EH_PE_sdata2 0x0a
+ * #define DW_EH_PE_textrel 0x20
+ * #define DW_EH_PE_datarel 0x30
+ */
+
+struct unwind_info {
+ struct perf_sample *sample;
+ struct machine *machine;
+ struct thread *thread;
+};
+
+#define dw_read(ptr, type, end) ({ \
+ type *__p = (type *) ptr; \
+ type __v; \
+ if ((__p + 1) > (type *) end) \
+ return -EINVAL; \
+ __v = *__p++; \
+ ptr = (typeof(ptr)) __p; \
+ __v; \
+ })
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+ u8 encoding)
+{
+ u8 *cur = *p;
+ *val = 0;
+
+ switch (encoding) {
+ case DW_EH_PE_omit:
+ *val = 0;
+ goto out;
+ case DW_EH_PE_ptr:
+ *val = dw_read(cur, unsigned long, end);
+ goto out;
+ default:
+ break;
+ }
+
+ switch (encoding & DW_EH_PE_APPL_MASK) {
+ case DW_EH_PE_absptr:
+ break;
+ case DW_EH_PE_pcrel:
+ *val = (unsigned long) cur;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((encoding & 0x07) == 0x00)
+ encoding |= DW_EH_PE_udata4;
+
+ switch (encoding & DW_EH_PE_FORMAT_MASK) {
+ case DW_EH_PE_sdata4:
+ *val += dw_read(cur, s32, end);
+ break;
+ case DW_EH_PE_udata4:
+ *val += dw_read(cur, u32, end);
+ break;
+ case DW_EH_PE_sdata8:
+ *val += dw_read(cur, s64, end);
+ break;
+ case DW_EH_PE_udata8:
+ *val += dw_read(cur, u64, end);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ out:
+ *p = cur;
+ return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({ \
+ u64 __v; \
+ if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \
+ return -EINVAL; \
+ } \
+ __v; \
+ })
+
+static u64 elf_section_offset(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ u64 offset = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+
+ do {
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ break;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+ break;
+
+ offset = shdr.sh_offset;
+ } while (0);
+
+ elf_end(elf);
+ return offset;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ int retval = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out;
+
+ retval = (ehdr.e_type == ET_EXEC);
+
+out:
+ elf_end(elf);
+ pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+ return retval;
+}
+#endif
+
+struct table_entry {
+ u32 start_ip_offset;
+ u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+ unsigned char version;
+ unsigned char eh_frame_ptr_enc;
+ unsigned char fde_count_enc;
+ unsigned char table_enc;
+
+ /*
+ * The rest of the header is variable-length and consists of the
+ * following members:
+ *
+ * encoded_t eh_frame_ptr;
+ * encoded_t fde_count;
+ */
+
+ /* A single encoded pointer should not be more than 8 bytes. */
+ u64 enc[2];
+
+ /*
+ * struct {
+ * encoded_t start_ip;
+ * encoded_t fde_addr;
+ * } binary_search_table[fde_count];
+ */
+ char data[0];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+ u64 offset, u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ struct eh_frame_hdr hdr;
+ u8 *enc = (u8 *) &hdr.enc;
+ u8 *end = (u8 *) &hdr.data;
+ ssize_t r;
+
+ r = dso__data_read_offset(dso, machine, offset,
+ (u8 *) &hdr, sizeof(hdr));
+ if (r != sizeof(hdr))
+ return -EINVAL;
+
+ /* We dont need eh_frame_ptr, just skip it. */
+ dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+ *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+ *segbase = offset;
+ *table_data = (enc - (u8 *) &hdr) + offset;
+ return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+ u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ int ret = -EINVAL, fd;
+ u64 offset = dso->data.eh_frame_hdr_offset;
+
+ if (offset == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd < 0)
+ return -EINVAL;
+
+ /* Check the .eh_frame section for unwinding info */
+ offset = elf_section_offset(fd, ".eh_frame_hdr");
+ dso->data.eh_frame_hdr_offset = offset;
+ dso__data_put_fd(dso);
+ }
+
+ if (offset)
+ ret = unwind_spec_ehframe(dso, machine, offset,
+ table_data, segbase,
+ fde_count);
+
+ return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+ struct machine *machine, u64 *offset)
+{
+ int fd;
+ u64 ofs = dso->data.debug_frame_offset;
+
+ if (ofs == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd < 0)
+ return -EINVAL;
+
+ /* Check the .debug_frame section for unwinding info */
+ ofs = elf_section_offset(fd, ".debug_frame");
+ dso->data.debug_frame_offset = ofs;
+ dso__data_put_fd(dso);
+ }
+
+ *offset = ofs;
+ if (*offset)
+ return 0;
+
+ return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+ struct addr_location al;
+
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__FUNCTION, ip, &al);
+ if (!al.map) {
+ /*
+ * We've seen cases (softice) where DWARF unwinder went
+ * through non executable mmaps, which we need to lookup
+ * in MAP__VARIABLE tree.
+ */
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__VARIABLE, ip, &al);
+ }
+ return al.map;
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+ int need_unwind_info, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct map *map;
+ unw_dyn_info_t di;
+ u64 table_data, segbase, fde_count;
+ int ret = -EINVAL;
+
+ map = find_map(ip, ui);
+ if (!map || !map->dso)
+ return -EINVAL;
+
+ pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+ /* Check the .eh_frame section for unwinding info */
+ if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+ &table_data, &segbase, &fde_count)) {
+ memset(&di, 0, sizeof(di));
+ di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+ di.start_ip = map->start;
+ di.end_ip = map->end;
+ di.u.rti.segbase = map->start + segbase;
+ di.u.rti.table_data = map->start + table_data;
+ di.u.rti.table_len = fde_count * sizeof(struct table_entry)
+ / sizeof(unw_word_t);
+ ret = dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+ /* Check the .debug_frame section for unwinding info */
+ if (ret < 0 &&
+ !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+ int fd = dso__data_get_fd(map->dso, ui->machine);
+ int is_exec = elf_is_exec(fd, map->dso->name);
+ unw_word_t base = is_exec ? 0 : map->start;
+ const char *symfile;
+
+ if (fd >= 0)
+ dso__data_put_fd(map->dso);
+
+ symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+ memset(&di, 0, sizeof(di));
+ if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+ map->start, map->end))
+ return dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+#endif
+
+ return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t __maybe_unused num,
+ unw_fpreg_t __maybe_unused *val,
+ int __maybe_unused __write,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: access_fpreg unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused *dil_addr,
+ void __maybe_unused *arg)
+{
+ return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+ unw_cursor_t __maybe_unused *cu,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: resume unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused addr,
+ char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+ unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+ pr_err("unwind: get_proc_name unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+ unw_word_t *data)
+{
+ struct map *map;
+ ssize_t size;
+
+ map = find_map(addr, ui);
+ if (!map) {
+ pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ return -1;
+ }
+
+ if (!map->dso)
+ return -1;
+
+ size = dso__data_read_addr(map->dso, map, ui->machine,
+ addr, (u8 *) data, sizeof(*data));
+
+ return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+ unw_word_t addr, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct stack_dump *stack = &ui->sample->user_stack;
+ u64 start, end;
+ int offset;
+ int ret;
+
+ /* Don't support write, probably not needed. */
+ if (__write || !stack || !ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ ret = perf_reg_value(&start, &ui->sample->user_regs,
+ LIBUNWIND__ARCH_REG_SP);
+ if (ret)
+ return ret;
+
+ end = start + stack->size;
+
+ /* Check overflow. */
+ if (addr + sizeof(unw_word_t) < addr)
+ return -EINVAL;
+
+ if (addr < start || addr + sizeof(unw_word_t) >= end) {
+ ret = access_dso_mem(ui, addr, valp);
+ if (ret) {
+ pr_debug("unwind: access_mem %p not inside range"
+ " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ (void *) (uintptr_t) addr, start, end);
+ *valp = 0;
+ return ret;
+ }
+ return 0;
+ }
+
+ offset = addr - start;
+ *valp = *(unw_word_t *)&stack->data[offset];
+ pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+ (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+ return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t regnum, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ int id, ret;
+ u64 val;
+
+ /* Don't support write, I suspect we don't need it. */
+ if (__write) {
+ pr_err("unwind: access_reg w %d\n", regnum);
+ return 0;
+ }
+
+ if (!ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ id = LIBUNWIND__ARCH_REG_ID(regnum);
+ if (id < 0)
+ return -EINVAL;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ if (ret) {
+ pr_err("unwind: can't read reg %d\n", regnum);
+ return ret;
+ }
+
+ *valp = (unw_word_t) val;
+ pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+ return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+ unw_proc_info_t *pi __maybe_unused,
+ void *arg __maybe_unused)
+{
+ pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+ unwind_entry_cb_t cb, void *arg)
+{
+ struct unwind_entry e;
+ struct addr_location al;
+
+ thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
+ MAP__FUNCTION, ip, &al);
+
+ e.ip = ip;
+ e.map = al.map;
+ e.sym = al.sym;
+
+ pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+ al.sym ? al.sym->name : "''",
+ ip,
+ al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+ return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+ switch (err) {
+ case UNW_EINVAL:
+ pr_err("unwind: Only supports local.\n");
+ break;
+ case UNW_EUNSPEC:
+ pr_err("unwind: Unspecified error.\n");
+ break;
+ case UNW_EBADREG:
+ pr_err("unwind: Register unavailable.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static unw_accessors_t accessors = {
+ .find_proc_info = find_proc_info,
+ .put_unwind_info = put_unwind_info,
+ .get_dyn_info_list_addr = get_dyn_info_list_addr,
+ .access_mem = access_mem,
+ .access_reg = access_reg,
+ .access_fpreg = access_fpreg,
+ .resume = resume,
+ .get_proc_name = get_proc_name,
+};
+
+static int _unwind__prepare_access(struct thread *thread)
+{
+ if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ return 0;
+
+ thread->addr_space = unw_create_addr_space(&accessors, 0);
+ if (!thread->addr_space) {
+ pr_err("unwind: Can't create unwind address space.\n");
+ return -ENOMEM;
+ }
+
+ unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
+ return 0;
+}
+
+static void _unwind__flush_access(struct thread *thread)
+{
+ if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ return;
+
+ unw_flush_cache(thread->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct thread *thread)
+{
+ if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ return;
+
+ unw_destroy_addr_space(thread->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+ void *arg, int max_stack)
+{
+ u64 val;
+ unw_word_t ips[max_stack];
+ unw_addr_space_t addr_space;
+ unw_cursor_t c;
+ int ret, i = 0;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs,
+ LIBUNWIND__ARCH_REG_IP);
+ if (ret)
+ return ret;
+
+ ips[i++] = (unw_word_t) val;
+
+ /*
+ * If we need more than one entry, do the DWARF
+ * unwind itself.
+ */
+ if (max_stack - 1 > 0) {
+ WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+ addr_space = ui->thread->addr_space;
+
+ if (addr_space == NULL)
+ return -1;
+
+ ret = unw_init_remote(&c, addr_space, ui);
+ if (ret)
+ display_error(ret);
+
+ while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+ unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+ ++i;
+ }
+
+ max_stack = i;
+ }
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < max_stack && !ret; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = max_stack - i - 1;
+ ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+ }
+
+ return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack)
+{
+ struct unwind_info ui = {
+ .sample = data,
+ .thread = thread,
+ .machine = thread->mg->machine,
+ };
+
+ if (!data->user_regs.regs)
+ return -EINVAL;
+
+ if (max_stack <= 0)
+ return -EINVAL;
+
+ return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+ .prepare_access = _unwind__prepare_access,
+ .flush_access = _unwind__flush_access,
+ .finish_access = _unwind__finish_access,
+ .get_entries = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 63687d3..8547119 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -1,682 +1,76 @@
-/*
- * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
- *
- * Lots of this code have been borrowed or heavily inspired from parts of
- * the libunwind 0.99 code which are (amongst other contributors I may have
- * forgotten):
- *
- * Copyright (C) 2002-2007 Hewlett-Packard Co
- * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * And the bugs have been added by:
- *
- * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
- * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
- *
- */
-
-#include <elf.h>
-#include <gelf.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <linux/list.h>
-#include <libunwind.h>
-#include <libunwind-ptrace.h>
-#include "callchain.h"
+#include "unwind.h"
#include "thread.h"
#include "session.h"
-#include "perf_regs.h"
-#include "unwind.h"
-#include "symbol.h"
-#include "util.h"
#include "debug.h"
-#include "asm/bug.h"
+#include "arch/common.h"
-extern int
-UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
- unw_word_t ip,
- unw_dyn_info_t *di,
- unw_proc_info_t *pi,
- int need_unwind_info, void *arg);
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
-#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
-
-extern int
-UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
- unw_word_t ip,
- unw_word_t segbase,
- const char *obj_name, unw_word_t start,
- unw_word_t end);
-
-#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
-
-#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */
-#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */
-
-/* Pointer-encoding formats: */
-#define DW_EH_PE_omit 0xff
-#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */
-#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */
-#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */
-#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */
-#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */
-
-/* Pointer-encoding application: */
-#define DW_EH_PE_absptr 0x00 /* absolute value */
-#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */
-
-/*
- * The following are not documented by LSB v1.3, yet they are used by
- * GCC, presumably they aren't documented by LSB since they aren't
- * used on Linux:
- */
-#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
-#define DW_EH_PE_aligned 0x50 /* aligned pointer */
-
-/* Flags intentionaly not handled, since they're not needed:
- * #define DW_EH_PE_indirect 0x80
- * #define DW_EH_PE_uleb128 0x01
- * #define DW_EH_PE_udata2 0x02
- * #define DW_EH_PE_sleb128 0x09
- * #define DW_EH_PE_sdata2 0x0a
- * #define DW_EH_PE_textrel 0x20
- * #define DW_EH_PE_datarel 0x30
- */
-
-struct unwind_info {
- struct perf_sample *sample;
- struct machine *machine;
- struct thread *thread;
-};
-
-#define dw_read(ptr, type, end) ({ \
- type *__p = (type *) ptr; \
- type __v; \
- if ((__p + 1) > (type *) end) \
- return -EINVAL; \
- __v = *__p++; \
- ptr = (typeof(ptr)) __p; \
- __v; \
- })
-
-static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
- u8 encoding)
+static void unwind__register_ops(struct thread *thread,
+ struct unwind_libunwind_ops *ops)
{
- u8 *cur = *p;
- *val = 0;
-
- switch (encoding) {
- case DW_EH_PE_omit:
- *val = 0;
- goto out;
- case DW_EH_PE_ptr:
- *val = dw_read(cur, unsigned long, end);
- goto out;
- default:
- break;
- }
-
- switch (encoding & DW_EH_PE_APPL_MASK) {
- case DW_EH_PE_absptr:
- break;
- case DW_EH_PE_pcrel:
- *val = (unsigned long) cur;
- break;
- default:
- return -EINVAL;
- }
-
- if ((encoding & 0x07) == 0x00)
- encoding |= DW_EH_PE_udata4;
-
- switch (encoding & DW_EH_PE_FORMAT_MASK) {
- case DW_EH_PE_sdata4:
- *val += dw_read(cur, s32, end);
- break;
- case DW_EH_PE_udata4:
- *val += dw_read(cur, u32, end);
- break;
- case DW_EH_PE_sdata8:
- *val += dw_read(cur, s64, end);
- break;
- case DW_EH_PE_udata8:
- *val += dw_read(cur, u64, end);
- break;
- default:
- return -EINVAL;
- }
-
- out:
- *p = cur;
- return 0;
+ thread->unwind_libunwind_ops = ops;
}
-#define dw_read_encoded_value(ptr, end, enc) ({ \
- u64 __v; \
- if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \
- return -EINVAL; \
- } \
- __v; \
- })
-
-static u64 elf_section_offset(int fd, const char *name)
+int unwind__prepare_access(struct thread *thread, struct map *map)
{
- Elf *elf;
- GElf_Ehdr ehdr;
- GElf_Shdr shdr;
- u64 offset = 0;
+ const char *arch;
+ enum dso_type dso_type;
+ struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
- elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
- if (elf == NULL)
+ if (thread->addr_space) {
+ pr_debug("unwind: thread map already set, dso=%s\n",
+ map->dso->name);
+ return 0;
+ }
+
+ /* env->arch is NULL for live-mode (i.e. perf top) */
+ if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
+ goto out_register;
+
+ dso_type = dso__type(map->dso, thread->mg->machine);
+ if (dso_type == DSO__TYPE_UNKNOWN)
return 0;
- do {
- if (gelf_getehdr(elf, &ehdr) == NULL)
- break;
+ arch = normalize_arch(thread->mg->machine->env->arch);
- if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
- break;
-
- offset = shdr.sh_offset;
- } while (0);
-
- elf_end(elf);
- return offset;
-}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int elf_is_exec(int fd, const char *name)
-{
- Elf *elf;
- GElf_Ehdr ehdr;
- int retval = 0;
-
- elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
- if (elf == NULL)
- return 0;
- if (gelf_getehdr(elf, &ehdr) == NULL)
- goto out;
-
- retval = (ehdr.e_type == ET_EXEC);
-
-out:
- elf_end(elf);
- pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
- return retval;
-}
-#endif
-
-struct table_entry {
- u32 start_ip_offset;
- u32 fde_offset;
-};
-
-struct eh_frame_hdr {
- unsigned char version;
- unsigned char eh_frame_ptr_enc;
- unsigned char fde_count_enc;
- unsigned char table_enc;
-
- /*
- * The rest of the header is variable-length and consists of the
- * following members:
- *
- * encoded_t eh_frame_ptr;
- * encoded_t fde_count;
- */
-
- /* A single encoded pointer should not be more than 8 bytes. */
- u64 enc[2];
-
- /*
- * struct {
- * encoded_t start_ip;
- * encoded_t fde_addr;
- * } binary_search_table[fde_count];
- */
- char data[0];
-} __packed;
-
-static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
- u64 offset, u64 *table_data, u64 *segbase,
- u64 *fde_count)
-{
- struct eh_frame_hdr hdr;
- u8 *enc = (u8 *) &hdr.enc;
- u8 *end = (u8 *) &hdr.data;
- ssize_t r;
-
- r = dso__data_read_offset(dso, machine, offset,
- (u8 *) &hdr, sizeof(hdr));
- if (r != sizeof(hdr))
- return -EINVAL;
-
- /* We dont need eh_frame_ptr, just skip it. */
- dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
-
- *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
- *segbase = offset;
- *table_data = (enc - (u8 *) &hdr) + offset;
- return 0;
-}
-
-static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
- u64 *table_data, u64 *segbase,
- u64 *fde_count)
-{
- int ret = -EINVAL, fd;
- u64 offset = dso->data.eh_frame_hdr_offset;
-
- if (offset == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd < 0)
- return -EINVAL;
-
- /* Check the .eh_frame section for unwinding info */
- offset = elf_section_offset(fd, ".eh_frame_hdr");
- dso->data.eh_frame_hdr_offset = offset;
- dso__data_put_fd(dso);
+ if (!strcmp(arch, "x86")) {
+ if (dso_type != DSO__TYPE_64BIT)
+ ops = x86_32_unwind_libunwind_ops;
+ } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+ if (dso_type == DSO__TYPE_64BIT)
+ ops = arm64_unwind_libunwind_ops;
}
- if (offset)
- ret = unwind_spec_ehframe(dso, machine, offset,
- table_data, segbase,
- fde_count);
-
- return ret;
-}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int read_unwind_spec_debug_frame(struct dso *dso,
- struct machine *machine, u64 *offset)
-{
- int fd;
- u64 ofs = dso->data.debug_frame_offset;
-
- if (ofs == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd < 0)
- return -EINVAL;
-
- /* Check the .debug_frame section for unwinding info */
- ofs = elf_section_offset(fd, ".debug_frame");
- dso->data.debug_frame_offset = ofs;
- dso__data_put_fd(dso);
- }
-
- *offset = ofs;
- if (*offset)
- return 0;
-
- return -EINVAL;
-}
-#endif
-
-static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
-{
- struct addr_location al;
-
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
- if (!al.map) {
- /*
- * We've seen cases (softice) where DWARF unwinder went
- * through non executable mmaps, which we need to lookup
- * in MAP__VARIABLE tree.
- */
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__VARIABLE, ip, &al);
- }
- return al.map;
-}
-
-static int
-find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
- int need_unwind_info, void *arg)
-{
- struct unwind_info *ui = arg;
- struct map *map;
- unw_dyn_info_t di;
- u64 table_data, segbase, fde_count;
- int ret = -EINVAL;
-
- map = find_map(ip, ui);
- if (!map || !map->dso)
- return -EINVAL;
-
- pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
-
- /* Check the .eh_frame section for unwinding info */
- if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
- &table_data, &segbase, &fde_count)) {
- memset(&di, 0, sizeof(di));
- di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
- di.start_ip = map->start;
- di.end_ip = map->end;
- di.u.rti.segbase = map->start + segbase;
- di.u.rti.table_data = map->start + table_data;
- di.u.rti.table_len = fde_count * sizeof(struct table_entry)
- / sizeof(unw_word_t);
- ret = dwarf_search_unwind_table(as, ip, &di, pi,
- need_unwind_info, arg);
- }
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
- /* Check the .debug_frame section for unwinding info */
- if (ret < 0 &&
- !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
- int fd = dso__data_get_fd(map->dso, ui->machine);
- int is_exec = elf_is_exec(fd, map->dso->name);
- unw_word_t base = is_exec ? 0 : map->start;
- const char *symfile;
-
- if (fd >= 0)
- dso__data_put_fd(map->dso);
-
- symfile = map->dso->symsrc_filename ?: map->dso->name;
-
- memset(&di, 0, sizeof(di));
- if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
- map->start, map->end))
- return dwarf_search_unwind_table(as, ip, &di, pi,
- need_unwind_info, arg);
- }
-#endif
-
- return ret;
-}
-
-static int access_fpreg(unw_addr_space_t __maybe_unused as,
- unw_regnum_t __maybe_unused num,
- unw_fpreg_t __maybe_unused *val,
- int __maybe_unused __write,
- void __maybe_unused *arg)
-{
- pr_err("unwind: access_fpreg unsupported\n");
- return -UNW_EINVAL;
-}
-
-static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
- unw_word_t __maybe_unused *dil_addr,
- void __maybe_unused *arg)
-{
- return -UNW_ENOINFO;
-}
-
-static int resume(unw_addr_space_t __maybe_unused as,
- unw_cursor_t __maybe_unused *cu,
- void __maybe_unused *arg)
-{
- pr_err("unwind: resume unsupported\n");
- return -UNW_EINVAL;
-}
-
-static int
-get_proc_name(unw_addr_space_t __maybe_unused as,
- unw_word_t __maybe_unused addr,
- char __maybe_unused *bufp, size_t __maybe_unused buf_len,
- unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
-{
- pr_err("unwind: get_proc_name unsupported\n");
- return -UNW_EINVAL;
-}
-
-static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
- unw_word_t *data)
-{
- struct map *map;
- ssize_t size;
-
- map = find_map(addr, ui);
- if (!map) {
- pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ if (!ops) {
+ pr_err("unwind: target platform=%s is not supported\n", arch);
return -1;
}
+out_register:
+ unwind__register_ops(thread, ops);
- if (!map->dso)
- return -1;
-
- size = dso__data_read_addr(map->dso, map, ui->machine,
- addr, (u8 *) data, sizeof(*data));
-
- return !(size == sizeof(*data));
-}
-
-static int access_mem(unw_addr_space_t __maybe_unused as,
- unw_word_t addr, unw_word_t *valp,
- int __write, void *arg)
-{
- struct unwind_info *ui = arg;
- struct stack_dump *stack = &ui->sample->user_stack;
- u64 start, end;
- int offset;
- int ret;
-
- /* Don't support write, probably not needed. */
- if (__write || !stack || !ui->sample->user_regs.regs) {
- *valp = 0;
- return 0;
- }
-
- ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
- if (ret)
- return ret;
-
- end = start + stack->size;
-
- /* Check overflow. */
- if (addr + sizeof(unw_word_t) < addr)
- return -EINVAL;
-
- if (addr < start || addr + sizeof(unw_word_t) >= end) {
- ret = access_dso_mem(ui, addr, valp);
- if (ret) {
- pr_debug("unwind: access_mem %p not inside range"
- " 0x%" PRIx64 "-0x%" PRIx64 "\n",
- (void *) (uintptr_t) addr, start, end);
- *valp = 0;
- return ret;
- }
- return 0;
- }
-
- offset = addr - start;
- *valp = *(unw_word_t *)&stack->data[offset];
- pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
- (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
- return 0;
-}
-
-static int access_reg(unw_addr_space_t __maybe_unused as,
- unw_regnum_t regnum, unw_word_t *valp,
- int __write, void *arg)
-{
- struct unwind_info *ui = arg;
- int id, ret;
- u64 val;
-
- /* Don't support write, I suspect we don't need it. */
- if (__write) {
- pr_err("unwind: access_reg w %d\n", regnum);
- return 0;
- }
-
- if (!ui->sample->user_regs.regs) {
- *valp = 0;
- return 0;
- }
-
- id = libunwind__arch_reg_id(regnum);
- if (id < 0)
- return -EINVAL;
-
- ret = perf_reg_value(&val, &ui->sample->user_regs, id);
- if (ret) {
- pr_err("unwind: can't read reg %d\n", regnum);
- return ret;
- }
-
- *valp = (unw_word_t) val;
- pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
- return 0;
-}
-
-static void put_unwind_info(unw_addr_space_t __maybe_unused as,
- unw_proc_info_t *pi __maybe_unused,
- void *arg __maybe_unused)
-{
- pr_debug("unwind: put_unwind_info called\n");
-}
-
-static int entry(u64 ip, struct thread *thread,
- unwind_entry_cb_t cb, void *arg)
-{
- struct unwind_entry e;
- struct addr_location al;
-
- thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
-
- e.ip = ip;
- e.map = al.map;
- e.sym = al.sym;
-
- pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
- al.sym ? al.sym->name : "''",
- ip,
- al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
-
- return cb(&e, arg);
-}
-
-static void display_error(int err)
-{
- switch (err) {
- case UNW_EINVAL:
- pr_err("unwind: Only supports local.\n");
- break;
- case UNW_EUNSPEC:
- pr_err("unwind: Unspecified error.\n");
- break;
- case UNW_EBADREG:
- pr_err("unwind: Register unavailable.\n");
- break;
- default:
- break;
- }
-}
-
-static unw_accessors_t accessors = {
- .find_proc_info = find_proc_info,
- .put_unwind_info = put_unwind_info,
- .get_dyn_info_list_addr = get_dyn_info_list_addr,
- .access_mem = access_mem,
- .access_reg = access_reg,
- .access_fpreg = access_fpreg,
- .resume = resume,
- .get_proc_name = get_proc_name,
-};
-
-int unwind__prepare_access(struct thread *thread)
-{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return 0;
-
- thread->addr_space = unw_create_addr_space(&accessors, 0);
- if (!thread->addr_space) {
- pr_err("unwind: Can't create unwind address space.\n");
- return -ENOMEM;
- }
-
- unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
- return 0;
+ return thread->unwind_libunwind_ops->prepare_access(thread);
}
void unwind__flush_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
- unw_flush_cache(thread->addr_space, 0, 0);
+ if (thread->unwind_libunwind_ops)
+ thread->unwind_libunwind_ops->flush_access(thread);
}
void unwind__finish_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
- unw_destroy_addr_space(thread->addr_space);
-}
-
-static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
- void *arg, int max_stack)
-{
- u64 val;
- unw_word_t ips[max_stack];
- unw_addr_space_t addr_space;
- unw_cursor_t c;
- int ret, i = 0;
-
- ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
- if (ret)
- return ret;
-
- ips[i++] = (unw_word_t) val;
-
- /*
- * If we need more than one entry, do the DWARF
- * unwind itself.
- */
- if (max_stack - 1 > 0) {
- WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
- addr_space = ui->thread->addr_space;
-
- if (addr_space == NULL)
- return -1;
-
- ret = unw_init_remote(&c, addr_space, ui);
- if (ret)
- display_error(ret);
-
- while (!ret && (unw_step(&c) > 0) && i < max_stack) {
- unw_get_reg(&c, UNW_REG_IP, &ips[i]);
- ++i;
- }
-
- max_stack = i;
- }
-
- /*
- * Display what we got based on the order setup.
- */
- for (i = 0; i < max_stack && !ret; i++) {
- int j = i;
-
- if (callchain_param.order == ORDER_CALLER)
- j = max_stack - i - 1;
- ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
- }
-
- return ret;
+ if (thread->unwind_libunwind_ops)
+ thread->unwind_libunwind_ops->finish_access(thread);
}
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
- struct thread *thread,
- struct perf_sample *data, int max_stack)
+ struct thread *thread,
+ struct perf_sample *data, int max_stack)
{
- struct unwind_info ui = {
- .sample = data,
- .thread = thread,
- .machine = thread->mg->machine,
- };
-
- if (!data->user_regs.regs)
- return -EINVAL;
-
- if (max_stack <= 0)
- return -EINVAL;
-
- return get_entries(&ui, cb, arg, max_stack);
+ if (thread->unwind_libunwind_ops)
+ return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+ return 0;
}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index 12790cf..84c6d44 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -14,18 +14,40 @@
typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
+struct unwind_libunwind_ops {
+ int (*prepare_access)(struct thread *thread);
+ void (*flush_access)(struct thread *thread);
+ void (*finish_access)(struct thread *thread);
+ int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack);
+};
+
#ifdef HAVE_DWARF_UNWIND_SUPPORT
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
struct perf_sample *data, int max_stack);
/* libunwind specific */
#ifdef HAVE_LIBUNWIND_SUPPORT
-int libunwind__arch_reg_id(int regnum);
-int unwind__prepare_access(struct thread *thread);
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_SP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_IP
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct thread *thread, struct map *map);
void unwind__flush_access(struct thread *thread);
void unwind__finish_access(struct thread *thread);
#else
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+ struct map *map __maybe_unused)
{
return 0;
}
@@ -44,7 +66,8 @@
return 0;
}
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+ struct map *map __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 23504ad..e08b9a0 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -97,20 +97,17 @@
scnprintf(namebuf, sizeof(namebuf), "%s/%s",
path, d->d_name);
- ret = stat(namebuf, &statbuf);
+ /* We have to check symbolic link itself */
+ ret = lstat(namebuf, &statbuf);
if (ret < 0) {
pr_debug("stat failed: %s\n", namebuf);
break;
}
- if (S_ISREG(statbuf.st_mode))
- ret = unlink(namebuf);
- else if (S_ISDIR(statbuf.st_mode))
+ if (S_ISDIR(statbuf.st_mode))
ret = rm_rf(namebuf);
- else {
- pr_debug("unknown file: %s\n", namebuf);
- ret = -1;
- }
+ else
+ ret = unlink(namebuf);
}
closedir(dir);
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 44d440d..7bdcad4 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -134,8 +134,6 @@
return dso;
}
-#if BITS_PER_LONG == 64
-
static enum dso_type machine__thread_dso_type(struct machine *machine,
struct thread *thread)
{
@@ -156,6 +154,8 @@
return dso_type;
}
+#if BITS_PER_LONG == 64
+
static int vdso__do_copy_compat(FILE *f, int fd)
{
char buf[4096];
@@ -283,8 +283,38 @@
#endif
+static struct dso *machine__find_vdso(struct machine *machine,
+ struct thread *thread)
+{
+ struct dso *dso = NULL;
+ enum dso_type dso_type;
+
+ dso_type = machine__thread_dso_type(machine, thread);
+ switch (dso_type) {
+ case DSO__TYPE_32BIT:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+ if (!dso) {
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
+ true);
+ if (dso && dso_type != dso__type(dso, machine))
+ dso = NULL;
+ }
+ break;
+ case DSO__TYPE_X32BIT:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+ break;
+ case DSO__TYPE_64BIT:
+ case DSO__TYPE_UNKNOWN:
+ default:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ break;
+ }
+
+ return dso;
+}
+
struct dso *machine__findnew_vdso(struct machine *machine,
- struct thread *thread __maybe_unused)
+ struct thread *thread)
{
struct vdso_info *vdso_info;
struct dso *dso = NULL;
@@ -297,6 +327,10 @@
if (!vdso_info)
goto out_unlock;
+ dso = machine__find_vdso(machine, thread);
+ if (dso)
+ goto out_unlock;
+
#if BITS_PER_LONG == 64
if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
goto out_unlock;