Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf fixes from Ingo Molnar: "Mostly tooling fixes, plus two uncore-PMU fixes, an uprobes fix, a perf-cgroups fix and an AUX events fix" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/uncore: Add enable_box for client MSR uncore perf/x86/intel/uncore: Fix uncore num_counters uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions perf/core: Set cgroup in CPU contexts for new cgroup events perf/core: Fix sideband list-iteration vs. event ordering NULL pointer deference crash perf probe ppc64le: Fix probe location when using DWARF perf probe: Add function to post process kernel trace events tools: Sync cpufeatures headers with the kernel toops: Sync tools/include/uapi/linux/bpf.h with the kernel tools: Sync cpufeatures.h and vmx.h with the kernel perf probe: Support signedness casting perf stat: Avoid skew when reading events perf probe: Fix module name matching perf probe: Adjust map->reloc offset when finding kernel symbol from map perf hists: Trim libtraceevent trace_seq buffers perf script: Add 'bpf-output' field to usage message

commit: ad83242a8f061194198fa380177e42713e0a7b8c [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Fri Aug 12 13:21:18 2016 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Aug 12 13:21:18 2016 -0700
tree: 398a60cd83d72347d100361144d065d0a6894c88
parent: 1f8083c640bf08aab8762a9e10326ce767c66492 [diff]
parent: 95f3be798472f63b495ca4712af005ea5ac7aa47 [diff]
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 97a69db..9d35ec0 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c

@@ -100,6 +100,12 @@
 	}
 }
 
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+		SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.init_box	= snb_uncore_msr_init_box,
+	.enable_box	= snb_uncore_msr_enable_box,
 	.exit_box	= snb_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@
 	}
 }
 
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+		SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@
 
 static struct intel_uncore_ops skl_uncore_msr_ops = {
 	.init_box	= skl_uncore_msr_init_box,
+	.enable_box	= skl_uncore_msr_enable_box,
 	.exit_box	= skl_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 824e540..8aee83b 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c

@@ -2626,7 +2626,7 @@
 
 static struct intel_uncore_type hswep_uncore_ha = {
 	.name		= "ha",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 2,
 	.perf_ctr_bits	= 48,
 	SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@
 
 static struct intel_uncore_type hswep_uncore_imc = {
 	.name		= "imc",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 8,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
@@ -2691,7 +2691,7 @@
 
 static struct intel_uncore_type hswep_uncore_qpi = {
 	.name			= "qpi",
-	.num_counters		= 5,
+	.num_counters		= 4,
 	.num_boxes		= 3,
 	.perf_ctr_bits		= 48,
 	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@
 
 static struct intel_uncore_type hswep_uncore_r3qpi = {
 	.name		= "r3qpi",
-	.num_counters   = 4,
+	.num_counters   = 3,
 	.num_boxes	= 3,
 	.perf_ctr_bits	= 44,
 	.constraints	= hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@
 
 static struct intel_uncore_type bdx_uncore_imc = {
 	.name		= "imc",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 8,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c1ff31..495c776 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c

@@ -357,20 +357,22 @@
 		*cursor &= 0xfe;
 	}
 	/*
-	 * Similar treatment for VEX3 prefix.
-	 * TODO: add XOP/EVEX treatment when insn decoder supports them
+	 * Similar treatment for VEX3/EVEX prefix.
+	 * TODO: add XOP treatment when insn decoder supports them
 	 */
-	if (insn->vex_prefix.nbytes == 3) {
+	if (insn->vex_prefix.nbytes >= 3) {
 		/*
 		 * vex2:     c5    rvvvvLpp   (has no b bit)
 		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
 		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
-		 *   (evex will need setting of both b and x since
-		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
-		 * Setting VEX3.b (setting because it has inverted meaning):
+		 * Setting VEX3.b (setting because it has inverted meaning).
+		 * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+		 * is the 4th bit of MODRM.rm, and needs the same treatment.
+		 * For VEX3-encoded insns, VEX3.x value has no effect in
+		 * non-SIB encoding, the change is superfluous but harmless.
 		 */
 		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
-		*cursor |= 0x20;
+		*cursor |= 0x60;
 	}
 
 	/*
@@ -415,12 +417,10 @@
 
 	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
 	reg2 = 0xff;		/* Fetch vex.vvvv */
-	if (insn->vex_prefix.nbytes == 2)
-		reg2 = insn->vex_prefix.bytes[1];
-	else if (insn->vex_prefix.nbytes == 3)
+	if (insn->vex_prefix.nbytes)
 		reg2 = insn->vex_prefix.bytes[2];
 	/*
-	 * TODO: add XOP, EXEV vvvv reading.
+	 * TODO: add XOP vvvv reading.
 	 *
 	 * vex.vvvv field is in bits 6-3, bits are inverted.
 	 * But in 32-bit mode, high-order bit may be ignored.

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8ed43261..2b6b43c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -743,7 +743,9 @@
 	u64				parent_gen;
 	u64				generation;
 	int				pin_count;
+#ifdef CONFIG_CGROUP_PERF
 	int				nr_cgroups;	 /* cgroup evts */
+#endif
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
 };
@@ -769,7 +771,9 @@
 	unsigned int			hrtimer_active;
 
 	struct pmu			*unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
+#endif
 };
 
 struct perf_output_handle {

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a19550d..1903b8f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -843,6 +843,32 @@
 		}
 	}
 }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+			 struct perf_event_context *ctx, bool add)
+{
+	struct perf_cpu_context *cpuctx;
+
+	if (!is_cgroup_event(event))
+		return;
+
+	if (add && ctx->nr_cgroups++)
+		return;
+	else if (!add && --ctx->nr_cgroups)
+		return;
+	/*
+	 * Because cgroup events are always per-cpu events,
+	 * this will always be called from the right CPU.
+	 */
+	cpuctx = __get_cpu_context(ctx);
+	cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
 #else /* !CONFIG_CGROUP_PERF */
 
 static inline bool
@@ -920,6 +946,13 @@
 			 struct perf_event_context *ctx)
 {
 }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+			 struct perf_event_context *ctx, bool add)
+{
+}
+
 #endif
 
 /*
@@ -1392,6 +1425,7 @@
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+
 	lockdep_assert_held(&ctx->lock);
 
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@
 		list_add_tail(&event->group_entry, list);
 	}
 
-	if (is_cgroup_event(event))
-		ctx->nr_cgroups++;
+	list_update_cgroup_event(event, ctx, true);
 
 	list_add_rcu(&event->event_entry, &ctx->event_list);
 	ctx->nr_events++;
@@ -1581,8 +1614,6 @@
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-	struct perf_cpu_context *cpuctx;
-
 	WARN_ON_ONCE(event->ctx != ctx);
 	lockdep_assert_held(&ctx->lock);
 
@@ -1594,20 +1625,7 @@
 
 	event->attach_state &= ~PERF_ATTACH_CONTEXT;
 
-	if (is_cgroup_event(event)) {
-		ctx->nr_cgroups--;
-		/*
-		 * Because cgroup events are always per-cpu events, this will
-		 * always be called from the right CPU.
-		 */
-		cpuctx = __get_cpu_context(ctx);
-		/*
-		 * If there are no more cgroup events then clear cgrp to avoid
-		 * stale pointer in update_cgrp_time_from_cpuctx().
-		 */
-		if (!ctx->nr_cgroups)
-			cpuctx->cgrp = NULL;
-	}
+	list_update_cgroup_event(event, ctx, false);
 
 	ctx->nr_events--;
 	if (event->attr.inherit_stat)
@@ -1716,8 +1734,8 @@
 static inline int
 event_filter_match(struct perf_event *event)
 {
-	return (event->cpu == -1 || event->cpu == smp_processor_id())
-	    && perf_cgroup_match(event) && pmu_filter_match(event);
+	return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+	       perf_cgroup_match(event) && pmu_filter_match(event);
 }
 
 static void
@@ -1737,8 +1755,8 @@
 	 * maintained, otherwise bogus information is return
 	 * via read() for time_enabled, time_running:
 	 */
-	if (event->state == PERF_EVENT_STATE_INACTIVE
-	    && !event_filter_match(event)) {
+	if (event->state == PERF_EVENT_STATE_INACTIVE &&
+	    !event_filter_match(event)) {
 		delta = tstamp - event->tstamp_stopped;
 		event->tstamp_running += delta;
 		event->tstamp_stopped = tstamp;
@@ -2236,10 +2254,15 @@
 
 	lockdep_assert_held(&ctx->mutex);
 
-	event->ctx = ctx;
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
+	/*
+	 * Ensures that if we can observe event->ctx, both the event and ctx
+	 * will be 'complete'. See perf_iterate_sb_cpu().
+	 */
+	smp_store_release(&event->ctx, ctx);
+
 	if (!task) {
 		cpu_function_call(cpu, __perf_install_in_context, event);
 		return;
@@ -5969,6 +5992,14 @@
 	struct perf_event *event;
 
 	list_for_each_entry_rcu(event, &pel->list, sb_list) {
+		/*
+		 * Skip events that are not fully formed yet; ensure that
+		 * if we observe event->ctx, both event and ctx will be
+		 * complete enough. See perf_install_in_context().
+		 */
+		if (!smp_load_acquire(&event->ctx))
+			continue;
+
 		if (event->state < PERF_EVENT_STATE_INACTIVE)
 			continue;
 		if (!event_filter_match(event))

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 4a41348..92a8308 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h

@@ -225,7 +225,6 @@
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
@@ -301,10 +300,6 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +307,7 @@
  */
 #define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-
+#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
 #endif /* _ASM_X86_CPUFEATURES_H */

diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 911e935..85599ad 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h

@@ -56,5 +56,7 @@
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */

diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 4916144..fac9a5c 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h

@@ -99,5 +99,7 @@
 #define REQUIRED_MASK14	0
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
+#define REQUIRED_MASK17	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */

diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 5b15d94..37fee27 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h

@@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 406459b..da218fe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h

@@ -84,6 +84,7 @@
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
 	BPF_MAP_TYPE_STACK_TRACE,
+	BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -93,6 +94,7 @@
 	BPF_PROG_TYPE_SCHED_CLS,
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
+	BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -313,6 +315,66 @@
 	 */
 	BPF_FUNC_skb_get_tunnel_opt,
 	BPF_FUNC_skb_set_tunnel_opt,
+
+	/**
+	 * bpf_skb_change_proto(skb, proto, flags)
+	 * Change protocol of the skb. Currently supported is
+	 * v4 -> v6, v6 -> v4 transitions. The helper will also
+	 * resize the skb. eBPF program is expected to fill the
+	 * new headers via skb_store_bytes and lX_csum_replace.
+	 * @skb: pointer to skb
+	 * @proto: new skb->protocol type
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_proto,
+
+	/**
+	 * bpf_skb_change_type(skb, type)
+	 * Change packet type of skb.
+	 * @skb: pointer to skb
+	 * @type: new skb->pkt_type type
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_type,
+
+	/**
+	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * @skb: pointer to skb
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 skb failed the cgroup2 descendant test
+	 *   == 1 skb succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_skb_in_cgroup,
+
+	/**
+	 * bpf_get_hash_recalc(skb)
+	 * Retrieve and possibly recalculate skb->hash.
+	 * @skb: pointer to skb
+	 * Return: hash
+	 */
+	BPF_FUNC_get_hash_recalc,
+
+	/**
+	 * u64 bpf_get_current_task(void)
+	 * Returns current task_struct
+	 * Return: current
+	 */
+	BPF_FUNC_get_current_task,
+
+	/**
+	 * bpf_probe_write_user(void *dst, void *src, int len)
+	 * safely attempt to write to a location
+	 * @dst: destination address in userspace
+	 * @src: source address on stack
+	 * @len: number of bytes to copy
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_probe_write_user,
+
 	__BPF_FUNC_MAX_ID,
 };
 
@@ -347,9 +409,11 @@
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@
 	__u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+	XDP_ABORTED = 0,
+	XDP_DROP,
+	XDP_PASS,
+	XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+	__u32 data;
+	__u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 736da44..b303bcd 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt

@@ -176,10 +176,18 @@
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
 LINE SYNTAX
 -----------
 Line range is described by following syntax.

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 1f6c705..053bbbd 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt

@@ -116,8 +116,8 @@
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-	srcline, period, iregs, brstack, brstacksym, flags.
-        Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        callindent. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index c6d0f91..8d4dc97 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c

@@ -54,10 +54,6 @@
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
-	return true;
-}
 
 #ifdef HAVE_LIBELF_SUPPORT
 void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,27 @@
 			tev->point.offset += lep_offset;
 	}
 }
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs)
+{
+	struct probe_trace_event *tev;
+	struct map *map;
+	struct symbol *sym = NULL;
+	struct rb_node *tmp;
+	int i = 0;
+
+	map = get_target_map(pev->target, pev->uprobes);
+	if (!map || map__load(map, NULL) < 0)
+		return;
+
+	for (i = 0; i < ntevs; i++) {
+		tev = &pev->tevs[i];
+		map__for_each_symbol(map, sym, tmp) {
+			if (map->unmap_ip(map, sym->start) == tev->point.address)
+				arch__fix_tev_from_maps(pev, tev, map, sym);
+		}
+	}
+}
+
 #endif

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 971ff91..9c640a8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -2116,7 +2116,7 @@
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "callindent", parse_output_fields),
+		     "bpf-output,callindent", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0c16d20..3c7452b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c

@@ -331,7 +331,7 @@
 	return 0;
 }
 
-static void read_counters(bool close_counters)
+static void read_counters(void)
 {
 	struct perf_evsel *counter;
 
@@ -341,11 +341,6 @@
 
 		if (perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
-
-		if (close_counters) {
-			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
-					     thread_map__nr(evsel_list->threads));
-		}
 	}
 }
 
@@ -353,7 +348,7 @@
 {
 	struct timespec ts, rs;
 
-	read_counters(false);
+	read_counters();
 
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
@@ -380,6 +375,17 @@
 		perf_evlist__enable(evsel_list);
 }
 
+static void disable_counters(void)
+{
+	/*
+	 * If we don't have tracee (attaching to task or cpu), counters may
+	 * still be running. To get accurate group ratios, we must stop groups
+	 * from counting before reading their constituent counters.
+	 */
+	if (!target__none(&target))
+		perf_evlist__disable(evsel_list);
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -657,11 +663,20 @@
 		}
 	}
 
+	disable_counters();
+
 	t1 = rdclock();
 
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
-	read_counters(true);
+	/*
+	 * Closing a group leader splits the group, and as we only disable
+	 * group leaders, results in remaining events becoming enabled. To
+	 * avoid arbitrary skew, we must read all counters before closing any
+	 * group leaders.
+	 */
+	read_counters();
+	perf_evlist__close(evsel_list);
 
 	return WEXITSTATUS(status);
 }

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 953dc1a..2873396 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c

@@ -170,15 +170,17 @@
 		module = "kernel";
 
 	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		/* short_name is "[module]" */
 		if (strncmp(pos->dso->short_name + 1, module,
-			    pos->dso->short_name_len - 2) == 0) {
+			    pos->dso->short_name_len - 2) == 0 &&
+		    module[pos->dso->short_name_len - 2] == '\0') {
 			return pos;
 		}
 	}
 	return NULL;
 }
 
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
 {
 	/* Init maps of given executable or kernel */
 	if (user)
@@ -385,7 +387,7 @@
 		if (uprobes)
 			address = sym->start;
 		else
-			address = map->unmap_ip(map, sym->start);
+			address = map->unmap_ip(map, sym->start) - map->reloc;
 		break;
 	}
 	if (!address) {
@@ -664,22 +666,14 @@
 	return ret;
 }
 
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
-					   int ntevs, const char *module,
-					   bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs)
 {
 	struct ref_reloc_sym *reloc_sym;
 	char *tmp;
 	int i, skipped = 0;
 
-	if (uprobe)
-		return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
-	/* Note that currently ref_reloc_sym based probe is not for drivers */
-	if (module)
-		return add_module_to_probe_trace_events(tevs, ntevs, module);
-
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {
 		pr_warning("Relocated base symbol is not found!\n");
@@ -711,6 +705,34 @@
 	return skipped;
 }
 
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+				      int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+					   struct probe_trace_event *tevs,
+					   int ntevs, const char *module,
+					   bool uprobe)
+{
+	int ret;
+
+	if (uprobe)
+		ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+	else if (module)
+		/* Currently ref_reloc_sym based probe is not for drivers */
+		ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+	else
+		ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+	if (ret >= 0)
+		arch__post_process_probe_trace_events(pev, ntevs);
+
+	return ret;
+}
+
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 					  struct probe_trace_event **tevs)
@@ -749,7 +771,7 @@
 
 	if (ntevs > 0) {	/* Succeeded to find trace events */
 		pr_debug("Found %d probe_trace_events.\n", ntevs);
-		ret = post_process_probe_trace_events(*tevs, ntevs,
+		ret = post_process_probe_trace_events(pev, *tevs, ntevs,
 						pev->target, pev->uprobes);
 		if (ret < 0 || ret == ntevs) {
 			clear_probe_trace_events(*tevs, ntevs);
@@ -2936,8 +2958,6 @@
 	return err;
 }
 
-bool __weak arch__prefers_symtab(void) { return false; }
-
 /* Concatinate two arrays */
 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
 {
@@ -3158,12 +3178,6 @@
 	if (ret > 0 || pev->sdt)	/* SDT can be found only in the cache */
 		return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
 
-	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-		ret = find_probe_trace_events_from_map(pev, tevs);
-		if (ret > 0)
-			return ret; /* Found in symbol table */
-	}
-
 	/* Convert perf_probe_event with debuginfo */
 	ret = try_to_find_probe_trace_events(pev, tevs);
 	if (ret != 0)

diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e18ea9f..f4f45db 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h

@@ -158,7 +158,6 @@
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
 			struct strfilter *filter);
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct probe_trace_event *tev, struct map *map,
 			     struct symbol *sym);
@@ -173,4 +172,9 @@
 int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
 			    struct perf_probe_arg *pvar);
 
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs);
+
 #endif /*_PROBE_EVENT_H */

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f2d9ff0..5c290c6 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c

@@ -297,10 +297,13 @@
 	char sbuf[STRERR_BUFSIZE];
 	int bsize, boffs, total;
 	int ret;
+	char sign;
 
 	/* TODO: check all types */
-	if (cast && strcmp(cast, "string") != 0) {
+	if (cast && strcmp(cast, "string") != 0 &&
+	    strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
 		/* Non string type is OK */
+		/* and respect signedness cast */
 		tvar->type = strdup(cast);
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
@@ -361,6 +364,13 @@
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
 
+	if (cast && (strcmp(cast, "u") == 0))
+		sign = 'u';
+	else if (cast && (strcmp(cast, "s") == 0))
+		sign = 's';
+	else
+		sign = die_is_signed_type(&type) ? 's' : 'u';
+
 	ret = dwarf_bytesize(&type);
 	if (ret <= 0)
 		/* No size ... try to use default type */
@@ -373,8 +383,7 @@
 			dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
 		ret = MAX_BASIC_TYPE_BITS;
 	}
-	ret = snprintf(buf, 16, "%c%d",
-		       die_is_signed_type(&type) ? 's' : 'u', ret);
+	ret = snprintf(buf, 16, "%c%d", sign, ret);
 
 formatted:
 	if (ret < 0 || ret >= 16) {

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 947d21f..3d3cb83 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -588,7 +588,11 @@
 	} else {
 		pevent_event_info(&seq, evsel->tp_format, &rec);
 	}
-	return seq.buffer;
+	/*
+	 * Trim the buffer, it starts at 4KB and we're not going to
+	 * add anything more to this buffer.
+	 */
+	return realloc(seq.buffer, seq.len + 1);
 }
 
 static int64_t
commit	ad83242a8f061194198fa380177e42713e0a7b8c	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Fri Aug 12 13:21:18 2016 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Aug 12 13:21:18 2016 -0700
tree	398a60cd83d72347d100361144d065d0a6894c88
parent	1f8083c640bf08aab8762a9e10326ce767c66492 [diff]
parent	95f3be798472f63b495ca4712af005ea5ac7aa47 [diff]