Port ftrace to markers
Porting ftrace to the marker infrastructure.
Don't need to chain to the wakeup tracer from the sched tracer, because markers
support multiple probes connected.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 360ca99..c0b1c69 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2131,38 +2131,6 @@
}
#endif
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
-extern void
-ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next);
-extern void
-ftrace_wake_up_task(void *rq, struct task_struct *wakee,
- struct task_struct *curr);
-extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data);
-extern void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-#else
-static inline void
-ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next)
-{
-}
-static inline void
-sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3)
-{
-}
-static inline void
-ftrace_wake_up_task(void *rq, struct task_struct *wakee,
- struct task_struct *curr)
-{
-}
-static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data)
-{
-}
-static inline void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-}
-#endif
-
extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
diff --git a/kernel/sched.c b/kernel/sched.c
index ad95cca..e2e985e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2500,7 +2500,9 @@
success = 1;
out_running:
- ftrace_wake_up_task(rq, p, rq->curr);
+ trace_mark(kernel_sched_wakeup,
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
@@ -2631,7 +2633,9 @@
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- ftrace_wake_up_task(rq, p, rq->curr);
+ trace_mark(kernel_sched_wakeup_new,
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -2804,7 +2808,11 @@
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- ftrace_ctx_switch(rq, prev, next);
+ trace_mark(kernel_sched_schedule,
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ prev->pid, next->pid, prev->state,
+ rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8845033..f5de060 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -234,25 +234,10 @@
extern cycle_t ftrace_now(int cpu);
-#ifdef CONFIG_SCHED_TRACER
-extern void
-wakeup_sched_switch(struct task_struct *prev, struct task_struct *next);
-extern void
-wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr);
-#else
-static inline void
-wakeup_sched_switch(struct task_struct *prev, struct task_struct *next)
-{
-}
-static inline void
-wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr)
-{
-}
-#endif
-
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
typedef void
(*tracer_switch_func_t)(void *private,
+ void *__rq,
struct task_struct *prev,
struct task_struct *next);
@@ -262,9 +247,6 @@
struct tracer_switch_ops *next;
};
-extern int register_tracer_switch(struct tracer_switch_ops *ops);
-extern int unregister_tracer_switch(struct tracer_switch_ops *ops);
-
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a337647..d25ffa5 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,11 +16,14 @@
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
+static atomic_t sched_ref;
static void
-ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next)
+sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+ struct task_struct *next)
{
- struct trace_array *tr = ctx_trace;
+ struct trace_array **ptr = private;
+ struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
@@ -41,10 +44,40 @@
local_irq_restore(flags);
}
-static void
-wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr)
+static notrace void
+sched_switch_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
{
- struct trace_array *tr = ctx_trace;
+ struct task_struct *prev;
+ struct task_struct *next;
+ struct rq *__rq;
+
+ if (!atomic_read(&sched_ref))
+ return;
+
+ /* skip prev_pid %d next_pid %d prev_state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ __rq = va_arg(*args, typeof(__rq));
+ prev = va_arg(*args, typeof(prev));
+ next = va_arg(*args, typeof(next));
+
+ tracing_record_cmdline(prev);
+
+ /*
+ * If tracer_switch_func only points to the local
+ * switch func, it still needs the ptr passed to it.
+ */
+ sched_switch_func(probe_data, __rq, prev, next);
+}
+
+static void
+wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
+ task_struct *curr)
+{
+ struct trace_array **ptr = private;
+ struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
@@ -67,35 +100,29 @@
local_irq_restore(flags);
}
-void
-ftrace_ctx_switch(void *__rq, struct task_struct *prev,
- struct task_struct *next)
+static notrace void
+wake_up_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
{
- if (unlikely(atomic_read(&trace_record_cmdline_enabled)))
- tracing_record_cmdline(prev);
+ struct task_struct *curr;
+ struct task_struct *task;
+ struct rq *__rq;
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- ctx_switch_func(__rq, prev, next);
+ if (likely(!tracer_enabled))
+ return;
- /*
- * Chain to the wakeup tracer (this is a NOP if disabled):
- */
- wakeup_sched_switch(prev, next);
-}
+ /* Skip pid %d state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ /* now get the meat: "rq %p task %p rq->curr %p" */
+ __rq = va_arg(*args, typeof(__rq));
+ task = va_arg(*args, typeof(task));
+ curr = va_arg(*args, typeof(curr));
-void
-ftrace_wake_up_task(void *__rq, struct task_struct *wakee,
- struct task_struct *curr)
-{
- wakeup_func(__rq, wakee, curr);
+ tracing_record_cmdline(task);
+ tracing_record_cmdline(curr);
- /*
- * Chain to the wakeup tracer (this is a NOP if disabled):
- */
- wakeup_sched_wakeup(wakee, curr);
+ wakeup_func(probe_data, __rq, task, curr);
}
void
@@ -132,15 +159,95 @@
tracing_reset(tr->data[cpu]);
}
+static int tracing_sched_register(void)
+{
+ int ret;
+
+ ret = marker_probe_register("kernel_sched_wakeup",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &ctx_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup\n");
+ return ret;
+ }
+
+ ret = marker_probe_register("kernel_sched_wakeup_new",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &ctx_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup_new\n");
+ goto fail_deprobe;
+ }
+
+ ret = marker_probe_register("kernel_sched_schedule",
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ sched_switch_callback,
+ &ctx_trace);
+ if (ret) {
+ pr_info("sched trace: Couldn't add marker"
+ " probe to kernel_sched_schedule\n");
+ goto fail_deprobe_wake_new;
+ }
+
+ return ret;
+fail_deprobe_wake_new:
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &ctx_trace);
+fail_deprobe:
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &ctx_trace);
+ return ret;
+}
+
+static void tracing_sched_unregister(void)
+{
+ marker_probe_unregister("kernel_sched_schedule",
+ sched_switch_callback,
+ &ctx_trace);
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &ctx_trace);
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &ctx_trace);
+}
+
+void tracing_start_sched_switch(void)
+{
+ long ref;
+
+ ref = atomic_inc_return(&sched_ref);
+ if (ref == 1)
+ tracing_sched_register();
+}
+
+void tracing_stop_sched_switch(void)
+{
+ long ref;
+
+ ref = atomic_dec_and_test(&sched_ref);
+ if (ref)
+ tracing_sched_unregister();
+}
+
static void start_sched_trace(struct trace_array *tr)
{
sched_switch_reset(tr);
atomic_inc(&trace_record_cmdline_enabled);
tracer_enabled = 1;
+ tracing_start_sched_switch();
}
static void stop_sched_trace(struct trace_array *tr)
{
+ tracing_stop_sched_switch();
atomic_dec(&trace_record_cmdline_enabled);
tracer_enabled = 0;
}
@@ -181,6 +288,14 @@
__init static int init_sched_switch_trace(void)
{
+ int ret = 0;
+
+ if (atomic_read(&sched_ref))
+ ret = tracing_sched_register();
+ if (ret) {
+ pr_info("error registering scheduler trace\n");
+ return ret;
+ }
return register_tracer(&sched_switch_trace);
}
device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5948011..5d2fb48 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,6 +15,7 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
+#include <linux/marker.h>
#include "trace.h"
@@ -44,11 +45,13 @@
return 1;
}
-void
-wakeup_sched_switch(struct task_struct *prev, struct task_struct *next)
+static void notrace
+wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+ struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
- struct trace_array *tr = wakeup_trace;
+ struct trace_array **ptr = private;
+ struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
@@ -113,6 +116,31 @@
atomic_dec(&tr->data[cpu]->disabled);
}
+static notrace void
+sched_switch_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ struct task_struct *prev;
+ struct task_struct *next;
+ struct rq *__rq;
+
+ /* skip prev_pid %d next_pid %d prev_state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ __rq = va_arg(*args, typeof(__rq));
+ prev = va_arg(*args, typeof(prev));
+ next = va_arg(*args, typeof(next));
+
+ tracing_record_cmdline(prev);
+
+ /*
+ * If tracer_switch_func only points to the local
+ * switch func, it still needs the ptr passed to it.
+ */
+ wakeup_sched_switch(probe_data, __rq, prev, next);
+}
+
static void __wakeup_reset(struct trace_array *tr)
{
struct trace_array_cpu *data;
@@ -188,19 +216,68 @@
atomic_dec(&tr->data[cpu]->disabled);
}
-void wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr)
+static notrace void
+wake_up_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
{
+ struct trace_array **ptr = probe_data;
+ struct trace_array *tr = *ptr;
+ struct task_struct *curr;
+ struct task_struct *task;
+ struct rq *__rq;
+
if (likely(!tracer_enabled))
return;
- tracing_record_cmdline(curr);
- tracing_record_cmdline(wakee);
+ /* Skip pid %d state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ /* now get the meat: "rq %p task %p rq->curr %p" */
+ __rq = va_arg(*args, typeof(__rq));
+ task = va_arg(*args, typeof(task));
+ curr = va_arg(*args, typeof(curr));
- wakeup_check_start(wakeup_trace, wakee, curr);
+ tracing_record_cmdline(task);
+ tracing_record_cmdline(curr);
+
+ wakeup_check_start(tr, task, curr);
}
static void start_wakeup_tracer(struct trace_array *tr)
{
+ int ret;
+
+ ret = marker_probe_register("kernel_sched_wakeup",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &wakeup_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup\n");
+ return;
+ }
+
+ ret = marker_probe_register("kernel_sched_wakeup_new",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &wakeup_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup_new\n");
+ goto fail_deprobe;
+ }
+
+ ret = marker_probe_register("kernel_sched_schedule",
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ sched_switch_callback,
+ &wakeup_trace);
+ if (ret) {
+ pr_info("sched trace: Couldn't add marker"
+ " probe to kernel_sched_schedule\n");
+ goto fail_deprobe_wake_new;
+ }
+
wakeup_reset(tr);
/*
@@ -215,11 +292,28 @@
tracer_enabled = 1;
return;
+fail_deprobe_wake_new:
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &wakeup_trace);
+fail_deprobe:
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &wakeup_trace);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ marker_probe_unregister("kernel_sched_schedule",
+ sched_switch_callback,
+ &wakeup_trace);
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &wakeup_trace);
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &wakeup_trace);
}
static void wakeup_tracer_init(struct trace_array *tr)