blob: ef1a7df80ea21257ab141a3ef57142bad9ab47ae [file] [log] [blame]
Ingo Molnar425e0962007-07-09 18:51:58 +02001
2#ifdef CONFIG_SCHEDSTATS
3/*
4 * bump this up when changing the output format or the meaning of an existing
5 * format, so that tools can adapt (or abort)
6 */
7#define SCHEDSTAT_VERSION 14
8
9static int show_schedstat(struct seq_file *seq, void *v)
10{
11 int cpu;
12
13 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
14 seq_printf(seq, "timestamp %lu\n", jiffies);
15 for_each_online_cpu(cpu) {
16 struct rq *rq = cpu_rq(cpu);
17#ifdef CONFIG_SMP
18 struct sched_domain *sd;
Ingo Molnar2d723762007-10-15 17:00:12 +020019 int dcount = 0;
Ingo Molnar425e0962007-07-09 18:51:58 +020020#endif
21
22 /* runqueue-specific stats */
23 seq_printf(seq,
Ken Chen480b9432007-10-18 21:32:56 +020024 "cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
Ingo Molnar425e0962007-07-09 18:51:58 +020025 cpu, rq->yld_both_empty,
Ingo Molnar2d723762007-10-15 17:00:12 +020026 rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
27 rq->sched_switch, rq->sched_count, rq->sched_goidle,
28 rq->ttwu_count, rq->ttwu_local,
Ingo Molnar425e0962007-07-09 18:51:58 +020029 rq->rq_sched_info.cpu_time,
Ingo Molnar2d723762007-10-15 17:00:12 +020030 rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
Ingo Molnar425e0962007-07-09 18:51:58 +020031
32 seq_printf(seq, "\n");
33
34#ifdef CONFIG_SMP
35 /* domain-specific stats */
36 preempt_disable();
37 for_each_domain(cpu, sd) {
38 enum cpu_idle_type itype;
39 char mask_str[NR_CPUS];
40
41 cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
Ingo Molnar2d723762007-10-15 17:00:12 +020042 seq_printf(seq, "domain%d %s", dcount++, mask_str);
Ingo Molnar425e0962007-07-09 18:51:58 +020043 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
44 itype++) {
Ken Chen480b9432007-10-18 21:32:56 +020045 seq_printf(seq, " %u %u %u %u %u %u %u %u",
Ingo Molnar2d723762007-10-15 17:00:12 +020046 sd->lb_count[itype],
Ingo Molnar425e0962007-07-09 18:51:58 +020047 sd->lb_balanced[itype],
48 sd->lb_failed[itype],
49 sd->lb_imbalance[itype],
50 sd->lb_gained[itype],
51 sd->lb_hot_gained[itype],
52 sd->lb_nobusyq[itype],
53 sd->lb_nobusyg[itype]);
54 }
Ken Chen480b9432007-10-18 21:32:56 +020055 seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u %u\n",
Ingo Molnar2d723762007-10-15 17:00:12 +020056 sd->alb_count, sd->alb_failed, sd->alb_pushed,
57 sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
58 sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
Ingo Molnar425e0962007-07-09 18:51:58 +020059 sd->ttwu_wake_remote, sd->ttwu_move_affine,
60 sd->ttwu_move_balance);
61 }
62 preempt_enable();
63#endif
64 }
65 return 0;
66}
67
68static int schedstat_open(struct inode *inode, struct file *file)
69{
70 unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
71 char *buf = kmalloc(size, GFP_KERNEL);
72 struct seq_file *m;
73 int res;
74
75 if (!buf)
76 return -ENOMEM;
77 res = single_open(file, show_schedstat, NULL);
78 if (!res) {
79 m = file->private_data;
80 m->buf = buf;
81 m->size = size;
82 } else
83 kfree(buf);
84 return res;
85}
86
87const struct file_operations proc_schedstat_operations = {
88 .open = schedstat_open,
89 .read = seq_read,
90 .llseek = seq_lseek,
91 .release = single_release,
92};
93
94/*
95 * Expects runqueue lock to be held for atomicity of update
96 */
97static inline void
98rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
99{
100 if (rq) {
101 rq->rq_sched_info.run_delay += delta;
Ingo Molnar2d723762007-10-15 17:00:12 +0200102 rq->rq_sched_info.pcount++;
Ingo Molnar425e0962007-07-09 18:51:58 +0200103 }
104}
105
106/*
107 * Expects runqueue lock to be held for atomicity of update
108 */
109static inline void
110rq_sched_info_depart(struct rq *rq, unsigned long long delta)
111{
112 if (rq)
113 rq->rq_sched_info.cpu_time += delta;
114}
115# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
116# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
Ingo Molnarc3c70112007-08-02 17:41:40 +0200117# define schedstat_set(var, val) do { var = (val); } while (0)
Ingo Molnar425e0962007-07-09 18:51:58 +0200118#else /* !CONFIG_SCHEDSTATS */
119static inline void
120rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
121{}
122static inline void
123rq_sched_info_depart(struct rq *rq, unsigned long long delta)
124{}
125# define schedstat_inc(rq, field) do { } while (0)
126# define schedstat_add(rq, field, amt) do { } while (0)
Ingo Molnarc3c70112007-08-02 17:41:40 +0200127# define schedstat_set(var, val) do { } while (0)
Ingo Molnar425e0962007-07-09 18:51:58 +0200128#endif
129
Ingo Molnar75d4ef12007-10-15 17:00:08 +0200130#ifdef CONFIG_SCHEDSTATS
Ingo Molnar425e0962007-07-09 18:51:58 +0200131/*
132 * Called when a process is dequeued from the active array and given
133 * the cpu. We should note that with the exception of interactive
134 * tasks, the expired queue will become the active queue after the active
135 * queue is empty, without explicitly dequeuing and requeuing tasks in the
136 * expired queue. (Interactive tasks may be requeued directly to the
137 * active queue, thus delaying tasks in the expired queue from running;
138 * see scheduler_tick()).
139 *
140 * This function is only called from sched_info_arrive(), rather than
141 * dequeue_task(). Even though a task may be queued and dequeued multiple
142 * times as it is shuffled about, we're really interested in knowing how
143 * long it was from the *first* time it was queued to the time that it
144 * finally hit a cpu.
145 */
146static inline void sched_info_dequeued(struct task_struct *t)
147{
148 t->sched_info.last_queued = 0;
149}
150
151/*
152 * Called when a task finally hits the cpu. We can now calculate how
153 * long it was waiting to run. We also note when it began so that we
154 * can keep stats on how long its timeslice is.
155 */
156static void sched_info_arrive(struct task_struct *t)
157{
158 unsigned long long now = sched_clock(), delta = 0;
159
160 if (t->sched_info.last_queued)
161 delta = now - t->sched_info.last_queued;
162 sched_info_dequeued(t);
163 t->sched_info.run_delay += delta;
164 t->sched_info.last_arrival = now;
Ingo Molnar2d723762007-10-15 17:00:12 +0200165 t->sched_info.pcount++;
Ingo Molnar425e0962007-07-09 18:51:58 +0200166
167 rq_sched_info_arrive(task_rq(t), delta);
168}
169
170/*
171 * Called when a process is queued into either the active or expired
172 * array. The time is noted and later used to determine how long we
173 * had to wait for us to reach the cpu. Since the expired queue will
174 * become the active queue after active queue is empty, without dequeuing
175 * and requeuing any tasks, we are interested in queuing to either. It
176 * is unusual but not impossible for tasks to be dequeued and immediately
177 * requeued in the same or another array: this can happen in sched_yield(),
178 * set_user_nice(), and even load_balance() as it moves tasks from runqueue
179 * to runqueue.
180 *
181 * This function is only called from enqueue_task(), but also only updates
182 * the timestamp if it is already not set. It's assumed that
183 * sched_info_dequeued() will clear that stamp when appropriate.
184 */
185static inline void sched_info_queued(struct task_struct *t)
186{
187 if (unlikely(sched_info_on()))
188 if (!t->sched_info.last_queued)
189 t->sched_info.last_queued = sched_clock();
190}
191
192/*
193 * Called when a process ceases being the active-running process, either
194 * voluntarily or involuntarily. Now we can calculate how long we ran.
195 */
196static inline void sched_info_depart(struct task_struct *t)
197{
198 unsigned long long delta = sched_clock() - t->sched_info.last_arrival;
199
200 t->sched_info.cpu_time += delta;
201 rq_sched_info_depart(task_rq(t), delta);
202}
203
204/*
205 * Called when tasks are switched involuntarily due, typically, to expiring
206 * their time slice. (This may also be called when switching to or from
207 * the idle task.) We are only called when prev != next.
208 */
209static inline void
210__sched_info_switch(struct task_struct *prev, struct task_struct *next)
211{
212 struct rq *rq = task_rq(prev);
213
214 /*
215 * prev now departs the cpu. It's not interesting to record
216 * stats about how efficient we were at scheduling the idle
217 * process, however.
218 */
219 if (prev != rq->idle)
220 sched_info_depart(prev);
221
222 if (next != rq->idle)
223 sched_info_arrive(next);
224}
225static inline void
226sched_info_switch(struct task_struct *prev, struct task_struct *next)
227{
228 if (unlikely(sched_info_on()))
229 __sched_info_switch(prev, next);
230}
231#else
232#define sched_info_queued(t) do { } while (0)
233#define sched_info_switch(t, next) do { } while (0)
Ingo Molnar75d4ef12007-10-15 17:00:08 +0200234#endif /* CONFIG_SCHEDSTATS */
Ingo Molnar425e0962007-07-09 18:51:58 +0200235