sched: do not keep current in the tree and get rid of sched_entity::fair_key

Get rid of 'sched_entity::fair_key'.

As a side effect, 'current' is not kept withing the tree for 
SCHED_NORMAL/BATCH tasks anymore. This simplifies some parts of code 
(e.g. entity_tick() and yield_task_fair()) and also somewhat optimizes 
them (e.g. a single update_curr() now vs. dequeue/enqueue() before in 
entity_tick()).

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 572df1b..f776a30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -891,7 +891,6 @@
  *     6 se->load.weight
  */
 struct sched_entity {
-	s64			fair_key;
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
 	unsigned int		on_rq;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6d18921..3b10463 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6610,7 +6610,6 @@
 
 	read_lock_irq(&tasklist_lock);
 	do_each_thread(g, p) {
-		p->se.fair_key			= 0;
 		p->se.exec_start		= 0;
 #ifdef CONFIG_SCHEDSTATS
 		p->se.wait_start		= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index e3b6232..bb34b81 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -38,7 +38,7 @@
 
 	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
 		p->comm, p->pid,
-		(long long)p->se.fair_key,
+		(long long)p->se.vruntime,
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 144f3ef..b9e426a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -139,7 +139,7 @@
 static inline s64
 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	return se->fair_key - cfs_rq->min_vruntime;
+	return se->vruntime - cfs_rq->min_vruntime;
 }
 
 /*
@@ -181,9 +181,6 @@
 
 	rb_link_node(&se->run_node, parent, link);
 	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
-	update_load_add(&cfs_rq->load, se->load.weight);
-	cfs_rq->nr_running++;
-	se->on_rq = 1;
 }
 
 static void
@@ -193,9 +190,6 @@
 		set_leftmost(cfs_rq, rb_next(&se->run_node));
 
 	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
-	update_load_sub(&cfs_rq->load, se->load.weight);
-	cfs_rq->nr_running--;
-	se->on_rq = 0;
 }
 
 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
@@ -341,10 +335,6 @@
 	 */
 	if (se != cfs_rq->curr)
 		update_stats_wait_start(cfs_rq, se);
-	/*
-	 * Update the key:
-	 */
-	se->fair_key = se->vruntime;
 }
 
 static void
@@ -392,6 +382,22 @@
  * Scheduling class queueing methods:
  */
 
+static void
+account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	update_load_add(&cfs_rq->load, se->load.weight);
+	cfs_rq->nr_running++;
+	se->on_rq = 1;
+}
+
+static void
+account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	update_load_sub(&cfs_rq->load, se->load.weight);
+	cfs_rq->nr_running--;
+	se->on_rq = 0;
+}
+
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -479,7 +485,9 @@
 	}
 
 	update_stats_enqueue(cfs_rq, se);
-	__enqueue_entity(cfs_rq, se);
+	if (se != cfs_rq->curr)
+		__enqueue_entity(cfs_rq, se);
+	account_entity_enqueue(cfs_rq, se);
 }
 
 static void
@@ -498,7 +506,9 @@
 		}
 	}
 #endif
-	__dequeue_entity(cfs_rq, se);
+	if (se != cfs_rq->curr)
+		__dequeue_entity(cfs_rq, se);
+	account_entity_dequeue(cfs_rq, se);
 }
 
 /*
@@ -544,6 +554,10 @@
 {
 	struct sched_entity *se = __pick_next_entity(cfs_rq);
 
+	/* 'current' is not kept within the tree. */
+	if (se)
+		__dequeue_entity(cfs_rq, se);
+
 	set_next_entity(cfs_rq, se);
 
 	return se;
@@ -560,19 +574,20 @@
 
 	update_stats_curr_end(cfs_rq, prev);
 
-	if (prev->on_rq)
+	if (prev->on_rq) {
 		update_stats_wait_start(cfs_rq, prev);
+		/* Put 'current' back into the tree. */
+		__enqueue_entity(cfs_rq, prev);
+	}
 	cfs_rq->curr = NULL;
 }
 
 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
 	/*
-	 * Dequeue and enqueue the task to update its
-	 * position within the tree:
+	 * Update run-time statistics of the 'current'.
 	 */
-	dequeue_entity(cfs_rq, curr, 0);
-	enqueue_entity(cfs_rq, curr, 0);
+	update_curr(cfs_rq);
 
 	if (cfs_rq->nr_running > 1)
 		check_preempt_tick(cfs_rq, curr);
@@ -749,7 +764,7 @@
 	/*
 	 * Minimally necessary key value to be last in the tree:
 	 */
-	se->fair_key = rightmost->fair_key + 1;
+	se->vruntime = rightmost->vruntime + 1;
 
 	if (cfs_rq->rb_leftmost == &se->run_node)
 		cfs_rq->rb_leftmost = rb_next(&se->run_node);
@@ -965,6 +980,7 @@
 
 	update_stats_enqueue(cfs_rq, se);
 	__enqueue_entity(cfs_rq, se);
+	account_entity_enqueue(cfs_rq, se);
 	resched_task(rq->curr);
 }