net: sched: avoid costly atomic operation in fq_dequeue()

Standard qdisc API to setup a timer implies an atomic operation on every
packet dequeue : qdisc_unthrottled()

It turns out this is not really needed for FQ, as FQ has no concept of
global qdisc throttling, being a qdisc handling many different flows,
some of them can be throttled, while others are not.

Fix is straightforward : add a 'bool throttle' to
qdisc_watchdog_schedule_ns(), and remove calls to qdisc_unthrottled()
in sch_fq.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c79a226..2cf61b3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -594,13 +594,14 @@
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle)
 {
 	if (test_bit(__QDISC_STATE_DEACTIVATED,
 		     &qdisc_root_sleeping(wd->qdisc)->state))
 		return;
 
-	qdisc_throttled(wd->qdisc);
+	if (throttle)
+		qdisc_throttled(wd->qdisc);
 
 	hrtimer_start(&wd->timer,
 		      ns_to_ktime(expires),
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index c9b9fcb..cbd7e1f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -377,7 +377,6 @@
 		if (time_after(jiffies, f->age + q->flow_refill_delay))
 			f->credit = max_t(u32, f->credit, q->quantum);
 		q->inactive_flows--;
-		qdisc_unthrottled(sch);
 	}
 
 	/* Note: this overwrites f->age */
@@ -385,7 +384,6 @@
 
 	if (unlikely(f == &q->internal)) {
 		q->stat_internal_packets++;
-		qdisc_unthrottled(sch);
 	}
 	sch->q.qlen++;
 
@@ -433,7 +431,8 @@
 		if (!head->first) {
 			if (q->time_next_delayed_flow != ~0ULL)
 				qdisc_watchdog_schedule_ns(&q->watchdog,
-							   q->time_next_delayed_flow);
+							   q->time_next_delayed_flow,
+							   false);
 			return NULL;
 		}
 	}
@@ -495,7 +494,6 @@
 	}
 out:
 	qdisc_bstats_update(sch, skb);
-	qdisc_unthrottled(sch);
 	return skb;
 }
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 77edffe..a4afde1 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -268,7 +268,8 @@
 		}
 
 		qdisc_watchdog_schedule_ns(&q->watchdog,
-					   now + max_t(long, -toks, -ptoks));
+					   now + max_t(long, -toks, -ptoks),
+					   true);
 
 		/* Maybe we have a shorter packet in the queue,
 		   which can be sent now. It sounds cool,