mac80211: implement fair queueing per txq mac80211's software queues were designed to work very closely with device tx queues. They are required to make use of 802.11 packet aggregation easily and efficiently. Due to the way 802.11 aggregation is designed it only makes sense to keep fair queuing as close to hardware as possible to reduce induced latency and inertia and provide the best flow responsiveness. This change doesn't translate directly to immediate and significant gains. End result depends on driver's induced latency. Best results can be achieved if driver keeps its own tx queue/fifo fill level to a minimum. Signed-off-by: Michal Kazior <michal.kazior@tieto.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com>

commit: fa962b92120bb70693a4db545f89067eb3373294 [log] [tgz]
author: Michal Kazior <michal.kazior@tieto.com> Thu May 19 10:37:49 2016 +0200
committer: Johannes Berg <johannes.berg@intel.com> Thu Jun 09 11:34:08 2016 +0200
tree: c8e159c99501f312756ad295088c884ef2a2361c
parent: 80a83cfc434b1e3afe38974570b460db4898bec6 [diff] [blame]
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3e77da1..1d8343f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c

@@ -25,6 +25,7 @@
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
 #include <asm/unaligned.h>
+#include <net/fq_impl.h>
 
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -1266,46 +1267,121 @@
 	return to_txq_info(txq);
 }
 
+static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
+					   struct fq_tin *tin,
+					   struct fq_flow *flow)
+{
+	return fq_flow_dequeue(fq, flow);
+}
+
+static void fq_skb_free_func(struct fq *fq,
+			     struct fq_tin *tin,
+			     struct fq_flow *flow,
+			     struct sk_buff *skb)
+{
+	struct ieee80211_local *local;
+
+	local = container_of(fq, struct ieee80211_local, fq);
+	ieee80211_free_txskb(&local->hw, skb);
+}
+
+static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
+						struct fq_tin *tin,
+						int idx,
+						struct sk_buff *skb)
+{
+	struct txq_info *txqi;
+
+	txqi = container_of(tin, struct txq_info, tin);
+	return &txqi->def_flow;
+}
+
 static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 				  struct txq_info *txqi,
 				  struct sk_buff *skb)
 {
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txqi->txq.vif);
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
 
-	lockdep_assert_held(&txqi->queue.lock);
+	fq_tin_enqueue(fq, tin, skb,
+		       fq_skb_free_func,
+		       fq_flow_get_default_func);
+}
 
-	if (atomic_read(&sdata->num_tx_queued) >= TOTAL_MAX_TX_BUFFER ||
-	    txqi->queue.qlen >= STA_MAX_TX_BUFFER) {
-		ieee80211_free_txskb(&local->hw, skb);
-		return;
+void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
+			struct sta_info *sta,
+			struct txq_info *txqi, int tid)
+{
+	fq_tin_init(&txqi->tin);
+	fq_flow_init(&txqi->def_flow);
+
+	txqi->txq.vif = &sdata->vif;
+
+	if (sta) {
+		txqi->txq.sta = &sta->sta;
+		sta->sta.txq[tid] = &txqi->txq;
+		txqi->txq.tid = tid;
+		txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
+	} else {
+		sdata->vif.txq = &txqi->txq;
+		txqi->txq.tid = 0;
+		txqi->txq.ac = IEEE80211_AC_BE;
 	}
+}
 
-	atomic_inc(&sdata->num_tx_queued);
-	txqi->byte_cnt += skb->len;
-	__skb_queue_tail(&txqi->queue, skb);
+void ieee80211_txq_purge(struct ieee80211_local *local,
+			 struct txq_info *txqi)
+{
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
+
+	fq_tin_reset(fq, tin, fq_skb_free_func);
+}
+
+int ieee80211_txq_setup_flows(struct ieee80211_local *local)
+{
+	struct fq *fq = &local->fq;
+	int ret;
+
+	if (!local->ops->wake_tx_queue)
+		return 0;
+
+	ret = fq_init(fq, 4096);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
+{
+	struct fq *fq = &local->fq;
+
+	if (!local->ops->wake_tx_queue)
+		return;
+
+	fq_reset(fq, fq_skb_free_func);
 }
 
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 				     struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
 	struct txq_info *txqi = container_of(txq, struct txq_info, txq);
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *skb = NULL;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
 
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
 
 	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
 		goto out;
 
-	skb = __skb_dequeue(&txqi->queue);
+	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
 	if (!skb)
 		goto out;
 
-	atomic_dec(&sdata->num_tx_queued);
-	txqi->byte_cnt -= skb->len;
-
 	hdr = (struct ieee80211_hdr *)skb->data;
 	if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
 		struct sta_info *sta = container_of(txq->sta, struct sta_info,
@@ -1320,7 +1396,7 @@
 	}
 
 out:
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 
 	if (skb && skb_has_frag_list(skb) &&
 	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
@@ -1337,6 +1413,7 @@
 			       bool txpending)
 {
 	struct ieee80211_tx_control control = {};
+	struct fq *fq = &local->fq;
 	struct sk_buff *skb, *tmp;
 	struct txq_info *txqi;
 	unsigned long flags;
@@ -1359,9 +1436,9 @@
 
 			__skb_unlink(skb, skbs);
 
-			spin_lock_bh(&txqi->queue.lock);
+			spin_lock_bh(&fq->lock);
 			ieee80211_txq_enqueue(local, txqi, skb);
-			spin_unlock_bh(&txqi->queue.lock);
+			spin_unlock_bh(&fq->lock);
 
 			drv_wake_tx_queue(local, txqi);
 
@@ -2893,6 +2970,9 @@
 				      struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin;
+	struct fq_flow *flow;
 	u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 	struct ieee80211_txq *txq = sta->sta.txq[tid];
 	struct txq_info *txqi;
@@ -2904,6 +2984,7 @@
 	__be16 len;
 	void *data;
 	bool ret = false;
+	unsigned int orig_len;
 	int n = 1, nfrags;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
@@ -2920,12 +3001,20 @@
 		max_amsdu_len = min_t(int, max_amsdu_len,
 				      sta->sta.max_rc_amsdu_len);
 
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
 
-	head = skb_peek_tail(&txqi->queue);
+	/* TODO: Ideally aggregation should be done on dequeue to remain
+	 * responsive to environment changes.
+	 */
+
+	tin = &txqi->tin;
+	flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
+	head = skb_peek_tail(&flow->queue);
 	if (!head)
 		goto out;
 
+	orig_len = head->len;
+
 	if (skb->len + head->len > max_amsdu_len)
 		goto out;
 
@@ -2964,8 +3053,13 @@
 	head->data_len += skb->len;
 	*frag_tail = skb;
 
+	flow->backlog += head->len - orig_len;
+	tin->backlog_bytes += head->len - orig_len;
+
+	fq_recalc_backlog(fq, tin, flow);
+
 out:
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 
 	return ret;
 }
commit	fa962b92120bb70693a4db545f89067eb3373294	[log] [tgz]
author	Michal Kazior <michal.kazior@tieto.com>	Thu May 19 10:37:49 2016 +0200
committer	Johannes Berg <johannes.berg@intel.com>	Thu Jun 09 11:34:08 2016 +0200
tree	c8e159c99501f312756ad295088c884ef2a2361c
parent	80a83cfc434b1e3afe38974570b460db4898bec6 [diff] [blame]