mac80211: fix aggregation to not require queue stop

Instead of stopping the entire AC queue when enabling aggregation
(which was only done for hardware with aggregation queues) buffer
the packets for each station, and release them to the pending skb
queue once aggregation is turned on successfully.

We get a little more code, but it becomes conceptually simpler and
we can remove the entire virtual queue mechanism from mac80211 in
a follow-up patch.

This changes how mac80211 behaves towards drivers that support
aggregation but have no hardware queues -- those drivers will now
not be handed packets while the aggregation session is being
established, but only after it has been fully established.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index fd718e2..64b839b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -132,16 +132,6 @@
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
 
 	if (local->hw.ampdu_queues) {
-		if (initiator) {
-			/*
-			 * Stop the AC queue to avoid issues where we send
-			 * unaggregated frames already before the delba.
-			 */
-			ieee80211_stop_queue_by_reason(&local->hw,
-				local->hw.queues + sta->tid_to_tx_q[tid],
-				IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-		}
-
 		/*
 		 * Pretend the driver woke the queue, just in case
 		 * it disabled it before the session was stopped.
@@ -158,6 +148,10 @@
 	/* HW shall not deny going back to legacy */
 	if (WARN_ON(ret)) {
 		*state = HT_AGG_STATE_OPERATIONAL;
+		/*
+		 * We may have pending packets get stuck in this case...
+		 * Not bothering with a workaround for now.
+		 */
 	}
 
 	return ret;
@@ -226,13 +220,6 @@
 	       ra, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-	if (hw->ampdu_queues && ieee80211_ac_from_tid(tid) == 0) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "rejecting on voice AC\n");
-#endif
-		return -EINVAL;
-	}
-
 	rcu_read_lock();
 
 	sta = sta_info_get(local, ra);
@@ -267,6 +254,7 @@
 	}
 
 	spin_lock_bh(&sta->lock);
+	spin_lock(&local->ampdu_lock);
 
 	sdata = sta->sdata;
 
@@ -308,21 +296,19 @@
 			ret = -ENOSPC;
 			goto err_unlock_sta;
 		}
-
-		/*
-		 * If we successfully allocate the session, we can't have
-		 * anything going on on the queue this TID maps into, so
-		 * stop it for now. This is a "virtual" stop using the same
-		 * mechanism that drivers will use.
-		 *
-		 * XXX: queue up frames for this session in the sta_info
-		 *	struct instead to avoid hitting all other STAs.
-		 */
-		ieee80211_stop_queue_by_reason(
-			&local->hw, hw->queues + qn,
-			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 	}
 
+	/*
+	 * While we're asking the driver about the aggregation,
+	 * stop the AC queue so that we don't have to worry
+	 * about frames that came in while we were doing that,
+	 * which would require us to put them to the AC pending
+	 * afterwards which just makes the code more complex.
+	 */
+	ieee80211_stop_queue_by_reason(
+		&local->hw, ieee80211_ac_from_tid(tid),
+		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+
 	/* prepare A-MPDU MLME for Tx aggregation */
 	sta->ampdu_mlme.tid_tx[tid] =
 			kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
@@ -336,6 +322,8 @@
 		goto err_return_queue;
 	}
 
+	skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending);
+
 	/* Tx timer */
 	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
 			sta_addba_resp_timer_expired;
@@ -362,6 +350,12 @@
 	}
 	sta->tid_to_tx_q[tid] = qn;
 
+	/* Driver vetoed or OKed, but we can take packets again now */
+	ieee80211_wake_queue_by_reason(
+		&local->hw, ieee80211_ac_from_tid(tid),
+		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+
+	spin_unlock(&local->ampdu_lock);
 	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
@@ -388,15 +382,16 @@
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
  err_return_queue:
 	if (qn >= 0) {
-		/* We failed, so start queue again right away. */
-		ieee80211_wake_queue_by_reason(hw, hw->queues + qn,
-			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
 		/* give queue back to pool */
 		spin_lock(&local->queue_stop_reason_lock);
 		local->ampdu_ac_queue[qn] = -1;
 		spin_unlock(&local->queue_stop_reason_lock);
 	}
+	ieee80211_wake_queue_by_reason(
+		&local->hw, ieee80211_ac_from_tid(tid),
+		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
  err_unlock_sta:
+	spin_unlock(&local->ampdu_lock);
 	spin_unlock_bh(&sta->lock);
  unlock:
 	rcu_read_unlock();
@@ -404,6 +399,45 @@
 }
 EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
 
+/*
+ * splice packets from the STA's pending to the local pending,
+ * requires a call to ieee80211_agg_splice_finish and holding
+ * local->ampdu_lock across both calls.
+ */
+static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
+					 struct sta_info *sta, u16 tid)
+{
+	unsigned long flags;
+	u16 queue = ieee80211_ac_from_tid(tid);
+
+	ieee80211_stop_queue_by_reason(
+		&local->hw, queue,
+		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+
+	if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) {
+		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+		/* mark queue as pending, it is stopped already */
+		__set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING,
+			  &local->queue_stop_reasons[queue]);
+		/* copy over remaining packets */
+		skb_queue_splice_tail_init(
+			&sta->ampdu_mlme.tid_tx[tid]->pending,
+			&local->pending[queue]);
+		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+	}
+}
+
+static void ieee80211_agg_splice_finish(struct ieee80211_local *local,
+					struct sta_info *sta, u16 tid)
+{
+	u16 queue = ieee80211_ac_from_tid(tid);
+
+	ieee80211_wake_queue_by_reason(
+		&local->hw, queue,
+		IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+}
+
+/* caller must hold sta->lock */
 static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 					 struct sta_info *sta, u16 tid)
 {
@@ -411,15 +445,16 @@
 	printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
 #endif
 
-	if (local->hw.ampdu_queues) {
-		/*
-		 * Wake up the A-MPDU queue, we stopped it earlier,
-		 * this will in turn wake the entire AC.
-		 */
-		ieee80211_wake_queue_by_reason(&local->hw,
-			local->hw.queues + sta->tid_to_tx_q[tid],
-			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-	}
+	spin_lock(&local->ampdu_lock);
+	ieee80211_agg_splice_packets(local, sta, tid);
+	/*
+	 * NB: we rely on sta->lock being taken in the TX
+	 * processing here when adding to the pending queue,
+	 * otherwise we could only change the state of the
+	 * session to OPERATIONAL _here_.
+	 */
+	ieee80211_agg_splice_finish(local, sta, tid);
+	spin_unlock(&local->ampdu_lock);
 
 	local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_OPERATIONAL,
 				 &sta->sta, tid, NULL);
@@ -602,22 +637,19 @@
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
 
 	spin_lock_bh(&sta->lock);
+	spin_lock(&local->ampdu_lock);
 
-	if (*state & HT_AGG_STATE_INITIATOR_MSK &&
-	    hw->ampdu_queues) {
-		/*
-		 * Wake up this queue, we stopped it earlier,
-		 * this will in turn wake the entire AC.
-		 */
-		ieee80211_wake_queue_by_reason(hw,
-			hw->queues + sta->tid_to_tx_q[tid],
-			IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
-	}
+	ieee80211_agg_splice_packets(local, sta, tid);
 
 	*state = HT_AGG_STATE_IDLE;
+	/* from now on packets are no longer put onto sta->pending */
 	sta->ampdu_mlme.addba_req_num[tid] = 0;
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
+
+	ieee80211_agg_splice_finish(local, sta, tid);
+
+	spin_unlock(&local->ampdu_lock);
 	spin_unlock_bh(&sta->lock);
 
 	rcu_read_unlock();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6ce62e5..32345b4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -639,6 +639,14 @@
 	struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
+	/*
+	 * This lock is used to prevent concurrent A-MPDU
+	 * session start/stop processing, this thus also
+	 * synchronises the ->ampdu_action() callback to
+	 * drivers and limits it to one at a time.
+	 */
+	spinlock_t ampdu_lock;
+
 	/* number of interfaces with corresponding IFF_ flags */
 	atomic_t iff_allmultis, iff_promiscs;
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a7430e9..756284e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -795,6 +795,8 @@
 	skb_queue_head_init(&local->skb_queue);
 	skb_queue_head_init(&local->skb_queue_unreliable);
 
+	spin_lock_init(&local->ampdu_lock);
+
 	return local_to_hw(local);
 }
 EXPORT_SYMBOL(ieee80211_alloc_hw);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4ba3c54..dd3593c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -239,6 +239,11 @@
 		tid_tx = sta->ampdu_mlme.tid_tx[i];
 		if (tid_tx) {
 			del_timer_sync(&tid_tx->addba_resp_timer);
+			/*
+			 * STA removed while aggregation session being
+			 * started? Bit odd, but purge frames anyway.
+			 */
+			skb_queue_purge(&tid_tx->pending);
 			kfree(tid_tx);
 		}
 	}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5b223b2..18fd5d1 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -73,11 +73,13 @@
  * struct tid_ampdu_tx - TID aggregation information (Tx).
  *
  * @addba_resp_timer: timer for peer's response to addba request
+ * @pending: pending frames queue -- use sta's spinlock to protect
  * @ssn: Starting Sequence Number expected to be aggregated.
  * @dialog_token: dialog token for aggregation session
  */
 struct tid_ampdu_tx {
 	struct timer_list addba_resp_timer;
+	struct sk_buff_head pending;
 	u16 ssn;
 	u8 dialog_token;
 };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a0e00c6..906ab78 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -984,9 +984,9 @@
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
 	int hdrlen, tid;
 	u8 *qc, *state;
+	bool queued = false;
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
@@ -1013,20 +1013,53 @@
 		 */
 	}
 
+	/*
+	 * If this flag is set to true anywhere, and we get here,
+	 * we are doing the needed processing, so remove the flag
+	 * now.
+	 */
+	info->flags &= ~IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+
 	hdr = (struct ieee80211_hdr *) skb->data;
 
 	tx->sta = sta_info_get(local, hdr->addr1);
 
-	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) {
+	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
+	    (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) {
 		unsigned long flags;
+		struct tid_ampdu_tx *tid_tx;
+
 		qc = ieee80211_get_qos_ctl(hdr);
 		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 
 		spin_lock_irqsave(&tx->sta->lock, flags);
+		/*
+		 * XXX: This spinlock could be fairly expensive, but see the
+		 *	comment in agg-tx.c:ieee80211_agg_tx_operational().
+		 *	One way to solve this would be to do something RCU-like
+		 *	for managing the tid_tx struct and using atomic bitops
+		 *	for the actual state -- by introducing an actual
+		 *	'operational' bit that would be possible. It would
+		 *	require changing ieee80211_agg_tx_operational() to
+		 *	set that bit, and changing the way tid_tx is managed
+		 *	everywhere, including races between that bit and
+		 *	tid_tx going away (tid_tx being added can be easily
+		 *	committed to memory before the 'operational' bit).
+		 */
+		tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
 		state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
-		if (*state == HT_AGG_STATE_OPERATIONAL)
+		if (*state == HT_AGG_STATE_OPERATIONAL) {
 			info->flags |= IEEE80211_TX_CTL_AMPDU;
+		} else if (*state != HT_AGG_STATE_IDLE) {
+			/* in progress */
+			queued = true;
+			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+			__skb_queue_tail(&tid_tx->pending, skb);
+		}
 		spin_unlock_irqrestore(&tx->sta->lock, flags);
+
+		if (unlikely(queued))
+			return TX_QUEUED;
 	}
 
 	if (is_multicast_ether_addr(hdr->addr1)) {
@@ -1077,7 +1110,14 @@
 	}
 	if (unlikely(!dev))
 		return -ENODEV;
-	/* initialises tx with control */
+	/*
+	 * initialises tx with control
+	 *
+	 * return value is safe to ignore here because this function
+	 * can only be invoked for multicast frames
+	 *
+	 * XXX: clean up
+	 */
 	__ieee80211_tx_prepare(tx, skb, dev);
 	dev_put(dev);
 	return 0;
@@ -1188,7 +1228,8 @@
 	return 0;
 }
 
-static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
+static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
+			 bool txpending)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct sta_info *sta;
@@ -1202,11 +1243,11 @@
 
 	queue = skb_get_queue_mapping(skb);
 
-	WARN_ON(!skb_queue_empty(&local->pending[queue]));
+	WARN_ON(!txpending && !skb_queue_empty(&local->pending[queue]));
 
 	if (unlikely(skb->len < 10)) {
 		dev_kfree_skb(skb);
-		return 0;
+		return;
 	}
 
 	rcu_read_lock();
@@ -1214,10 +1255,13 @@
 	/* initialises tx */
 	res_prepare = __ieee80211_tx_prepare(&tx, skb, dev);
 
-	if (res_prepare == TX_DROP) {
+	if (unlikely(res_prepare == TX_DROP)) {
 		dev_kfree_skb(skb);
 		rcu_read_unlock();
-		return 0;
+		return;
+	} else if (unlikely(res_prepare == TX_QUEUED)) {
+		rcu_read_unlock();
+		return;
 	}
 
 	sta = tx.sta;
@@ -1251,7 +1295,12 @@
 			do {
 				next = skb->next;
 				skb->next = NULL;
-				skb_queue_tail(&local->pending[queue], skb);
+				if (unlikely(txpending))
+					skb_queue_head(&local->pending[queue],
+						       skb);
+				else
+					skb_queue_tail(&local->pending[queue],
+						       skb);
 			} while ((skb = next));
 
 			/*
@@ -1276,7 +1325,7 @@
 	}
  out:
 	rcu_read_unlock();
-	return 0;
+	return;
 
  drop:
 	rcu_read_unlock();
@@ -1287,7 +1336,6 @@
 		dev_kfree_skb(skb);
 		skb = next;
 	}
-	return 0;
 }
 
 /* device xmit handlers */
@@ -1346,7 +1394,6 @@
 		FOUND_SDATA,
 		UNKNOWN_ADDRESS,
 	} monitor_iface = NOT_MONITOR;
-	int ret;
 
 	if (skb->iif)
 		odev = dev_get_by_index(&init_net, skb->iif);
@@ -1360,7 +1407,7 @@
 		       "originating device\n", dev->name);
 #endif
 		dev_kfree_skb(skb);
-		return 0;
+		return NETDEV_TX_OK;
 	}
 
 	if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
@@ -1389,7 +1436,7 @@
 		else
 			if (mesh_nexthop_lookup(skb, osdata)) {
 				dev_put(odev);
-				return 0;
+				return NETDEV_TX_OK;
 			}
 		if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
 			IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
@@ -1451,7 +1498,7 @@
 	if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) {
 		dev_kfree_skb(skb);
 		dev_put(odev);
-		return 0;
+		return NETDEV_TX_OK;
 	}
 
 	if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -1460,10 +1507,11 @@
 				      u.ap);
 	if (likely(monitor_iface != UNKNOWN_ADDRESS))
 		info->control.vif = &osdata->vif;
-	ret = ieee80211_tx(odev, skb);
+
+	ieee80211_tx(odev, skb, false);
 	dev_put(odev);
 
-	return ret;
+	return NETDEV_TX_OK;
 }
 
 int ieee80211_monitor_start_xmit(struct sk_buff *skb,
@@ -1827,6 +1875,54 @@
 		skb_queue_purge(&local->pending[i]);
 }
 
+static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
+				     struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta;
+	struct ieee80211_hdr *hdr;
+	struct net_device *dev;
+	int ret;
+	bool result = true;
+
+	/* does interface still exist? */
+	dev = dev_get_by_index(&init_net, skb->iif);
+	if (!dev) {
+		dev_kfree_skb(skb);
+		return true;
+	}
+
+	/* validate info->control.vif against skb->iif */
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		sdata = container_of(sdata->bss,
+				     struct ieee80211_sub_if_data,
+				     u.ap);
+
+	if (unlikely(info->control.vif && info->control.vif != &sdata->vif)) {
+		dev_kfree_skb(skb);
+		result = true;
+		goto out;
+	}
+
+	if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) {
+		ieee80211_tx(dev, skb, true);
+	} else {
+		hdr = (struct ieee80211_hdr *)skb->data;
+		sta = sta_info_get(local, hdr->addr1);
+
+		ret = __ieee80211_tx(local, &skb, sta);
+		if (ret != IEEE80211_TX_OK)
+			result = false;
+	}
+
+ out:
+	dev_put(dev);
+
+	return result;
+}
+
 /*
  * Transmit all pending packets. Called from tasklet, locks master device
  * TX lock so that no new packets can come in.
@@ -1835,9 +1931,8 @@
 {
 	struct ieee80211_local *local = (struct ieee80211_local *)data;
 	struct net_device *dev = local->mdev;
-	struct ieee80211_hdr *hdr;
 	unsigned long flags;
-	int i, ret;
+	int i;
 	bool next;
 
 	rcu_read_lock();
@@ -1868,13 +1963,8 @@
 
 		while (!skb_queue_empty(&local->pending[i])) {
 			struct sk_buff *skb = skb_dequeue(&local->pending[i]);
-			struct sta_info *sta;
 
-			hdr = (struct ieee80211_hdr *)skb->data;
-			sta = sta_info_get(local, hdr->addr1);
-
-			ret = __ieee80211_tx(local, &skb, sta);
-			if (ret != IEEE80211_TX_OK) {
+			if (!ieee80211_tx_pending_skb(local, skb)) {
 				skb_queue_head(&local->pending[i], skb);
 				break;
 			}