mac80211: use RCU for TX aggregation
Currently we allocate some memory for each TX
aggregation session and additionally keep a
state bitmap indicating the state it is in.
By using RCU to protect the pointer, moving
the state into the structure and some locking
trickery we can avoid locking when the TX agg
session is fully operational.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 680bcb7..7bf1f9c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1092,6 +1092,54 @@
return true;
}
+static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
+ struct sk_buff *skb,
+ struct ieee80211_tx_info *info,
+ struct tid_ampdu_tx *tid_tx,
+ int tid)
+{
+ bool queued = false;
+
+ if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
+ info->flags |= IEEE80211_TX_CTL_AMPDU;
+ } else {
+ spin_lock(&tx->sta->lock);
+ /*
+ * Need to re-check now, because we may get here
+ *
+ * 1) in the window during which the setup is actually
+ * already done, but not marked yet because not all
+ * packets are spliced over to the driver pending
+ * queue yet -- if this happened we acquire the lock
+ * either before or after the splice happens, but
+ * need to recheck which of these cases happened.
+ *
+ * 2) during session teardown, if the OPERATIONAL bit
+ * was cleared due to the teardown but the pointer
+ * hasn't been assigned NULL yet (or we loaded it
+ * before it was assigned) -- in this case it may
+ * now be NULL which means we should just let the
+ * packet pass through because splicing the frames
+ * back is already done.
+ */
+ tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
+
+ if (!tid_tx) {
+ /* do nothing, let packet pass through */
+ } else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
+ info->flags |= IEEE80211_TX_CTL_AMPDU;
+ } else {
+ queued = true;
+ info->control.vif = &tx->sdata->vif;
+ info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+ __skb_queue_tail(&tid_tx->pending, skb);
+ }
+ spin_unlock(&tx->sta->lock);
+ }
+
+ return queued;
+}
+
/*
* initialises @tx
*/
@@ -1104,8 +1152,7 @@
struct ieee80211_hdr *hdr;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
int hdrlen, tid;
- u8 *qc, *state;
- bool queued = false;
+ u8 *qc;
memset(tx, 0, sizeof(*tx));
tx->skb = skb;
@@ -1157,35 +1204,16 @@
qc = ieee80211_get_qos_ctl(hdr);
tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
- spin_lock(&tx->sta->lock);
- /*
- * XXX: This spinlock could be fairly expensive, but see the
- * comment in agg-tx.c:ieee80211_agg_tx_operational().
- * One way to solve this would be to do something RCU-like
- * for managing the tid_tx struct and using atomic bitops
- * for the actual state -- by introducing an actual
- * 'operational' bit that would be possible. It would
- * require changing ieee80211_agg_tx_operational() to
- * set that bit, and changing the way tid_tx is managed
- * everywhere, including races between that bit and
- * tid_tx going away (tid_tx being added can be easily
- * committed to memory before the 'operational' bit).
- */
- tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
- state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
- if (*state == HT_AGG_STATE_OPERATIONAL) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
- } else if (*state != HT_AGG_STATE_IDLE) {
- /* in progress */
- queued = true;
- info->control.vif = &sdata->vif;
- info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
- __skb_queue_tail(&tid_tx->pending, skb);
- }
- spin_unlock(&tx->sta->lock);
+ tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+ if (tid_tx) {
+ bool queued;
- if (unlikely(queued))
- return TX_QUEUED;
+ queued = ieee80211_tx_prep_agg(tx, skb, info,
+ tid_tx, tid);
+
+ if (unlikely(queued))
+ return TX_QUEUED;
+ }
}
if (is_multicast_ether_addr(hdr->addr1)) {