11n Tx aggregation for iwm(4)

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
17 messages Options
Reply | Threaded
Open this post in threaded view
|

11n Tx aggregation for iwm(4)

Stefan Sperling-5
This patch adds support for 11n Tx aggregation to iwm(4).

Please help with testing if you can by running the patch and using wifi
as usual. Nothing should change, except that Tx speed may potentially
improve. If you have time to run before/after performance measurements with
tcpbench or such, that would be nice. But it's not required for testing.

If Tx aggregation is active then netstat will show a non-zero output block ack
agreement counter:

$ netstat -W iwm0 | grep 'output block'
        3 new output block ack agreements
        0 output block ack agreements timed out

It would be great to get at least one test for all the chipsets the driver
supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
The behaviour of the access point also matters a great deal. It won't
hurt to test the same chipset against several different access points.

I have tested this version on 8265 only so far. I've run older revisions
of this patch on 7265 so I'm confident that this chip will work, too.
So far, the APs I have tested against are athn(4) in 11a mode and in 11n
mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.

diff refs/heads/master refs/heads/txagg
blob - 3a75d07a60a7eb4c66540474e47aeffd7a85250a
blob + 853bdd1290ad509f5fce7b5bf20550f458a2b460
--- sys/dev/pci/if_iwm.c
+++ sys/dev/pci/if_iwm.c
@@ -144,6 +144,8 @@
 #include <net80211/ieee80211_amrr.h>
 #include <net80211/ieee80211_mira.h>
 #include <net80211/ieee80211_radiotap.h>
+#include <net80211/ieee80211_priv.h> /* for SEQ_LT */
+#undef DPRINTF /* defined in ieee80211_priv.h */
 
 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
 
@@ -299,7 +301,8 @@ int iwm_nic_rx_mq_init(struct iwm_softc *);
 int iwm_nic_tx_init(struct iwm_softc *);
 int iwm_nic_init(struct iwm_softc *);
 int iwm_enable_ac_txq(struct iwm_softc *, int, int);
-int iwm_enable_txq(struct iwm_softc *, int, int, int);
+int iwm_enable_txq(struct iwm_softc *, int, int, int, int, uint8_t,
+    uint16_t);
 int iwm_post_alive(struct iwm_softc *);
 struct iwm_phy_db_entry *iwm_phy_db_get_section(struct iwm_softc *, uint16_t,
     uint16_t);
@@ -334,12 +337,12 @@ void iwm_ampdu_rx_stop(struct ieee80211com *, struct i
     uint8_t);
 void iwm_sta_rx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
     uint16_t, uint16_t, int);
-#ifdef notyet
+void iwm_sta_tx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
+    uint16_t, uint16_t, int);
 int iwm_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
     uint8_t);
 void iwm_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
     uint8_t);
-#endif
 void iwm_ba_task(void *);
 
 int iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *,
@@ -372,14 +375,25 @@ int iwm_rxmq_get_signal_strength(struct iwm_softc *, s
 void iwm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *,
     struct iwm_rx_data *);
 int iwm_get_noise(const struct iwm_statistics_rx_non_phy *);
+void iwm_txq_advance(struct iwm_softc *, struct iwm_tx_ring *, int);
+void iwm_ampdu_tx_done(struct iwm_softc *, struct iwm_cmd_header *,
+    struct iwm_node *, struct iwm_tx_ring *, uint32_t, uint8_t,
+    uint8_t, uint16_t, int, struct iwm_agg_tx_status *);
 int iwm_ccmp_decap(struct iwm_softc *, struct mbuf *,
     struct ieee80211_node *);
 void iwm_rx_frame(struct iwm_softc *, struct mbuf *, int, uint32_t, int, int,
     uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
-void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *,
-    struct iwm_node *, int, int);
+void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_tx_resp *,
+    struct iwm_node *, int, int, int);
+void iwm_txd_done(struct iwm_softc *, struct iwm_tx_data *);
 void iwm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *,
     struct iwm_rx_data *);
+void iwm_clear_oactive(struct iwm_softc *, struct iwm_tx_ring *);
+void iwm_mira_choose(struct iwm_softc *, struct ieee80211_node *);
+void iwm_ampdu_rate_control(struct iwm_softc *, struct ieee80211_node *,
+    struct iwm_tx_ring *, int, uint16_t, uint16_t);
+void iwm_rx_ba(struct iwm_softc *, struct iwm_rx_packet *,
+    struct iwm_rx_data *);
 void iwm_rx_bmiss(struct iwm_softc *, struct iwm_rx_packet *,
     struct iwm_rx_data *);
 int iwm_binding_cmd(struct iwm_softc *, struct iwm_node *, uint32_t);
@@ -399,6 +413,7 @@ int iwm_send_cmd_pdu_status(struct iwm_softc *, uint32
 void iwm_free_resp(struct iwm_softc *, struct iwm_host_cmd *);
 void iwm_cmd_done(struct iwm_softc *, int, int, int);
 void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t);
+void iwm_reset_sched(struct iwm_softc *, int, int, uint8_t);
 const struct iwm_rate *iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *,
     struct ieee80211_frame *, struct iwm_tx_cmd *);
 int iwm_tx(struct iwm_softc *, struct mbuf *, struct ieee80211_node *, int);
@@ -1306,17 +1321,17 @@ iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_
  * The command is queue 0 (sc->txq[0]), and 4 mgmt/data frame queues
  * are sc->tqx[IWM_DQA_MIN_MGMT_QUEUE + ac], i.e. sc->txq[5:8],
  * in order to provide one queue per EDCA category.
+ * Tx aggregation requires additional queues, one queue per TID for
+ * which aggregation is enabled. We map TID 0-7 to sc->txq[10:17].
  *
- * In non-DQA mode, we use rings 0 through 9 (0-3 are EDCA, 9 is cmd).
+ * In non-DQA mode, we use rings 0 through 9 (0-3 are EDCA, 9 is cmd),
+ * and Tx aggregation is not supported.
  *
- * Tx aggregation will require additional queues (one queue per TID
- * for which aggregation is enabled) but we do not implement this yet.
- *
  * Unfortunately, we cannot tell if DQA will be used until the
  * firmware gets loaded later, so just allocate sufficient rings
  * in order to satisfy both cases.
  */
- if (qid > IWM_CMD_QUEUE)
+ if (qid > IWM_LAST_AGG_TX_QUEUE)
  return 0;
 
  size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd);
@@ -1380,6 +1395,7 @@ iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_
  bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
     ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
  sc->qfullmsk &= ~(1 << ring->qid);
+ sc->qenablemsk &= ~(1 << ring->qid);
  /* 7000 family NICs are locked while commands are in progress. */
  if (ring->qid == sc->cmdqid && ring->queued > 0) {
  if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
@@ -2208,6 +2224,18 @@ const uint8_t iwm_ac_to_tx_fifo[] = {
  IWM_TX_FIFO_VO,
 };
 
+/* Map a TID to an ieee80211_edca_ac category. */
+const uint8_t iwm_tid_to_ac[IWM_MAX_TID_COUNT] = {
+ EDCA_AC_BE,
+ EDCA_AC_BK,
+ EDCA_AC_BK,
+ EDCA_AC_BE,
+ EDCA_AC_VI,
+ EDCA_AC_VI,
+ EDCA_AC_VO,
+ EDCA_AC_VO,
+};
+
 int
 iwm_enable_ac_txq(struct iwm_softc *sc, int qid, int fifo)
 {
@@ -2250,28 +2278,48 @@ iwm_enable_ac_txq(struct iwm_softc *sc, int qid, int f
 }
 
 int
-iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo)
+iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo,
+    int aggregate, uint8_t tid, uint16_t ssn)
 {
+ struct iwm_tx_ring *ring = &sc->txq[qid];
  struct iwm_scd_txq_cfg_cmd cmd;
- int err;
+ int err, idx, scd_bug;
 
  iwm_nic_assert_locked(sc);
 
- IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0);
+ /*
+ * If we need to move the SCD write pointer by steps of
+ * 0x40, 0x80 or 0xc0, it gets stuck.
+ * This is really ugly, but this is the easiest way out for
+ * this sad hardware issue.
+ * This bug has been fixed on devices 9000 and up.
+ */
+ scd_bug = !sc->sc_mqrx_supported &&
+ !((ssn - ring->cur) & 0x3f) &&
+ (ssn != ring->cur);
+ if (scd_bug)
+ ssn = (ssn + 1) & 0xfff;
 
+ idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
+ IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | idx);
+ ring->cur = idx;
+ ring->tail = idx;
+
  memset(&cmd, 0, sizeof(cmd));
+ cmd.tid = tid;
  cmd.scd_queue = qid;
  cmd.enable = 1;
  cmd.sta_id = sta_id;
  cmd.tx_fifo = fifo;
- cmd.aggregate = 0;
+ cmd.aggregate = aggregate;
+ cmd.ssn = htole16(ssn);
  cmd.window = IWM_FRAME_LIMIT;
 
- err = iwm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, 0,
-    sizeof(cmd), &cmd);
+ err = iwm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd);
  if (err)
  return err;
 
+ sc->qenablemsk |= (1 << qid);
  return 0;
 }
 
@@ -2950,8 +2998,12 @@ iwm_sta_rx_agg(struct iwm_softc *sc, struct ieee80211_
  if (start) {
  sc->sc_rx_ba_sessions++;
  ieee80211_addba_req_accept(ic, ni, tid);
- } else if (sc->sc_rx_ba_sessions > 0)
- sc->sc_rx_ba_sessions--;
+ in->ampdu_rx_tid_mask |= (1 << tid);
+ } else {
+ in->ampdu_rx_tid_mask &= ~(1 << tid);
+ if (sc->sc_rx_ba_sessions > 0)
+ sc->sc_rx_ba_sessions--;
+ }
  } else if (start)
  ieee80211_addba_req_refuse(ic, ni, tid);
 
@@ -2959,6 +3011,75 @@ iwm_sta_rx_agg(struct iwm_softc *sc, struct ieee80211_
 }
 
 void
+iwm_sta_tx_agg(struct iwm_softc *sc, struct ieee80211_node *ni, uint8_t tid,
+    uint16_t ssn, uint16_t winsize, int start)
+{
+ struct ieee80211com *ic = &sc->sc_ic;
+ struct iwm_node *in = (void *)ni;
+ int qid = IWM_FIRST_AGG_TX_QUEUE + tid;
+ enum ieee80211_edca_ac ac = iwm_tid_to_ac[tid];
+ int fifo = iwm_ac_to_tx_fifo[ac];
+ int err;
+
+ if (qid > IWM_LAST_AGG_TX_QUEUE || !iwm_nic_lock(sc)) {
+ ieee80211_addba_resp_refuse(ic, ni, tid,
+    IEEE80211_STATUS_UNSPECIFIED);
+ return;
+ }
+
+ if (start) {
+ if ((sc->qenablemsk & (1 << qid)) == 0) {
+ struct iwm_tx_ring *ring = &sc->txq[qid];
+
+ err = iwm_enable_txq(sc, IWM_STATION_ID, qid, fifo,
+    1, tid, ssn);
+ if (err)
+ goto done;
+ /*
+ * If iwm_enable_txq() employed the SCD hardware bug
+ * workaround we must skip the frame with seqnum SSN.
+ */
+ if (IWM_AGG_SSN_TO_TXQ_IDX(ring->cur) !=
+    IWM_AGG_SSN_TO_TXQ_IDX(ssn)) {
+ ssn = (ssn + 1) & 0xfff;
+ KASSERT(IWM_AGG_SSN_TO_TXQ_IDX(ring->cur) ==
+    IWM_AGG_SSN_TO_TXQ_IDX(ssn));
+ ieee80211_output_ba_move_window(ic, ni,
+    tid, ssn);
+ ni->ni_qos_txseqs[tid] = ssn;
+ }
+ }
+ in->ampdu_tx_tid_mask |= (1 << tid);
+ err = iwm_add_sta_cmd(sc, in, 1);
+ if (err) {
+ printf("%s: could not update sta (error %d)\n",
+    DEVNAME(sc), err);
+ in->ampdu_tx_tid_mask &= ~(1 << tid);
+ goto done;
+ }
+ } else {
+ in->ampdu_tx_tid_mask &= ~(1 << tid);
+ err = iwm_add_sta_cmd(sc, in, 1);
+ if (err) {
+ printf("%s: could not update sta (error %d)\n",
+    DEVNAME(sc), err);
+ in->ampdu_tx_tid_mask |= (1 << tid);
+ goto done;
+ }
+ }
+
+done:
+ iwm_nic_unlock(sc);
+ if (start) {
+ if (err)
+ ieee80211_addba_resp_refuse(ic, ni, tid,
+    IEEE80211_STATUS_UNSPECIFIED);
+ else
+ ieee80211_addba_resp_accept(ic, ni, tid);
+ }
+}
+
+void
 iwm_htprot_task(void *arg)
 {
  struct iwm_softc *sc = arg;
@@ -3002,19 +3123,53 @@ iwm_ba_task(void *arg)
  struct ieee80211com *ic = &sc->sc_ic;
  struct ieee80211_node *ni = ic->ic_bss;
  int s = splnet();
+ int tid;
 
- if (sc->sc_flags & IWM_FLAG_SHUTDOWN) {
+ if ((sc->sc_flags & IWM_FLAG_SHUTDOWN) ||
+    ic->ic_state != IEEE80211_S_RUN) {
  refcnt_rele_wake(&sc->task_refs);
  splx(s);
  return;
  }
 
- if (sc->ba_start)
- iwm_sta_rx_agg(sc, ni, sc->ba_tid, sc->ba_ssn,
-    sc->ba_winsize, 1);
- else
- iwm_sta_rx_agg(sc, ni, sc->ba_tid, 0, 0, 0);
+ if (sc->ba_flags & IWM_RX_BA_START) {
+ for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
+ if ((sc->rx_ba_start.tid_mask & (1 << tid)) == 0)
+ continue;
+ iwm_sta_rx_agg(sc, ni, tid, sc->rx_ba_start.ssn[tid],
+    sc->rx_ba_start.winsize[tid], 1);
+ sc->rx_ba_start.tid_mask &= ~(1 << tid);
+ }
+ }
+
+ if (sc->ba_flags & IWM_RX_BA_STOP) {
+ for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
+ if ((sc->rx_ba_stop.tid_mask & (1 << tid)) == 0)
+ continue;
+ iwm_sta_rx_agg(sc, ni, tid, 0, 0, 0);
+ sc->rx_ba_stop.tid_mask &= ~(1 << tid);
+ }
+ }
 
+ if (sc->ba_flags & IWM_TX_BA_START) {
+ for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
+ if ((sc->tx_ba_start.tid_mask & (1 << tid)) == 0)
+ continue;
+ iwm_sta_tx_agg(sc, ni, tid, sc->tx_ba_start.ssn[tid],
+    sc->tx_ba_start.winsize[tid], 1);
+ sc->tx_ba_start.tid_mask &= ~(1 << tid);
+ }
+ }
+
+ if (sc->ba_flags & IWM_TX_BA_STOP) {
+ for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
+ if ((sc->tx_ba_stop.tid_mask & (1 << tid)) == 0)
+ continue;
+ iwm_sta_tx_agg(sc, ni, tid, 0, 0, 0);
+ sc->tx_ba_stop.tid_mask &= ~(1 << tid);
+ }
+ }
+
  refcnt_rele_wake(&sc->task_refs);
  splx(s);
 }
@@ -3029,14 +3184,23 @@ iwm_ampdu_rx_start(struct ieee80211com *ic, struct iee
 {
  struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
  struct iwm_softc *sc = IC2IFP(ic)->if_softc;
+ struct iwm_node *in = (void *)ni;
 
- if (sc->sc_rx_ba_sessions >= IWM_MAX_RX_BA_SESSIONS)
+ if (tid >= IWM_MAX_TID_COUNT)
+ return EINVAL;
+
+ if (sc->sc_rx_ba_sessions >= IWM_MAX_RX_BA_SESSIONS ||
+    (in->ampdu_rx_tid_mask & (1 << tid)) != 0)
  return ENOSPC;
 
- sc->ba_start = 1;
- sc->ba_tid = tid;
- sc->ba_ssn = htole16(ba->ba_winstart);
- sc->ba_winsize = htole16(ba->ba_winsize);
+ if (sc->rx_ba_start.tid_mask & (1 << tid) ||
+    sc->rx_ba_stop.tid_mask & (1 << tid))
+ return EAGAIN;
+
+ sc->ba_flags |= IWM_RX_BA_START;
+ sc->rx_ba_start.tid_mask |= (1 << tid);
+ sc->rx_ba_start.ssn[tid] = htole16(ba->ba_winstart);
+ sc->rx_ba_start.winsize[tid] = htole16(ba->ba_winsize);
  iwm_add_task(sc, systq, &sc->ba_task);
 
  return EBUSY;
@@ -3051,13 +3215,69 @@ iwm_ampdu_rx_stop(struct ieee80211com *ic, struct ieee
     uint8_t tid)
 {
  struct iwm_softc *sc = IC2IFP(ic)->if_softc;
+ struct iwm_node *in = (void *)ni;
 
- sc->ba_start = 0;
- sc->ba_tid = tid;
+ if (in->ampdu_rx_tid_mask & (1 << tid))  {
+ sc->ba_flags |= IWM_RX_BA_STOP;
+ sc->rx_ba_stop.tid_mask |= (1 << tid);
+ iwm_add_task(sc, systq, &sc->ba_task);
+ }
+}
+
+int
+iwm_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
+    uint8_t tid)
+{
+ struct iwm_softc *sc = IC2IFP(ic)->if_softc;
+ struct iwm_node *in = (void *)ni;
+ struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
+
+ /* We only implement Tx aggregation with DQA-capable firmware. */
+ if (!isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT))
+ return ENOTSUP;
+
+ /* Ensure we can map this TID to an aggregation queue. */
+ if (tid >= IWM_MAX_TID_COUNT)
+ return EINVAL;
+
+ /* We only support a fixed Tx aggregation window size, for now. */
+ if (ba->ba_winsize != IWM_FRAME_LIMIT)
+ return ENOTSUP;
+
+ if ((in->ampdu_tx_tid_mask & (1 << tid)) != 0)
+ return ENOSPC;
+
+ if (sc->tx_ba_start.tid_mask & (1 << tid) ||
+    sc->tx_ba_stop.tid_mask & (1 << tid))
+ return EAGAIN;
+
+ sc->ba_flags |= IWM_TX_BA_START;
+ sc->tx_ba_start.tid_mask |= (1 << tid);
+ sc->tx_ba_start.ssn[tid] = htole16(ba->ba_winstart);
+ sc->tx_ba_start.winsize[tid] = htole16(ba->ba_winsize);
  iwm_add_task(sc, systq, &sc->ba_task);
+
+ return EBUSY;
 }
 
 void
+iwm_ampdu_tx_stop(struct ieee80211com *ic, struct ieee80211_node *ni,
+    uint8_t tid)
+{
+ struct iwm_softc *sc = IC2IFP(ic)->if_softc;
+ struct iwm_node *in = (void *)ni;
+
+ if (tid >= IWM_MAX_TID_COUNT)
+ return;
+
+ if (in->ampdu_tx_tid_mask & (1 << tid))  {
+ sc->ba_flags |= IWM_TX_BA_STOP;
+ sc->tx_ba_stop.tid_mask |= (1 << tid);
+ iwm_add_task(sc, systq, &sc->ba_task);
+ }
+}
+
+void
 iwm_set_hw_address_8000(struct iwm_softc *sc, struct iwm_nvm_data *data,
     const uint16_t *mac_override, const uint16_t *nvm_hw)
 {
@@ -4238,13 +4458,178 @@ iwm_rx_mpdu_mq(struct iwm_softc *sc, struct mbuf *m, v
 }
 
 void
-iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
-    struct iwm_node *in, int txmcs, int txrate)
+iwm_txq_advance(struct iwm_softc *sc, struct iwm_tx_ring *ring, int idx)
 {
+ struct iwm_tx_data *txd;
+
+ while (ring->tail != idx) {
+ txd = &ring->data[ring->tail];
+ if (txd->m != NULL) {
+ if (ring->qid < IWM_FIRST_AGG_TX_QUEUE)
+ DPRINTF(("%s: missed Tx completion: tail=%d "
+    "idx=%d\n", __func__, ring->tail, idx));
+ iwm_reset_sched(sc, ring->qid, ring->tail, IWM_STATION_ID);
+ iwm_txd_done(sc, txd);
+ ring->queued--;
+ }
+ ring->tail = (ring->tail + 1) % IWM_TX_RING_COUNT;
+ }
+}
+
+void
+iwm_ampdu_tx_done(struct iwm_softc *sc, struct iwm_cmd_header *cmd_hdr,
+    struct iwm_node *in, struct iwm_tx_ring *txq, uint32_t initial_rate,
+    uint8_t nframes, uint8_t failure_frame, uint16_t ssn, int status,
+    struct iwm_agg_tx_status *agg_status)
+{
  struct ieee80211com *ic = &sc->sc_ic;
+ int tid = cmd_hdr->qid - IWM_FIRST_AGG_TX_QUEUE;
+ struct iwm_tx_data *txdata = &txq->data[cmd_hdr->idx];
  struct ieee80211_node *ni = &in->in_ni;
+ int txfail = (status != IWM_TX_STATUS_SUCCESS &&
+    status != IWM_TX_STATUS_DIRECT_DONE);
+ struct ieee80211_tx_ba *ba;
+
+ sc->sc_tx_timer = 0;
+
+ if (ic->ic_state != IEEE80211_S_RUN)
+ return;
+
+ if (nframes > 1) {
+ int ampdu_id, have_ampdu_id = 0, ampdu_size = 0;
+ int i;
+
+ /* Compute the size of this A-MPDU. */
+ for (i = 0; i < nframes; i++) {
+ uint8_t qid = agg_status[i].qid;
+ uint8_t idx = agg_status[i].idx;
+
+ if (qid != cmd_hdr->qid)
+ continue;
+
+ txdata = &txq->data[idx];
+ if (txdata->m == NULL)
+ continue;
+
+ ampdu_size += txdata->totlen + IEEE80211_CRC_LEN;
+ }
+
+ /*
+ * For each subframe collect Tx status, retries, and Tx rate.
+ * (The Tx rate is the same for all subframes in this batch.)
+ */
+ for (i = 0; i < nframes; i++) {
+ uint8_t qid = agg_status[i].qid;
+ uint8_t idx = agg_status[i].idx;
+ uint16_t txstatus = (le16toh(agg_status[i].status) &
+    IWM_AGG_TX_STATE_STATUS_MSK);
+ uint16_t trycnt = (le16toh(agg_status[i].status) &
+    IWM_AGG_TX_STATE_TRY_CNT_MSK) >>
+    IWM_AGG_TX_STATE_TRY_CNT_POS;
+
+ if (qid != cmd_hdr->qid)
+ continue;
+
+ txdata = &txq->data[idx];
+ if (txdata->m == NULL)
+ continue;
+
+ if (initial_rate & IWM_RATE_MCS_HT_MSK)
+ txdata->ampdu_txmcs = (initial_rate &
+    (IWM_RATE_HT_MCS_RATE_CODE_MSK |
+    IWM_RATE_HT_MCS_NSS_MSK));
+ if (txstatus != IWM_AGG_TX_STATE_TRANSMITTED)
+ txdata->txfail++;
+ if (trycnt > 1)
+ txdata->retries++;
+
+ /*
+ * Assign a common ID to all subframes of this A-MPDU.
+ * This ID will be used during Tx rate control to
+ * infer the ACK status of individual subframes.
+ */
+ if (!have_ampdu_id) {
+ ampdu_id = txdata->in->next_ampdu_id++;
+ have_ampdu_id = 1;
+ }
+ txdata->ampdu_id = ampdu_id;
+
+ /*
+ * We will also need to know the total number of
+ * subframes and the size of this A-MPDU. We store
+ * this redundantly on each subframe because firmware
+ * only reports acknowledged subframes via compressed
+ * block-ack notification. This way we will know what
+ * the total number of subframes and size were even if
+ * just one of these subframes gets acknowledged.
+ */
+ txdata->ampdu_nframes = nframes;
+ txdata->ampdu_size = ampdu_size;
+ }
+ return;
+ }
+
+ if (ni == NULL)
+ return;
+
+ ba = &ni->ni_tx_ba[tid];
+ if (ba->ba_state != IEEE80211_BA_AGREED)
+ return;
+
+ /* This is a final single-frame Tx attempt. */
+ DPRINTFN(3, ("%s: final tx status=0x%x qid=%d queued=%d idx=%d ssn=%u "
+    "bitmap=0x%llx\n", __func__, status, desc->qid, txq->queued,
+    desc->idx, ssn, ba->ba_bitmap));
+
+ /*
+ * Skip rate control if our Tx rate is fixed.
+ * Don't report frames to MiRA which were sent at a different
+ * Tx rate than ni->ni_txmcs.
+ */
+ if (ic->ic_fixed_mcs == -1 && txdata->txmcs == ni->ni_txmcs) {
+ in->in_mn.frames++;
+ in->in_mn.agglen = 1;
+ in->in_mn.ampdu_size = txdata->totlen + IEEE80211_CRC_LEN;
+ if (failure_frame > 0)
+ in->in_mn.retries++;
+ if (txfail)
+ in->in_mn.txfail++;
+ iwm_mira_choose(sc, ni);
+ }
+
+ if (txfail)
+ ieee80211_tx_compressed_bar(ic, ni, tid, ssn);
+ else if (!SEQ_LT(ssn, ba->ba_winstart)) {
+ /*
+ * Move window forward if SSN lies beyond end of window,
+ * otherwise we can't record the ACK for this frame.
+ * Non-acked frames which left holes in the bitmap near
+ * the beginning of the window must be discarded.
+ */
+ uint16_t s = ssn;
+ while (SEQ_LT(ba->ba_winend, s)) {
+ ieee80211_output_ba_move_window(ic, ni, tid, s);
+ iwm_txq_advance(sc, txq, IWM_AGG_SSN_TO_TXQ_IDX(s));
+ s = (s + 1) % 0xfff;
+ }
+ /* SSN should now be within window; set corresponding bit. */
+ ieee80211_output_ba_record_ack(ic, ni, tid, ssn);
+ }
+
+ /* Move window forward up to the first hole in the bitmap. */
+ ieee80211_output_ba_move_window_to_first_unacked(ic, ni, tid, ssn);
+ iwm_txq_advance(sc, txq, IWM_AGG_SSN_TO_TXQ_IDX(ba->ba_winstart));
+
+ iwm_clear_oactive(sc, txq);
+}
+
+void
+iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_tx_resp *tx_resp,
+    struct iwm_node *in, int txmcs, int txrate, int qid)
+{
+ struct ieee80211com *ic = &sc->sc_ic;
+ struct ieee80211_node *ni = &in->in_ni;
  struct ifnet *ifp = IC2IFP(ic);
- struct iwm_tx_resp *tx_resp = (void *)pkt->data;
  int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
  int txfail;
 
@@ -4277,22 +4662,8 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_
  in->in_mn.retries += tx_resp->failure_frame;
  if (txfail)
  in->in_mn.txfail += tx_resp->frame_count;
- if (ic->ic_state == IEEE80211_S_RUN) {
- int best_mcs;
-
- ieee80211_mira_choose(&in->in_mn, ic, &in->in_ni);
- /*
- * If MiRA has chosen a new TX rate we must update
- * the firwmare's LQ rate table from process context.
- * ni_txmcs may change again before the task runs so
- * cache the chosen rate in the iwm_node structure.
- */
- best_mcs = ieee80211_mira_get_best_mcs(&in->in_mn);
- if (best_mcs != in->chosen_txmcs) {
- in->chosen_txmcs = best_mcs;
- iwm_setrates(in, 1);
- }
- }
+ if (ic->ic_state == IEEE80211_S_RUN)
+ iwm_mira_choose(sc, ni);
  }
 
  if (txfail)
@@ -4313,49 +4684,91 @@ iwm_txd_done(struct iwm_softc *sc, struct iwm_tx_data
  KASSERT(txd->in);
  ieee80211_release_node(ic, &txd->in->in_ni);
  txd->in = NULL;
+
+ txd->retries = 0;
+ txd->txfail = 0;
+ txd->txmcs = 0;
+ txd->ampdu_txmcs = 0;
+ txd->txrate = 0;
 }
 
 void
 iwm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
     struct iwm_rx_data *data)
 {
- struct ieee80211com *ic = &sc->sc_ic;
- struct ifnet *ifp = IC2IFP(ic);
  struct iwm_cmd_header *cmd_hdr = &pkt->hdr;
  int idx = cmd_hdr->idx;
  int qid = cmd_hdr->qid;
  struct iwm_tx_ring *ring = &sc->txq[qid];
  struct iwm_tx_data *txd;
+ struct iwm_tx_resp *tx_resp = (void *)pkt->data;
+ uint32_t ssn;
+ uint32_t len = iwm_rx_packet_len(pkt);
 
  bus_dmamap_sync(sc->sc_dmat, data->map, 0, IWM_RBUF_SIZE,
     BUS_DMASYNC_POSTREAD);
 
  sc->sc_tx_timer = 0;
 
+ /* Sanity checks. */
+ if (sizeof(*tx_resp) > len)
+ return;
+ if (qid < IWM_FIRST_AGG_TX_QUEUE && tx_resp->frame_count > 1)
+ return;
+ if (qid >= IWM_FIRST_AGG_TX_QUEUE && sizeof(*tx_resp) + sizeof(ssn) +
+    tx_resp->frame_count * sizeof(tx_resp->status) > len)
+ return;
+
+ /*
+ * In the multi-frame case the firmware has just transmitted a bunch
+ * of frames in an A-MPDU. The final Tx status of those frames won't
+ * be known until the peer ACKs subframes with a block ack or firmware
+ * gives up on a particular subframe.
+ * Subframes for which the firmware never sees an ACK will be retried
+ * and will eventually arrive here as a single-frame Tx failure.
+ * So there is nothing to do, for now.
+ */
+ if (tx_resp->frame_count != 1)
+ return;
+
  txd = &ring->data[idx];
  if (txd->m == NULL)
  return;
 
- iwm_rx_tx_cmd_single(sc, pkt, txd->in, txd->txmcs, txd->txrate);
- iwm_txd_done(sc, txd);
+ if (qid >= IWM_FIRST_AGG_TX_QUEUE) {
+ int status;
 
- /*
- * XXX Sometimes we miss Tx completion interrupts.
- * We cannot check Tx success/failure for affected frames; just free
- * the associated mbuf and release the associated node reference.
- */
- while (ring->tail != idx) {
- txd = &ring->data[ring->tail];
- if (txd->m != NULL) {
- DPRINTF(("%s: missed Tx completion: tail=%d idx=%d\n",
-    __func__, ring->tail, idx));
- iwm_txd_done(sc, txd);
- ring->queued--;
- }
- ring->tail = (ring->tail + 1) % IWM_TX_RING_COUNT;
+ memcpy(&ssn, &tx_resp->status + tx_resp->frame_count, sizeof(ssn));
+ ssn = le32toh(ssn) & 0xfff;
+ status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
+ iwm_ampdu_tx_done(sc, cmd_hdr, txd->in, ring,
+    le32toh(tx_resp->initial_rate), tx_resp->frame_count,
+    tx_resp->failure_frame, ssn, status, &tx_resp->status);
+ } else {
+ iwm_rx_tx_cmd_single(sc, tx_resp, txd->in, txd->txmcs,
+    txd->txrate, qid);
+ iwm_reset_sched(sc, qid, idx, IWM_STATION_ID);
+ iwm_txd_done(sc, txd);
+ ring->queued--;
+
+ /*
+ * XXX Sometimes we miss Tx completion interrupts.
+ * We cannot check Tx success/failure for affected frames;
+ * just free the associated mbuf and release the associated
+ * node reference.
+ */
+ iwm_txq_advance(sc, ring, idx);
+ iwm_clear_oactive(sc, ring);
  }
+}
 
- if (--ring->queued < IWM_TX_RING_LOMARK) {
+void
+iwm_clear_oactive(struct iwm_softc *sc, struct iwm_tx_ring *ring)
+{
+ struct ieee80211com *ic = &sc->sc_ic;
+ struct ifnet *ifp = IC2IFP(ic);
+
+ if (ring->queued < IWM_TX_RING_LOMARK) {
  sc->qfullmsk &= ~(1 << ring->qid);
  if (sc->qfullmsk == 0 && ifq_is_oactive(&ifp->if_snd)) {
  ifq_clr_oactive(&ifp->if_snd);
@@ -4370,6 +4783,183 @@ iwm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_pack
 }
 
 void
+iwm_mira_choose(struct iwm_softc *sc, struct ieee80211_node *ni)
+{
+ struct ieee80211com *ic = &sc->sc_ic;
+ struct iwm_node *in = (void *)ni;
+ int best_mcs = ieee80211_mira_get_best_mcs(&in->in_mn);
+
+ ieee80211_mira_choose(&in->in_mn, ic, ni);
+
+ /*
+ * Update firmware's LQ retry table if MiRA has chosen a new MCS.
+ *
+ * We only need to do this if the best MCS has changed because
+ * we ask firmware to use a fixed MCS while MiRA is probing a
+ * candidate MCS.
+ * While not probing we ask firmware to retry at lower rates in case
+ * Tx at the newly chosen best MCS ends up failing, and then report
+ * any resulting Tx retries to MiRA in order to trigger probing.
+ */
+ if (best_mcs != ieee80211_mira_get_best_mcs(&in->in_mn)) {
+ in->chosen_txmcs = best_mcs;
+ iwm_setrates(in, 1);
+ }
+}
+
+void
+iwm_ampdu_rate_control(struct iwm_softc *sc, struct ieee80211_node *ni,
+    struct iwm_tx_ring *txq, int tid, uint16_t seq, uint16_t ssn)
+{
+ struct ieee80211com *ic = &sc->sc_ic;
+ struct iwm_node *in = (void *)ni;
+ struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
+ int min_ampdu_id, max_ampdu_id, id;
+ int idx, end_idx;
+
+ /* Determine the min/max IDs we assigned to AMPDUs in this range. */
+ idx = IWM_AGG_SSN_TO_TXQ_IDX(seq);
+ end_idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
+ min_ampdu_id = txq->data[idx].ampdu_id;
+ max_ampdu_id = min_ampdu_id;
+ while (idx != end_idx) {
+ struct iwm_tx_data *txdata = &txq->data[idx];
+
+ if (txdata->m != NULL) {
+ if (min_ampdu_id > txdata->ampdu_id)
+ min_ampdu_id = txdata->ampdu_id;
+ if (max_ampdu_id < txdata->ampdu_id)
+ max_ampdu_id = txdata->ampdu_id;
+ }
+
+ idx = (idx + 1) % IWM_TX_RING_COUNT;
+ }
+
+ /*
+ * Update Tx rate statistics for A-MPDUs before firmware's BA window.
+ */
+ for (id = min_ampdu_id; id <= max_ampdu_id; id++) {
+ int have_ack = 0, bit = 0;
+ idx = IWM_AGG_SSN_TO_TXQ_IDX(seq);
+ end_idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
+ in->in_mn.agglen = 0;
+ in->in_mn.ampdu_size = 0;
+ while (idx != end_idx) {
+ struct iwm_tx_data *txdata = &txq->data[idx];
+ uint16_t s = (seq + bit) & 0xfff;
+ /*
+ * We can assume that this subframe has been ACKed
+ * because ACK failures come as single frames and
+ * before failing an A-MPDU subframe the firmware
+ * sends it as a single frame at least once.
+ *
+ * However, when this A-MPDU was transmitted we
+ * learned how many subframes it contained.
+ * So if firmware isn't reporting all subframes now
+ * we can deduce an ACK failure for missing frames.
+ */
+ if (txdata->m != NULL && txdata->ampdu_id == id &&
+    txdata->ampdu_txmcs == ni->ni_txmcs &&
+    txdata->ampdu_nframes > 0 &&
+    (SEQ_LT(ba->ba_winend, s) ||
+    (ba->ba_bitmap & (1 << bit)) == 0)) {
+ have_ack++;
+ in->in_mn.frames = txdata->ampdu_nframes;
+ in->in_mn.agglen = txdata->ampdu_nframes;
+ in->in_mn.ampdu_size = txdata->ampdu_size;
+ if (txdata->retries > 1)
+ in->in_mn.retries++;
+ if (!SEQ_LT(ba->ba_winend, s))
+ ieee80211_output_ba_record_ack(ic, ni,
+    tid, s);
+ }
+
+ idx = (idx + 1) % IWM_TX_RING_COUNT;
+ bit++;
+ }
+
+ if (have_ack > 0) {
+ in->in_mn.txfail = in->in_mn.frames - have_ack;
+ iwm_mira_choose(sc, ni);
+ }
+ }
+}
+
+void
+iwm_rx_ba(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
+    struct iwm_rx_data *data)
+{
+ struct iwm_ba_notif *ban = (void *)pkt->data;
+ struct ieee80211com *ic = &sc->sc_ic;
+ struct ieee80211_node *ni;
+ struct ieee80211_tx_ba *ba;
+ struct iwm_node *in;
+ struct iwm_tx_ring *ring;
+ uint16_t seq, ssn, idx;
+ int qid;
+
+ if (ic->ic_state != IEEE80211_S_RUN)
+ return;
+
+ if (iwm_rx_packet_payload_len(pkt) < sizeof(*ban))
+ return;
+
+ if (ban->sta_id != IWM_STATION_ID ||
+    !IEEE80211_ADDR_EQ(ic->ic_bss->ni_macaddr, ban->sta_addr))
+ return;
+
+ ni = ic->ic_bss;
+ in = (void *)ni;
+
+ qid = le16toh(ban->scd_flow);
+ if (qid < IWM_FIRST_AGG_TX_QUEUE || qid > IWM_LAST_AGG_TX_QUEUE)
+ return;
+
+ /* Protect against a firmware bug where the queue/TID are off. */
+ if (qid != IWM_FIRST_AGG_TX_QUEUE + ban->tid)
+ return;
+
+ ba = &ni->ni_tx_ba[ban->tid];
+ if (ba->ba_state != IEEE80211_BA_AGREED)
+ return;
+
+ ring = &sc->txq[qid];
+ ssn = le16toh(ban->scd_ssn); /* BA window starting sequence number */
+ idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
+
+ /*
+ * The first bit in ban->bitmap corresponds to the sequence number
+ * stored in the sequence control field ban->seq_ctl.
+ * Multiple BA notifications in a row may be using this number, with
+ * additional bits being set in cba->bitmap. It is unclear how the
+ * firmware decides to shift this window forward.
+ */
+ seq = le16toh(ban->seq_ctl) >> IEEE80211_SEQ_SEQ_SHIFT;
+
+ /*
+ * The firmware's new BA window starting sequence number
+ * corresponds to the first hole in ban->scd_ssn, implying
+ * that all frames between 'seq' and 'ssn' have been acked.
+ */
+ ssn = le16toh(ban->scd_ssn);
+
+ /* Skip rate control if our Tx rate is fixed. */
+ if (ic->ic_fixed_mcs != -1)
+ iwm_ampdu_rate_control(sc, ni, ring, ban->tid, seq, ssn);
+
+ /*
+ * SSN corresponds to the first (perhaps not yet transmitted) frame
+ * in firmware's BA window. Firmware is not going to retransmit any
+ * frames before its BA window so mark them all as done.
+ */
+ if (SEQ_LT(ba->ba_winstart, ssn)) {
+ ieee80211_output_ba_move_window(ic, ni, ban->tid, ssn);
+ iwm_txq_advance(sc, ring, IWM_AGG_SSN_TO_TXQ_IDX(ssn));
+ iwm_clear_oactive(sc, ring);
+ }
+}
+
+void
 iwm_rx_bmiss(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
     struct iwm_rx_data *data)
 {
@@ -4638,9 +5228,6 @@ iwm_send_cmd(struct iwm_softc *sc, struct iwm_host_cmd
  }
  }
 
-#if 0
- iwm_update_sched(sc, ring->qid, ring->cur, 0, 0);
-#endif
  /* Kick command ring. */
  ring->queued++;
  ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
@@ -4771,7 +5358,6 @@ iwm_cmd_done(struct iwm_softc *sc, int qid, int idx, i
  }
 }
 
-#if 0
 /*
  * necessary only for block ack mode
  */
@@ -4780,32 +5366,49 @@ iwm_update_sched(struct iwm_softc *sc, int qid, int id
     uint16_t len)
 {
  struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
- uint16_t w_val;
+ uint16_t val;
 
  scd_bc_tbl = sc->sched_dma.vaddr;
 
- len += 8; /* magic numbers came naturally from paris */
+ len += IWM_TX_CRC_SIZE + IWM_TX_DELIMITER_SIZE;
  if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE)
  len = roundup(len, 4) / 4;
 
- w_val = htole16(sta_id << 12 | len);
+ val = htole16(sta_id << 12 | len);
 
+ bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
+    0, sc->sched_dma.size, BUS_DMASYNC_PREWRITE);
+
  /* Update TX scheduler. */
- scd_bc_tbl[qid].tfd_offset[idx] = w_val;
+ scd_bc_tbl[qid].tfd_offset[idx] = val;
+ if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP)
+ scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = val;
+
  bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
-    (char *)(void *)w - (char *)(void *)sc->sched_dma.vaddr,
-    sizeof(uint16_t), BUS_DMASYNC_PREWRITE);
+    0, sc->sched_dma.size, BUS_DMASYNC_POSTWRITE);
+}
 
- /* I really wonder what this is ?!? */
- if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) {
- scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val;
- bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
-    (char *)(void *)(w + IWM_TFD_QUEUE_SIZE_MAX) -
-    (char *)(void *)sc->sched_dma.vaddr,
-    sizeof (uint16_t), BUS_DMASYNC_PREWRITE);
- }
+void
+iwm_reset_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id)
+{
+ struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
+ uint16_t val;
+
+ scd_bc_tbl = sc->sched_dma.vaddr;
+
+ val = htole16(1 | (sta_id << 12));
+
+ bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
+    0, sc->sched_dma.size, BUS_DMASYNC_PREWRITE);
+
+ /* Update TX scheduler. */
+ scd_bc_tbl[qid].tfd_offset[idx] = val;
+ if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP)
+ scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = val;
+
+ bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
+    0, sc->sched_dma.size, BUS_DMASYNC_POSTWRITE);
 }
-#endif
 
 /*
  * Fill in various bit for management frames, and leave them
@@ -4897,19 +5500,24 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
  uint32_t flags;
  u_int hdrlen;
  bus_dma_segment_t *seg;
- uint8_t tid, type;
+ uint8_t tid, type, subtype;
  int i, totlen, err, pad;
- int hdrlen2, rtsthres = ic->ic_rtsthreshold;
+ int qid, hasqos, rtsthres = ic->ic_rtsthreshold;
 
  wh = mtod(m, struct ieee80211_frame *);
- hdrlen = ieee80211_get_hdrlen(wh);
  type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
+ subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
+ if (type == IEEE80211_FC0_TYPE_CTL)
+ hdrlen = sizeof(struct ieee80211_frame_min);
+ else
+ hdrlen = ieee80211_get_hdrlen(wh);
 
- hdrlen2 = (ieee80211_has_qos(wh)) ?
-    sizeof (struct ieee80211_qosframe) :
-    sizeof (struct ieee80211_frame);
+ hasqos = ieee80211_has_qos(wh);
 
- tid = 0;
+ if (type == IEEE80211_FC0_TYPE_DATA)
+ tid = IWM_TID_NON_QOS;
+ else
+ tid = IWM_MAX_TID_COUNT;
 
  /*
  * Map EDCA categories to Tx data queues.
@@ -4918,14 +5526,31 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
  * need to share Tx queues between stations because we only implement
  * client mode; the firmware's station table contains only one entry
  * which represents our access point.
- *
- * Tx aggregation will require additional queues (one queue per TID
- * for which aggregation is enabled) but we do not implement this yet.
  */
  if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT))
- ring = &sc->txq[IWM_DQA_MIN_MGMT_QUEUE + ac];
+ qid = IWM_DQA_MIN_MGMT_QUEUE + ac;
  else
- ring = &sc->txq[ac];
+ qid = ac;
+
+ /* If possible, put this frame on an aggregation queue. */
+ if (hasqos) {
+ struct ieee80211_tx_ba *ba;
+ uint16_t qos = ieee80211_get_qos(wh);
+ int qostid = qos & IEEE80211_QOS_TID;
+ int qosac = ieee80211_up_to_ac(ic, qostid);
+
+ ba = &ni->ni_tx_ba[qostid];
+ if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
+    type == IEEE80211_FC0_TYPE_DATA &&
+    (in->ampdu_tx_tid_mask & (1 << qostid)) &&
+    ba->ba_state == IEEE80211_BA_AGREED) {
+ qid = IWM_FIRST_AGG_TX_QUEUE + qostid;
+ tid = qostid;
+ ac = qosac;
+ }
+ }
+
+ ring = &sc->txq[qid];
  desc = &ring->desc[ring->cur];
  memset(desc, 0, sizeof(*desc));
  data = &ring->data[ring->cur];
@@ -5004,14 +5629,28 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
  tx->sta_id = IWM_STATION_ID;
 
  if (type == IEEE80211_FC0_TYPE_MGT) {
- uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
-
  if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ ||
     subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ)
  tx->pm_frame_timeout = htole16(3);
  else
  tx->pm_frame_timeout = htole16(2);
  } else {
+ if (type == IEEE80211_FC0_TYPE_CTL &&
+    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
+ struct ieee80211_frame_min *wbar;
+ uint8_t *frm;
+ uint16_t ctl;
+
+ flags |= IWM_TX_CMD_FLG_ACK | IWM_TX_CMD_FLG_BAR;
+ tx->data_retry_limit = IWM_BAR_DFAULT_RETRY_LIMIT;
+
+ wbar = mtod(m, struct ieee80211_frame_min *);
+ frm = (uint8_t *)&wbar[1];
+ memcpy(&ctl, frm, sizeof(ctl));
+ tid = (le16toh(ctl) & IEEE80211_BA_TID_INFO_MASK) >>
+    IEEE80211_BA_TID_INFO_SHIFT;
+ }
+
  tx->pm_frame_timeout = htole16(0);
  }
 
@@ -5058,7 +5697,9 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
  tx->sec_ctl = 0;
  }
 
- flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL;
+ flags |= IWM_TX_CMD_FLG_BT_DIS;
+ if (!hasqos)
+ flags |= IWM_TX_CMD_FLG_SEQ_CTL;
 
  tx->tx_flags |= htole32(flags);
 
@@ -5085,9 +5726,11 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
  }
  }
  data->m = m;
+ data->totlen = totlen;
  data->in = in;
  data->txmcs = ni->ni_txmcs;
  data->txrate = ni->ni_txrate;
+ data->ampdu_txmcs = ni->ni_txmcs; /* updated upon Tx interrupt */
 
  /* Fill TX descriptor. */
  desc->num_tbs = 2 + data->map->dm_nsegs;
@@ -5118,9 +5761,7 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
     (char *)(void *)desc - (char *)(void *)ring->desc_dma.vaddr,
     sizeof (*desc), BUS_DMASYNC_PREWRITE);
 
-#if 0
  iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len));
-#endif
 
  /* Kick TX ring. */
  ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
@@ -5336,6 +5977,7 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
  uint32_t status;
  size_t cmdsize;
  struct ieee80211com *ic = &sc->sc_ic;
+ uint16_t tid_disable_tx = 0xffff;
 
  if (!update && (sc->sc_flags & IWM_FLAG_STA_ACTIVE))
  panic("STA already added");
@@ -5362,7 +6004,7 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
  else
  qid = IWM_AUX_QUEUE;
  add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
- } else if (!update) {
+ } else {
  int ac;
  for (ac = 0; ac < EDCA_NUM_AC; ac++) {
  int qid = ac;
@@ -5371,15 +6013,33 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
  qid += IWM_DQA_MIN_MGMT_QUEUE;
  add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
  }
- IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid);
  }
+ if (!update) {
+ if (ic->ic_opmode == IEEE80211_M_MONITOR)
+ IEEE80211_ADDR_COPY(&add_sta_cmd.addr,
+    etherbroadcastaddr);
+ else
+ IEEE80211_ADDR_COPY(&add_sta_cmd.addr,
+    in->in_ni.ni_bssid);
+ }
  add_sta_cmd.add_modify = update ? 1 : 0;
  add_sta_cmd.station_flags_msk
     |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK);
- add_sta_cmd.tid_disable_tx = htole16(0xffff);
- if (update)
- add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_TID_DISABLE_TX);
+ if (update) {
+ int tid, qid;
+ for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
+ if ((in->ampdu_tx_tid_mask & (1 << tid)) == 0)
+ continue;
 
+ qid = IWM_FIRST_AGG_TX_QUEUE + tid;
+ add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
+ tid_disable_tx &= ~(1 << tid);
+ add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_QUEUES |
+    IWM_STA_MODIFY_TID_DISABLE_TX);
+ }
+ }
+ add_sta_cmd.tid_disable_tx = htole16(tid_disable_tx);
+
  if (in->in_ni.ni_flags & IEEE80211_NODE_HT) {
  add_sta_cmd.station_flags_msk
     |= htole32(IWM_STA_FLG_MAX_AGG_SIZE_MSK |
@@ -5444,7 +6104,7 @@ iwm_add_aux_sta(struct iwm_softc *sc)
  if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT)) {
  qid = IWM_DQA_AUX_QUEUE;
  err = iwm_enable_txq(sc, IWM_AUX_STA_ID, qid,
-    IWM_TX_FIFO_MCAST);
+    IWM_TX_FIFO_MCAST, 0, IWM_MAX_TID_COUNT, 0);
  } else {
  qid = IWM_AUX_QUEUE;
  err = iwm_enable_ac_txq(sc, qid, IWM_TX_FIFO_MCAST);
@@ -6582,6 +7242,9 @@ iwm_auth(struct iwm_softc *sc)
 
  splassert(IPL_NET);
 
+ in->ampdu_rx_tid_mask = 0;
+ in->ampdu_tx_tid_mask = 0;
+
  if (ic->ic_opmode == IEEE80211_M_MONITOR)
  sc->sc_phyctxt[0].channel = ic->ic_ibss_chan;
  else
@@ -7156,11 +7819,7 @@ iwm_setrates(struct iwm_node *in, int async)
 
  lqcmd.agg_time_limit = htole16(4000); /* 4ms */
  lqcmd.agg_disable_start_th = 3;
-#ifdef notyet
  lqcmd.agg_frame_cnt_limit = 0x3f;
-#else
- lqcmd.agg_frame_cnt_limit = 1; /* tx agg disabled */
-#endif
 
  cmd.data[0] = &lqcmd;
  iwm_send_cmd(sc, &cmd);
@@ -7892,7 +8551,7 @@ iwm_init_hw(struct iwm_softc *sc)
  else
  qid = IWM_AUX_QUEUE;
  err = iwm_enable_txq(sc, IWM_MONITOR_STA_ID, qid,
-    iwm_ac_to_tx_fifo[EDCA_AC_BE]);
+    iwm_ac_to_tx_fifo[EDCA_AC_BE], 0, IWM_MAX_TID_COUNT, 0);
  if (err) {
  printf("%s: could not enable monitor inject Tx queue "
     "(error %d)\n", DEVNAME(sc), err);
@@ -7906,7 +8565,7 @@ iwm_init_hw(struct iwm_softc *sc)
  else
  qid = ac;
  err = iwm_enable_txq(sc, IWM_STATION_ID, qid,
-    iwm_ac_to_tx_fifo[ac]);
+    iwm_ac_to_tx_fifo[ac], 0, IWM_TID_NON_QOS, 0);
  if (err) {
  printf("%s: could not enable Tx queue %d "
     "(error %d)\n", DEVNAME(sc), ac, err);
@@ -8578,6 +9237,10 @@ iwm_rx_pkt(struct iwm_softc *sc, struct iwm_rx_data *d
  iwm_rx_tx_cmd(sc, pkt, data);
  break;
 
+ case IWM_BA_NOTIF:
+ iwm_rx_ba(sc, pkt, data);
+ break;
+
  case IWM_MISSED_BEACONS_NOTIFICATION:
  iwm_rx_bmiss(sc, pkt, data);
  break;
@@ -8943,9 +9606,9 @@ iwm_intr(void *arg)
  DPRINTF(("driver status:\n"));
  for (i = 0; i < IWM_MAX_QUEUES; i++) {
  struct iwm_tx_ring *ring = &sc->txq[i];
- DPRINTF(("  tx ring %2d: qid=%-2d cur=%-3d "
+ DPRINTF(("  tx ring %2d: qid=%-2d tail=%-3d cur=%-3d "
     "queued=%-3d\n",
-    i, ring->qid, ring->cur, ring->queued));
+    i, ring->qid, ring->tail, ring->cur, ring->queued));
  }
  DPRINTF(("  rx ring: cur=%d\n", sc->rxq.cur));
  DPRINTF(("  802.11 state %s\n",
@@ -9053,9 +9716,9 @@ iwm_intr_msix(void *arg)
  DPRINTF(("driver status:\n"));
  for (i = 0; i < IWM_MAX_QUEUES; i++) {
  struct iwm_tx_ring *ring = &sc->txq[i];
- DPRINTF(("  tx ring %2d: qid=%-2d cur=%-3d "
+ DPRINTF(("  tx ring %2d: qid=%-2d tail=%-3d cur=%-3d "
     "queued=%-3d\n",
-    i, ring->qid, ring->cur, ring->queued));
+    i, ring->qid, ring->tail, ring->cur, ring->queued));
  }
  DPRINTF(("  rx ring: cur=%d\n", sc->rxq.cur));
  DPRINTF(("  802.11 state %s\n",
@@ -9465,6 +10128,7 @@ iwm_attach(struct device *parent, struct device *self,
 
  /* Set device capabilities. */
  ic->ic_caps =
+    IEEE80211_C_QOS | IEEE80211_C_TX_AMPDU | /* A-MPDU */
     IEEE80211_C_WEP | /* WEP */
     IEEE80211_C_RSN | /* WPA/RSN */
     IEEE80211_C_SCANALL | /* device scans all channels at once */
@@ -9529,10 +10193,8 @@ iwm_attach(struct device *parent, struct device *self,
  ic->ic_update_htprot = iwm_update_htprot;
  ic->ic_ampdu_rx_start = iwm_ampdu_rx_start;
  ic->ic_ampdu_rx_stop = iwm_ampdu_rx_stop;
-#ifdef notyet
  ic->ic_ampdu_tx_start = iwm_ampdu_tx_start;
  ic->ic_ampdu_tx_stop = iwm_ampdu_tx_stop;
-#endif
  /*
  * We cannot read the MAC address without loading the
  * firmware from disk. Postpone until mountroot is done.
blob - 201ce69014b9422335a6d698cd4a3cc3f314b2b5
blob + 1e2e4e01e2a98f60221b72fc6e82a1246f7b9cef
--- sys/dev/pci/if_iwmreg.h
+++ sys/dev/pci/if_iwmreg.h
@@ -1837,6 +1837,9 @@ struct iwm_agn_scd_bc_tbl {
  uint16_t tfd_offset[IWM_TFD_QUEUE_BC_SIZE];
 } __packed;
 
+#define IWM_TX_CRC_SIZE 4
+#define IWM_TX_DELIMITER_SIZE 4
+
 /* Maximum number of Tx queues. */
 #define IWM_MAX_QUEUES 31
 
@@ -1875,6 +1878,11 @@ struct iwm_agn_scd_bc_tbl {
 #define IWM_DQA_MIN_DATA_QUEUE 10
 #define IWM_DQA_MAX_DATA_QUEUE 31
 
+/* Reserve 8 DQA Tx queues, from 10 up to 17, for A-MPDU aggregation. */
+#define IWM_MAX_TID_COUNT 8
+#define IWM_FIRST_AGG_TX_QUEUE IWM_DQA_MIN_DATA_QUEUE
+#define IWM_LAST_AGG_TX_QUEUE (IWM_FIRST_AGG_TX_QUEUE + IWM_MAX_TID_COUNT - 1)
+
 /* legacy non-DQA queues; the legacy command queue uses a different number! */
 #define IWM_OFFCHANNEL_QUEUE 8
 #define IWM_CMD_QUEUE 9
@@ -4627,7 +4635,8 @@ struct iwm_lq_cmd {
 /*
  * TID for non QoS frames - to be written in tid_tspec
  */
-#define IWM_TID_NON_QOS IWM_MAX_TID_COUNT
+#define IWM_TID_NON_QOS 0
+#define IWM_TID_MGMT 15
 
 /*
  * Limits on the retransmissions - to be written in {data,rts}_retry_limit
@@ -4898,21 +4907,23 @@ struct iwm_tx_resp {
 /**
  * struct iwm_ba_notif - notifies about reception of BA
  * ( IWM_BA_NOTIF = 0xc5 )
- * @sta_addr_lo32: lower 32 bits of the MAC address
- * @sta_addr_hi16: upper 16 bits of the MAC address
+ * @sta_addr: MAC address
  * @sta_id: Index of recipient (BA-sending) station in fw's station table
  * @tid: tid of the session
- * @seq_ctl: sequence control field from IEEE80211 frame header (it is unclear
- *  which frame this relates to; info or reverse engineering welcome)
+ * @seq_ctl: sequence control field from IEEE80211 frame header (the first
+ * bit in @bitmap corresponds to the sequence number stored here)
  * @bitmap: the bitmap of the BA notification as seen in the air
  * @scd_flow: the tx queue this BA relates to
  * @scd_ssn: the index of the last contiguously sent packet
  * @txed: number of Txed frames in this batch
  * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ * not a copy from the LQ command. Thus, if not the first rate was used
+ * for Tx-ing then this value will be set to 0 by FW.
+ * @reserved1: reserved
  */
 struct iwm_ba_notif {
- uint32_t sta_addr_lo32;
- uint16_t sta_addr_hi16;
+ uint8_t sta_addr[ETHER_ADDR_LEN];
  uint16_t reserved;
 
  uint8_t sta_id;
@@ -4923,6 +4934,7 @@ struct iwm_ba_notif {
  uint16_t scd_ssn;
  uint8_t txed;
  uint8_t txed_2_done;
+ uint8_t reduced_txp;
  uint16_t reserved1;
 } __packed;
 
blob - 89abe2c1dbdf5ac3ccbf710994380502530ef2a8
blob + 7d9e26bffe0f1658c771bf85768797c23e94e147
--- sys/dev/pci/if_iwmvar.h
+++ sys/dev/pci/if_iwmvar.h
@@ -252,14 +252,26 @@ struct iwm_fw_paging {
 #define IWM_TX_RING_LOMARK 192
 #define IWM_TX_RING_HIMARK 224
 
+/* For aggregation queues, index must be aligned to frame sequence number. */
+#define IWM_AGG_SSN_TO_TXQ_IDX(x) ((x) & (IWM_TX_RING_COUNT - 1))
+
 struct iwm_tx_data {
  bus_dmamap_t map;
  bus_addr_t cmd_paddr;
  bus_addr_t scratch_paddr;
  struct mbuf *m;
  struct iwm_node *in;
+ int totlen;
+ int retries;
+ int txfail;
  int txmcs;
  int txrate;
+
+ /* A-MPDU subframes */
+ int ampdu_id;
+ int ampdu_txmcs;
+ int ampdu_nframes;
+ int ampdu_size;
 };
 
 struct iwm_tx_ring {
@@ -363,6 +375,12 @@ struct iwm_bf_data {
  int last_cqm_event;
 };
 
+struct iwm_ba_param {
+ uint16_t tid_mask;
+ uint16_t ssn[IWM_MAX_TID_COUNT];
+ uint16_t winsize[IWM_MAX_TID_COUNT];
+};
+
 struct iwm_softc {
  struct device sc_dev;
  struct ieee80211com sc_ic;
@@ -381,10 +399,15 @@ struct iwm_softc {
 
  /* Task for firmware BlockAck setup/teardown and its arguments. */
  struct task ba_task;
- int ba_start;
- int ba_tid;
- uint16_t ba_ssn;
- uint16_t ba_winsize;
+ int ba_flags;
+#define IWM_RX_BA_START 0x01
+#define IWM_TX_BA_START 0x02
+#define IWM_RX_BA_STOP 0x04
+#define IWM_TX_BA_STOP 0x08
+ struct iwm_ba_param rx_ba_start;
+ struct iwm_ba_param rx_ba_stop;
+ struct iwm_ba_param tx_ba_start;
+ struct iwm_ba_param tx_ba_stop;
 
  /* Task for HT protection updates. */
  struct task htprot_task;
@@ -407,6 +430,7 @@ struct iwm_softc {
  struct iwm_rx_ring rxq;
  int qfullmsk;
  int cmdqid;
+ int qenablemsk;
 
  int sc_sf_state;
 
@@ -551,6 +575,12 @@ struct iwm_node {
  int chosen_txrate;
  struct ieee80211_mira_node in_mn;
  int chosen_txmcs;
+
+ uint32_t next_ampdu_id;
+
+ /* Currently active Rx/Tx block ack sessions; tracked per TID. */
+ uint8_t ampdu_rx_tid_mask;
+ uint8_t ampdu_tx_tid_mask;
 };
 #define IWM_STATION_ID 0
 #define IWM_AUX_STA_ID 1

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Florian Obser-2
Seems to be working on a X1 gen2 using
iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7260" rev 0x83, msi
against a Unifi AP-SHD.

Before:

bandwidth min/avg/max/std-dev = 7.344/9.077/11.514/0.803 Mbps

after:

bandwidth min/avg/max/std-dev = 12.551/65.407/82.835/14.169 Mbps

--
I'm not entirely sure you are real.

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Landry Breuil-5
In reply to this post by Stefan Sperling-5
On Fri, Jun 26, 2020 at 02:45:53PM +0200, Stefan Sperling wrote:

> This patch adds support for 11n Tx aggregation to iwm(4).
>
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
>
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
>
> $ netstat -W iwm0 | grep 'output block'
>         3 new output block ack agreements
> 0 output block ack agreements timed out
>
> It would be great to get at least one test for all the chipsets the driver
> supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> The behaviour of the access point also matters a great deal. It won't
> hurt to test the same chipset against several different access points.
>
> I have tested this version on 8265 only so far. I've run older revisions
> of this patch on 7265 so I'm confident that this chip will work, too.
> So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.

no difference on X1c3 w/
iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x59, msi
iwm0: hw rev 0x210, fw ver 17.3216344376.0,

using a crappy old fonera as AP, serving as a bridge to gw w/ tcpbench.

bandwidth min/avg/max/std-dev = 22.519/22.704/22.995/0.162 Mbps

same bw both ways it seems.

Landry

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

toc-2
In reply to this post by Stefan Sperling-5

This is from an 8265:

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x88, msi
iwm0: hw rev 0x230, fw ver 34.0.1,

Associated to an Apple AirPort AP on a 5 GHz channel.

Before:
bandwidth min/avg/max/std-dev = 11.402/17.410/36.190/4.079 Mbps

After:
bandwidth min/avg/max/std-dev = 5.147/25.039/54.066/8.489 Mbps

$ netstat -W iwm0 | grep "output block"
        1 new output block ack agreement
        0 output block ack agreements timed out


Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Johan Huldtgren-2
In reply to this post by Stefan Sperling-5
hello,

On 2020-06-26 14:45, Stefan Sperling wrote:
> It would be great to get at least one test for all the chipsets the driver
> supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> The behaviour of the access point also matters a great deal. It won't
> hurt to test the same chipset against several different access points.

tested on:

iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi

AP is a Ruckus 7363.

$ netstat -W iwm0 | grep "output block"                                                                                                                                        
        6 new output block ack agreements
        0 output block ack agreements timed out

Before:

bandwidth min/avg/max/std-dev = 16.780/18.325/19.939/1.235 Mbps

After:

bandwidth min/avg/max/std-dev = 0.000/15.559/51.631/19.548 Mbps

.jh

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Mike Larkin-2
In reply to this post by Stefan Sperling-5
On Fri, Jun 26, 2020 at 02:45:53PM +0200, Stefan Sperling wrote:

> This patch adds support for 11n Tx aggregation to iwm(4).
>
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
>
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
>
> $ netstat -W iwm0 | grep 'output block'
>         3 new output block ack agreements
> 0 output block ack agreements timed out
>
> It would be great to get at least one test for all the chipsets the driver
> supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> The behaviour of the access point also matters a great deal. It won't
> hurt to test the same chipset against several different access points.
>
> I have tested this version on 8265 only so far. I've run older revisions
> of this patch on 7265 so I'm confident that this chip will work, too.
> So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.

I tested this on my T490 Thinkpad:

iwm0 at pci0 dev 20 function 3 "Intel Dual Band Wireless AC 9560" rev 0x30, msix
iwm0: hw rev 0x310, fw ver 34.3125811985.0

It ended up having a heck of a time connecting to anything, most/all
connections ended up timing out or just taking a really long time to complete.

I looked in dmesg, and found a stream of fatal firmware errors and other
errors (see end of this email).

My iwm-firmware was updated before I tried the new kernel:

-innsmouth- ~> pkg_info iwm-firmware
Information for inst:iwm-firmware-20191022p1

Comment:
firmware binary images for iwm(4) driver

Description:
Firmware binary images for use with the iwm(4) driver.

Maintainer: The OpenBSD ports mailing-list <[hidden email]>

WWW: https://wireless.wiki.kernel.org/en/users/Drivers/iwlwifi



I still have the kernel around if you want me to test something else. There
is nothing in this tree except this Txagg diff. LMK if you need any more
info.

OpenBSD 6.7-current (GENERIC.MP) #1: Fri Jun 26 14:01:06 PDT 2020
    [hidden email]:/u/bin/src/OpenBSD/openbsd/sys/arch/amd64/compile/GENERIC.MP
real mem = 51260506112 (48885MB)
avail mem = 49691906048 (47389MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.1 @ 0x604f5000 (67 entries)
bios0: vendor LENOVO version "N2IET61W (1.39 )" date 05/16/2019
bios0: LENOVO 20N20046US
acpi0 at bios0: ACPI 6.1
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP SSDT SSDT SSDT SSDT UEFI SSDT HPET APIC MCFG ECDT SSDT SSDT BOOT SLIC SSDT LPIT WSMT SSDT DBGP DBG2 MSDM BATB DMAR NHLT ASF! FPDT UEFI
acpi0: wakeup devices GLAN(S4) XHC_(S3) XDCI(S4) HDAS(S4) RP01(S4) PXSX(S4) RP02(S4) PXSX(S4) RP03(S4) PXSX(S4) RP04(S4) PXSX(S4) RP05(S4) PXSX(S4) RP06(S4) PXSX(S4) [...]
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpihpet0 at acpi0: 23999999 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1586.72 MHz, 06-8e-0c
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 256KB 64b/line 8-way L2 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
cpu0: apic clock running at 24MHz
cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4.1.1.1, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1333.05 MHz, 06-8e-0c
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 256KB 64b/line 8-way L2 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 4 (application processor)
cpu2: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1125.81 MHz, 06-8e-0c
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 256KB 64b/line 8-way L2 cache
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 6 (application processor)
cpu3: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1011.36 MHz, 06-8e-0c
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 256KB 64b/line 8-way L2 cache
cpu3: smt 0, core 3, package 0
cpu4 at mainbus0: apid 1 (application processor)
cpu4: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.68 MHz, 06-8e-0c
cpu4: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu4: 256KB 64b/line 8-way L2 cache
cpu4: smt 1, core 0, package 0
cpu5 at mainbus0: apid 3 (application processor)
cpu5: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.69 MHz, 06-8e-0c
cpu5: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu5: 256KB 64b/line 8-way L2 cache
cpu5: smt 1, core 1, package 0
cpu6 at mainbus0: apid 5 (application processor)
cpu6: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.68 MHz, 06-8e-0c
cpu6: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu6: 256KB 64b/line 8-way L2 cache
cpu6: smt 1, core 2, package 0
cpu7 at mainbus0: apid 7 (application processor)
cpu7: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.68 MHz, 06-8e-0c
cpu7: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu7: 256KB 64b/line 8-way L2 cache
cpu7: smt 1, core 3, package 0
ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 20, 120 pins
acpimcfg0 at acpi0
acpimcfg0: addr 0xe0000000, bus 0-255
acpiec0 at acpi0
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus 1 (RP01)
acpiprt2 at acpi0: bus -1 (RP02)
acpiprt3 at acpi0: bus -1 (RP03)
acpiprt4 at acpi0: bus -1 (RP04)
acpiprt5 at acpi0: bus 2 (RP05)
acpiprt6 at acpi0: bus -1 (RP06)
acpiprt7 at acpi0: bus -1 (RP07)
acpiprt8 at acpi0: bus -1 (RP08)
acpiprt9 at acpi0: bus 60 (RP09)
acpiprt10 at acpi0: bus -1 (RP10)
acpiprt11 at acpi0: bus -1 (RP11)
acpiprt12 at acpi0: bus -1 (RP12)
acpiprt13 at acpi0: bus 61 (RP13)
acpiprt14 at acpi0: bus -1 (RP14)
acpiprt15 at acpi0: bus -1 (RP15)
acpiprt16 at acpi0: bus -1 (RP16)
acpiprt17 at acpi0: bus -1 (RP17)
acpiprt18 at acpi0: bus -1 (RP18)
acpiprt19 at acpi0: bus -1 (RP19)
acpiprt20 at acpi0: bus -1 (RP20)
acpiprt21 at acpi0: bus -1 (RP21)
acpiprt22 at acpi0: bus -1 (RP22)
acpiprt23 at acpi0: bus -1 (RP23)
acpiprt24 at acpi0: bus -1 (RP24)
acpipwrres0 at acpi0: PUBS, resource for XHC_
acpipwrres1 at acpi0: USBC, resource for XDCI
acpipwrres2 at acpi0: PXP_, resource for RP07, PXSX
acpipwrres3 at acpi0: V0PR
acpipwrres4 at acpi0: V1PR
acpipwrres5 at acpi0: V2PR
acpipwrres6 at acpi0: WRST
acpicpu0 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu1 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu2 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu3 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu4 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu5 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu6 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu7 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpitz0 at acpi0: critical temperature is 128 degC
acpipwrres7 at acpi0: PIN_
acpipwrres8 at acpi0: PINP
acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
extent `acpipci0 pcibus' (0x0 - 0xff), flags=0
     0xff - 0xff
extent `acpipci0 pciio' (0x0 - 0xffffffff), flags=0
     0xcf8 - 0xcff
     0x10000 - 0xffffffff
extent `acpipci0 pcimem' (0x0 - 0xffffffffffffffff), flags=0
     0x0 - 0x9ffff
     0xc0000 - 0x697fffff
     0xe0000000 - 0xfc7fffff
     0xfe800000 - 0xffffffffffffffff
acpithinkpad0 at acpi0: version 2.0
acpiac0 at acpi0: AC unit offline
acpibat0 at acpi0: BAT0 model "02DL008" serial   659 type LiP oem "SMP"
"LEN0100" at acpi0 not configured
"INT3403" at acpi0 not configured
"INT3403" at acpi0 not configured
acpicmos0 at acpi0
"INT34BB" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpibtn0 at acpi0: SLPB
"PNP0C14" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpibtn1 at acpi0: LID_
"PNP0C14" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"INT3400" at acpi0 not configured
"USBC000" at acpi0 not configured
acpivideo0 at acpi0: GFX0
acpivout0 at acpivideo0: DD1F
cpu0: Enhanced SpeedStep 1586 MHz: speeds: 2101, 2100, 1900, 1800, 1700, 1600, 1500, 1400, 1200, 1100, 1000, 800, 700, 600, 500, 400 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "Intel Core 8G Host" rev 0x0c
inteldrm0 at pci0 dev 2 function 0 "Intel UHD Graphics 620" rev 0x02
drm0 at inteldrm0
inteldrm0: msi, COFFEELAKE, gen 9
"Intel Core 6G Thermal" rev 0x0c at pci0 dev 4 function 0 not configured
"Intel Core GMM" rev 0x00 at pci0 dev 8 function 0 not configured
pchtemp0 at pci0 dev 18 function 0 "Intel 300 Series Thermal" rev 0x30
xhci0 at pci0 dev 20 function 0 "Intel 300 Series xHCI" rev 0x30: msi, xHCI 1.10
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
"Intel 300 Series Shared SRAM" rev 0x30 at pci0 dev 20 function 2 not configured
iwm0 at pci0 dev 20 function 3 "Intel Dual Band Wireless AC 9560" rev 0x30, msix
dwiic0 at pci0 dev 21 function 0 "Intel 300 Series I2C" rev 0x30: apic 2 int 16
iic0 at dwiic0
"Intel 300 Series MEI" rev 0x30 at pci0 dev 22 function 0 not configured
ppb0 at pci0 dev 28 function 0 "Intel 300 Series PCIE" rev 0xf0: msi
pci1 at ppb0 bus 1
rtsx0 at pci1 dev 0 function 0 "Realtek RTS522A Card Reader" rev 0x01: msi
sdmmc0 at rtsx0: 4-bit, dma
ppb1 at pci0 dev 28 function 4 "Intel 300 Series PCIE" rev 0xf0: msi
pci2 at ppb1 bus 2
ppb2 at pci0 dev 29 function 0 "Intel 300 Series PCIE" rev 0xf0: msi
pci3 at ppb2 bus 60
ppb3 at pci0 dev 29 function 4 "Intel 300 Series PCIE" rev 0xf0: msi
pci4 at ppb3 bus 61
nvme0 at pci4 dev 0 function 0 "Samsung SM981/PM981 NVMe" rev 0x00: msix, NVMe 1.3
nvme0: Samsung SSD 970 EVO 2TB, firmware 1B2QEXE7, serial XXXXXXXXXX
scsibus1 at nvme0: 2 targets, initiator 0
sd0 at scsibus1 targ 1 lun 0: <NVMe, Samsung SSD 970, 1B2Q>
sd0: 1907729MB, 512 bytes/sector, 3907029168 sectors
pcib0 at pci0 dev 31 function 0 "Intel 300 Series LPC" rev 0x30
azalia0 at pci0 dev 31 function 3 "Intel 300 Series HD Audio" rev 0x30: msi
azalia0: codecs: Realtek/0x0257, Intel/0x280b, using Realtek/0x0257
audio0 at azalia0
ichiic0 at pci0 dev 31 function 4 "Intel 300 Series SMBus" rev 0x30: apic 2 int 16
iic1 at ichiic0
"Intel 300 Series SPI" rev 0x30 at pci0 dev 31 function 5 not configured
em0 at pci0 dev 31 function 6 "Intel I219-LM" rev 0x30: msi, address XXXXXXXXX
isa0 at pcib0
isadma0 at isa0
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pms0 at pckbc0 (aux slot)
wsmouse0 at pms0 mux 0
wsmouse1 at pms0 mux 0
pms0: Synaptics clickpad, firmware 10.32, 0x1e2a1 0x940300 0x378f40 0xf00aa3 0x12e800
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: VMX/EPT
efifb at mainbus0 not configured
uhidev0 at uhub0 port 6 configuration 1 interface 0 "ELAN Touchscreen" rev 2.00/57.13 addr 2
uhidev0: iclass 3/0, 68 report ids
ums0 at uhidev0 reportid 1: 1 button, tip
wsmouse2 at ums0 mux 0
uhid0 at uhidev0 reportid 2: input=64, output=0, feature=0
uhid1 at uhidev0 reportid 3: input=0, output=63, feature=0
uhid2 at uhidev0 reportid 4: input=19, output=0, feature=0
uhid3 at uhidev0 reportid 10: input=0, output=0, feature=1
ums1 at uhidev0 reportid 68
ums1: mouse has no X report
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
sd1 at scsibus3 targ 1 lun 0: <OPENBSD, SR CRYPTO, 006>
sd1: 1907728MB, 512 bytes/sector, 3907027553 sectors
root on sd1a (e32608d644f1d59c.a) swap on sd1b dump on sd1b
inteldrm0: 1920x1080, 32bpp
wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd0
wsdisplay0: screen 1-5 added (std, vt100 emulation)
iwm0: hw rev 0x310, fw ver 34.3125811985.0, address XXXXXXXXXX
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: device timeout
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: device timeout
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: device timeout
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error
iwm0: fatal firmware error


Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Mike Larkin-2
On Fri, Jun 26, 2020 at 09:01:03PM -0700, Mike Larkin wrote:

> On Fri, Jun 26, 2020 at 02:45:53PM +0200, Stefan Sperling wrote:
> > This patch adds support for 11n Tx aggregation to iwm(4).
> >
> > Please help with testing if you can by running the patch and using wifi
> > as usual. Nothing should change, except that Tx speed may potentially
> > improve. If you have time to run before/after performance measurements with
> > tcpbench or such, that would be nice. But it's not required for testing.
> >
> > If Tx aggregation is active then netstat will show a non-zero output block ack
> > agreement counter:
> >
> > $ netstat -W iwm0 | grep 'output block'
> >         3 new output block ack agreements
> > 0 output block ack agreements timed out
> >
> > It would be great to get at least one test for all the chipsets the driver
> > supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> > The behaviour of the access point also matters a great deal. It won't
> > hurt to test the same chipset against several different access points.
> >
> > I have tested this version on 8265 only so far. I've run older revisions
> > of this patch on 7265 so I'm confident that this chip will work, too.
> > So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> > mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
>
> I tested this on my T490 Thinkpad:
>
> iwm0 at pci0 dev 20 function 3 "Intel Dual Band Wireless AC 9560" rev 0x30, msix
> iwm0: hw rev 0x310, fw ver 34.3125811985.0
>
> It ended up having a heck of a time connecting to anything, most/all
> connections ended up timing out or just taking a really long time to complete.
>
> I looked in dmesg, and found a stream of fatal firmware errors and other
> errors (see end of this email).
>
> My iwm-firmware was updated before I tried the new kernel:
>
> -innsmouth- ~> pkg_info iwm-firmware
> Information for inst:iwm-firmware-20191022p1
>
> Comment:
> firmware binary images for iwm(4) driver
>
> Description:
> Firmware binary images for use with the iwm(4) driver.
>
> Maintainer: The OpenBSD ports mailing-list <[hidden email]>
>
> WWW: https://wireless.wiki.kernel.org/en/users/Drivers/iwlwifi
>

PS, I did see 5 new output block ack agreements when I was running the diff,
so apparently at least it is doing ... something?

-ml

>
>
> I still have the kernel around if you want me to test something else. There
> is nothing in this tree except this Txagg diff. LMK if you need any more
> info.
>
> OpenBSD 6.7-current (GENERIC.MP) #1: Fri Jun 26 14:01:06 PDT 2020
>     [hidden email]:/u/bin/src/OpenBSD/openbsd/sys/arch/amd64/compile/GENERIC.MP
> real mem = 51260506112 (48885MB)
> avail mem = 49691906048 (47389MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 3.1 @ 0x604f5000 (67 entries)
> bios0: vendor LENOVO version "N2IET61W (1.39 )" date 05/16/2019
> bios0: LENOVO 20N20046US
> acpi0 at bios0: ACPI 6.1
> acpi0: sleep states S0 S3 S4 S5
> acpi0: tables DSDT FACP SSDT SSDT SSDT SSDT UEFI SSDT HPET APIC MCFG ECDT SSDT SSDT BOOT SLIC SSDT LPIT WSMT SSDT DBGP DBG2 MSDM BATB DMAR NHLT ASF! FPDT UEFI
> acpi0: wakeup devices GLAN(S4) XHC_(S3) XDCI(S4) HDAS(S4) RP01(S4) PXSX(S4) RP02(S4) PXSX(S4) RP03(S4) PXSX(S4) RP04(S4) PXSX(S4) RP05(S4) PXSX(S4) RP06(S4) PXSX(S4) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpihpet0 at acpi0: 23999999 Hz
> acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1586.72 MHz, 06-8e-0c
> cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu0: 256KB 64b/line 8-way L2 cache
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
> cpu0: apic clock running at 24MHz
> cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4.1.1.1, IBE
> cpu1 at mainbus0: apid 2 (application processor)
> cpu1: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1333.05 MHz, 06-8e-0c
> cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu1: 256KB 64b/line 8-way L2 cache
> cpu1: smt 0, core 1, package 0
> cpu2 at mainbus0: apid 4 (application processor)
> cpu2: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1125.81 MHz, 06-8e-0c
> cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu2: 256KB 64b/line 8-way L2 cache
> cpu2: smt 0, core 2, package 0
> cpu3 at mainbus0: apid 6 (application processor)
> cpu3: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 1011.36 MHz, 06-8e-0c
> cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu3: 256KB 64b/line 8-way L2 cache
> cpu3: smt 0, core 3, package 0
> cpu4 at mainbus0: apid 1 (application processor)
> cpu4: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.68 MHz, 06-8e-0c
> cpu4: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu4: 256KB 64b/line 8-way L2 cache
> cpu4: smt 1, core 0, package 0
> cpu5 at mainbus0: apid 3 (application processor)
> cpu5: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.69 MHz, 06-8e-0c
> cpu5: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu5: 256KB 64b/line 8-way L2 cache
> cpu5: smt 1, core 1, package 0
> cpu6 at mainbus0: apid 5 (application processor)
> cpu6: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.68 MHz, 06-8e-0c
> cpu6: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu6: 256KB 64b/line 8-way L2 cache
> cpu6: smt 1, core 2, package 0
> cpu7 at mainbus0: apid 7 (application processor)
> cpu7: Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 997.68 MHz, 06-8e-0c
> cpu7: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu7: 256KB 64b/line 8-way L2 cache
> cpu7: smt 1, core 3, package 0
> ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 20, 120 pins
> acpimcfg0 at acpi0
> acpimcfg0: addr 0xe0000000, bus 0-255
> acpiec0 at acpi0
> acpiprt0 at acpi0: bus 0 (PCI0)
> acpiprt1 at acpi0: bus 1 (RP01)
> acpiprt2 at acpi0: bus -1 (RP02)
> acpiprt3 at acpi0: bus -1 (RP03)
> acpiprt4 at acpi0: bus -1 (RP04)
> acpiprt5 at acpi0: bus 2 (RP05)
> acpiprt6 at acpi0: bus -1 (RP06)
> acpiprt7 at acpi0: bus -1 (RP07)
> acpiprt8 at acpi0: bus -1 (RP08)
> acpiprt9 at acpi0: bus 60 (RP09)
> acpiprt10 at acpi0: bus -1 (RP10)
> acpiprt11 at acpi0: bus -1 (RP11)
> acpiprt12 at acpi0: bus -1 (RP12)
> acpiprt13 at acpi0: bus 61 (RP13)
> acpiprt14 at acpi0: bus -1 (RP14)
> acpiprt15 at acpi0: bus -1 (RP15)
> acpiprt16 at acpi0: bus -1 (RP16)
> acpiprt17 at acpi0: bus -1 (RP17)
> acpiprt18 at acpi0: bus -1 (RP18)
> acpiprt19 at acpi0: bus -1 (RP19)
> acpiprt20 at acpi0: bus -1 (RP20)
> acpiprt21 at acpi0: bus -1 (RP21)
> acpiprt22 at acpi0: bus -1 (RP22)
> acpiprt23 at acpi0: bus -1 (RP23)
> acpiprt24 at acpi0: bus -1 (RP24)
> acpipwrres0 at acpi0: PUBS, resource for XHC_
> acpipwrres1 at acpi0: USBC, resource for XDCI
> acpipwrres2 at acpi0: PXP_, resource for RP07, PXSX
> acpipwrres3 at acpi0: V0PR
> acpipwrres4 at acpi0: V1PR
> acpipwrres5 at acpi0: V2PR
> acpipwrres6 at acpi0: WRST
> acpicpu0 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu1 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu2 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu3 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu4 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu5 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu6 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpicpu7 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
> acpitz0 at acpi0: critical temperature is 128 degC
> acpipwrres7 at acpi0: PIN_
> acpipwrres8 at acpi0: PINP
> acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
> extent `acpipci0 pcibus' (0x0 - 0xff), flags=0
>      0xff - 0xff
> extent `acpipci0 pciio' (0x0 - 0xffffffff), flags=0
>      0xcf8 - 0xcff
>      0x10000 - 0xffffffff
> extent `acpipci0 pcimem' (0x0 - 0xffffffffffffffff), flags=0
>      0x0 - 0x9ffff
>      0xc0000 - 0x697fffff
>      0xe0000000 - 0xfc7fffff
>      0xfe800000 - 0xffffffffffffffff
> acpithinkpad0 at acpi0: version 2.0
> acpiac0 at acpi0: AC unit offline
> acpibat0 at acpi0: BAT0 model "02DL008" serial   659 type LiP oem "SMP"
> "LEN0100" at acpi0 not configured
> "INT3403" at acpi0 not configured
> "INT3403" at acpi0 not configured
> acpicmos0 at acpi0
> "INT34BB" at acpi0 not configured
> "PNP0C14" at acpi0 not configured
> acpibtn0 at acpi0: SLPB
> "PNP0C14" at acpi0 not configured
> "PNP0C14" at acpi0 not configured
> acpibtn1 at acpi0: LID_
> "PNP0C14" at acpi0 not configured
> "PNP0C14" at acpi0 not configured
> "PNP0C14" at acpi0 not configured
> "INT3400" at acpi0 not configured
> "USBC000" at acpi0 not configured
> acpivideo0 at acpi0: GFX0
> acpivout0 at acpivideo0: DD1F
> cpu0: Enhanced SpeedStep 1586 MHz: speeds: 2101, 2100, 1900, 1800, 1700, 1600, 1500, 1400, 1200, 1100, 1000, 800, 700, 600, 500, 400 MHz
> pci0 at mainbus0 bus 0
> pchb0 at pci0 dev 0 function 0 "Intel Core 8G Host" rev 0x0c
> inteldrm0 at pci0 dev 2 function 0 "Intel UHD Graphics 620" rev 0x02
> drm0 at inteldrm0
> inteldrm0: msi, COFFEELAKE, gen 9
> "Intel Core 6G Thermal" rev 0x0c at pci0 dev 4 function 0 not configured
> "Intel Core GMM" rev 0x00 at pci0 dev 8 function 0 not configured
> pchtemp0 at pci0 dev 18 function 0 "Intel 300 Series Thermal" rev 0x30
> xhci0 at pci0 dev 20 function 0 "Intel 300 Series xHCI" rev 0x30: msi, xHCI 1.10
> usb0 at xhci0: USB revision 3.0
> uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
> "Intel 300 Series Shared SRAM" rev 0x30 at pci0 dev 20 function 2 not configured
> iwm0 at pci0 dev 20 function 3 "Intel Dual Band Wireless AC 9560" rev 0x30, msix
> dwiic0 at pci0 dev 21 function 0 "Intel 300 Series I2C" rev 0x30: apic 2 int 16
> iic0 at dwiic0
> "Intel 300 Series MEI" rev 0x30 at pci0 dev 22 function 0 not configured
> ppb0 at pci0 dev 28 function 0 "Intel 300 Series PCIE" rev 0xf0: msi
> pci1 at ppb0 bus 1
> rtsx0 at pci1 dev 0 function 0 "Realtek RTS522A Card Reader" rev 0x01: msi
> sdmmc0 at rtsx0: 4-bit, dma
> ppb1 at pci0 dev 28 function 4 "Intel 300 Series PCIE" rev 0xf0: msi
> pci2 at ppb1 bus 2
> ppb2 at pci0 dev 29 function 0 "Intel 300 Series PCIE" rev 0xf0: msi
> pci3 at ppb2 bus 60
> ppb3 at pci0 dev 29 function 4 "Intel 300 Series PCIE" rev 0xf0: msi
> pci4 at ppb3 bus 61
> nvme0 at pci4 dev 0 function 0 "Samsung SM981/PM981 NVMe" rev 0x00: msix, NVMe 1.3
> nvme0: Samsung SSD 970 EVO 2TB, firmware 1B2QEXE7, serial XXXXXXXXXX
> scsibus1 at nvme0: 2 targets, initiator 0
> sd0 at scsibus1 targ 1 lun 0: <NVMe, Samsung SSD 970, 1B2Q>
> sd0: 1907729MB, 512 bytes/sector, 3907029168 sectors
> pcib0 at pci0 dev 31 function 0 "Intel 300 Series LPC" rev 0x30
> azalia0 at pci0 dev 31 function 3 "Intel 300 Series HD Audio" rev 0x30: msi
> azalia0: codecs: Realtek/0x0257, Intel/0x280b, using Realtek/0x0257
> audio0 at azalia0
> ichiic0 at pci0 dev 31 function 4 "Intel 300 Series SMBus" rev 0x30: apic 2 int 16
> iic1 at ichiic0
> "Intel 300 Series SPI" rev 0x30 at pci0 dev 31 function 5 not configured
> em0 at pci0 dev 31 function 6 "Intel I219-LM" rev 0x30: msi, address XXXXXXXXX
> isa0 at pcib0
> isadma0 at isa0
> pckbc0 at isa0 port 0x60/5 irq 1 irq 12
> pckbd0 at pckbc0 (kbd slot)
> wskbd0 at pckbd0: console keyboard
> pms0 at pckbc0 (aux slot)
> wsmouse0 at pms0 mux 0
> wsmouse1 at pms0 mux 0
> pms0: Synaptics clickpad, firmware 10.32, 0x1e2a1 0x940300 0x378f40 0xf00aa3 0x12e800
> pcppi0 at isa0 port 0x61
> spkr0 at pcppi0
> vmm0 at mainbus0: VMX/EPT
> efifb at mainbus0 not configured
> uhidev0 at uhub0 port 6 configuration 1 interface 0 "ELAN Touchscreen" rev 2.00/57.13 addr 2
> uhidev0: iclass 3/0, 68 report ids
> ums0 at uhidev0 reportid 1: 1 button, tip
> wsmouse2 at ums0 mux 0
> uhid0 at uhidev0 reportid 2: input=64, output=0, feature=0
> uhid1 at uhidev0 reportid 3: input=0, output=63, feature=0
> uhid2 at uhidev0 reportid 4: input=19, output=0, feature=0
> uhid3 at uhidev0 reportid 10: input=0, output=0, feature=1
> ums1 at uhidev0 reportid 68
> ums1: mouse has no X report
> vscsi0 at root
> scsibus2 at vscsi0: 256 targets
> softraid0 at root
> scsibus3 at softraid0: 256 targets
> sd1 at scsibus3 targ 1 lun 0: <OPENBSD, SR CRYPTO, 006>
> sd1: 1907728MB, 512 bytes/sector, 3907027553 sectors
> root on sd1a (e32608d644f1d59c.a) swap on sd1b dump on sd1b
> inteldrm0: 1920x1080, 32bpp
> wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd0
> wsdisplay0: screen 1-5 added (std, vt100 emulation)
> iwm0: hw rev 0x310, fw ver 34.3125811985.0, address XXXXXXXXXX
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: device timeout
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: device timeout
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: device timeout
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
> iwm0: fatal firmware error
>
>

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Landry Breuil-5
In reply to this post by Landry Breuil-5
On Fri, Jun 26, 2020 at 06:14:48PM +0200, Landry Breuil wrote:

> On Fri, Jun 26, 2020 at 02:45:53PM +0200, Stefan Sperling wrote:
> > This patch adds support for 11n Tx aggregation to iwm(4).
> >
> > Please help with testing if you can by running the patch and using wifi
> > as usual. Nothing should change, except that Tx speed may potentially
> > improve. If you have time to run before/after performance measurements with
> > tcpbench or such, that would be nice. But it's not required for testing.
> >
> > If Tx aggregation is active then netstat will show a non-zero output block ack
> > agreement counter:
> >
> > $ netstat -W iwm0 | grep 'output block'
> >         3 new output block ack agreements
> > 0 output block ack agreements timed out
> >
> > It would be great to get at least one test for all the chipsets the driver
> > supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> > The behaviour of the access point also matters a great deal. It won't
> > hurt to test the same chipset against several different access points.
> >
> > I have tested this version on 8265 only so far. I've run older revisions
> > of this patch on 7265 so I'm confident that this chip will work, too.
> > So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> > mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
>
> no difference on X1c3 w/
> iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x59, msi
> iwm0: hw rev 0x210, fw ver 17.3216344376.0,
>
> using a crappy old fonera as AP, serving as a bridge to gw w/ tcpbench.
>
> bandwidth min/avg/max/std-dev = 22.519/22.704/22.995/0.162 Mbps
>
> same bw both ways it seems.

so no change against this old AP, which selects:
        media: IEEE802.11 autoselect (OFDM48 mode 11g)
or sometimes
        media: IEEE802.11 autoselect (OFDM12 mode 11g)
or
        media: IEEE802.11 autoselect (OFDM6 mode 11g)

but if i connect to the ISP's box wifi, which selects:
        media: IEEE802.11 autoselect (HT-MCS8 mode 11n)

the performance is horrible, i have a lot of lag, and tcpbench says:
bandwidth min/avg/max/std-dev = 0.000/1.576/10.069/2.781 Mbps

i have some iwm firmware errors in dmesg.

without the patch, its a bit the same:
bandwidth min/avg/max/std-dev = 0.000/1.836/9.846/2.292 Mbps

but no firmware errors afaict.
so dunno if the patch itself changes something, but the perf with the
ISP AP is awful. Cant remember if it was the case before as i seldomly
use it with OpenBSD as a client..

Landry

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Tobias Heider-2
In reply to this post by Stefan Sperling-5
Works for me on a 7260.

[ ID] Interval       Transfer     Bandwidth
[  3]  0.0-10.1 sec   108 MBytes  90.1 Mbits/sec

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Johan Huldtgren-2
In reply to this post by Johan Huldtgren-2
On 2020-06-26 20:11, Johan Huldtgren wrote:

> hello,
>
> On 2020-06-26 14:45, Stefan Sperling wrote:
> > It would be great to get at least one test for all the chipsets the driver
> > supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> > The behaviour of the access point also matters a great deal. It won't
> > hurt to test the same chipset against several different access points.
>
> tested on:
>
> iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
>
> AP is a Ruckus 7363.
>
> $ netstat -W iwm0 | grep "output block"                                                                                                                                        
>         6 new output block ack agreements
>         0 output block ack agreements timed out
>
> Before:
>
> bandwidth min/avg/max/std-dev = 16.780/18.325/19.939/1.235 Mbps
>
> After:
>
> bandwidth min/avg/max/std-dev = 0.000/15.559/51.631/19.548 Mbps

Testing against a slightly different AP (Ruckus 7372):

before patch:

bandwidth min/avg/max/std-dev = 0.092/14.665/22.589/9.992 Mbps

after patch:

bandwidth min/avg/max/std-dev = 7.020/24.596/41.121/11.300 Mbps

This is the reported mode:

media: IEEE802.11 autoselect (HT-MCS13 mode 11n)

.jh

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Tracey Emery
In reply to this post by Stefan Sperling-5
On Fri, Jun 26, 2020 at 02:45:53PM +0200, Stefan Sperling wrote:

> This patch adds support for 11n Tx aggregation to iwm(4).
>
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
>
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
>
> $ netstat -W iwm0 | grep 'output block'
>         3 new output block ack agreements
> 0 output block ack agreements timed out
>
> It would be great to get at least one test for all the chipsets the driver
> supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> The behaviour of the access point also matters a great deal. It won't
> hurt to test the same chipset against several different access points.
>
> I have tested this version on 8265 only so far. I've run older revisions
> of this patch on 7265 so I'm confident that this chip will work, too.
> So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
>

Sure you've got plenty of 8265 tests, but the diff tripled my speed
against my apple airport extreme.

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev
0x78, msi

--

Tracey Emery

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Denis Fondras
On Fri, Jun 26, 2020 at 02:45:53PM +0200, Stefan Sperling wrote:
> This patch adds support for 11n Tx aggregation to iwm(4).

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7260" rev
0x73, msi

AP is Zyxel USG40W

Before :
bandwidth min/avg/max/std-dev = 9.800/14.000/14.214/0.606 Mbps

After :
bandwidth min/avg/max/std-dev = 8.124/47.270/57.076/8.906 Mbps

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Jesper Wallin
In reply to this post by Stefan Sperling-5
Tested on a "Intel Dual Band Wireless-AC 9260" rev 0x29, msix
(hw rev 0x320, fw ver 34.3125811985.0)

I seem to be getting "iwm0: fatal firmware error" a few seconds after
the 4-way handshake.  I can send a few packets, so it sure connects
and all, but then it fails shortly after.

iwm0: begin active scan
iwm0: INIT -> SCAN
iwm0: end active scan
iwm0: + 70:73:cb:cb:c3:86   40   +45 54M   ess  privacy   rsn  "FRA"
iwm0: SCAN -> AUTH
iwm0: sending auth to 70:73:cb:cb:c3:86 on channel 40 mode 11a
iwm0: AUTH -> ASSOC
iwm0: sending assoc_req to 70:73:cb:cb:c3:86 on channel 40 mode 11a
iwm0: ASSOC -> RUN
iwm0: associated with 70:73:cb:cb:c3:86 ssid "FRA" channel 40 start MCS 0 long preamble short slot time HT enabled
iwm0: missed beacon threshold set to 30 beacons, beacon interval is 100 TU
iwm0: received msg 1/4 of the 4-way handshake from 70:73:cb:cb:c3:86
iwm0: sending msg 2/4 of the 4-way handshake to 70:73:cb:cb:c3:86
iwm0: received msg 3/4 of the 4-way handshake from 70:73:cb:cb:c3:86
iwm0: sending msg 4/4 of the 4-way handshake to 70:73:cb:cb:c3:86
iwm0: sending action to 70:73:cb:cb:c3:86 on channel 40 mode 11n
iwm0: fatal firmware error

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Uwe Werler
In reply to this post by Stefan Sperling-5
Hi Stefan,

for me the patch works in mode 11n:

before (OpenBSD 6.7-current (GENERIC.MP) #304: Fri Jun 26 02:08:50 MDT 2020)
bandwidth min/avg/max/std-dev = 2.354/12.319/15.391/3.850 Mbps

with patch (OpenBSD 6.7-current (GENERIC.MP) #0: Mon Jun 29 09:35:24 GMT 2020)
bandwidth min/avg/max/std-dev = 12.174/31.411/57.746/15.154 Mbps

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
iwm0: hw rev 0x230, fw ver 34.0.1, address 60:f6:77:bc:3a:04

(mode 11g: bandwidth min/avg/max/std-dev = 0.620/0.844/1.101/0.153 Mbps)

mbk Uwe


On 26 Jun 14:45, Stefan Sperling wrote:

> This patch adds support for 11n Tx aggregation to iwm(4).
>
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
>
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
>
> $ netstat -W iwm0 | grep 'output block'
>         3 new output block ack agreements
> 0 output block ack agreements timed out
>
> It would be great to get at least one test for all the chipsets the driver
> supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> The behaviour of the access point also matters a great deal. It won't
> hurt to test the same chipset against several different access points.
>
> I have tested this version on 8265 only so far. I've run older revisions
> of this patch on 7265 so I'm confident that this chip will work, too.
> So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
>
> diff refs/heads/master refs/heads/txagg
> blob - 3a75d07a60a7eb4c66540474e47aeffd7a85250a
> blob + 853bdd1290ad509f5fce7b5bf20550f458a2b460
> --- sys/dev/pci/if_iwm.c
> +++ sys/dev/pci/if_iwm.c
> @@ -144,6 +144,8 @@
>  #include <net80211/ieee80211_amrr.h>
>  #include <net80211/ieee80211_mira.h>
>  #include <net80211/ieee80211_radiotap.h>
> +#include <net80211/ieee80211_priv.h> /* for SEQ_LT */
> +#undef DPRINTF /* defined in ieee80211_priv.h */
>  
>  #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
>  
> @@ -299,7 +301,8 @@ int iwm_nic_rx_mq_init(struct iwm_softc *);
>  int iwm_nic_tx_init(struct iwm_softc *);
>  int iwm_nic_init(struct iwm_softc *);
>  int iwm_enable_ac_txq(struct iwm_softc *, int, int);
> -int iwm_enable_txq(struct iwm_softc *, int, int, int);
> +int iwm_enable_txq(struct iwm_softc *, int, int, int, int, uint8_t,
> +    uint16_t);
>  int iwm_post_alive(struct iwm_softc *);
>  struct iwm_phy_db_entry *iwm_phy_db_get_section(struct iwm_softc *, uint16_t,
>      uint16_t);
> @@ -334,12 +337,12 @@ void iwm_ampdu_rx_stop(struct ieee80211com *, struct i
>      uint8_t);
>  void iwm_sta_rx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
>      uint16_t, uint16_t, int);
> -#ifdef notyet
> +void iwm_sta_tx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
> +    uint16_t, uint16_t, int);
>  int iwm_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
>      uint8_t);
>  void iwm_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
>      uint8_t);
> -#endif
>  void iwm_ba_task(void *);
>  
>  int iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *,
> @@ -372,14 +375,25 @@ int iwm_rxmq_get_signal_strength(struct iwm_softc *, s
>  void iwm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *,
>      struct iwm_rx_data *);
>  int iwm_get_noise(const struct iwm_statistics_rx_non_phy *);
> +void iwm_txq_advance(struct iwm_softc *, struct iwm_tx_ring *, int);
> +void iwm_ampdu_tx_done(struct iwm_softc *, struct iwm_cmd_header *,
> +    struct iwm_node *, struct iwm_tx_ring *, uint32_t, uint8_t,
> +    uint8_t, uint16_t, int, struct iwm_agg_tx_status *);
>  int iwm_ccmp_decap(struct iwm_softc *, struct mbuf *,
>      struct ieee80211_node *);
>  void iwm_rx_frame(struct iwm_softc *, struct mbuf *, int, uint32_t, int, int,
>      uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
> -void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *,
> -    struct iwm_node *, int, int);
> +void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_tx_resp *,
> +    struct iwm_node *, int, int, int);
> +void iwm_txd_done(struct iwm_softc *, struct iwm_tx_data *);
>  void iwm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *,
>      struct iwm_rx_data *);
> +void iwm_clear_oactive(struct iwm_softc *, struct iwm_tx_ring *);
> +void iwm_mira_choose(struct iwm_softc *, struct ieee80211_node *);
> +void iwm_ampdu_rate_control(struct iwm_softc *, struct ieee80211_node *,
> +    struct iwm_tx_ring *, int, uint16_t, uint16_t);
> +void iwm_rx_ba(struct iwm_softc *, struct iwm_rx_packet *,
> +    struct iwm_rx_data *);
>  void iwm_rx_bmiss(struct iwm_softc *, struct iwm_rx_packet *,
>      struct iwm_rx_data *);
>  int iwm_binding_cmd(struct iwm_softc *, struct iwm_node *, uint32_t);
> @@ -399,6 +413,7 @@ int iwm_send_cmd_pdu_status(struct iwm_softc *, uint32
>  void iwm_free_resp(struct iwm_softc *, struct iwm_host_cmd *);
>  void iwm_cmd_done(struct iwm_softc *, int, int, int);
>  void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t);
> +void iwm_reset_sched(struct iwm_softc *, int, int, uint8_t);
>  const struct iwm_rate *iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *,
>      struct ieee80211_frame *, struct iwm_tx_cmd *);
>  int iwm_tx(struct iwm_softc *, struct mbuf *, struct ieee80211_node *, int);
> @@ -1306,17 +1321,17 @@ iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_
>   * The command is queue 0 (sc->txq[0]), and 4 mgmt/data frame queues
>   * are sc->tqx[IWM_DQA_MIN_MGMT_QUEUE + ac], i.e. sc->txq[5:8],
>   * in order to provide one queue per EDCA category.
> + * Tx aggregation requires additional queues, one queue per TID for
> + * which aggregation is enabled. We map TID 0-7 to sc->txq[10:17].
>   *
> - * In non-DQA mode, we use rings 0 through 9 (0-3 are EDCA, 9 is cmd).
> + * In non-DQA mode, we use rings 0 through 9 (0-3 are EDCA, 9 is cmd),
> + * and Tx aggregation is not supported.
>   *
> - * Tx aggregation will require additional queues (one queue per TID
> - * for which aggregation is enabled) but we do not implement this yet.
> - *
>   * Unfortunately, we cannot tell if DQA will be used until the
>   * firmware gets loaded later, so just allocate sufficient rings
>   * in order to satisfy both cases.
>   */
> - if (qid > IWM_CMD_QUEUE)
> + if (qid > IWM_LAST_AGG_TX_QUEUE)
>   return 0;
>  
>   size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd);
> @@ -1380,6 +1395,7 @@ iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_
>   bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
>      ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
>   sc->qfullmsk &= ~(1 << ring->qid);
> + sc->qenablemsk &= ~(1 << ring->qid);
>   /* 7000 family NICs are locked while commands are in progress. */
>   if (ring->qid == sc->cmdqid && ring->queued > 0) {
>   if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
> @@ -2208,6 +2224,18 @@ const uint8_t iwm_ac_to_tx_fifo[] = {
>   IWM_TX_FIFO_VO,
>  };
>  
> +/* Map a TID to an ieee80211_edca_ac category. */
> +const uint8_t iwm_tid_to_ac[IWM_MAX_TID_COUNT] = {
> + EDCA_AC_BE,
> + EDCA_AC_BK,
> + EDCA_AC_BK,
> + EDCA_AC_BE,
> + EDCA_AC_VI,
> + EDCA_AC_VI,
> + EDCA_AC_VO,
> + EDCA_AC_VO,
> +};
> +
>  int
>  iwm_enable_ac_txq(struct iwm_softc *sc, int qid, int fifo)
>  {
> @@ -2250,28 +2278,48 @@ iwm_enable_ac_txq(struct iwm_softc *sc, int qid, int f
>  }
>  
>  int
> -iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo)
> +iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo,
> +    int aggregate, uint8_t tid, uint16_t ssn)
>  {
> + struct iwm_tx_ring *ring = &sc->txq[qid];
>   struct iwm_scd_txq_cfg_cmd cmd;
> - int err;
> + int err, idx, scd_bug;
>  
>   iwm_nic_assert_locked(sc);
>  
> - IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0);
> + /*
> + * If we need to move the SCD write pointer by steps of
> + * 0x40, 0x80 or 0xc0, it gets stuck.
> + * This is really ugly, but this is the easiest way out for
> + * this sad hardware issue.
> + * This bug has been fixed on devices 9000 and up.
> + */
> + scd_bug = !sc->sc_mqrx_supported &&
> + !((ssn - ring->cur) & 0x3f) &&
> + (ssn != ring->cur);
> + if (scd_bug)
> + ssn = (ssn + 1) & 0xfff;
>  
> + idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> + IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | idx);
> + ring->cur = idx;
> + ring->tail = idx;
> +
>   memset(&cmd, 0, sizeof(cmd));
> + cmd.tid = tid;
>   cmd.scd_queue = qid;
>   cmd.enable = 1;
>   cmd.sta_id = sta_id;
>   cmd.tx_fifo = fifo;
> - cmd.aggregate = 0;
> + cmd.aggregate = aggregate;
> + cmd.ssn = htole16(ssn);
>   cmd.window = IWM_FRAME_LIMIT;
>  
> - err = iwm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, 0,
> -    sizeof(cmd), &cmd);
> + err = iwm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd);
>   if (err)
>   return err;
>  
> + sc->qenablemsk |= (1 << qid);
>   return 0;
>  }
>  
> @@ -2950,8 +2998,12 @@ iwm_sta_rx_agg(struct iwm_softc *sc, struct ieee80211_
>   if (start) {
>   sc->sc_rx_ba_sessions++;
>   ieee80211_addba_req_accept(ic, ni, tid);
> - } else if (sc->sc_rx_ba_sessions > 0)
> - sc->sc_rx_ba_sessions--;
> + in->ampdu_rx_tid_mask |= (1 << tid);
> + } else {
> + in->ampdu_rx_tid_mask &= ~(1 << tid);
> + if (sc->sc_rx_ba_sessions > 0)
> + sc->sc_rx_ba_sessions--;
> + }
>   } else if (start)
>   ieee80211_addba_req_refuse(ic, ni, tid);
>  
> @@ -2959,6 +3011,75 @@ iwm_sta_rx_agg(struct iwm_softc *sc, struct ieee80211_
>  }
>  
>  void
> +iwm_sta_tx_agg(struct iwm_softc *sc, struct ieee80211_node *ni, uint8_t tid,
> +    uint16_t ssn, uint16_t winsize, int start)
> +{
> + struct ieee80211com *ic = &sc->sc_ic;
> + struct iwm_node *in = (void *)ni;
> + int qid = IWM_FIRST_AGG_TX_QUEUE + tid;
> + enum ieee80211_edca_ac ac = iwm_tid_to_ac[tid];
> + int fifo = iwm_ac_to_tx_fifo[ac];
> + int err;
> +
> + if (qid > IWM_LAST_AGG_TX_QUEUE || !iwm_nic_lock(sc)) {
> + ieee80211_addba_resp_refuse(ic, ni, tid,
> +    IEEE80211_STATUS_UNSPECIFIED);
> + return;
> + }
> +
> + if (start) {
> + if ((sc->qenablemsk & (1 << qid)) == 0) {
> + struct iwm_tx_ring *ring = &sc->txq[qid];
> +
> + err = iwm_enable_txq(sc, IWM_STATION_ID, qid, fifo,
> +    1, tid, ssn);
> + if (err)
> + goto done;
> + /*
> + * If iwm_enable_txq() employed the SCD hardware bug
> + * workaround we must skip the frame with seqnum SSN.
> + */
> + if (IWM_AGG_SSN_TO_TXQ_IDX(ring->cur) !=
> +    IWM_AGG_SSN_TO_TXQ_IDX(ssn)) {
> + ssn = (ssn + 1) & 0xfff;
> + KASSERT(IWM_AGG_SSN_TO_TXQ_IDX(ring->cur) ==
> +    IWM_AGG_SSN_TO_TXQ_IDX(ssn));
> + ieee80211_output_ba_move_window(ic, ni,
> +    tid, ssn);
> + ni->ni_qos_txseqs[tid] = ssn;
> + }
> + }
> + in->ampdu_tx_tid_mask |= (1 << tid);
> + err = iwm_add_sta_cmd(sc, in, 1);
> + if (err) {
> + printf("%s: could not update sta (error %d)\n",
> +    DEVNAME(sc), err);
> + in->ampdu_tx_tid_mask &= ~(1 << tid);
> + goto done;
> + }
> + } else {
> + in->ampdu_tx_tid_mask &= ~(1 << tid);
> + err = iwm_add_sta_cmd(sc, in, 1);
> + if (err) {
> + printf("%s: could not update sta (error %d)\n",
> +    DEVNAME(sc), err);
> + in->ampdu_tx_tid_mask |= (1 << tid);
> + goto done;
> + }
> + }
> +
> +done:
> + iwm_nic_unlock(sc);
> + if (start) {
> + if (err)
> + ieee80211_addba_resp_refuse(ic, ni, tid,
> +    IEEE80211_STATUS_UNSPECIFIED);
> + else
> + ieee80211_addba_resp_accept(ic, ni, tid);
> + }
> +}
> +
> +void
>  iwm_htprot_task(void *arg)
>  {
>   struct iwm_softc *sc = arg;
> @@ -3002,19 +3123,53 @@ iwm_ba_task(void *arg)
>   struct ieee80211com *ic = &sc->sc_ic;
>   struct ieee80211_node *ni = ic->ic_bss;
>   int s = splnet();
> + int tid;
>  
> - if (sc->sc_flags & IWM_FLAG_SHUTDOWN) {
> + if ((sc->sc_flags & IWM_FLAG_SHUTDOWN) ||
> +    ic->ic_state != IEEE80211_S_RUN) {
>   refcnt_rele_wake(&sc->task_refs);
>   splx(s);
>   return;
>   }
>  
> - if (sc->ba_start)
> - iwm_sta_rx_agg(sc, ni, sc->ba_tid, sc->ba_ssn,
> -    sc->ba_winsize, 1);
> - else
> - iwm_sta_rx_agg(sc, ni, sc->ba_tid, 0, 0, 0);
> + if (sc->ba_flags & IWM_RX_BA_START) {
> + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> + if ((sc->rx_ba_start.tid_mask & (1 << tid)) == 0)
> + continue;
> + iwm_sta_rx_agg(sc, ni, tid, sc->rx_ba_start.ssn[tid],
> +    sc->rx_ba_start.winsize[tid], 1);
> + sc->rx_ba_start.tid_mask &= ~(1 << tid);
> + }
> + }
> +
> + if (sc->ba_flags & IWM_RX_BA_STOP) {
> + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> + if ((sc->rx_ba_stop.tid_mask & (1 << tid)) == 0)
> + continue;
> + iwm_sta_rx_agg(sc, ni, tid, 0, 0, 0);
> + sc->rx_ba_stop.tid_mask &= ~(1 << tid);
> + }
> + }
>  
> + if (sc->ba_flags & IWM_TX_BA_START) {
> + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> + if ((sc->tx_ba_start.tid_mask & (1 << tid)) == 0)
> + continue;
> + iwm_sta_tx_agg(sc, ni, tid, sc->tx_ba_start.ssn[tid],
> +    sc->tx_ba_start.winsize[tid], 1);
> + sc->tx_ba_start.tid_mask &= ~(1 << tid);
> + }
> + }
> +
> + if (sc->ba_flags & IWM_TX_BA_STOP) {
> + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> + if ((sc->tx_ba_stop.tid_mask & (1 << tid)) == 0)
> + continue;
> + iwm_sta_tx_agg(sc, ni, tid, 0, 0, 0);
> + sc->tx_ba_stop.tid_mask &= ~(1 << tid);
> + }
> + }
> +
>   refcnt_rele_wake(&sc->task_refs);
>   splx(s);
>  }
> @@ -3029,14 +3184,23 @@ iwm_ampdu_rx_start(struct ieee80211com *ic, struct iee
>  {
>   struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
>   struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> + struct iwm_node *in = (void *)ni;
>  
> - if (sc->sc_rx_ba_sessions >= IWM_MAX_RX_BA_SESSIONS)
> + if (tid >= IWM_MAX_TID_COUNT)
> + return EINVAL;
> +
> + if (sc->sc_rx_ba_sessions >= IWM_MAX_RX_BA_SESSIONS ||
> +    (in->ampdu_rx_tid_mask & (1 << tid)) != 0)
>   return ENOSPC;
>  
> - sc->ba_start = 1;
> - sc->ba_tid = tid;
> - sc->ba_ssn = htole16(ba->ba_winstart);
> - sc->ba_winsize = htole16(ba->ba_winsize);
> + if (sc->rx_ba_start.tid_mask & (1 << tid) ||
> +    sc->rx_ba_stop.tid_mask & (1 << tid))
> + return EAGAIN;
> +
> + sc->ba_flags |= IWM_RX_BA_START;
> + sc->rx_ba_start.tid_mask |= (1 << tid);
> + sc->rx_ba_start.ssn[tid] = htole16(ba->ba_winstart);
> + sc->rx_ba_start.winsize[tid] = htole16(ba->ba_winsize);
>   iwm_add_task(sc, systq, &sc->ba_task);
>  
>   return EBUSY;
> @@ -3051,13 +3215,69 @@ iwm_ampdu_rx_stop(struct ieee80211com *ic, struct ieee
>      uint8_t tid)
>  {
>   struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> + struct iwm_node *in = (void *)ni;
>  
> - sc->ba_start = 0;
> - sc->ba_tid = tid;
> + if (in->ampdu_rx_tid_mask & (1 << tid))  {
> + sc->ba_flags |= IWM_RX_BA_STOP;
> + sc->rx_ba_stop.tid_mask |= (1 << tid);
> + iwm_add_task(sc, systq, &sc->ba_task);
> + }
> +}
> +
> +int
> +iwm_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
> +    uint8_t tid)
> +{
> + struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> + struct iwm_node *in = (void *)ni;
> + struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
> +
> + /* We only implement Tx aggregation with DQA-capable firmware. */
> + if (!isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT))
> + return ENOTSUP;
> +
> + /* Ensure we can map this TID to an aggregation queue. */
> + if (tid >= IWM_MAX_TID_COUNT)
> + return EINVAL;
> +
> + /* We only support a fixed Tx aggregation window size, for now. */
> + if (ba->ba_winsize != IWM_FRAME_LIMIT)
> + return ENOTSUP;
> +
> + if ((in->ampdu_tx_tid_mask & (1 << tid)) != 0)
> + return ENOSPC;
> +
> + if (sc->tx_ba_start.tid_mask & (1 << tid) ||
> +    sc->tx_ba_stop.tid_mask & (1 << tid))
> + return EAGAIN;
> +
> + sc->ba_flags |= IWM_TX_BA_START;
> + sc->tx_ba_start.tid_mask |= (1 << tid);
> + sc->tx_ba_start.ssn[tid] = htole16(ba->ba_winstart);
> + sc->tx_ba_start.winsize[tid] = htole16(ba->ba_winsize);
>   iwm_add_task(sc, systq, &sc->ba_task);
> +
> + return EBUSY;
>  }
>  
>  void
> +iwm_ampdu_tx_stop(struct ieee80211com *ic, struct ieee80211_node *ni,
> +    uint8_t tid)
> +{
> + struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> + struct iwm_node *in = (void *)ni;
> +
> + if (tid >= IWM_MAX_TID_COUNT)
> + return;
> +
> + if (in->ampdu_tx_tid_mask & (1 << tid))  {
> + sc->ba_flags |= IWM_TX_BA_STOP;
> + sc->tx_ba_stop.tid_mask |= (1 << tid);
> + iwm_add_task(sc, systq, &sc->ba_task);
> + }
> +}
> +
> +void
>  iwm_set_hw_address_8000(struct iwm_softc *sc, struct iwm_nvm_data *data,
>      const uint16_t *mac_override, const uint16_t *nvm_hw)
>  {
> @@ -4238,13 +4458,178 @@ iwm_rx_mpdu_mq(struct iwm_softc *sc, struct mbuf *m, v
>  }
>  
>  void
> -iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
> -    struct iwm_node *in, int txmcs, int txrate)
> +iwm_txq_advance(struct iwm_softc *sc, struct iwm_tx_ring *ring, int idx)
>  {
> + struct iwm_tx_data *txd;
> +
> + while (ring->tail != idx) {
> + txd = &ring->data[ring->tail];
> + if (txd->m != NULL) {
> + if (ring->qid < IWM_FIRST_AGG_TX_QUEUE)
> + DPRINTF(("%s: missed Tx completion: tail=%d "
> +    "idx=%d\n", __func__, ring->tail, idx));
> + iwm_reset_sched(sc, ring->qid, ring->tail, IWM_STATION_ID);
> + iwm_txd_done(sc, txd);
> + ring->queued--;
> + }
> + ring->tail = (ring->tail + 1) % IWM_TX_RING_COUNT;
> + }
> +}
> +
> +void
> +iwm_ampdu_tx_done(struct iwm_softc *sc, struct iwm_cmd_header *cmd_hdr,
> +    struct iwm_node *in, struct iwm_tx_ring *txq, uint32_t initial_rate,
> +    uint8_t nframes, uint8_t failure_frame, uint16_t ssn, int status,
> +    struct iwm_agg_tx_status *agg_status)
> +{
>   struct ieee80211com *ic = &sc->sc_ic;
> + int tid = cmd_hdr->qid - IWM_FIRST_AGG_TX_QUEUE;
> + struct iwm_tx_data *txdata = &txq->data[cmd_hdr->idx];
>   struct ieee80211_node *ni = &in->in_ni;
> + int txfail = (status != IWM_TX_STATUS_SUCCESS &&
> +    status != IWM_TX_STATUS_DIRECT_DONE);
> + struct ieee80211_tx_ba *ba;
> +
> + sc->sc_tx_timer = 0;
> +
> + if (ic->ic_state != IEEE80211_S_RUN)
> + return;
> +
> + if (nframes > 1) {
> + int ampdu_id, have_ampdu_id = 0, ampdu_size = 0;
> + int i;
> +
> + /* Compute the size of this A-MPDU. */
> + for (i = 0; i < nframes; i++) {
> + uint8_t qid = agg_status[i].qid;
> + uint8_t idx = agg_status[i].idx;
> +
> + if (qid != cmd_hdr->qid)
> + continue;
> +
> + txdata = &txq->data[idx];
> + if (txdata->m == NULL)
> + continue;
> +
> + ampdu_size += txdata->totlen + IEEE80211_CRC_LEN;
> + }
> +
> + /*
> + * For each subframe collect Tx status, retries, and Tx rate.
> + * (The Tx rate is the same for all subframes in this batch.)
> + */
> + for (i = 0; i < nframes; i++) {
> + uint8_t qid = agg_status[i].qid;
> + uint8_t idx = agg_status[i].idx;
> + uint16_t txstatus = (le16toh(agg_status[i].status) &
> +    IWM_AGG_TX_STATE_STATUS_MSK);
> + uint16_t trycnt = (le16toh(agg_status[i].status) &
> +    IWM_AGG_TX_STATE_TRY_CNT_MSK) >>
> +    IWM_AGG_TX_STATE_TRY_CNT_POS;
> +
> + if (qid != cmd_hdr->qid)
> + continue;
> +
> + txdata = &txq->data[idx];
> + if (txdata->m == NULL)
> + continue;
> +
> + if (initial_rate & IWM_RATE_MCS_HT_MSK)
> + txdata->ampdu_txmcs = (initial_rate &
> +    (IWM_RATE_HT_MCS_RATE_CODE_MSK |
> +    IWM_RATE_HT_MCS_NSS_MSK));
> + if (txstatus != IWM_AGG_TX_STATE_TRANSMITTED)
> + txdata->txfail++;
> + if (trycnt > 1)
> + txdata->retries++;
> +
> + /*
> + * Assign a common ID to all subframes of this A-MPDU.
> + * This ID will be used during Tx rate control to
> + * infer the ACK status of individual subframes.
> + */
> + if (!have_ampdu_id) {
> + ampdu_id = txdata->in->next_ampdu_id++;
> + have_ampdu_id = 1;
> + }
> + txdata->ampdu_id = ampdu_id;
> +
> + /*
> + * We will also need to know the total number of
> + * subframes and the size of this A-MPDU. We store
> + * this redundantly on each subframe because firmware
> + * only reports acknowledged subframes via compressed
> + * block-ack notification. This way we will know what
> + * the total number of subframes and size were even if
> + * just one of these subframes gets acknowledged.
> + */
> + txdata->ampdu_nframes = nframes;
> + txdata->ampdu_size = ampdu_size;
> + }
> + return;
> + }
> +
> + if (ni == NULL)
> + return;
> +
> + ba = &ni->ni_tx_ba[tid];
> + if (ba->ba_state != IEEE80211_BA_AGREED)
> + return;
> +
> + /* This is a final single-frame Tx attempt. */
> + DPRINTFN(3, ("%s: final tx status=0x%x qid=%d queued=%d idx=%d ssn=%u "
> +    "bitmap=0x%llx\n", __func__, status, desc->qid, txq->queued,
> +    desc->idx, ssn, ba->ba_bitmap));
> +
> + /*
> + * Skip rate control if our Tx rate is fixed.
> + * Don't report frames to MiRA which were sent at a different
> + * Tx rate than ni->ni_txmcs.
> + */
> + if (ic->ic_fixed_mcs == -1 && txdata->txmcs == ni->ni_txmcs) {
> + in->in_mn.frames++;
> + in->in_mn.agglen = 1;
> + in->in_mn.ampdu_size = txdata->totlen + IEEE80211_CRC_LEN;
> + if (failure_frame > 0)
> + in->in_mn.retries++;
> + if (txfail)
> + in->in_mn.txfail++;
> + iwm_mira_choose(sc, ni);
> + }
> +
> + if (txfail)
> + ieee80211_tx_compressed_bar(ic, ni, tid, ssn);
> + else if (!SEQ_LT(ssn, ba->ba_winstart)) {
> + /*
> + * Move window forward if SSN lies beyond end of window,
> + * otherwise we can't record the ACK for this frame.
> + * Non-acked frames which left holes in the bitmap near
> + * the beginning of the window must be discarded.
> + */
> + uint16_t s = ssn;
> + while (SEQ_LT(ba->ba_winend, s)) {
> + ieee80211_output_ba_move_window(ic, ni, tid, s);
> + iwm_txq_advance(sc, txq, IWM_AGG_SSN_TO_TXQ_IDX(s));
> + s = (s + 1) % 0xfff;
> + }
> + /* SSN should now be within window; set corresponding bit. */
> + ieee80211_output_ba_record_ack(ic, ni, tid, ssn);
> + }
> +
> + /* Move window forward up to the first hole in the bitmap. */
> + ieee80211_output_ba_move_window_to_first_unacked(ic, ni, tid, ssn);
> + iwm_txq_advance(sc, txq, IWM_AGG_SSN_TO_TXQ_IDX(ba->ba_winstart));
> +
> + iwm_clear_oactive(sc, txq);
> +}
> +
> +void
> +iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_tx_resp *tx_resp,
> +    struct iwm_node *in, int txmcs, int txrate, int qid)
> +{
> + struct ieee80211com *ic = &sc->sc_ic;
> + struct ieee80211_node *ni = &in->in_ni;
>   struct ifnet *ifp = IC2IFP(ic);
> - struct iwm_tx_resp *tx_resp = (void *)pkt->data;
>   int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
>   int txfail;
>  
> @@ -4277,22 +4662,8 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_
>   in->in_mn.retries += tx_resp->failure_frame;
>   if (txfail)
>   in->in_mn.txfail += tx_resp->frame_count;
> - if (ic->ic_state == IEEE80211_S_RUN) {
> - int best_mcs;
> -
> - ieee80211_mira_choose(&in->in_mn, ic, &in->in_ni);
> - /*
> - * If MiRA has chosen a new TX rate we must update
> - * the firwmare's LQ rate table from process context.
> - * ni_txmcs may change again before the task runs so
> - * cache the chosen rate in the iwm_node structure.
> - */
> - best_mcs = ieee80211_mira_get_best_mcs(&in->in_mn);
> - if (best_mcs != in->chosen_txmcs) {
> - in->chosen_txmcs = best_mcs;
> - iwm_setrates(in, 1);
> - }
> - }
> + if (ic->ic_state == IEEE80211_S_RUN)
> + iwm_mira_choose(sc, ni);
>   }
>  
>   if (txfail)
> @@ -4313,49 +4684,91 @@ iwm_txd_done(struct iwm_softc *sc, struct iwm_tx_data
>   KASSERT(txd->in);
>   ieee80211_release_node(ic, &txd->in->in_ni);
>   txd->in = NULL;
> +
> + txd->retries = 0;
> + txd->txfail = 0;
> + txd->txmcs = 0;
> + txd->ampdu_txmcs = 0;
> + txd->txrate = 0;
>  }
>  
>  void
>  iwm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
>      struct iwm_rx_data *data)
>  {
> - struct ieee80211com *ic = &sc->sc_ic;
> - struct ifnet *ifp = IC2IFP(ic);
>   struct iwm_cmd_header *cmd_hdr = &pkt->hdr;
>   int idx = cmd_hdr->idx;
>   int qid = cmd_hdr->qid;
>   struct iwm_tx_ring *ring = &sc->txq[qid];
>   struct iwm_tx_data *txd;
> + struct iwm_tx_resp *tx_resp = (void *)pkt->data;
> + uint32_t ssn;
> + uint32_t len = iwm_rx_packet_len(pkt);
>  
>   bus_dmamap_sync(sc->sc_dmat, data->map, 0, IWM_RBUF_SIZE,
>      BUS_DMASYNC_POSTREAD);
>  
>   sc->sc_tx_timer = 0;
>  
> + /* Sanity checks. */
> + if (sizeof(*tx_resp) > len)
> + return;
> + if (qid < IWM_FIRST_AGG_TX_QUEUE && tx_resp->frame_count > 1)
> + return;
> + if (qid >= IWM_FIRST_AGG_TX_QUEUE && sizeof(*tx_resp) + sizeof(ssn) +
> +    tx_resp->frame_count * sizeof(tx_resp->status) > len)
> + return;
> +
> + /*
> + * In the multi-frame case the firmware has just transmitted a bunch
> + * of frames in an A-MPDU. The final Tx status of those frames won't
> + * be known until the peer ACKs subframes with a block ack or firmware
> + * gives up on a particular subframe.
> + * Subframes for which the firmware never sees an ACK will be retried
> + * and will eventually arrive here as a single-frame Tx failure.
> + * So there is nothing to do, for now.
> + */
> + if (tx_resp->frame_count != 1)
> + return;
> +
>   txd = &ring->data[idx];
>   if (txd->m == NULL)
>   return;
>  
> - iwm_rx_tx_cmd_single(sc, pkt, txd->in, txd->txmcs, txd->txrate);
> - iwm_txd_done(sc, txd);
> + if (qid >= IWM_FIRST_AGG_TX_QUEUE) {
> + int status;
>  
> - /*
> - * XXX Sometimes we miss Tx completion interrupts.
> - * We cannot check Tx success/failure for affected frames; just free
> - * the associated mbuf and release the associated node reference.
> - */
> - while (ring->tail != idx) {
> - txd = &ring->data[ring->tail];
> - if (txd->m != NULL) {
> - DPRINTF(("%s: missed Tx completion: tail=%d idx=%d\n",
> -    __func__, ring->tail, idx));
> - iwm_txd_done(sc, txd);
> - ring->queued--;
> - }
> - ring->tail = (ring->tail + 1) % IWM_TX_RING_COUNT;
> + memcpy(&ssn, &tx_resp->status + tx_resp->frame_count, sizeof(ssn));
> + ssn = le32toh(ssn) & 0xfff;
> + status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
> + iwm_ampdu_tx_done(sc, cmd_hdr, txd->in, ring,
> +    le32toh(tx_resp->initial_rate), tx_resp->frame_count,
> +    tx_resp->failure_frame, ssn, status, &tx_resp->status);
> + } else {
> + iwm_rx_tx_cmd_single(sc, tx_resp, txd->in, txd->txmcs,
> +    txd->txrate, qid);
> + iwm_reset_sched(sc, qid, idx, IWM_STATION_ID);
> + iwm_txd_done(sc, txd);
> + ring->queued--;
> +
> + /*
> + * XXX Sometimes we miss Tx completion interrupts.
> + * We cannot check Tx success/failure for affected frames;
> + * just free the associated mbuf and release the associated
> + * node reference.
> + */
> + iwm_txq_advance(sc, ring, idx);
> + iwm_clear_oactive(sc, ring);
>   }
> +}
>  
> - if (--ring->queued < IWM_TX_RING_LOMARK) {
> +void
> +iwm_clear_oactive(struct iwm_softc *sc, struct iwm_tx_ring *ring)
> +{
> + struct ieee80211com *ic = &sc->sc_ic;
> + struct ifnet *ifp = IC2IFP(ic);
> +
> + if (ring->queued < IWM_TX_RING_LOMARK) {
>   sc->qfullmsk &= ~(1 << ring->qid);
>   if (sc->qfullmsk == 0 && ifq_is_oactive(&ifp->if_snd)) {
>   ifq_clr_oactive(&ifp->if_snd);
> @@ -4370,6 +4783,183 @@ iwm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_pack
>  }
>  
>  void
> +iwm_mira_choose(struct iwm_softc *sc, struct ieee80211_node *ni)
> +{
> + struct ieee80211com *ic = &sc->sc_ic;
> + struct iwm_node *in = (void *)ni;
> + int best_mcs = ieee80211_mira_get_best_mcs(&in->in_mn);
> +
> + ieee80211_mira_choose(&in->in_mn, ic, ni);
> +
> + /*
> + * Update firmware's LQ retry table if MiRA has chosen a new MCS.
> + *
> + * We only need to do this if the best MCS has changed because
> + * we ask firmware to use a fixed MCS while MiRA is probing a
> + * candidate MCS.
> + * While not probing we ask firmware to retry at lower rates in case
> + * Tx at the newly chosen best MCS ends up failing, and then report
> + * any resulting Tx retries to MiRA in order to trigger probing.
> + */
> + if (best_mcs != ieee80211_mira_get_best_mcs(&in->in_mn)) {
> + in->chosen_txmcs = best_mcs;
> + iwm_setrates(in, 1);
> + }
> +}
> +
> +void
> +iwm_ampdu_rate_control(struct iwm_softc *sc, struct ieee80211_node *ni,
> +    struct iwm_tx_ring *txq, int tid, uint16_t seq, uint16_t ssn)
> +{
> + struct ieee80211com *ic = &sc->sc_ic;
> + struct iwm_node *in = (void *)ni;
> + struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
> + int min_ampdu_id, max_ampdu_id, id;
> + int idx, end_idx;
> +
> + /* Determine the min/max IDs we assigned to AMPDUs in this range. */
> + idx = IWM_AGG_SSN_TO_TXQ_IDX(seq);
> + end_idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> + min_ampdu_id = txq->data[idx].ampdu_id;
> + max_ampdu_id = min_ampdu_id;
> + while (idx != end_idx) {
> + struct iwm_tx_data *txdata = &txq->data[idx];
> +
> + if (txdata->m != NULL) {
> + if (min_ampdu_id > txdata->ampdu_id)
> + min_ampdu_id = txdata->ampdu_id;
> + if (max_ampdu_id < txdata->ampdu_id)
> + max_ampdu_id = txdata->ampdu_id;
> + }
> +
> + idx = (idx + 1) % IWM_TX_RING_COUNT;
> + }
> +
> + /*
> + * Update Tx rate statistics for A-MPDUs before firmware's BA window.
> + */
> + for (id = min_ampdu_id; id <= max_ampdu_id; id++) {
> + int have_ack = 0, bit = 0;
> + idx = IWM_AGG_SSN_TO_TXQ_IDX(seq);
> + end_idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> + in->in_mn.agglen = 0;
> + in->in_mn.ampdu_size = 0;
> + while (idx != end_idx) {
> + struct iwm_tx_data *txdata = &txq->data[idx];
> + uint16_t s = (seq + bit) & 0xfff;
> + /*
> + * We can assume that this subframe has been ACKed
> + * because ACK failures come as single frames and
> + * before failing an A-MPDU subframe the firmware
> + * sends it as a single frame at least once.
> + *
> + * However, when this A-MPDU was transmitted we
> + * learned how many subframes it contained.
> + * So if firmware isn't reporting all subframes now
> + * we can deduce an ACK failure for missing frames.
> + */
> + if (txdata->m != NULL && txdata->ampdu_id == id &&
> +    txdata->ampdu_txmcs == ni->ni_txmcs &&
> +    txdata->ampdu_nframes > 0 &&
> +    (SEQ_LT(ba->ba_winend, s) ||
> +    (ba->ba_bitmap & (1 << bit)) == 0)) {
> + have_ack++;
> + in->in_mn.frames = txdata->ampdu_nframes;
> + in->in_mn.agglen = txdata->ampdu_nframes;
> + in->in_mn.ampdu_size = txdata->ampdu_size;
> + if (txdata->retries > 1)
> + in->in_mn.retries++;
> + if (!SEQ_LT(ba->ba_winend, s))
> + ieee80211_output_ba_record_ack(ic, ni,
> +    tid, s);
> + }
> +
> + idx = (idx + 1) % IWM_TX_RING_COUNT;
> + bit++;
> + }
> +
> + if (have_ack > 0) {
> + in->in_mn.txfail = in->in_mn.frames - have_ack;
> + iwm_mira_choose(sc, ni);
> + }
> + }
> +}
> +
> +void
> +iwm_rx_ba(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
> +    struct iwm_rx_data *data)
> +{
> + struct iwm_ba_notif *ban = (void *)pkt->data;
> + struct ieee80211com *ic = &sc->sc_ic;
> + struct ieee80211_node *ni;
> + struct ieee80211_tx_ba *ba;
> + struct iwm_node *in;
> + struct iwm_tx_ring *ring;
> + uint16_t seq, ssn, idx;
> + int qid;
> +
> + if (ic->ic_state != IEEE80211_S_RUN)
> + return;
> +
> + if (iwm_rx_packet_payload_len(pkt) < sizeof(*ban))
> + return;
> +
> + if (ban->sta_id != IWM_STATION_ID ||
> +    !IEEE80211_ADDR_EQ(ic->ic_bss->ni_macaddr, ban->sta_addr))
> + return;
> +
> + ni = ic->ic_bss;
> + in = (void *)ni;
> +
> + qid = le16toh(ban->scd_flow);
> + if (qid < IWM_FIRST_AGG_TX_QUEUE || qid > IWM_LAST_AGG_TX_QUEUE)
> + return;
> +
> + /* Protect against a firmware bug where the queue/TID are off. */
> + if (qid != IWM_FIRST_AGG_TX_QUEUE + ban->tid)
> + return;
> +
> + ba = &ni->ni_tx_ba[ban->tid];
> + if (ba->ba_state != IEEE80211_BA_AGREED)
> + return;
> +
> + ring = &sc->txq[qid];
> + ssn = le16toh(ban->scd_ssn); /* BA window starting sequence number */
> + idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> +
> + /*
> + * The first bit in ban->bitmap corresponds to the sequence number
> + * stored in the sequence control field ban->seq_ctl.
> + * Multiple BA notifications in a row may be using this number, with
> + * additional bits being set in cba->bitmap. It is unclear how the
> + * firmware decides to shift this window forward.
> + */
> + seq = le16toh(ban->seq_ctl) >> IEEE80211_SEQ_SEQ_SHIFT;
> +
> + /*
> + * The firmware's new BA window starting sequence number
> + * corresponds to the first hole in ban->scd_ssn, implying
> + * that all frames between 'seq' and 'ssn' have been acked.
> + */
> + ssn = le16toh(ban->scd_ssn);
> +
> + /* Skip rate control if our Tx rate is fixed. */
> + if (ic->ic_fixed_mcs != -1)
> + iwm_ampdu_rate_control(sc, ni, ring, ban->tid, seq, ssn);
> +
> + /*
> + * SSN corresponds to the first (perhaps not yet transmitted) frame
> + * in firmware's BA window. Firmware is not going to retransmit any
> + * frames before its BA window so mark them all as done.
> + */
> + if (SEQ_LT(ba->ba_winstart, ssn)) {
> + ieee80211_output_ba_move_window(ic, ni, ban->tid, ssn);
> + iwm_txq_advance(sc, ring, IWM_AGG_SSN_TO_TXQ_IDX(ssn));
> + iwm_clear_oactive(sc, ring);
> + }
> +}
> +
> +void
>  iwm_rx_bmiss(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
>      struct iwm_rx_data *data)
>  {
> @@ -4638,9 +5228,6 @@ iwm_send_cmd(struct iwm_softc *sc, struct iwm_host_cmd
>   }
>   }
>  
> -#if 0
> - iwm_update_sched(sc, ring->qid, ring->cur, 0, 0);
> -#endif
>   /* Kick command ring. */
>   ring->queued++;
>   ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
> @@ -4771,7 +5358,6 @@ iwm_cmd_done(struct iwm_softc *sc, int qid, int idx, i
>   }
>  }
>  
> -#if 0
>  /*
>   * necessary only for block ack mode
>   */
> @@ -4780,32 +5366,49 @@ iwm_update_sched(struct iwm_softc *sc, int qid, int id
>      uint16_t len)
>  {
>   struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
> - uint16_t w_val;
> + uint16_t val;
>  
>   scd_bc_tbl = sc->sched_dma.vaddr;
>  
> - len += 8; /* magic numbers came naturally from paris */
> + len += IWM_TX_CRC_SIZE + IWM_TX_DELIMITER_SIZE;
>   if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE)
>   len = roundup(len, 4) / 4;
>  
> - w_val = htole16(sta_id << 12 | len);
> + val = htole16(sta_id << 12 | len);
>  
> + bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> +    0, sc->sched_dma.size, BUS_DMASYNC_PREWRITE);
> +
>   /* Update TX scheduler. */
> - scd_bc_tbl[qid].tfd_offset[idx] = w_val;
> + scd_bc_tbl[qid].tfd_offset[idx] = val;
> + if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP)
> + scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = val;
> +
>   bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> -    (char *)(void *)w - (char *)(void *)sc->sched_dma.vaddr,
> -    sizeof(uint16_t), BUS_DMASYNC_PREWRITE);
> +    0, sc->sched_dma.size, BUS_DMASYNC_POSTWRITE);
> +}
>  
> - /* I really wonder what this is ?!? */
> - if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) {
> - scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val;
> - bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> -    (char *)(void *)(w + IWM_TFD_QUEUE_SIZE_MAX) -
> -    (char *)(void *)sc->sched_dma.vaddr,
> -    sizeof (uint16_t), BUS_DMASYNC_PREWRITE);
> - }
> +void
> +iwm_reset_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id)
> +{
> + struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
> + uint16_t val;
> +
> + scd_bc_tbl = sc->sched_dma.vaddr;
> +
> + val = htole16(1 | (sta_id << 12));
> +
> + bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> +    0, sc->sched_dma.size, BUS_DMASYNC_PREWRITE);
> +
> + /* Update TX scheduler. */
> + scd_bc_tbl[qid].tfd_offset[idx] = val;
> + if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP)
> + scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = val;
> +
> + bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> +    0, sc->sched_dma.size, BUS_DMASYNC_POSTWRITE);
>  }
> -#endif
>  
>  /*
>   * Fill in various bit for management frames, and leave them
> @@ -4897,19 +5500,24 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
>   uint32_t flags;
>   u_int hdrlen;
>   bus_dma_segment_t *seg;
> - uint8_t tid, type;
> + uint8_t tid, type, subtype;
>   int i, totlen, err, pad;
> - int hdrlen2, rtsthres = ic->ic_rtsthreshold;
> + int qid, hasqos, rtsthres = ic->ic_rtsthreshold;
>  
>   wh = mtod(m, struct ieee80211_frame *);
> - hdrlen = ieee80211_get_hdrlen(wh);
>   type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
> + subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
> + if (type == IEEE80211_FC0_TYPE_CTL)
> + hdrlen = sizeof(struct ieee80211_frame_min);
> + else
> + hdrlen = ieee80211_get_hdrlen(wh);
>  
> - hdrlen2 = (ieee80211_has_qos(wh)) ?
> -    sizeof (struct ieee80211_qosframe) :
> -    sizeof (struct ieee80211_frame);
> + hasqos = ieee80211_has_qos(wh);
>  
> - tid = 0;
> + if (type == IEEE80211_FC0_TYPE_DATA)
> + tid = IWM_TID_NON_QOS;
> + else
> + tid = IWM_MAX_TID_COUNT;
>  
>   /*
>   * Map EDCA categories to Tx data queues.
> @@ -4918,14 +5526,31 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
>   * need to share Tx queues between stations because we only implement
>   * client mode; the firmware's station table contains only one entry
>   * which represents our access point.
> - *
> - * Tx aggregation will require additional queues (one queue per TID
> - * for which aggregation is enabled) but we do not implement this yet.
>   */
>   if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT))
> - ring = &sc->txq[IWM_DQA_MIN_MGMT_QUEUE + ac];
> + qid = IWM_DQA_MIN_MGMT_QUEUE + ac;
>   else
> - ring = &sc->txq[ac];
> + qid = ac;
> +
> + /* If possible, put this frame on an aggregation queue. */
> + if (hasqos) {
> + struct ieee80211_tx_ba *ba;
> + uint16_t qos = ieee80211_get_qos(wh);
> + int qostid = qos & IEEE80211_QOS_TID;
> + int qosac = ieee80211_up_to_ac(ic, qostid);
> +
> + ba = &ni->ni_tx_ba[qostid];
> + if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
> +    type == IEEE80211_FC0_TYPE_DATA &&
> +    (in->ampdu_tx_tid_mask & (1 << qostid)) &&
> +    ba->ba_state == IEEE80211_BA_AGREED) {
> + qid = IWM_FIRST_AGG_TX_QUEUE + qostid;
> + tid = qostid;
> + ac = qosac;
> + }
> + }
> +
> + ring = &sc->txq[qid];
>   desc = &ring->desc[ring->cur];
>   memset(desc, 0, sizeof(*desc));
>   data = &ring->data[ring->cur];
> @@ -5004,14 +5629,28 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
>   tx->sta_id = IWM_STATION_ID;
>  
>   if (type == IEEE80211_FC0_TYPE_MGT) {
> - uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
> -
>   if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ ||
>      subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ)
>   tx->pm_frame_timeout = htole16(3);
>   else
>   tx->pm_frame_timeout = htole16(2);
>   } else {
> + if (type == IEEE80211_FC0_TYPE_CTL &&
> +    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
> + struct ieee80211_frame_min *wbar;
> + uint8_t *frm;
> + uint16_t ctl;
> +
> + flags |= IWM_TX_CMD_FLG_ACK | IWM_TX_CMD_FLG_BAR;
> + tx->data_retry_limit = IWM_BAR_DFAULT_RETRY_LIMIT;
> +
> + wbar = mtod(m, struct ieee80211_frame_min *);
> + frm = (uint8_t *)&wbar[1];
> + memcpy(&ctl, frm, sizeof(ctl));
> + tid = (le16toh(ctl) & IEEE80211_BA_TID_INFO_MASK) >>
> +    IEEE80211_BA_TID_INFO_SHIFT;
> + }
> +
>   tx->pm_frame_timeout = htole16(0);
>   }
>  
> @@ -5058,7 +5697,9 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
>   tx->sec_ctl = 0;
>   }
>  
> - flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL;
> + flags |= IWM_TX_CMD_FLG_BT_DIS;
> + if (!hasqos)
> + flags |= IWM_TX_CMD_FLG_SEQ_CTL;
>  
>   tx->tx_flags |= htole32(flags);
>  
> @@ -5085,9 +5726,11 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
>   }
>   }
>   data->m = m;
> + data->totlen = totlen;
>   data->in = in;
>   data->txmcs = ni->ni_txmcs;
>   data->txrate = ni->ni_txrate;
> + data->ampdu_txmcs = ni->ni_txmcs; /* updated upon Tx interrupt */
>  
>   /* Fill TX descriptor. */
>   desc->num_tbs = 2 + data->map->dm_nsegs;
> @@ -5118,9 +5761,7 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
>      (char *)(void *)desc - (char *)(void *)ring->desc_dma.vaddr,
>      sizeof (*desc), BUS_DMASYNC_PREWRITE);
>  
> -#if 0
>   iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len));
> -#endif
>  
>   /* Kick TX ring. */
>   ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
> @@ -5336,6 +5977,7 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
>   uint32_t status;
>   size_t cmdsize;
>   struct ieee80211com *ic = &sc->sc_ic;
> + uint16_t tid_disable_tx = 0xffff;
>  
>   if (!update && (sc->sc_flags & IWM_FLAG_STA_ACTIVE))
>   panic("STA already added");
> @@ -5362,7 +6004,7 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
>   else
>   qid = IWM_AUX_QUEUE;
>   add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
> - } else if (!update) {
> + } else {
>   int ac;
>   for (ac = 0; ac < EDCA_NUM_AC; ac++) {
>   int qid = ac;
> @@ -5371,15 +6013,33 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
>   qid += IWM_DQA_MIN_MGMT_QUEUE;
>   add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
>   }
> - IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid);
>   }
> + if (!update) {
> + if (ic->ic_opmode == IEEE80211_M_MONITOR)
> + IEEE80211_ADDR_COPY(&add_sta_cmd.addr,
> +    etherbroadcastaddr);
> + else
> + IEEE80211_ADDR_COPY(&add_sta_cmd.addr,
> +    in->in_ni.ni_bssid);
> + }
>   add_sta_cmd.add_modify = update ? 1 : 0;
>   add_sta_cmd.station_flags_msk
>      |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK);
> - add_sta_cmd.tid_disable_tx = htole16(0xffff);
> - if (update)
> - add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_TID_DISABLE_TX);
> + if (update) {
> + int tid, qid;
> + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> + if ((in->ampdu_tx_tid_mask & (1 << tid)) == 0)
> + continue;
>  
> + qid = IWM_FIRST_AGG_TX_QUEUE + tid;
> + add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
> + tid_disable_tx &= ~(1 << tid);
> + add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_QUEUES |
> +    IWM_STA_MODIFY_TID_DISABLE_TX);
> + }
> + }
> + add_sta_cmd.tid_disable_tx = htole16(tid_disable_tx);
> +
>   if (in->in_ni.ni_flags & IEEE80211_NODE_HT) {
>   add_sta_cmd.station_flags_msk
>      |= htole32(IWM_STA_FLG_MAX_AGG_SIZE_MSK |
> @@ -5444,7 +6104,7 @@ iwm_add_aux_sta(struct iwm_softc *sc)
>   if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT)) {
>   qid = IWM_DQA_AUX_QUEUE;
>   err = iwm_enable_txq(sc, IWM_AUX_STA_ID, qid,
> -    IWM_TX_FIFO_MCAST);
> +    IWM_TX_FIFO_MCAST, 0, IWM_MAX_TID_COUNT, 0);
>   } else {
>   qid = IWM_AUX_QUEUE;
>   err = iwm_enable_ac_txq(sc, qid, IWM_TX_FIFO_MCAST);
> @@ -6582,6 +7242,9 @@ iwm_auth(struct iwm_softc *sc)
>  
>   splassert(IPL_NET);
>  
> + in->ampdu_rx_tid_mask = 0;
> + in->ampdu_tx_tid_mask = 0;
> +
>   if (ic->ic_opmode == IEEE80211_M_MONITOR)
>   sc->sc_phyctxt[0].channel = ic->ic_ibss_chan;
>   else
> @@ -7156,11 +7819,7 @@ iwm_setrates(struct iwm_node *in, int async)
>  
>   lqcmd.agg_time_limit = htole16(4000); /* 4ms */
>   lqcmd.agg_disable_start_th = 3;
> -#ifdef notyet
>   lqcmd.agg_frame_cnt_limit = 0x3f;
> -#else
> - lqcmd.agg_frame_cnt_limit = 1; /* tx agg disabled */
> -#endif
>  
>   cmd.data[0] = &lqcmd;
>   iwm_send_cmd(sc, &cmd);
> @@ -7892,7 +8551,7 @@ iwm_init_hw(struct iwm_softc *sc)
>   else
>   qid = IWM_AUX_QUEUE;
>   err = iwm_enable_txq(sc, IWM_MONITOR_STA_ID, qid,
> -    iwm_ac_to_tx_fifo[EDCA_AC_BE]);
> +    iwm_ac_to_tx_fifo[EDCA_AC_BE], 0, IWM_MAX_TID_COUNT, 0);
>   if (err) {
>   printf("%s: could not enable monitor inject Tx queue "
>      "(error %d)\n", DEVNAME(sc), err);
> @@ -7906,7 +8565,7 @@ iwm_init_hw(struct iwm_softc *sc)
>   else
>   qid = ac;
>   err = iwm_enable_txq(sc, IWM_STATION_ID, qid,
> -    iwm_ac_to_tx_fifo[ac]);
> +    iwm_ac_to_tx_fifo[ac], 0, IWM_TID_NON_QOS, 0);
>   if (err) {
>   printf("%s: could not enable Tx queue %d "
>      "(error %d)\n", DEVNAME(sc), ac, err);
> @@ -8578,6 +9237,10 @@ iwm_rx_pkt(struct iwm_softc *sc, struct iwm_rx_data *d
>   iwm_rx_tx_cmd(sc, pkt, data);
>   break;
>  
> + case IWM_BA_NOTIF:
> + iwm_rx_ba(sc, pkt, data);
> + break;
> +
>   case IWM_MISSED_BEACONS_NOTIFICATION:
>   iwm_rx_bmiss(sc, pkt, data);
>   break;
> @@ -8943,9 +9606,9 @@ iwm_intr(void *arg)
>   DPRINTF(("driver status:\n"));
>   for (i = 0; i < IWM_MAX_QUEUES; i++) {
>   struct iwm_tx_ring *ring = &sc->txq[i];
> - DPRINTF(("  tx ring %2d: qid=%-2d cur=%-3d "
> + DPRINTF(("  tx ring %2d: qid=%-2d tail=%-3d cur=%-3d "
>      "queued=%-3d\n",
> -    i, ring->qid, ring->cur, ring->queued));
> +    i, ring->qid, ring->tail, ring->cur, ring->queued));
>   }
>   DPRINTF(("  rx ring: cur=%d\n", sc->rxq.cur));
>   DPRINTF(("  802.11 state %s\n",
> @@ -9053,9 +9716,9 @@ iwm_intr_msix(void *arg)
>   DPRINTF(("driver status:\n"));
>   for (i = 0; i < IWM_MAX_QUEUES; i++) {
>   struct iwm_tx_ring *ring = &sc->txq[i];
> - DPRINTF(("  tx ring %2d: qid=%-2d cur=%-3d "
> + DPRINTF(("  tx ring %2d: qid=%-2d tail=%-3d cur=%-3d "
>      "queued=%-3d\n",
> -    i, ring->qid, ring->cur, ring->queued));
> +    i, ring->qid, ring->tail, ring->cur, ring->queued));
>   }
>   DPRINTF(("  rx ring: cur=%d\n", sc->rxq.cur));
>   DPRINTF(("  802.11 state %s\n",
> @@ -9465,6 +10128,7 @@ iwm_attach(struct device *parent, struct device *self,
>  
>   /* Set device capabilities. */
>   ic->ic_caps =
> +    IEEE80211_C_QOS | IEEE80211_C_TX_AMPDU | /* A-MPDU */
>      IEEE80211_C_WEP | /* WEP */
>      IEEE80211_C_RSN | /* WPA/RSN */
>      IEEE80211_C_SCANALL | /* device scans all channels at once */
> @@ -9529,10 +10193,8 @@ iwm_attach(struct device *parent, struct device *self,
>   ic->ic_update_htprot = iwm_update_htprot;
>   ic->ic_ampdu_rx_start = iwm_ampdu_rx_start;
>   ic->ic_ampdu_rx_stop = iwm_ampdu_rx_stop;
> -#ifdef notyet
>   ic->ic_ampdu_tx_start = iwm_ampdu_tx_start;
>   ic->ic_ampdu_tx_stop = iwm_ampdu_tx_stop;
> -#endif
>   /*
>   * We cannot read the MAC address without loading the
>   * firmware from disk. Postpone until mountroot is done.
> blob - 201ce69014b9422335a6d698cd4a3cc3f314b2b5
> blob + 1e2e4e01e2a98f60221b72fc6e82a1246f7b9cef
> --- sys/dev/pci/if_iwmreg.h
> +++ sys/dev/pci/if_iwmreg.h
> @@ -1837,6 +1837,9 @@ struct iwm_agn_scd_bc_tbl {
>   uint16_t tfd_offset[IWM_TFD_QUEUE_BC_SIZE];
>  } __packed;
>  
> +#define IWM_TX_CRC_SIZE 4
> +#define IWM_TX_DELIMITER_SIZE 4
> +
>  /* Maximum number of Tx queues. */
>  #define IWM_MAX_QUEUES 31
>  
> @@ -1875,6 +1878,11 @@ struct iwm_agn_scd_bc_tbl {
>  #define IWM_DQA_MIN_DATA_QUEUE 10
>  #define IWM_DQA_MAX_DATA_QUEUE 31
>  
> +/* Reserve 8 DQA Tx queues, from 10 up to 17, for A-MPDU aggregation. */
> +#define IWM_MAX_TID_COUNT 8
> +#define IWM_FIRST_AGG_TX_QUEUE IWM_DQA_MIN_DATA_QUEUE
> +#define IWM_LAST_AGG_TX_QUEUE (IWM_FIRST_AGG_TX_QUEUE + IWM_MAX_TID_COUNT - 1)
> +
>  /* legacy non-DQA queues; the legacy command queue uses a different number! */
>  #define IWM_OFFCHANNEL_QUEUE 8
>  #define IWM_CMD_QUEUE 9
> @@ -4627,7 +4635,8 @@ struct iwm_lq_cmd {
>  /*
>   * TID for non QoS frames - to be written in tid_tspec
>   */
> -#define IWM_TID_NON_QOS IWM_MAX_TID_COUNT
> +#define IWM_TID_NON_QOS 0
> +#define IWM_TID_MGMT 15
>  
>  /*
>   * Limits on the retransmissions - to be written in {data,rts}_retry_limit
> @@ -4898,21 +4907,23 @@ struct iwm_tx_resp {
>  /**
>   * struct iwm_ba_notif - notifies about reception of BA
>   * ( IWM_BA_NOTIF = 0xc5 )
> - * @sta_addr_lo32: lower 32 bits of the MAC address
> - * @sta_addr_hi16: upper 16 bits of the MAC address
> + * @sta_addr: MAC address
>   * @sta_id: Index of recipient (BA-sending) station in fw's station table
>   * @tid: tid of the session
> - * @seq_ctl: sequence control field from IEEE80211 frame header (it is unclear
> - *  which frame this relates to; info or reverse engineering welcome)
> + * @seq_ctl: sequence control field from IEEE80211 frame header (the first
> + * bit in @bitmap corresponds to the sequence number stored here)
>   * @bitmap: the bitmap of the BA notification as seen in the air
>   * @scd_flow: the tx queue this BA relates to
>   * @scd_ssn: the index of the last contiguously sent packet
>   * @txed: number of Txed frames in this batch
>   * @txed_2_done: number of Acked frames in this batch
> + * @reduced_txp: power reduced according to TPC. This is the actual value and
> + * not a copy from the LQ command. Thus, if not the first rate was used
> + * for Tx-ing then this value will be set to 0 by FW.
> + * @reserved1: reserved
>   */
>  struct iwm_ba_notif {
> - uint32_t sta_addr_lo32;
> - uint16_t sta_addr_hi16;
> + uint8_t sta_addr[ETHER_ADDR_LEN];
>   uint16_t reserved;
>  
>   uint8_t sta_id;
> @@ -4923,6 +4934,7 @@ struct iwm_ba_notif {
>   uint16_t scd_ssn;
>   uint8_t txed;
>   uint8_t txed_2_done;
> + uint8_t reduced_txp;
>   uint16_t reserved1;
>  } __packed;
>  
> blob - 89abe2c1dbdf5ac3ccbf710994380502530ef2a8
> blob + 7d9e26bffe0f1658c771bf85768797c23e94e147
> --- sys/dev/pci/if_iwmvar.h
> +++ sys/dev/pci/if_iwmvar.h
> @@ -252,14 +252,26 @@ struct iwm_fw_paging {
>  #define IWM_TX_RING_LOMARK 192
>  #define IWM_TX_RING_HIMARK 224
>  
> +/* For aggregation queues, index must be aligned to frame sequence number. */
> +#define IWM_AGG_SSN_TO_TXQ_IDX(x) ((x) & (IWM_TX_RING_COUNT - 1))
> +
>  struct iwm_tx_data {
>   bus_dmamap_t map;
>   bus_addr_t cmd_paddr;
>   bus_addr_t scratch_paddr;
>   struct mbuf *m;
>   struct iwm_node *in;
> + int totlen;
> + int retries;
> + int txfail;
>   int txmcs;
>   int txrate;
> +
> + /* A-MPDU subframes */
> + int ampdu_id;
> + int ampdu_txmcs;
> + int ampdu_nframes;
> + int ampdu_size;
>  };
>  
>  struct iwm_tx_ring {
> @@ -363,6 +375,12 @@ struct iwm_bf_data {
>   int last_cqm_event;
>  };
>  
> +struct iwm_ba_param {
> + uint16_t tid_mask;
> + uint16_t ssn[IWM_MAX_TID_COUNT];
> + uint16_t winsize[IWM_MAX_TID_COUNT];
> +};
> +
>  struct iwm_softc {
>   struct device sc_dev;
>   struct ieee80211com sc_ic;
> @@ -381,10 +399,15 @@ struct iwm_softc {
>  
>   /* Task for firmware BlockAck setup/teardown and its arguments. */
>   struct task ba_task;
> - int ba_start;
> - int ba_tid;
> - uint16_t ba_ssn;
> - uint16_t ba_winsize;
> + int ba_flags;
> +#define IWM_RX_BA_START 0x01
> +#define IWM_TX_BA_START 0x02
> +#define IWM_RX_BA_STOP 0x04
> +#define IWM_TX_BA_STOP 0x08
> + struct iwm_ba_param rx_ba_start;
> + struct iwm_ba_param rx_ba_stop;
> + struct iwm_ba_param tx_ba_start;
> + struct iwm_ba_param tx_ba_stop;
>  
>   /* Task for HT protection updates. */
>   struct task htprot_task;
> @@ -407,6 +430,7 @@ struct iwm_softc {
>   struct iwm_rx_ring rxq;
>   int qfullmsk;
>   int cmdqid;
> + int qenablemsk;
>  
>   int sc_sf_state;
>  
> @@ -551,6 +575,12 @@ struct iwm_node {
>   int chosen_txrate;
>   struct ieee80211_mira_node in_mn;
>   int chosen_txmcs;
> +
> + uint32_t next_ampdu_id;
> +
> + /* Currently active Rx/Tx block ack sessions; tracked per TID. */
> + uint8_t ampdu_rx_tid_mask;
> + uint8_t ampdu_tx_tid_mask;
>  };
>  #define IWM_STATION_ID 0
>  #define IWM_AUX_STA_ID 1
>

--

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Uwe Werler
Woahhh,

was also trying 5GHz (and tcpbench against one of our bsd servers in DMZ):

469651560 bytes sent over 85.291 seconds
bandwidth min/avg/max/std-dev = 3.475/43.927/87.071/29.809 Mbps


        6 new output block ack agreements
        0 output block ack agreements timed out

(Tomorrow @work I will test against our new APs. My AP @home is a Technicolor MediaAccess TG789vac).

mbk Uwe

On 29 Jun 09:48, Uwe Werler wrote:

> Hi Stefan,
>
> for me the patch works in mode 11n:
>
> before (OpenBSD 6.7-current (GENERIC.MP) #304: Fri Jun 26 02:08:50 MDT 2020)
> bandwidth min/avg/max/std-dev = 2.354/12.319/15.391/3.850 Mbps
>
> with patch (OpenBSD 6.7-current (GENERIC.MP) #0: Mon Jun 29 09:35:24 GMT 2020)
> bandwidth min/avg/max/std-dev = 12.174/31.411/57.746/15.154 Mbps
>
> iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
> iwm0: hw rev 0x230, fw ver 34.0.1, address 60:f6:77:bc:3a:04
>
> (mode 11g: bandwidth min/avg/max/std-dev = 0.620/0.844/1.101/0.153 Mbps)
>
> mbk Uwe
>
>
> On 26 Jun 14:45, Stefan Sperling wrote:
> > This patch adds support for 11n Tx aggregation to iwm(4).
> >
> > Please help with testing if you can by running the patch and using wifi
> > as usual. Nothing should change, except that Tx speed may potentially
> > improve. If you have time to run before/after performance measurements with
> > tcpbench or such, that would be nice. But it's not required for testing.
> >
> > If Tx aggregation is active then netstat will show a non-zero output block ack
> > agreement counter:
> >
> > $ netstat -W iwm0 | grep 'output block'
> >         3 new output block ack agreements
> > 0 output block ack agreements timed out
> >
> > It would be great to get at least one test for all the chipsets the driver
> > supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> > The behaviour of the access point also matters a great deal. It won't
> > hurt to test the same chipset against several different access points.
> >
> > I have tested this version on 8265 only so far. I've run older revisions
> > of this patch on 7265 so I'm confident that this chip will work, too.
> > So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> > mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
> >
> > diff refs/heads/master refs/heads/txagg
> > blob - 3a75d07a60a7eb4c66540474e47aeffd7a85250a
> > blob + 853bdd1290ad509f5fce7b5bf20550f458a2b460
> > --- sys/dev/pci/if_iwm.c
> > +++ sys/dev/pci/if_iwm.c
> > @@ -144,6 +144,8 @@
> >  #include <net80211/ieee80211_amrr.h>
> >  #include <net80211/ieee80211_mira.h>
> >  #include <net80211/ieee80211_radiotap.h>
> > +#include <net80211/ieee80211_priv.h> /* for SEQ_LT */
> > +#undef DPRINTF /* defined in ieee80211_priv.h */
> >  
> >  #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
> >  
> > @@ -299,7 +301,8 @@ int iwm_nic_rx_mq_init(struct iwm_softc *);
> >  int iwm_nic_tx_init(struct iwm_softc *);
> >  int iwm_nic_init(struct iwm_softc *);
> >  int iwm_enable_ac_txq(struct iwm_softc *, int, int);
> > -int iwm_enable_txq(struct iwm_softc *, int, int, int);
> > +int iwm_enable_txq(struct iwm_softc *, int, int, int, int, uint8_t,
> > +    uint16_t);
> >  int iwm_post_alive(struct iwm_softc *);
> >  struct iwm_phy_db_entry *iwm_phy_db_get_section(struct iwm_softc *, uint16_t,
> >      uint16_t);
> > @@ -334,12 +337,12 @@ void iwm_ampdu_rx_stop(struct ieee80211com *, struct i
> >      uint8_t);
> >  void iwm_sta_rx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
> >      uint16_t, uint16_t, int);
> > -#ifdef notyet
> > +void iwm_sta_tx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
> > +    uint16_t, uint16_t, int);
> >  int iwm_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
> >      uint8_t);
> >  void iwm_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
> >      uint8_t);
> > -#endif
> >  void iwm_ba_task(void *);
> >  
> >  int iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *,
> > @@ -372,14 +375,25 @@ int iwm_rxmq_get_signal_strength(struct iwm_softc *, s
> >  void iwm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *,
> >      struct iwm_rx_data *);
> >  int iwm_get_noise(const struct iwm_statistics_rx_non_phy *);
> > +void iwm_txq_advance(struct iwm_softc *, struct iwm_tx_ring *, int);
> > +void iwm_ampdu_tx_done(struct iwm_softc *, struct iwm_cmd_header *,
> > +    struct iwm_node *, struct iwm_tx_ring *, uint32_t, uint8_t,
> > +    uint8_t, uint16_t, int, struct iwm_agg_tx_status *);
> >  int iwm_ccmp_decap(struct iwm_softc *, struct mbuf *,
> >      struct ieee80211_node *);
> >  void iwm_rx_frame(struct iwm_softc *, struct mbuf *, int, uint32_t, int, int,
> >      uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
> > -void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *,
> > -    struct iwm_node *, int, int);
> > +void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_tx_resp *,
> > +    struct iwm_node *, int, int, int);
> > +void iwm_txd_done(struct iwm_softc *, struct iwm_tx_data *);
> >  void iwm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *,
> >      struct iwm_rx_data *);
> > +void iwm_clear_oactive(struct iwm_softc *, struct iwm_tx_ring *);
> > +void iwm_mira_choose(struct iwm_softc *, struct ieee80211_node *);
> > +void iwm_ampdu_rate_control(struct iwm_softc *, struct ieee80211_node *,
> > +    struct iwm_tx_ring *, int, uint16_t, uint16_t);
> > +void iwm_rx_ba(struct iwm_softc *, struct iwm_rx_packet *,
> > +    struct iwm_rx_data *);
> >  void iwm_rx_bmiss(struct iwm_softc *, struct iwm_rx_packet *,
> >      struct iwm_rx_data *);
> >  int iwm_binding_cmd(struct iwm_softc *, struct iwm_node *, uint32_t);
> > @@ -399,6 +413,7 @@ int iwm_send_cmd_pdu_status(struct iwm_softc *, uint32
> >  void iwm_free_resp(struct iwm_softc *, struct iwm_host_cmd *);
> >  void iwm_cmd_done(struct iwm_softc *, int, int, int);
> >  void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t);
> > +void iwm_reset_sched(struct iwm_softc *, int, int, uint8_t);
> >  const struct iwm_rate *iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *,
> >      struct ieee80211_frame *, struct iwm_tx_cmd *);
> >  int iwm_tx(struct iwm_softc *, struct mbuf *, struct ieee80211_node *, int);
> > @@ -1306,17 +1321,17 @@ iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_
> >   * The command is queue 0 (sc->txq[0]), and 4 mgmt/data frame queues
> >   * are sc->tqx[IWM_DQA_MIN_MGMT_QUEUE + ac], i.e. sc->txq[5:8],
> >   * in order to provide one queue per EDCA category.
> > + * Tx aggregation requires additional queues, one queue per TID for
> > + * which aggregation is enabled. We map TID 0-7 to sc->txq[10:17].
> >   *
> > - * In non-DQA mode, we use rings 0 through 9 (0-3 are EDCA, 9 is cmd).
> > + * In non-DQA mode, we use rings 0 through 9 (0-3 are EDCA, 9 is cmd),
> > + * and Tx aggregation is not supported.
> >   *
> > - * Tx aggregation will require additional queues (one queue per TID
> > - * for which aggregation is enabled) but we do not implement this yet.
> > - *
> >   * Unfortunately, we cannot tell if DQA will be used until the
> >   * firmware gets loaded later, so just allocate sufficient rings
> >   * in order to satisfy both cases.
> >   */
> > - if (qid > IWM_CMD_QUEUE)
> > + if (qid > IWM_LAST_AGG_TX_QUEUE)
> >   return 0;
> >  
> >   size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd);
> > @@ -1380,6 +1395,7 @@ iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_
> >   bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
> >      ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
> >   sc->qfullmsk &= ~(1 << ring->qid);
> > + sc->qenablemsk &= ~(1 << ring->qid);
> >   /* 7000 family NICs are locked while commands are in progress. */
> >   if (ring->qid == sc->cmdqid && ring->queued > 0) {
> >   if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
> > @@ -2208,6 +2224,18 @@ const uint8_t iwm_ac_to_tx_fifo[] = {
> >   IWM_TX_FIFO_VO,
> >  };
> >  
> > +/* Map a TID to an ieee80211_edca_ac category. */
> > +const uint8_t iwm_tid_to_ac[IWM_MAX_TID_COUNT] = {
> > + EDCA_AC_BE,
> > + EDCA_AC_BK,
> > + EDCA_AC_BK,
> > + EDCA_AC_BE,
> > + EDCA_AC_VI,
> > + EDCA_AC_VI,
> > + EDCA_AC_VO,
> > + EDCA_AC_VO,
> > +};
> > +
> >  int
> >  iwm_enable_ac_txq(struct iwm_softc *sc, int qid, int fifo)
> >  {
> > @@ -2250,28 +2278,48 @@ iwm_enable_ac_txq(struct iwm_softc *sc, int qid, int f
> >  }
> >  
> >  int
> > -iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo)
> > +iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo,
> > +    int aggregate, uint8_t tid, uint16_t ssn)
> >  {
> > + struct iwm_tx_ring *ring = &sc->txq[qid];
> >   struct iwm_scd_txq_cfg_cmd cmd;
> > - int err;
> > + int err, idx, scd_bug;
> >  
> >   iwm_nic_assert_locked(sc);
> >  
> > - IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0);
> > + /*
> > + * If we need to move the SCD write pointer by steps of
> > + * 0x40, 0x80 or 0xc0, it gets stuck.
> > + * This is really ugly, but this is the easiest way out for
> > + * this sad hardware issue.
> > + * This bug has been fixed on devices 9000 and up.
> > + */
> > + scd_bug = !sc->sc_mqrx_supported &&
> > + !((ssn - ring->cur) & 0x3f) &&
> > + (ssn != ring->cur);
> > + if (scd_bug)
> > + ssn = (ssn + 1) & 0xfff;
> >  
> > + idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> > + IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | idx);
> > + ring->cur = idx;
> > + ring->tail = idx;
> > +
> >   memset(&cmd, 0, sizeof(cmd));
> > + cmd.tid = tid;
> >   cmd.scd_queue = qid;
> >   cmd.enable = 1;
> >   cmd.sta_id = sta_id;
> >   cmd.tx_fifo = fifo;
> > - cmd.aggregate = 0;
> > + cmd.aggregate = aggregate;
> > + cmd.ssn = htole16(ssn);
> >   cmd.window = IWM_FRAME_LIMIT;
> >  
> > - err = iwm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, 0,
> > -    sizeof(cmd), &cmd);
> > + err = iwm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd);
> >   if (err)
> >   return err;
> >  
> > + sc->qenablemsk |= (1 << qid);
> >   return 0;
> >  }
> >  
> > @@ -2950,8 +2998,12 @@ iwm_sta_rx_agg(struct iwm_softc *sc, struct ieee80211_
> >   if (start) {
> >   sc->sc_rx_ba_sessions++;
> >   ieee80211_addba_req_accept(ic, ni, tid);
> > - } else if (sc->sc_rx_ba_sessions > 0)
> > - sc->sc_rx_ba_sessions--;
> > + in->ampdu_rx_tid_mask |= (1 << tid);
> > + } else {
> > + in->ampdu_rx_tid_mask &= ~(1 << tid);
> > + if (sc->sc_rx_ba_sessions > 0)
> > + sc->sc_rx_ba_sessions--;
> > + }
> >   } else if (start)
> >   ieee80211_addba_req_refuse(ic, ni, tid);
> >  
> > @@ -2959,6 +3011,75 @@ iwm_sta_rx_agg(struct iwm_softc *sc, struct ieee80211_
> >  }
> >  
> >  void
> > +iwm_sta_tx_agg(struct iwm_softc *sc, struct ieee80211_node *ni, uint8_t tid,
> > +    uint16_t ssn, uint16_t winsize, int start)
> > +{
> > + struct ieee80211com *ic = &sc->sc_ic;
> > + struct iwm_node *in = (void *)ni;
> > + int qid = IWM_FIRST_AGG_TX_QUEUE + tid;
> > + enum ieee80211_edca_ac ac = iwm_tid_to_ac[tid];
> > + int fifo = iwm_ac_to_tx_fifo[ac];
> > + int err;
> > +
> > + if (qid > IWM_LAST_AGG_TX_QUEUE || !iwm_nic_lock(sc)) {
> > + ieee80211_addba_resp_refuse(ic, ni, tid,
> > +    IEEE80211_STATUS_UNSPECIFIED);
> > + return;
> > + }
> > +
> > + if (start) {
> > + if ((sc->qenablemsk & (1 << qid)) == 0) {
> > + struct iwm_tx_ring *ring = &sc->txq[qid];
> > +
> > + err = iwm_enable_txq(sc, IWM_STATION_ID, qid, fifo,
> > +    1, tid, ssn);
> > + if (err)
> > + goto done;
> > + /*
> > + * If iwm_enable_txq() employed the SCD hardware bug
> > + * workaround we must skip the frame with seqnum SSN.
> > + */
> > + if (IWM_AGG_SSN_TO_TXQ_IDX(ring->cur) !=
> > +    IWM_AGG_SSN_TO_TXQ_IDX(ssn)) {
> > + ssn = (ssn + 1) & 0xfff;
> > + KASSERT(IWM_AGG_SSN_TO_TXQ_IDX(ring->cur) ==
> > +    IWM_AGG_SSN_TO_TXQ_IDX(ssn));
> > + ieee80211_output_ba_move_window(ic, ni,
> > +    tid, ssn);
> > + ni->ni_qos_txseqs[tid] = ssn;
> > + }
> > + }
> > + in->ampdu_tx_tid_mask |= (1 << tid);
> > + err = iwm_add_sta_cmd(sc, in, 1);
> > + if (err) {
> > + printf("%s: could not update sta (error %d)\n",
> > +    DEVNAME(sc), err);
> > + in->ampdu_tx_tid_mask &= ~(1 << tid);
> > + goto done;
> > + }
> > + } else {
> > + in->ampdu_tx_tid_mask &= ~(1 << tid);
> > + err = iwm_add_sta_cmd(sc, in, 1);
> > + if (err) {
> > + printf("%s: could not update sta (error %d)\n",
> > +    DEVNAME(sc), err);
> > + in->ampdu_tx_tid_mask |= (1 << tid);
> > + goto done;
> > + }
> > + }
> > +
> > +done:
> > + iwm_nic_unlock(sc);
> > + if (start) {
> > + if (err)
> > + ieee80211_addba_resp_refuse(ic, ni, tid,
> > +    IEEE80211_STATUS_UNSPECIFIED);
> > + else
> > + ieee80211_addba_resp_accept(ic, ni, tid);
> > + }
> > +}
> > +
> > +void
> >  iwm_htprot_task(void *arg)
> >  {
> >   struct iwm_softc *sc = arg;
> > @@ -3002,19 +3123,53 @@ iwm_ba_task(void *arg)
> >   struct ieee80211com *ic = &sc->sc_ic;
> >   struct ieee80211_node *ni = ic->ic_bss;
> >   int s = splnet();
> > + int tid;
> >  
> > - if (sc->sc_flags & IWM_FLAG_SHUTDOWN) {
> > + if ((sc->sc_flags & IWM_FLAG_SHUTDOWN) ||
> > +    ic->ic_state != IEEE80211_S_RUN) {
> >   refcnt_rele_wake(&sc->task_refs);
> >   splx(s);
> >   return;
> >   }
> >  
> > - if (sc->ba_start)
> > - iwm_sta_rx_agg(sc, ni, sc->ba_tid, sc->ba_ssn,
> > -    sc->ba_winsize, 1);
> > - else
> > - iwm_sta_rx_agg(sc, ni, sc->ba_tid, 0, 0, 0);
> > + if (sc->ba_flags & IWM_RX_BA_START) {
> > + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> > + if ((sc->rx_ba_start.tid_mask & (1 << tid)) == 0)
> > + continue;
> > + iwm_sta_rx_agg(sc, ni, tid, sc->rx_ba_start.ssn[tid],
> > +    sc->rx_ba_start.winsize[tid], 1);
> > + sc->rx_ba_start.tid_mask &= ~(1 << tid);
> > + }
> > + }
> > +
> > + if (sc->ba_flags & IWM_RX_BA_STOP) {
> > + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> > + if ((sc->rx_ba_stop.tid_mask & (1 << tid)) == 0)
> > + continue;
> > + iwm_sta_rx_agg(sc, ni, tid, 0, 0, 0);
> > + sc->rx_ba_stop.tid_mask &= ~(1 << tid);
> > + }
> > + }
> >  
> > + if (sc->ba_flags & IWM_TX_BA_START) {
> > + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> > + if ((sc->tx_ba_start.tid_mask & (1 << tid)) == 0)
> > + continue;
> > + iwm_sta_tx_agg(sc, ni, tid, sc->tx_ba_start.ssn[tid],
> > +    sc->tx_ba_start.winsize[tid], 1);
> > + sc->tx_ba_start.tid_mask &= ~(1 << tid);
> > + }
> > + }
> > +
> > + if (sc->ba_flags & IWM_TX_BA_STOP) {
> > + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> > + if ((sc->tx_ba_stop.tid_mask & (1 << tid)) == 0)
> > + continue;
> > + iwm_sta_tx_agg(sc, ni, tid, 0, 0, 0);
> > + sc->tx_ba_stop.tid_mask &= ~(1 << tid);
> > + }
> > + }
> > +
> >   refcnt_rele_wake(&sc->task_refs);
> >   splx(s);
> >  }
> > @@ -3029,14 +3184,23 @@ iwm_ampdu_rx_start(struct ieee80211com *ic, struct iee
> >  {
> >   struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
> >   struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> > + struct iwm_node *in = (void *)ni;
> >  
> > - if (sc->sc_rx_ba_sessions >= IWM_MAX_RX_BA_SESSIONS)
> > + if (tid >= IWM_MAX_TID_COUNT)
> > + return EINVAL;
> > +
> > + if (sc->sc_rx_ba_sessions >= IWM_MAX_RX_BA_SESSIONS ||
> > +    (in->ampdu_rx_tid_mask & (1 << tid)) != 0)
> >   return ENOSPC;
> >  
> > - sc->ba_start = 1;
> > - sc->ba_tid = tid;
> > - sc->ba_ssn = htole16(ba->ba_winstart);
> > - sc->ba_winsize = htole16(ba->ba_winsize);
> > + if (sc->rx_ba_start.tid_mask & (1 << tid) ||
> > +    sc->rx_ba_stop.tid_mask & (1 << tid))
> > + return EAGAIN;
> > +
> > + sc->ba_flags |= IWM_RX_BA_START;
> > + sc->rx_ba_start.tid_mask |= (1 << tid);
> > + sc->rx_ba_start.ssn[tid] = htole16(ba->ba_winstart);
> > + sc->rx_ba_start.winsize[tid] = htole16(ba->ba_winsize);
> >   iwm_add_task(sc, systq, &sc->ba_task);
> >  
> >   return EBUSY;
> > @@ -3051,13 +3215,69 @@ iwm_ampdu_rx_stop(struct ieee80211com *ic, struct ieee
> >      uint8_t tid)
> >  {
> >   struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> > + struct iwm_node *in = (void *)ni;
> >  
> > - sc->ba_start = 0;
> > - sc->ba_tid = tid;
> > + if (in->ampdu_rx_tid_mask & (1 << tid))  {
> > + sc->ba_flags |= IWM_RX_BA_STOP;
> > + sc->rx_ba_stop.tid_mask |= (1 << tid);
> > + iwm_add_task(sc, systq, &sc->ba_task);
> > + }
> > +}
> > +
> > +int
> > +iwm_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
> > +    uint8_t tid)
> > +{
> > + struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> > + struct iwm_node *in = (void *)ni;
> > + struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
> > +
> > + /* We only implement Tx aggregation with DQA-capable firmware. */
> > + if (!isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT))
> > + return ENOTSUP;
> > +
> > + /* Ensure we can map this TID to an aggregation queue. */
> > + if (tid >= IWM_MAX_TID_COUNT)
> > + return EINVAL;
> > +
> > + /* We only support a fixed Tx aggregation window size, for now. */
> > + if (ba->ba_winsize != IWM_FRAME_LIMIT)
> > + return ENOTSUP;
> > +
> > + if ((in->ampdu_tx_tid_mask & (1 << tid)) != 0)
> > + return ENOSPC;
> > +
> > + if (sc->tx_ba_start.tid_mask & (1 << tid) ||
> > +    sc->tx_ba_stop.tid_mask & (1 << tid))
> > + return EAGAIN;
> > +
> > + sc->ba_flags |= IWM_TX_BA_START;
> > + sc->tx_ba_start.tid_mask |= (1 << tid);
> > + sc->tx_ba_start.ssn[tid] = htole16(ba->ba_winstart);
> > + sc->tx_ba_start.winsize[tid] = htole16(ba->ba_winsize);
> >   iwm_add_task(sc, systq, &sc->ba_task);
> > +
> > + return EBUSY;
> >  }
> >  
> >  void
> > +iwm_ampdu_tx_stop(struct ieee80211com *ic, struct ieee80211_node *ni,
> > +    uint8_t tid)
> > +{
> > + struct iwm_softc *sc = IC2IFP(ic)->if_softc;
> > + struct iwm_node *in = (void *)ni;
> > +
> > + if (tid >= IWM_MAX_TID_COUNT)
> > + return;
> > +
> > + if (in->ampdu_tx_tid_mask & (1 << tid))  {
> > + sc->ba_flags |= IWM_TX_BA_STOP;
> > + sc->tx_ba_stop.tid_mask |= (1 << tid);
> > + iwm_add_task(sc, systq, &sc->ba_task);
> > + }
> > +}
> > +
> > +void
> >  iwm_set_hw_address_8000(struct iwm_softc *sc, struct iwm_nvm_data *data,
> >      const uint16_t *mac_override, const uint16_t *nvm_hw)
> >  {
> > @@ -4238,13 +4458,178 @@ iwm_rx_mpdu_mq(struct iwm_softc *sc, struct mbuf *m, v
> >  }
> >  
> >  void
> > -iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
> > -    struct iwm_node *in, int txmcs, int txrate)
> > +iwm_txq_advance(struct iwm_softc *sc, struct iwm_tx_ring *ring, int idx)
> >  {
> > + struct iwm_tx_data *txd;
> > +
> > + while (ring->tail != idx) {
> > + txd = &ring->data[ring->tail];
> > + if (txd->m != NULL) {
> > + if (ring->qid < IWM_FIRST_AGG_TX_QUEUE)
> > + DPRINTF(("%s: missed Tx completion: tail=%d "
> > +    "idx=%d\n", __func__, ring->tail, idx));
> > + iwm_reset_sched(sc, ring->qid, ring->tail, IWM_STATION_ID);
> > + iwm_txd_done(sc, txd);
> > + ring->queued--;
> > + }
> > + ring->tail = (ring->tail + 1) % IWM_TX_RING_COUNT;
> > + }
> > +}
> > +
> > +void
> > +iwm_ampdu_tx_done(struct iwm_softc *sc, struct iwm_cmd_header *cmd_hdr,
> > +    struct iwm_node *in, struct iwm_tx_ring *txq, uint32_t initial_rate,
> > +    uint8_t nframes, uint8_t failure_frame, uint16_t ssn, int status,
> > +    struct iwm_agg_tx_status *agg_status)
> > +{
> >   struct ieee80211com *ic = &sc->sc_ic;
> > + int tid = cmd_hdr->qid - IWM_FIRST_AGG_TX_QUEUE;
> > + struct iwm_tx_data *txdata = &txq->data[cmd_hdr->idx];
> >   struct ieee80211_node *ni = &in->in_ni;
> > + int txfail = (status != IWM_TX_STATUS_SUCCESS &&
> > +    status != IWM_TX_STATUS_DIRECT_DONE);
> > + struct ieee80211_tx_ba *ba;
> > +
> > + sc->sc_tx_timer = 0;
> > +
> > + if (ic->ic_state != IEEE80211_S_RUN)
> > + return;
> > +
> > + if (nframes > 1) {
> > + int ampdu_id, have_ampdu_id = 0, ampdu_size = 0;
> > + int i;
> > +
> > + /* Compute the size of this A-MPDU. */
> > + for (i = 0; i < nframes; i++) {
> > + uint8_t qid = agg_status[i].qid;
> > + uint8_t idx = agg_status[i].idx;
> > +
> > + if (qid != cmd_hdr->qid)
> > + continue;
> > +
> > + txdata = &txq->data[idx];
> > + if (txdata->m == NULL)
> > + continue;
> > +
> > + ampdu_size += txdata->totlen + IEEE80211_CRC_LEN;
> > + }
> > +
> > + /*
> > + * For each subframe collect Tx status, retries, and Tx rate.
> > + * (The Tx rate is the same for all subframes in this batch.)
> > + */
> > + for (i = 0; i < nframes; i++) {
> > + uint8_t qid = agg_status[i].qid;
> > + uint8_t idx = agg_status[i].idx;
> > + uint16_t txstatus = (le16toh(agg_status[i].status) &
> > +    IWM_AGG_TX_STATE_STATUS_MSK);
> > + uint16_t trycnt = (le16toh(agg_status[i].status) &
> > +    IWM_AGG_TX_STATE_TRY_CNT_MSK) >>
> > +    IWM_AGG_TX_STATE_TRY_CNT_POS;
> > +
> > + if (qid != cmd_hdr->qid)
> > + continue;
> > +
> > + txdata = &txq->data[idx];
> > + if (txdata->m == NULL)
> > + continue;
> > +
> > + if (initial_rate & IWM_RATE_MCS_HT_MSK)
> > + txdata->ampdu_txmcs = (initial_rate &
> > +    (IWM_RATE_HT_MCS_RATE_CODE_MSK |
> > +    IWM_RATE_HT_MCS_NSS_MSK));
> > + if (txstatus != IWM_AGG_TX_STATE_TRANSMITTED)
> > + txdata->txfail++;
> > + if (trycnt > 1)
> > + txdata->retries++;
> > +
> > + /*
> > + * Assign a common ID to all subframes of this A-MPDU.
> > + * This ID will be used during Tx rate control to
> > + * infer the ACK status of individual subframes.
> > + */
> > + if (!have_ampdu_id) {
> > + ampdu_id = txdata->in->next_ampdu_id++;
> > + have_ampdu_id = 1;
> > + }
> > + txdata->ampdu_id = ampdu_id;
> > +
> > + /*
> > + * We will also need to know the total number of
> > + * subframes and the size of this A-MPDU. We store
> > + * this redundantly on each subframe because firmware
> > + * only reports acknowledged subframes via compressed
> > + * block-ack notification. This way we will know what
> > + * the total number of subframes and size were even if
> > + * just one of these subframes gets acknowledged.
> > + */
> > + txdata->ampdu_nframes = nframes;
> > + txdata->ampdu_size = ampdu_size;
> > + }
> > + return;
> > + }
> > +
> > + if (ni == NULL)
> > + return;
> > +
> > + ba = &ni->ni_tx_ba[tid];
> > + if (ba->ba_state != IEEE80211_BA_AGREED)
> > + return;
> > +
> > + /* This is a final single-frame Tx attempt. */
> > + DPRINTFN(3, ("%s: final tx status=0x%x qid=%d queued=%d idx=%d ssn=%u "
> > +    "bitmap=0x%llx\n", __func__, status, desc->qid, txq->queued,
> > +    desc->idx, ssn, ba->ba_bitmap));
> > +
> > + /*
> > + * Skip rate control if our Tx rate is fixed.
> > + * Don't report frames to MiRA which were sent at a different
> > + * Tx rate than ni->ni_txmcs.
> > + */
> > + if (ic->ic_fixed_mcs == -1 && txdata->txmcs == ni->ni_txmcs) {
> > + in->in_mn.frames++;
> > + in->in_mn.agglen = 1;
> > + in->in_mn.ampdu_size = txdata->totlen + IEEE80211_CRC_LEN;
> > + if (failure_frame > 0)
> > + in->in_mn.retries++;
> > + if (txfail)
> > + in->in_mn.txfail++;
> > + iwm_mira_choose(sc, ni);
> > + }
> > +
> > + if (txfail)
> > + ieee80211_tx_compressed_bar(ic, ni, tid, ssn);
> > + else if (!SEQ_LT(ssn, ba->ba_winstart)) {
> > + /*
> > + * Move window forward if SSN lies beyond end of window,
> > + * otherwise we can't record the ACK for this frame.
> > + * Non-acked frames which left holes in the bitmap near
> > + * the beginning of the window must be discarded.
> > + */
> > + uint16_t s = ssn;
> > + while (SEQ_LT(ba->ba_winend, s)) {
> > + ieee80211_output_ba_move_window(ic, ni, tid, s);
> > + iwm_txq_advance(sc, txq, IWM_AGG_SSN_TO_TXQ_IDX(s));
> > + s = (s + 1) % 0xfff;
> > + }
> > + /* SSN should now be within window; set corresponding bit. */
> > + ieee80211_output_ba_record_ack(ic, ni, tid, ssn);
> > + }
> > +
> > + /* Move window forward up to the first hole in the bitmap. */
> > + ieee80211_output_ba_move_window_to_first_unacked(ic, ni, tid, ssn);
> > + iwm_txq_advance(sc, txq, IWM_AGG_SSN_TO_TXQ_IDX(ba->ba_winstart));
> > +
> > + iwm_clear_oactive(sc, txq);
> > +}
> > +
> > +void
> > +iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_tx_resp *tx_resp,
> > +    struct iwm_node *in, int txmcs, int txrate, int qid)
> > +{
> > + struct ieee80211com *ic = &sc->sc_ic;
> > + struct ieee80211_node *ni = &in->in_ni;
> >   struct ifnet *ifp = IC2IFP(ic);
> > - struct iwm_tx_resp *tx_resp = (void *)pkt->data;
> >   int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
> >   int txfail;
> >  
> > @@ -4277,22 +4662,8 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_
> >   in->in_mn.retries += tx_resp->failure_frame;
> >   if (txfail)
> >   in->in_mn.txfail += tx_resp->frame_count;
> > - if (ic->ic_state == IEEE80211_S_RUN) {
> > - int best_mcs;
> > -
> > - ieee80211_mira_choose(&in->in_mn, ic, &in->in_ni);
> > - /*
> > - * If MiRA has chosen a new TX rate we must update
> > - * the firwmare's LQ rate table from process context.
> > - * ni_txmcs may change again before the task runs so
> > - * cache the chosen rate in the iwm_node structure.
> > - */
> > - best_mcs = ieee80211_mira_get_best_mcs(&in->in_mn);
> > - if (best_mcs != in->chosen_txmcs) {
> > - in->chosen_txmcs = best_mcs;
> > - iwm_setrates(in, 1);
> > - }
> > - }
> > + if (ic->ic_state == IEEE80211_S_RUN)
> > + iwm_mira_choose(sc, ni);
> >   }
> >  
> >   if (txfail)
> > @@ -4313,49 +4684,91 @@ iwm_txd_done(struct iwm_softc *sc, struct iwm_tx_data
> >   KASSERT(txd->in);
> >   ieee80211_release_node(ic, &txd->in->in_ni);
> >   txd->in = NULL;
> > +
> > + txd->retries = 0;
> > + txd->txfail = 0;
> > + txd->txmcs = 0;
> > + txd->ampdu_txmcs = 0;
> > + txd->txrate = 0;
> >  }
> >  
> >  void
> >  iwm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
> >      struct iwm_rx_data *data)
> >  {
> > - struct ieee80211com *ic = &sc->sc_ic;
> > - struct ifnet *ifp = IC2IFP(ic);
> >   struct iwm_cmd_header *cmd_hdr = &pkt->hdr;
> >   int idx = cmd_hdr->idx;
> >   int qid = cmd_hdr->qid;
> >   struct iwm_tx_ring *ring = &sc->txq[qid];
> >   struct iwm_tx_data *txd;
> > + struct iwm_tx_resp *tx_resp = (void *)pkt->data;
> > + uint32_t ssn;
> > + uint32_t len = iwm_rx_packet_len(pkt);
> >  
> >   bus_dmamap_sync(sc->sc_dmat, data->map, 0, IWM_RBUF_SIZE,
> >      BUS_DMASYNC_POSTREAD);
> >  
> >   sc->sc_tx_timer = 0;
> >  
> > + /* Sanity checks. */
> > + if (sizeof(*tx_resp) > len)
> > + return;
> > + if (qid < IWM_FIRST_AGG_TX_QUEUE && tx_resp->frame_count > 1)
> > + return;
> > + if (qid >= IWM_FIRST_AGG_TX_QUEUE && sizeof(*tx_resp) + sizeof(ssn) +
> > +    tx_resp->frame_count * sizeof(tx_resp->status) > len)
> > + return;
> > +
> > + /*
> > + * In the multi-frame case the firmware has just transmitted a bunch
> > + * of frames in an A-MPDU. The final Tx status of those frames won't
> > + * be known until the peer ACKs subframes with a block ack or firmware
> > + * gives up on a particular subframe.
> > + * Subframes for which the firmware never sees an ACK will be retried
> > + * and will eventually arrive here as a single-frame Tx failure.
> > + * So there is nothing to do, for now.
> > + */
> > + if (tx_resp->frame_count != 1)
> > + return;
> > +
> >   txd = &ring->data[idx];
> >   if (txd->m == NULL)
> >   return;
> >  
> > - iwm_rx_tx_cmd_single(sc, pkt, txd->in, txd->txmcs, txd->txrate);
> > - iwm_txd_done(sc, txd);
> > + if (qid >= IWM_FIRST_AGG_TX_QUEUE) {
> > + int status;
> >  
> > - /*
> > - * XXX Sometimes we miss Tx completion interrupts.
> > - * We cannot check Tx success/failure for affected frames; just free
> > - * the associated mbuf and release the associated node reference.
> > - */
> > - while (ring->tail != idx) {
> > - txd = &ring->data[ring->tail];
> > - if (txd->m != NULL) {
> > - DPRINTF(("%s: missed Tx completion: tail=%d idx=%d\n",
> > -    __func__, ring->tail, idx));
> > - iwm_txd_done(sc, txd);
> > - ring->queued--;
> > - }
> > - ring->tail = (ring->tail + 1) % IWM_TX_RING_COUNT;
> > + memcpy(&ssn, &tx_resp->status + tx_resp->frame_count, sizeof(ssn));
> > + ssn = le32toh(ssn) & 0xfff;
> > + status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
> > + iwm_ampdu_tx_done(sc, cmd_hdr, txd->in, ring,
> > +    le32toh(tx_resp->initial_rate), tx_resp->frame_count,
> > +    tx_resp->failure_frame, ssn, status, &tx_resp->status);
> > + } else {
> > + iwm_rx_tx_cmd_single(sc, tx_resp, txd->in, txd->txmcs,
> > +    txd->txrate, qid);
> > + iwm_reset_sched(sc, qid, idx, IWM_STATION_ID);
> > + iwm_txd_done(sc, txd);
> > + ring->queued--;
> > +
> > + /*
> > + * XXX Sometimes we miss Tx completion interrupts.
> > + * We cannot check Tx success/failure for affected frames;
> > + * just free the associated mbuf and release the associated
> > + * node reference.
> > + */
> > + iwm_txq_advance(sc, ring, idx);
> > + iwm_clear_oactive(sc, ring);
> >   }
> > +}
> >  
> > - if (--ring->queued < IWM_TX_RING_LOMARK) {
> > +void
> > +iwm_clear_oactive(struct iwm_softc *sc, struct iwm_tx_ring *ring)
> > +{
> > + struct ieee80211com *ic = &sc->sc_ic;
> > + struct ifnet *ifp = IC2IFP(ic);
> > +
> > + if (ring->queued < IWM_TX_RING_LOMARK) {
> >   sc->qfullmsk &= ~(1 << ring->qid);
> >   if (sc->qfullmsk == 0 && ifq_is_oactive(&ifp->if_snd)) {
> >   ifq_clr_oactive(&ifp->if_snd);
> > @@ -4370,6 +4783,183 @@ iwm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_pack
> >  }
> >  
> >  void
> > +iwm_mira_choose(struct iwm_softc *sc, struct ieee80211_node *ni)
> > +{
> > + struct ieee80211com *ic = &sc->sc_ic;
> > + struct iwm_node *in = (void *)ni;
> > + int best_mcs = ieee80211_mira_get_best_mcs(&in->in_mn);
> > +
> > + ieee80211_mira_choose(&in->in_mn, ic, ni);
> > +
> > + /*
> > + * Update firmware's LQ retry table if MiRA has chosen a new MCS.
> > + *
> > + * We only need to do this if the best MCS has changed because
> > + * we ask firmware to use a fixed MCS while MiRA is probing a
> > + * candidate MCS.
> > + * While not probing we ask firmware to retry at lower rates in case
> > + * Tx at the newly chosen best MCS ends up failing, and then report
> > + * any resulting Tx retries to MiRA in order to trigger probing.
> > + */
> > + if (best_mcs != ieee80211_mira_get_best_mcs(&in->in_mn)) {
> > + in->chosen_txmcs = best_mcs;
> > + iwm_setrates(in, 1);
> > + }
> > +}
> > +
> > +void
> > +iwm_ampdu_rate_control(struct iwm_softc *sc, struct ieee80211_node *ni,
> > +    struct iwm_tx_ring *txq, int tid, uint16_t seq, uint16_t ssn)
> > +{
> > + struct ieee80211com *ic = &sc->sc_ic;
> > + struct iwm_node *in = (void *)ni;
> > + struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
> > + int min_ampdu_id, max_ampdu_id, id;
> > + int idx, end_idx;
> > +
> > + /* Determine the min/max IDs we assigned to AMPDUs in this range. */
> > + idx = IWM_AGG_SSN_TO_TXQ_IDX(seq);
> > + end_idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> > + min_ampdu_id = txq->data[idx].ampdu_id;
> > + max_ampdu_id = min_ampdu_id;
> > + while (idx != end_idx) {
> > + struct iwm_tx_data *txdata = &txq->data[idx];
> > +
> > + if (txdata->m != NULL) {
> > + if (min_ampdu_id > txdata->ampdu_id)
> > + min_ampdu_id = txdata->ampdu_id;
> > + if (max_ampdu_id < txdata->ampdu_id)
> > + max_ampdu_id = txdata->ampdu_id;
> > + }
> > +
> > + idx = (idx + 1) % IWM_TX_RING_COUNT;
> > + }
> > +
> > + /*
> > + * Update Tx rate statistics for A-MPDUs before firmware's BA window.
> > + */
> > + for (id = min_ampdu_id; id <= max_ampdu_id; id++) {
> > + int have_ack = 0, bit = 0;
> > + idx = IWM_AGG_SSN_TO_TXQ_IDX(seq);
> > + end_idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> > + in->in_mn.agglen = 0;
> > + in->in_mn.ampdu_size = 0;
> > + while (idx != end_idx) {
> > + struct iwm_tx_data *txdata = &txq->data[idx];
> > + uint16_t s = (seq + bit) & 0xfff;
> > + /*
> > + * We can assume that this subframe has been ACKed
> > + * because ACK failures come as single frames and
> > + * before failing an A-MPDU subframe the firmware
> > + * sends it as a single frame at least once.
> > + *
> > + * However, when this A-MPDU was transmitted we
> > + * learned how many subframes it contained.
> > + * So if firmware isn't reporting all subframes now
> > + * we can deduce an ACK failure for missing frames.
> > + */
> > + if (txdata->m != NULL && txdata->ampdu_id == id &&
> > +    txdata->ampdu_txmcs == ni->ni_txmcs &&
> > +    txdata->ampdu_nframes > 0 &&
> > +    (SEQ_LT(ba->ba_winend, s) ||
> > +    (ba->ba_bitmap & (1 << bit)) == 0)) {
> > + have_ack++;
> > + in->in_mn.frames = txdata->ampdu_nframes;
> > + in->in_mn.agglen = txdata->ampdu_nframes;
> > + in->in_mn.ampdu_size = txdata->ampdu_size;
> > + if (txdata->retries > 1)
> > + in->in_mn.retries++;
> > + if (!SEQ_LT(ba->ba_winend, s))
> > + ieee80211_output_ba_record_ack(ic, ni,
> > +    tid, s);
> > + }
> > +
> > + idx = (idx + 1) % IWM_TX_RING_COUNT;
> > + bit++;
> > + }
> > +
> > + if (have_ack > 0) {
> > + in->in_mn.txfail = in->in_mn.frames - have_ack;
> > + iwm_mira_choose(sc, ni);
> > + }
> > + }
> > +}
> > +
> > +void
> > +iwm_rx_ba(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
> > +    struct iwm_rx_data *data)
> > +{
> > + struct iwm_ba_notif *ban = (void *)pkt->data;
> > + struct ieee80211com *ic = &sc->sc_ic;
> > + struct ieee80211_node *ni;
> > + struct ieee80211_tx_ba *ba;
> > + struct iwm_node *in;
> > + struct iwm_tx_ring *ring;
> > + uint16_t seq, ssn, idx;
> > + int qid;
> > +
> > + if (ic->ic_state != IEEE80211_S_RUN)
> > + return;
> > +
> > + if (iwm_rx_packet_payload_len(pkt) < sizeof(*ban))
> > + return;
> > +
> > + if (ban->sta_id != IWM_STATION_ID ||
> > +    !IEEE80211_ADDR_EQ(ic->ic_bss->ni_macaddr, ban->sta_addr))
> > + return;
> > +
> > + ni = ic->ic_bss;
> > + in = (void *)ni;
> > +
> > + qid = le16toh(ban->scd_flow);
> > + if (qid < IWM_FIRST_AGG_TX_QUEUE || qid > IWM_LAST_AGG_TX_QUEUE)
> > + return;
> > +
> > + /* Protect against a firmware bug where the queue/TID are off. */
> > + if (qid != IWM_FIRST_AGG_TX_QUEUE + ban->tid)
> > + return;
> > +
> > + ba = &ni->ni_tx_ba[ban->tid];
> > + if (ba->ba_state != IEEE80211_BA_AGREED)
> > + return;
> > +
> > + ring = &sc->txq[qid];
> > + ssn = le16toh(ban->scd_ssn); /* BA window starting sequence number */
> > + idx = IWM_AGG_SSN_TO_TXQ_IDX(ssn);
> > +
> > + /*
> > + * The first bit in ban->bitmap corresponds to the sequence number
> > + * stored in the sequence control field ban->seq_ctl.
> > + * Multiple BA notifications in a row may be using this number, with
> > + * additional bits being set in cba->bitmap. It is unclear how the
> > + * firmware decides to shift this window forward.
> > + */
> > + seq = le16toh(ban->seq_ctl) >> IEEE80211_SEQ_SEQ_SHIFT;
> > +
> > + /*
> > + * The firmware's new BA window starting sequence number
> > + * corresponds to the first hole in ban->scd_ssn, implying
> > + * that all frames between 'seq' and 'ssn' have been acked.
> > + */
> > + ssn = le16toh(ban->scd_ssn);
> > +
> > + /* Skip rate control if our Tx rate is fixed. */
> > + if (ic->ic_fixed_mcs != -1)
> > + iwm_ampdu_rate_control(sc, ni, ring, ban->tid, seq, ssn);
> > +
> > + /*
> > + * SSN corresponds to the first (perhaps not yet transmitted) frame
> > + * in firmware's BA window. Firmware is not going to retransmit any
> > + * frames before its BA window so mark them all as done.
> > + */
> > + if (SEQ_LT(ba->ba_winstart, ssn)) {
> > + ieee80211_output_ba_move_window(ic, ni, ban->tid, ssn);
> > + iwm_txq_advance(sc, ring, IWM_AGG_SSN_TO_TXQ_IDX(ssn));
> > + iwm_clear_oactive(sc, ring);
> > + }
> > +}
> > +
> > +void
> >  iwm_rx_bmiss(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
> >      struct iwm_rx_data *data)
> >  {
> > @@ -4638,9 +5228,6 @@ iwm_send_cmd(struct iwm_softc *sc, struct iwm_host_cmd
> >   }
> >   }
> >  
> > -#if 0
> > - iwm_update_sched(sc, ring->qid, ring->cur, 0, 0);
> > -#endif
> >   /* Kick command ring. */
> >   ring->queued++;
> >   ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
> > @@ -4771,7 +5358,6 @@ iwm_cmd_done(struct iwm_softc *sc, int qid, int idx, i
> >   }
> >  }
> >  
> > -#if 0
> >  /*
> >   * necessary only for block ack mode
> >   */
> > @@ -4780,32 +5366,49 @@ iwm_update_sched(struct iwm_softc *sc, int qid, int id
> >      uint16_t len)
> >  {
> >   struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
> > - uint16_t w_val;
> > + uint16_t val;
> >  
> >   scd_bc_tbl = sc->sched_dma.vaddr;
> >  
> > - len += 8; /* magic numbers came naturally from paris */
> > + len += IWM_TX_CRC_SIZE + IWM_TX_DELIMITER_SIZE;
> >   if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE)
> >   len = roundup(len, 4) / 4;
> >  
> > - w_val = htole16(sta_id << 12 | len);
> > + val = htole16(sta_id << 12 | len);
> >  
> > + bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> > +    0, sc->sched_dma.size, BUS_DMASYNC_PREWRITE);
> > +
> >   /* Update TX scheduler. */
> > - scd_bc_tbl[qid].tfd_offset[idx] = w_val;
> > + scd_bc_tbl[qid].tfd_offset[idx] = val;
> > + if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP)
> > + scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = val;
> > +
> >   bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> > -    (char *)(void *)w - (char *)(void *)sc->sched_dma.vaddr,
> > -    sizeof(uint16_t), BUS_DMASYNC_PREWRITE);
> > +    0, sc->sched_dma.size, BUS_DMASYNC_POSTWRITE);
> > +}
> >  
> > - /* I really wonder what this is ?!? */
> > - if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) {
> > - scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val;
> > - bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> > -    (char *)(void *)(w + IWM_TFD_QUEUE_SIZE_MAX) -
> > -    (char *)(void *)sc->sched_dma.vaddr,
> > -    sizeof (uint16_t), BUS_DMASYNC_PREWRITE);
> > - }
> > +void
> > +iwm_reset_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id)
> > +{
> > + struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
> > + uint16_t val;
> > +
> > + scd_bc_tbl = sc->sched_dma.vaddr;
> > +
> > + val = htole16(1 | (sta_id << 12));
> > +
> > + bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> > +    0, sc->sched_dma.size, BUS_DMASYNC_PREWRITE);
> > +
> > + /* Update TX scheduler. */
> > + scd_bc_tbl[qid].tfd_offset[idx] = val;
> > + if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP)
> > + scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = val;
> > +
> > + bus_dmamap_sync(sc->sc_dmat, sc->sched_dma.map,
> > +    0, sc->sched_dma.size, BUS_DMASYNC_POSTWRITE);
> >  }
> > -#endif
> >  
> >  /*
> >   * Fill in various bit for management frames, and leave them
> > @@ -4897,19 +5500,24 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
> >   uint32_t flags;
> >   u_int hdrlen;
> >   bus_dma_segment_t *seg;
> > - uint8_t tid, type;
> > + uint8_t tid, type, subtype;
> >   int i, totlen, err, pad;
> > - int hdrlen2, rtsthres = ic->ic_rtsthreshold;
> > + int qid, hasqos, rtsthres = ic->ic_rtsthreshold;
> >  
> >   wh = mtod(m, struct ieee80211_frame *);
> > - hdrlen = ieee80211_get_hdrlen(wh);
> >   type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
> > + subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
> > + if (type == IEEE80211_FC0_TYPE_CTL)
> > + hdrlen = sizeof(struct ieee80211_frame_min);
> > + else
> > + hdrlen = ieee80211_get_hdrlen(wh);
> >  
> > - hdrlen2 = (ieee80211_has_qos(wh)) ?
> > -    sizeof (struct ieee80211_qosframe) :
> > -    sizeof (struct ieee80211_frame);
> > + hasqos = ieee80211_has_qos(wh);
> >  
> > - tid = 0;
> > + if (type == IEEE80211_FC0_TYPE_DATA)
> > + tid = IWM_TID_NON_QOS;
> > + else
> > + tid = IWM_MAX_TID_COUNT;
> >  
> >   /*
> >   * Map EDCA categories to Tx data queues.
> > @@ -4918,14 +5526,31 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
> >   * need to share Tx queues between stations because we only implement
> >   * client mode; the firmware's station table contains only one entry
> >   * which represents our access point.
> > - *
> > - * Tx aggregation will require additional queues (one queue per TID
> > - * for which aggregation is enabled) but we do not implement this yet.
> >   */
> >   if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT))
> > - ring = &sc->txq[IWM_DQA_MIN_MGMT_QUEUE + ac];
> > + qid = IWM_DQA_MIN_MGMT_QUEUE + ac;
> >   else
> > - ring = &sc->txq[ac];
> > + qid = ac;
> > +
> > + /* If possible, put this frame on an aggregation queue. */
> > + if (hasqos) {
> > + struct ieee80211_tx_ba *ba;
> > + uint16_t qos = ieee80211_get_qos(wh);
> > + int qostid = qos & IEEE80211_QOS_TID;
> > + int qosac = ieee80211_up_to_ac(ic, qostid);
> > +
> > + ba = &ni->ni_tx_ba[qostid];
> > + if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
> > +    type == IEEE80211_FC0_TYPE_DATA &&
> > +    (in->ampdu_tx_tid_mask & (1 << qostid)) &&
> > +    ba->ba_state == IEEE80211_BA_AGREED) {
> > + qid = IWM_FIRST_AGG_TX_QUEUE + qostid;
> > + tid = qostid;
> > + ac = qosac;
> > + }
> > + }
> > +
> > + ring = &sc->txq[qid];
> >   desc = &ring->desc[ring->cur];
> >   memset(desc, 0, sizeof(*desc));
> >   data = &ring->data[ring->cur];
> > @@ -5004,14 +5629,28 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
> >   tx->sta_id = IWM_STATION_ID;
> >  
> >   if (type == IEEE80211_FC0_TYPE_MGT) {
> > - uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
> > -
> >   if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ ||
> >      subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ)
> >   tx->pm_frame_timeout = htole16(3);
> >   else
> >   tx->pm_frame_timeout = htole16(2);
> >   } else {
> > + if (type == IEEE80211_FC0_TYPE_CTL &&
> > +    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
> > + struct ieee80211_frame_min *wbar;
> > + uint8_t *frm;
> > + uint16_t ctl;
> > +
> > + flags |= IWM_TX_CMD_FLG_ACK | IWM_TX_CMD_FLG_BAR;
> > + tx->data_retry_limit = IWM_BAR_DFAULT_RETRY_LIMIT;
> > +
> > + wbar = mtod(m, struct ieee80211_frame_min *);
> > + frm = (uint8_t *)&wbar[1];
> > + memcpy(&ctl, frm, sizeof(ctl));
> > + tid = (le16toh(ctl) & IEEE80211_BA_TID_INFO_MASK) >>
> > +    IEEE80211_BA_TID_INFO_SHIFT;
> > + }
> > +
> >   tx->pm_frame_timeout = htole16(0);
> >   }
> >  
> > @@ -5058,7 +5697,9 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
> >   tx->sec_ctl = 0;
> >   }
> >  
> > - flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL;
> > + flags |= IWM_TX_CMD_FLG_BT_DIS;
> > + if (!hasqos)
> > + flags |= IWM_TX_CMD_FLG_SEQ_CTL;
> >  
> >   tx->tx_flags |= htole32(flags);
> >  
> > @@ -5085,9 +5726,11 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
> >   }
> >   }
> >   data->m = m;
> > + data->totlen = totlen;
> >   data->in = in;
> >   data->txmcs = ni->ni_txmcs;
> >   data->txrate = ni->ni_txrate;
> > + data->ampdu_txmcs = ni->ni_txmcs; /* updated upon Tx interrupt */
> >  
> >   /* Fill TX descriptor. */
> >   desc->num_tbs = 2 + data->map->dm_nsegs;
> > @@ -5118,9 +5761,7 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
> >      (char *)(void *)desc - (char *)(void *)ring->desc_dma.vaddr,
> >      sizeof (*desc), BUS_DMASYNC_PREWRITE);
> >  
> > -#if 0
> >   iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len));
> > -#endif
> >  
> >   /* Kick TX ring. */
> >   ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
> > @@ -5336,6 +5977,7 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
> >   uint32_t status;
> >   size_t cmdsize;
> >   struct ieee80211com *ic = &sc->sc_ic;
> > + uint16_t tid_disable_tx = 0xffff;
> >  
> >   if (!update && (sc->sc_flags & IWM_FLAG_STA_ACTIVE))
> >   panic("STA already added");
> > @@ -5362,7 +6004,7 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
> >   else
> >   qid = IWM_AUX_QUEUE;
> >   add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
> > - } else if (!update) {
> > + } else {
> >   int ac;
> >   for (ac = 0; ac < EDCA_NUM_AC; ac++) {
> >   int qid = ac;
> > @@ -5371,15 +6013,33 @@ iwm_add_sta_cmd(struct iwm_softc *sc, struct iwm_node
> >   qid += IWM_DQA_MIN_MGMT_QUEUE;
> >   add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
> >   }
> > - IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid);
> >   }
> > + if (!update) {
> > + if (ic->ic_opmode == IEEE80211_M_MONITOR)
> > + IEEE80211_ADDR_COPY(&add_sta_cmd.addr,
> > +    etherbroadcastaddr);
> > + else
> > + IEEE80211_ADDR_COPY(&add_sta_cmd.addr,
> > +    in->in_ni.ni_bssid);
> > + }
> >   add_sta_cmd.add_modify = update ? 1 : 0;
> >   add_sta_cmd.station_flags_msk
> >      |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK);
> > - add_sta_cmd.tid_disable_tx = htole16(0xffff);
> > - if (update)
> > - add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_TID_DISABLE_TX);
> > + if (update) {
> > + int tid, qid;
> > + for (tid = 0; tid < IWM_MAX_TID_COUNT; tid++) {
> > + if ((in->ampdu_tx_tid_mask & (1 << tid)) == 0)
> > + continue;
> >  
> > + qid = IWM_FIRST_AGG_TX_QUEUE + tid;
> > + add_sta_cmd.tfd_queue_msk |= htole32(1 << qid);
> > + tid_disable_tx &= ~(1 << tid);
> > + add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_QUEUES |
> > +    IWM_STA_MODIFY_TID_DISABLE_TX);
> > + }
> > + }
> > + add_sta_cmd.tid_disable_tx = htole16(tid_disable_tx);
> > +
> >   if (in->in_ni.ni_flags & IEEE80211_NODE_HT) {
> >   add_sta_cmd.station_flags_msk
> >      |= htole32(IWM_STA_FLG_MAX_AGG_SIZE_MSK |
> > @@ -5444,7 +6104,7 @@ iwm_add_aux_sta(struct iwm_softc *sc)
> >   if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_DQA_SUPPORT)) {
> >   qid = IWM_DQA_AUX_QUEUE;
> >   err = iwm_enable_txq(sc, IWM_AUX_STA_ID, qid,
> > -    IWM_TX_FIFO_MCAST);
> > +    IWM_TX_FIFO_MCAST, 0, IWM_MAX_TID_COUNT, 0);
> >   } else {
> >   qid = IWM_AUX_QUEUE;
> >   err = iwm_enable_ac_txq(sc, qid, IWM_TX_FIFO_MCAST);
> > @@ -6582,6 +7242,9 @@ iwm_auth(struct iwm_softc *sc)
> >  
> >   splassert(IPL_NET);
> >  
> > + in->ampdu_rx_tid_mask = 0;
> > + in->ampdu_tx_tid_mask = 0;
> > +
> >   if (ic->ic_opmode == IEEE80211_M_MONITOR)
> >   sc->sc_phyctxt[0].channel = ic->ic_ibss_chan;
> >   else
> > @@ -7156,11 +7819,7 @@ iwm_setrates(struct iwm_node *in, int async)
> >  
> >   lqcmd.agg_time_limit = htole16(4000); /* 4ms */
> >   lqcmd.agg_disable_start_th = 3;
> > -#ifdef notyet
> >   lqcmd.agg_frame_cnt_limit = 0x3f;
> > -#else
> > - lqcmd.agg_frame_cnt_limit = 1; /* tx agg disabled */
> > -#endif
> >  
> >   cmd.data[0] = &lqcmd;
> >   iwm_send_cmd(sc, &cmd);
> > @@ -7892,7 +8551,7 @@ iwm_init_hw(struct iwm_softc *sc)
> >   else
> >   qid = IWM_AUX_QUEUE;
> >   err = iwm_enable_txq(sc, IWM_MONITOR_STA_ID, qid,
> > -    iwm_ac_to_tx_fifo[EDCA_AC_BE]);
> > +    iwm_ac_to_tx_fifo[EDCA_AC_BE], 0, IWM_MAX_TID_COUNT, 0);
> >   if (err) {
> >   printf("%s: could not enable monitor inject Tx queue "
> >      "(error %d)\n", DEVNAME(sc), err);
> > @@ -7906,7 +8565,7 @@ iwm_init_hw(struct iwm_softc *sc)
> >   else
> >   qid = ac;
> >   err = iwm_enable_txq(sc, IWM_STATION_ID, qid,
> > -    iwm_ac_to_tx_fifo[ac]);
> > +    iwm_ac_to_tx_fifo[ac], 0, IWM_TID_NON_QOS, 0);
> >   if (err) {
> >   printf("%s: could not enable Tx queue %d "
> >      "(error %d)\n", DEVNAME(sc), ac, err);
> > @@ -8578,6 +9237,10 @@ iwm_rx_pkt(struct iwm_softc *sc, struct iwm_rx_data *d
> >   iwm_rx_tx_cmd(sc, pkt, data);
> >   break;
> >  
> > + case IWM_BA_NOTIF:
> > + iwm_rx_ba(sc, pkt, data);
> > + break;
> > +
> >   case IWM_MISSED_BEACONS_NOTIFICATION:
> >   iwm_rx_bmiss(sc, pkt, data);
> >   break;
> > @@ -8943,9 +9606,9 @@ iwm_intr(void *arg)
> >   DPRINTF(("driver status:\n"));
> >   for (i = 0; i < IWM_MAX_QUEUES; i++) {
> >   struct iwm_tx_ring *ring = &sc->txq[i];
> > - DPRINTF(("  tx ring %2d: qid=%-2d cur=%-3d "
> > + DPRINTF(("  tx ring %2d: qid=%-2d tail=%-3d cur=%-3d "
> >      "queued=%-3d\n",
> > -    i, ring->qid, ring->cur, ring->queued));
> > +    i, ring->qid, ring->tail, ring->cur, ring->queued));
> >   }
> >   DPRINTF(("  rx ring: cur=%d\n", sc->rxq.cur));
> >   DPRINTF(("  802.11 state %s\n",
> > @@ -9053,9 +9716,9 @@ iwm_intr_msix(void *arg)
> >   DPRINTF(("driver status:\n"));
> >   for (i = 0; i < IWM_MAX_QUEUES; i++) {
> >   struct iwm_tx_ring *ring = &sc->txq[i];
> > - DPRINTF(("  tx ring %2d: qid=%-2d cur=%-3d "
> > + DPRINTF(("  tx ring %2d: qid=%-2d tail=%-3d cur=%-3d "
> >      "queued=%-3d\n",
> > -    i, ring->qid, ring->cur, ring->queued));
> > +    i, ring->qid, ring->tail, ring->cur, ring->queued));
> >   }
> >   DPRINTF(("  rx ring: cur=%d\n", sc->rxq.cur));
> >   DPRINTF(("  802.11 state %s\n",
> > @@ -9465,6 +10128,7 @@ iwm_attach(struct device *parent, struct device *self,
> >  
> >   /* Set device capabilities. */
> >   ic->ic_caps =
> > +    IEEE80211_C_QOS | IEEE80211_C_TX_AMPDU | /* A-MPDU */
> >      IEEE80211_C_WEP | /* WEP */
> >      IEEE80211_C_RSN | /* WPA/RSN */
> >      IEEE80211_C_SCANALL | /* device scans all channels at once */
> > @@ -9529,10 +10193,8 @@ iwm_attach(struct device *parent, struct device *self,
> >   ic->ic_update_htprot = iwm_update_htprot;
> >   ic->ic_ampdu_rx_start = iwm_ampdu_rx_start;
> >   ic->ic_ampdu_rx_stop = iwm_ampdu_rx_stop;
> > -#ifdef notyet
> >   ic->ic_ampdu_tx_start = iwm_ampdu_tx_start;
> >   ic->ic_ampdu_tx_stop = iwm_ampdu_tx_stop;
> > -#endif
> >   /*
> >   * We cannot read the MAC address without loading the
> >   * firmware from disk. Postpone until mountroot is done.
> > blob - 201ce69014b9422335a6d698cd4a3cc3f314b2b5
> > blob + 1e2e4e01e2a98f60221b72fc6e82a1246f7b9cef
> > --- sys/dev/pci/if_iwmreg.h
> > +++ sys/dev/pci/if_iwmreg.h
> > @@ -1837,6 +1837,9 @@ struct iwm_agn_scd_bc_tbl {
> >   uint16_t tfd_offset[IWM_TFD_QUEUE_BC_SIZE];
> >  } __packed;
> >  
> > +#define IWM_TX_CRC_SIZE 4
> > +#define IWM_TX_DELIMITER_SIZE 4
> > +
> >  /* Maximum number of Tx queues. */
> >  #define IWM_MAX_QUEUES 31
> >  
> > @@ -1875,6 +1878,11 @@ struct iwm_agn_scd_bc_tbl {
> >  #define IWM_DQA_MIN_DATA_QUEUE 10
> >  #define IWM_DQA_MAX_DATA_QUEUE 31
> >  
> > +/* Reserve 8 DQA Tx queues, from 10 up to 17, for A-MPDU aggregation. */
> > +#define IWM_MAX_TID_COUNT 8
> > +#define IWM_FIRST_AGG_TX_QUEUE IWM_DQA_MIN_DATA_QUEUE
> > +#define IWM_LAST_AGG_TX_QUEUE (IWM_FIRST_AGG_TX_QUEUE + IWM_MAX_TID_COUNT - 1)
> > +
> >  /* legacy non-DQA queues; the legacy command queue uses a different number! */
> >  #define IWM_OFFCHANNEL_QUEUE 8
> >  #define IWM_CMD_QUEUE 9
> > @@ -4627,7 +4635,8 @@ struct iwm_lq_cmd {
> >  /*
> >   * TID for non QoS frames - to be written in tid_tspec
> >   */
> > -#define IWM_TID_NON_QOS IWM_MAX_TID_COUNT
> > +#define IWM_TID_NON_QOS 0
> > +#define IWM_TID_MGMT 15
> >  
> >  /*
> >   * Limits on the retransmissions - to be written in {data,rts}_retry_limit
> > @@ -4898,21 +4907,23 @@ struct iwm_tx_resp {
> >  /**
> >   * struct iwm_ba_notif - notifies about reception of BA
> >   * ( IWM_BA_NOTIF = 0xc5 )
> > - * @sta_addr_lo32: lower 32 bits of the MAC address
> > - * @sta_addr_hi16: upper 16 bits of the MAC address
> > + * @sta_addr: MAC address
> >   * @sta_id: Index of recipient (BA-sending) station in fw's station table
> >   * @tid: tid of the session
> > - * @seq_ctl: sequence control field from IEEE80211 frame header (it is unclear
> > - *  which frame this relates to; info or reverse engineering welcome)
> > + * @seq_ctl: sequence control field from IEEE80211 frame header (the first
> > + * bit in @bitmap corresponds to the sequence number stored here)
> >   * @bitmap: the bitmap of the BA notification as seen in the air
> >   * @scd_flow: the tx queue this BA relates to
> >   * @scd_ssn: the index of the last contiguously sent packet
> >   * @txed: number of Txed frames in this batch
> >   * @txed_2_done: number of Acked frames in this batch
> > + * @reduced_txp: power reduced according to TPC. This is the actual value and
> > + * not a copy from the LQ command. Thus, if not the first rate was used
> > + * for Tx-ing then this value will be set to 0 by FW.
> > + * @reserved1: reserved
> >   */
> >  struct iwm_ba_notif {
> > - uint32_t sta_addr_lo32;
> > - uint16_t sta_addr_hi16;
> > + uint8_t sta_addr[ETHER_ADDR_LEN];
> >   uint16_t reserved;
> >  
> >   uint8_t sta_id;
> > @@ -4923,6 +4934,7 @@ struct iwm_ba_notif {
> >   uint16_t scd_ssn;
> >   uint8_t txed;
> >   uint8_t txed_2_done;
> > + uint8_t reduced_txp;
> >   uint16_t reserved1;
> >  } __packed;
> >  
> > blob - 89abe2c1dbdf5ac3ccbf710994380502530ef2a8
> > blob + 7d9e26bffe0f1658c771bf85768797c23e94e147
> > --- sys/dev/pci/if_iwmvar.h
> > +++ sys/dev/pci/if_iwmvar.h
> > @@ -252,14 +252,26 @@ struct iwm_fw_paging {
> >  #define IWM_TX_RING_LOMARK 192
> >  #define IWM_TX_RING_HIMARK 224
> >  
> > +/* For aggregation queues, index must be aligned to frame sequence number. */
> > +#define IWM_AGG_SSN_TO_TXQ_IDX(x) ((x) & (IWM_TX_RING_COUNT - 1))
> > +
> >  struct iwm_tx_data {
> >   bus_dmamap_t map;
> >   bus_addr_t cmd_paddr;
> >   bus_addr_t scratch_paddr;
> >   struct mbuf *m;
> >   struct iwm_node *in;
> > + int totlen;
> > + int retries;
> > + int txfail;
> >   int txmcs;
> >   int txrate;
> > +
> > + /* A-MPDU subframes */
> > + int ampdu_id;
> > + int ampdu_txmcs;
> > + int ampdu_nframes;
> > + int ampdu_size;
> >  };
> >  
> >  struct iwm_tx_ring {
> > @@ -363,6 +375,12 @@ struct iwm_bf_data {
> >   int last_cqm_event;
> >  };
> >  
> > +struct iwm_ba_param {
> > + uint16_t tid_mask;
> > + uint16_t ssn[IWM_MAX_TID_COUNT];
> > + uint16_t winsize[IWM_MAX_TID_COUNT];
> > +};
> > +
> >  struct iwm_softc {
> >   struct device sc_dev;
> >   struct ieee80211com sc_ic;
> > @@ -381,10 +399,15 @@ struct iwm_softc {
> >  
> >   /* Task for firmware BlockAck setup/teardown and its arguments. */
> >   struct task ba_task;
> > - int ba_start;
> > - int ba_tid;
> > - uint16_t ba_ssn;
> > - uint16_t ba_winsize;
> > + int ba_flags;
> > +#define IWM_RX_BA_START 0x01
> > +#define IWM_TX_BA_START 0x02
> > +#define IWM_RX_BA_STOP 0x04
> > +#define IWM_TX_BA_STOP 0x08
> > + struct iwm_ba_param rx_ba_start;
> > + struct iwm_ba_param rx_ba_stop;
> > + struct iwm_ba_param tx_ba_start;
> > + struct iwm_ba_param tx_ba_stop;
> >  
> >   /* Task for HT protection updates. */
> >   struct task htprot_task;
> > @@ -407,6 +430,7 @@ struct iwm_softc {
> >   struct iwm_rx_ring rxq;
> >   int qfullmsk;
> >   int cmdqid;
> > + int qenablemsk;
> >  
> >   int sc_sf_state;
> >  
> > @@ -551,6 +575,12 @@ struct iwm_node {
> >   int chosen_txrate;
> >   struct ieee80211_mira_node in_mn;
> >   int chosen_txmcs;
> > +
> > + uint32_t next_ampdu_id;
> > +
> > + /* Currently active Rx/Tx block ack sessions; tracked per TID. */
> > + uint8_t ampdu_rx_tid_mask;
> > + uint8_t ampdu_tx_tid_mask;
> >  };
> >  #define IWM_STATION_ID 0
> >  #define IWM_AUX_STA_ID 1
> >
>
> --
>

--

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Sebastian Benoit-3
In reply to this post by Stefan Sperling-5
Stefan Sperling([hidden email]) on 2020.06.26 14:45:53 +0200:

> This patch adds support for 11n Tx aggregation to iwm(4).
>
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
>
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
>
> $ netstat -W iwm0 | grep 'output block'
>         3 new output block ack agreements
> 0 output block ack agreements timed out

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
iwm0: hw rev 0x230, fw ver 34.0.1, address 9c:da:3e:6f:51:a4

With intel firmware updated: intel-firmware-20200520v0->20200609v0: ok

With the patch i get a speedup from ca 25 MBit/s to ca. 35 MBit/s.

However i do not see any 'new output block ack agreements'.

        0 new output block ack agreements
        0 output block ack agreements timed out

This is with two TP-Link APs (Archer A7 or something like that).

/Benno

Reply | Threaded
Open this post in threaded view
|

Re: 11n Tx aggregation for iwm(4)

Sebastian Benoit-3
Sebastian Benoit([hidden email]) on 2020.06.29 16:18:03 +0200:

> Stefan Sperling([hidden email]) on 2020.06.26 14:45:53 +0200:
> > This patch adds support for 11n Tx aggregation to iwm(4).
> >
> > Please help with testing if you can by running the patch and using wifi
> > as usual. Nothing should change, except that Tx speed may potentially
> > improve. If you have time to run before/after performance measurements with
> > tcpbench or such, that would be nice. But it's not required for testing.
> >
> > If Tx aggregation is active then netstat will show a non-zero output block ack
> > agreement counter:
> >
> > $ netstat -W iwm0 | grep 'output block'
> >         3 new output block ack agreements
> > 0 output block ack agreements timed out
>
> iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
> iwm0: hw rev 0x230, fw ver 34.0.1, address 9c:da:3e:6f:51:a4
>
> With intel firmware updated: intel-firmware-20200520v0->20200609v0: ok

He, i meant to say with "iwm-firmware-20191022p1" as before, but copied the
wrong line and then wrote the correct thing for it :)