net80211: skip input block ack window gaps faster

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

net80211: skip input block ack window gaps faster

Stefan Sperling-5
While measuring Tx performance at a fixed Tx rate with iwm(4) I observed
unexpected dips in throughput measured by tcpbench. These dips coincided
with one or more gap timeouts shown in 'netstat -W iwm0', such as:
        77 input block ack window gaps timed out
Which means lost frames on the receive side were stalling subsequent frames
and thus slowing tcpbench down.

I decided to disable the gap timeout entirely to see what would happen if
those missing frames were immediately skipped rather than waiting for them.
The result was stable throughput according to tcpbench.

I then wrote the patch below which keeps the gap timeout intact (it is needed
in case the peer stops sending anything) but skips missing frames at the head
of the Rx block window once a certain amount of frames have queued up. This
heuristics avoids having to wait for the timeout to fire in order to get
frames flowing again if we lose one of more frames during Rx traffic bursts.

I have picked a threshold of 16 outstanding frames based on local testing.
I have no idea if this is a good threshold for everyone. It would help to
get some feedback from tests in other RF environments and other types of
access points. Any regressions?

diff e27fc20afa168944a7605737ac45330f21645404 /usr/src
blob - 098aa9bce19481ce09676ce3c4fc0040f14c9b93
file + sys/net80211/ieee80211_input.c
--- sys/net80211/ieee80211_input.c
+++ sys/net80211/ieee80211_input.c
@@ -67,6 +67,7 @@ void ieee80211_input_ba(struct ieee80211com *, struct
     struct mbuf_list *);
 void ieee80211_input_ba_flush(struct ieee80211com *, struct ieee80211_node *,
     struct ieee80211_rx_ba *, struct mbuf_list *);
+int ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *);
 void ieee80211_input_ba_gap_timeout(void *arg);
 void ieee80211_ba_move_window(struct ieee80211com *,
     struct ieee80211_node *, u_int8_t, u_int16_t, struct mbuf_list *);
@@ -837,10 +838,24 @@ ieee80211_input_ba(struct ieee80211com *ic, struct mbu
  rxi->rxi_flags |= IEEE80211_RXI_AMPDU_DONE;
  ba->ba_buf[idx].rxi = *rxi;
 
- if (ba->ba_buf[ba->ba_head].m == NULL)
- timeout_add_msec(&ba->ba_gap_to, IEEE80211_BA_GAP_TIMEOUT);
- else if (timeout_pending(&ba->ba_gap_to))
- timeout_del(&ba->ba_gap_to);
+ if (ba->ba_buf[ba->ba_head].m == NULL) {
+ if (ba->ba_gapwait < IEEE80211_BA_MAX_GAPWAIT) {
+ if (ba->ba_gapwait == 0)
+ timeout_add_msec(&ba->ba_gap_to,
+    IEEE80211_BA_GAP_TIMEOUT);
+ ba->ba_gapwait++;
+ } else {
+ int skipped = ieee80211_input_ba_gap_skip(ba);
+ ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
+ ba->ba_gapwait = 0;
+ if (timeout_pending(&ba->ba_gap_to))
+ timeout_del(&ba->ba_gap_to);
+ }
+ } else {
+ ba->ba_gapwait = 0;
+ if (timeout_pending(&ba->ba_gap_to))
+ timeout_del(&ba->ba_gap_to);
+ }
 
  ieee80211_input_ba_flush(ic, ni, ba, ml);
 }
@@ -902,6 +917,23 @@ ieee80211_input_ba_flush(struct ieee80211com *ic, stru
  ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
 }
 
+int
+ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *ba)
+{
+ int skipped = 0;
+
+ while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
+ /* move window forward */
+ ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
+ ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
+ skipped++;
+ }
+ if (skipped > 0)
+ ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
+
+ return skipped;
+}
+
 /*
  * Forcibly move the BA window forward to remove a leading gap which has
  * been causing frames to linger in the reordering buffer for too long.
@@ -921,17 +953,8 @@ ieee80211_input_ba_gap_timeout(void *arg)
 
  s = splnet();
 
- skipped = 0;
- while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
- /* move window forward */
- ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
- ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
- skipped++;
- ic->ic_stats.is_ht_rx_ba_frame_lost++;
- }
- if (skipped > 0)
- ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
-
+ skipped = ieee80211_input_ba_gap_skip(ba);
+ ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
  ieee80211_input_ba_flush(ic, ni, ba, &ml);
  if_input(&ic->ic_if, &ml);
 
@@ -2716,6 +2739,7 @@ ieee80211_recv_addba_req(struct ieee80211com *ic, stru
  ba->ba_token = token;
  timeout_set(&ba->ba_to, ieee80211_rx_ba_timeout, ba);
  timeout_set(&ba->ba_gap_to, ieee80211_input_ba_gap_timeout, ba);
+ ba->ba_gapwait = 0;
  ba->ba_winsize = bufsz;
  if (ba->ba_winsize == 0 || ba->ba_winsize > IEEE80211_BA_MAX_WINSZ)
  ba->ba_winsize = IEEE80211_BA_MAX_WINSZ;
@@ -2956,6 +2980,7 @@ ieee80211_recv_delba(struct ieee80211com *ic, struct m
  /* stop Block Ack inactivity timer */
  timeout_del(&ba->ba_to);
  timeout_del(&ba->ba_gap_to);
+ ba->ba_gapwait = 0;
 
  if (ba->ba_buf != NULL) {
  /* free all MSDUs stored in reordering buffer */
blob - 4256a8add05c825d9cd25404822b1e147d597325
file + sys/net80211/ieee80211_node.h
--- sys/net80211/ieee80211_node.h
+++ sys/net80211/ieee80211_node.h
@@ -226,6 +226,15 @@ struct ieee80211_rx_ba {
  u_int16_t ba_head;
  struct timeout ba_gap_to;
 #define IEEE80211_BA_GAP_TIMEOUT 300 /* msec */
+
+ /*
+ * Counter for frames forced to wait in the reordering buffer
+ * due to a leading gap caused by one or more missing frames.
+ */
+ int ba_gapwait;
+ /* Missing frames will be skipped once this many frames are waiting. */
+#define IEEE80211_BA_MAX_GAPWAIT 16
+
  /* Counter for consecutive frames which missed the BA window. */
  int ba_winmiss;
  /* Sequence number of previous frame which missed the BA window. */

Reply | Threaded
Open this post in threaded view
|

Re: net80211: skip input block ack window gaps faster

Stefan Sperling-5
On Fri, Jul 17, 2020 at 03:59:38PM +0200, Stefan Sperling wrote:

> While measuring Tx performance at a fixed Tx rate with iwm(4) I observed
> unexpected dips in throughput measured by tcpbench. These dips coincided
> with one or more gap timeouts shown in 'netstat -W iwm0', such as:
> 77 input block ack window gaps timed out
> Which means lost frames on the receive side were stalling subsequent frames
> and thus slowing tcpbench down.
>
> I decided to disable the gap timeout entirely to see what would happen if
> those missing frames were immediately skipped rather than waiting for them.
> The result was stable throughput according to tcpbench.
>
> I then wrote the patch below which keeps the gap timeout intact (it is needed
> in case the peer stops sending anything) but skips missing frames at the head
> of the Rx block window once a certain amount of frames have queued up. This
> heuristics avoids having to wait for the timeout to fire in order to get
> frames flowing again if we lose one of more frames during Rx traffic bursts.
>
> I have picked a threshold of 16 outstanding frames based on local testing.
> I have no idea if this is a good threshold for everyone. It would help to
> get some feedback from tests in other RF environments and other types of
> access points. Any regressions?

Next version.

One problem with the previous patch was that it effectively limited the
size of the BA window to the arbitrarily chosen limit of 16. We should not
drop frames which arrive out of order but still fall within the BA window.

With this version, we allow the entire block ack window (usually 64 frames)
to fill up beyond the missing frame at the head, and only then bypass the
gap timeout handler and skip over the missing frame directly. I can still
trigger this shortcut with tcpbench, and still see the timeout run sometimes.
Direct skip should be faster than having to wait for the timeout to run,
and missing just one out of 64 frames is a common case in my testing.

Also, I am not quite sure if calling if_input() from a timeout is such a
good idea. Any opinions about that? This patch still lets the gap timeout
handler clear the leading gap but avoids flushing buffered frames there.
The peer will now need to send another frame to flush the buffer, but now
if_input() will be called from network interrupt context only. Which is
probably a good thing?

This code still seems to recover well enough from occasional packet loss,
which is what this is all about. If you are on a really bad link, none
of this will help anyway.

diff refs/heads/master refs/heads/ba-gap
blob - 098aa9bce19481ce09676ce3c4fc0040f14c9b93
blob + 4f41b568311bf29e131a3f4802e0a238ba940fe0
--- sys/net80211/ieee80211_input.c
+++ sys/net80211/ieee80211_input.c
@@ -67,6 +67,7 @@ void ieee80211_input_ba(struct ieee80211com *, struct
     struct mbuf_list *);
 void ieee80211_input_ba_flush(struct ieee80211com *, struct ieee80211_node *,
     struct ieee80211_rx_ba *, struct mbuf_list *);
+int ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *);
 void ieee80211_input_ba_gap_timeout(void *arg);
 void ieee80211_ba_move_window(struct ieee80211com *,
     struct ieee80211_node *, u_int8_t, u_int16_t, struct mbuf_list *);
@@ -837,10 +838,29 @@ ieee80211_input_ba(struct ieee80211com *ic, struct mbu
  rxi->rxi_flags |= IEEE80211_RXI_AMPDU_DONE;
  ba->ba_buf[idx].rxi = *rxi;
 
- if (ba->ba_buf[ba->ba_head].m == NULL)
- timeout_add_msec(&ba->ba_gap_to, IEEE80211_BA_GAP_TIMEOUT);
- else if (timeout_pending(&ba->ba_gap_to))
- timeout_del(&ba->ba_gap_to);
+ if (ba->ba_buf[ba->ba_head].m == NULL) {
+ if (ba->ba_gapwait < (ba->ba_winsize - 1)) {
+ if (ba->ba_gapwait == 0) {
+ timeout_add_msec(&ba->ba_gap_to,
+    IEEE80211_BA_GAP_TIMEOUT);
+ }
+ ba->ba_gapwait++;
+ } else {
+ /*
+ * A full BA window worth of frames is now waiting.
+ * Skip the missing frame at the head of the window.
+ */
+ int skipped = ieee80211_input_ba_gap_skip(ba);
+ ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
+ ba->ba_gapwait = 0;
+ if (timeout_pending(&ba->ba_gap_to))
+ timeout_del(&ba->ba_gap_to);
+ }
+ } else {
+ ba->ba_gapwait = 0;
+ if (timeout_pending(&ba->ba_gap_to))
+ timeout_del(&ba->ba_gap_to);
+ }
 
  ieee80211_input_ba_flush(ic, ni, ba, ml);
 }
@@ -908,10 +928,26 @@ ieee80211_input_ba_flush(struct ieee80211com *ic, stru
  * A leading gap will occur if a particular A-MPDU subframe never arrives
  * or if a bug in the sender causes sequence numbers to jump forward by > 1.
  */
+int
+ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *ba)
+{
+ int skipped = 0;
+
+ while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
+ /* move window forward */
+ ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
+ ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
+ skipped++;
+ }
+ if (skipped > 0)
+ ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
+
+ return skipped;
+}
+
 void
 ieee80211_input_ba_gap_timeout(void *arg)
 {
- struct mbuf_list ml = MBUF_LIST_INITIALIZER();
  struct ieee80211_rx_ba *ba = arg;
  struct ieee80211_node *ni = ba->ba_ni;
  struct ieee80211com *ic = ni->ni_ic;
@@ -921,20 +957,9 @@ ieee80211_input_ba_gap_timeout(void *arg)
 
  s = splnet();
 
- skipped = 0;
- while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
- /* move window forward */
- ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
- ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
- skipped++;
- ic->ic_stats.is_ht_rx_ba_frame_lost++;
- }
- if (skipped > 0)
- ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
+ skipped = ieee80211_input_ba_gap_skip(ba);
+ ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
 
- ieee80211_input_ba_flush(ic, ni, ba, &ml);
- if_input(&ic->ic_if, &ml);
-
  splx(s);
 }
 
@@ -2716,6 +2741,7 @@ ieee80211_recv_addba_req(struct ieee80211com *ic, stru
  ba->ba_token = token;
  timeout_set(&ba->ba_to, ieee80211_rx_ba_timeout, ba);
  timeout_set(&ba->ba_gap_to, ieee80211_input_ba_gap_timeout, ba);
+ ba->ba_gapwait = 0;
  ba->ba_winsize = bufsz;
  if (ba->ba_winsize == 0 || ba->ba_winsize > IEEE80211_BA_MAX_WINSZ)
  ba->ba_winsize = IEEE80211_BA_MAX_WINSZ;
@@ -2956,6 +2982,7 @@ ieee80211_recv_delba(struct ieee80211com *ic, struct m
  /* stop Block Ack inactivity timer */
  timeout_del(&ba->ba_to);
  timeout_del(&ba->ba_gap_to);
+ ba->ba_gapwait = 0;
 
  if (ba->ba_buf != NULL) {
  /* free all MSDUs stored in reordering buffer */
blob - 4256a8add05c825d9cd25404822b1e147d597325
blob + 17d982850c16aab271f362a660025a2c0331a499
--- sys/net80211/ieee80211_node.h
+++ sys/net80211/ieee80211_node.h
@@ -226,6 +226,13 @@ struct ieee80211_rx_ba {
  u_int16_t ba_head;
  struct timeout ba_gap_to;
 #define IEEE80211_BA_GAP_TIMEOUT 300 /* msec */
+
+ /*
+ * Counter for frames forced to wait in the reordering buffer
+ * due to a leading gap caused by one or more missing frames.
+ */
+ int ba_gapwait;
+
  /* Counter for consecutive frames which missed the BA window. */
  int ba_winmiss;
  /* Sequence number of previous frame which missed the BA window. */

Reply | Threaded
Open this post in threaded view
|

Re: net80211: skip input block ack window gaps faster

Uwe Werler
> Next version.
>
> One problem with the previous patch was that it effectively limited the
> size of the BA window to the arbitrarily chosen limit of 16. We should not
> drop frames which arrive out of order but still fall within the BA window.
>
> With this version, we allow the entire block ack window (usually 64 frames)
> to fill up beyond the missing frame at the head, and only then bypass the
> gap timeout handler and skip over the missing frame directly. I can still
> trigger this shortcut with tcpbench, and still see the timeout run sometimes.
> Direct skip should be faster than having to wait for the timeout to run,
> and missing just one out of 64 frames is a common case in my testing.
>
> Also, I am not quite sure if calling if_input() from a timeout is such a
> good idea. Any opinions about that? This patch still lets the gap timeout
> handler clear the leading gap but avoids flushing buffered frames there.
> The peer will now need to send another frame to flush the buffer, but now
> if_input() will be called from network interrupt context only. Which is
> probably a good thing?
>
> This code still seems to recover well enough from occasional packet loss,
> which is what this is all about. If you are on a really bad link, none
> of this will help anyway.
>
> diff refs/heads/master refs/heads/ba-gap
> blob - 098aa9bce19481ce09676ce3c4fc0040f14c9b93
> blob + 4f41b568311bf29e131a3f4802e0a238ba940fe0
> --- sys/net80211/ieee80211_input.c
> +++ sys/net80211/ieee80211_input.c
> @@ -67,6 +67,7 @@ void ieee80211_input_ba(struct ieee80211com *, struct
>      struct mbuf_list *);
>  void ieee80211_input_ba_flush(struct ieee80211com *, struct ieee80211_node *,
>      struct ieee80211_rx_ba *, struct mbuf_list *);
> +int ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *);
>  void ieee80211_input_ba_gap_timeout(void *arg);
>  void ieee80211_ba_move_window(struct ieee80211com *,
>      struct ieee80211_node *, u_int8_t, u_int16_t, struct mbuf_list *);
> @@ -837,10 +838,29 @@ ieee80211_input_ba(struct ieee80211com *ic, struct mbu
>   rxi->rxi_flags |= IEEE80211_RXI_AMPDU_DONE;
>   ba->ba_buf[idx].rxi = *rxi;
>  
> - if (ba->ba_buf[ba->ba_head].m == NULL)
> - timeout_add_msec(&ba->ba_gap_to, IEEE80211_BA_GAP_TIMEOUT);
> - else if (timeout_pending(&ba->ba_gap_to))
> - timeout_del(&ba->ba_gap_to);
> + if (ba->ba_buf[ba->ba_head].m == NULL) {
> + if (ba->ba_gapwait < (ba->ba_winsize - 1)) {
> + if (ba->ba_gapwait == 0) {
> + timeout_add_msec(&ba->ba_gap_to,
> +    IEEE80211_BA_GAP_TIMEOUT);
> + }
> + ba->ba_gapwait++;
> + } else {
> + /*
> + * A full BA window worth of frames is now waiting.
> + * Skip the missing frame at the head of the window.
> + */
> + int skipped = ieee80211_input_ba_gap_skip(ba);
> + ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
> + ba->ba_gapwait = 0;
> + if (timeout_pending(&ba->ba_gap_to))
> + timeout_del(&ba->ba_gap_to);
> + }
> + } else {
> + ba->ba_gapwait = 0;
> + if (timeout_pending(&ba->ba_gap_to))
> + timeout_del(&ba->ba_gap_to);
> + }
>  
>   ieee80211_input_ba_flush(ic, ni, ba, ml);
>  }
> @@ -908,10 +928,26 @@ ieee80211_input_ba_flush(struct ieee80211com *ic, stru
>   * A leading gap will occur if a particular A-MPDU subframe never arrives
>   * or if a bug in the sender causes sequence numbers to jump forward by > 1.
>   */
> +int
> +ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *ba)
> +{
> + int skipped = 0;
> +
> + while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
> + /* move window forward */
> + ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
> + ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
> + skipped++;
> + }
> + if (skipped > 0)
> + ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
> +
> + return skipped;
> +}
> +
>  void
>  ieee80211_input_ba_gap_timeout(void *arg)
>  {
> - struct mbuf_list ml = MBUF_LIST_INITIALIZER();
>   struct ieee80211_rx_ba *ba = arg;
>   struct ieee80211_node *ni = ba->ba_ni;
>   struct ieee80211com *ic = ni->ni_ic;
> @@ -921,20 +957,9 @@ ieee80211_input_ba_gap_timeout(void *arg)
>  
>   s = splnet();
>  
> - skipped = 0;
> - while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
> - /* move window forward */
> - ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
> - ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
> - skipped++;
> - ic->ic_stats.is_ht_rx_ba_frame_lost++;
> - }
> - if (skipped > 0)
> - ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
> + skipped = ieee80211_input_ba_gap_skip(ba);
> + ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
>  
> - ieee80211_input_ba_flush(ic, ni, ba, &ml);
> - if_input(&ic->ic_if, &ml);
> -
>   splx(s);
>  }
>  
> @@ -2716,6 +2741,7 @@ ieee80211_recv_addba_req(struct ieee80211com *ic, stru
>   ba->ba_token = token;
>   timeout_set(&ba->ba_to, ieee80211_rx_ba_timeout, ba);
>   timeout_set(&ba->ba_gap_to, ieee80211_input_ba_gap_timeout, ba);
> + ba->ba_gapwait = 0;
>   ba->ba_winsize = bufsz;
>   if (ba->ba_winsize == 0 || ba->ba_winsize > IEEE80211_BA_MAX_WINSZ)
>   ba->ba_winsize = IEEE80211_BA_MAX_WINSZ;
> @@ -2956,6 +2982,7 @@ ieee80211_recv_delba(struct ieee80211com *ic, struct m
>   /* stop Block Ack inactivity timer */
>   timeout_del(&ba->ba_to);
>   timeout_del(&ba->ba_gap_to);
> + ba->ba_gapwait = 0;
>  
>   if (ba->ba_buf != NULL) {
>   /* free all MSDUs stored in reordering buffer */
> blob - 4256a8add05c825d9cd25404822b1e147d597325
> blob + 17d982850c16aab271f362a660025a2c0331a499
> --- sys/net80211/ieee80211_node.h
> +++ sys/net80211/ieee80211_node.h
> @@ -226,6 +226,13 @@ struct ieee80211_rx_ba {
>   u_int16_t ba_head;
>   struct timeout ba_gap_to;
>  #define IEEE80211_BA_GAP_TIMEOUT 300 /* msec */
> +
> + /*
> + * Counter for frames forced to wait in the reordering buffer
> + * due to a leading gap caused by one or more missing frames.
> + */
> + int ba_gapwait;
> +
>   /* Counter for consecutive frames which missed the BA window. */
>   int ba_winmiss;
>   /* Sequence number of previous frame which missed the BA window. */
>

Hi Stefan,

just tested:

tcpbench: bandwidth min/avg/max/std-dev = 1.093/46.274/58.174/17.566 Mbps


ieee80211 on iwm0:
        0 input packets with bad version
        1 input packet too short
        0 input packets from wrong bssid
        0 input packet duplicates discarded
        0 input packets with wrong direction
        4 input multicast echo packets discarded
        0 input packets from unassociated station discarded
        2 input encrypted packets without wep/wpa config discarded
        1 input unencrypted packet with wep/wpa config discarded
        0 input wep/wpa packets processing failed
        0 input packet decapsulations failed
        0 input management packets discarded
        0 input control packets discarded
        0 input packets with truncated rate set
        0 input packets with missing elements
        0 input packets with elements too big
        0 input packets with elements too small
        0 input packets with invalid channel
        0 input packets with mismatched channel
        0 node allocations failed
        0 input packets with mismatched ssid
        0 input packets with unsupported auth algorithm
        0 input authentications failed
        0 input associations from wrong bssid
        0 input associations without authentication
        0 input associations with mismatched capabilities
        0 input associations without matching rates
        0 input associations with bad rsn ie
        0 input deauthentication packets
        0 input disassociation packets
        0 input packets with unknown subtype
        0 input packets failed for lack of mbufs
        0 input decryptions failed on crc
        0 input ahdemo management packets discarded
        0 input packets with bad auth request
        3 input eapol-key packets
        0 input eapol-key packets with bad mic
        0 input eapol-key packets replayed
        0 input packets with bad tkip mic
        0 input tkip mic failure notifications
        0 input packets on unauthenticated port
        0 output packets failed for lack of mbufs
        0 output packets failed for no nodes
        0 output packets of unknown management type
        0 output packets on unauthenticated port
        1 active scan started
        0 passive scans started
        0 nodes timed out
        0 failures with no memory for crypto ctx
        0 ccmp decryption errors
        58 ccmp replayed frames
        0 cmac icv errors
        0 cmac replayed frames
        0 tkip icv errors
        0 tkip replays
        0 pbac errors
        0 HT negotiation failures because peer does not support MCS 0-7
        0 HT negotiation failures because we do not support basic MCS set
        0 HT negotiation failures because peer uses bad crypto
        0 HT protection changes
        1 new input block ack agreement
        1 new output block ack agreement
        0 input frames below block ack window start
        9 input frames above block ack window end
        9 input block ack window slides
        0 input block ack window jumps
        0 duplicate input block ack frames
        24 expected input block ack frames never arrived
        11 input block ack window gaps timed out
        0 input block ack agreements timed out
        0 output block ack agreements timed out


iwm0: flags=808843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST,AUTOCONF4> mtu 1500
        lladdr 60:f6:77:bc:3a:04
        index 1 priority 4 llprio 3
        groups: wlan egress
        media: IEEE802.11 autoselect mode 11n (HT-MCS4 mode 11n)
        status: active
        ieee80211: join HIDDEN chan 100 bssid e2:b9:e5:dd:b4:0b 76% wpakey wpaprotos wpa2 wpaakms psk wpaciphers ccmp wpagroupcipher ccmp
        inet 192.168.1.220 netmask 0xffffff00 broadcast 192.168.1.255


OpenBSD 6.7-current (GENERIC.MP) #7: Fri Jul 17 16:55:31 GMT 2020
    [hidden email]:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 17018589184 (16230MB)
avail mem = 16487751680 (15723MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.0 @ 0xeab60 (103 entries)
bios0: vendor Dell Inc. version "1.16.1" date 10/03/2019
bios0: Dell Inc. Latitude 7480
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP APIC FPDT FIDT MCFG HPET SSDT SSDT HPET SSDT UEFI SSDT LPIT SSDT SSDT SSDT SSDT DBGP DBG2 SSDT MSDM SLIC DMAR TPM2 ASF! BGRT
acpi0: wakeup devices RP09(S4) PXSX(S4) RP10(S4) PXSX(S4) RP11(S4) PXSX(S4) RP12(S4) PXSX(S4) RP13(S4) PXSX(S4) RP01(S4) PXSX(S4) RP02(S4) PXSX(S4) RP03(S4) PXSX(S4) [...]
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz, 2295.59 MHz, 06-8e-09
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu0: 256KB 64b/line 8-way L2 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
cpu0: apic clock running at 24MHz
cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4.1.1.1, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz, 2294.66 MHz, 06-8e-09
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu1: 256KB 64b/line 8-way L2 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 1 (application processor)
cpu2: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz, 2294.66 MHz, 06-8e-09
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu2: 256KB 64b/line 8-way L2 cache
cpu2: smt 1, core 0, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz, 2294.66 MHz, 06-8e-09
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu3: 256KB 64b/line 8-way L2 cache
cpu3: smt 1, core 1, package 0
ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 20, 120 pins
acpimcfg0 at acpi0
acpimcfg0: addr 0xf0000000, bus 0-127
acpihpet0 at acpi0: 23999999 Hz
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (PEG0)
acpiprt2 at acpi0: bus -1 (PEG1)
acpiprt3 at acpi0: bus -1 (PEG2)
acpiprt4 at acpi0: bus -1 (RP09)
acpiprt5 at acpi0: bus -1 (RP10)
acpiprt6 at acpi0: bus -1 (RP11)
acpiprt7 at acpi0: bus -1 (RP12)
acpiprt8 at acpi0: bus -1 (RP13)
acpiprt9 at acpi0: bus 1 (RP01)
acpiprt10 at acpi0: bus -1 (RP02)
acpiprt11 at acpi0: bus 2 (RP03)
acpiprt12 at acpi0: bus -1 (RP04)
acpiprt13 at acpi0: bus 3 (RP05)
acpiprt14 at acpi0: bus -1 (RP06)
acpiprt15 at acpi0: bus -1 (RP07)
acpiprt16 at acpi0: bus -1 (RP08)
acpiprt17 at acpi0: bus -1 (RP17)
acpiprt18 at acpi0: bus -1 (RP18)
acpiprt19 at acpi0: bus -1 (RP19)
acpiprt20 at acpi0: bus -1 (RP20)
acpiprt21 at acpi0: bus -1 (RP21)
acpiprt22 at acpi0: bus -1 (RP22)
acpiprt23 at acpi0: bus -1 (RP23)
acpiprt24 at acpi0: bus -1 (RP24)
acpiprt25 at acpi0: bus -1 (RP14)
acpiprt26 at acpi0: bus -1 (RP15)
acpiprt27 at acpi0: bus -1 (RP16)
acpiec0 at acpi0
acpiec at acpi0 not configured
acpicpu0 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu1 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu2 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu3 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpipwrres0 at acpi0: WRST
acpipwrres1 at acpi0: WRST
acpipwrres2 at acpi0: WRST
acpipwrres3 at acpi0: WRST
acpipwrres4 at acpi0: WRST
acpipwrres5 at acpi0: WRST
acpipwrres6 at acpi0: WRST
acpipwrres7 at acpi0: WRST
acpipwrres8 at acpi0: WRST
acpipwrres9 at acpi0: WRST
acpipwrres10 at acpi0: WRST
acpipwrres11 at acpi0: WRST
acpipwrres12 at acpi0: WRST
acpipwrres13 at acpi0: WRST
acpipwrres14 at acpi0: WRST
acpipwrres15 at acpi0: WRST
acpipwrres16 at acpi0: WRST
acpipwrres17 at acpi0: WRST
acpipwrres18 at acpi0: WRST
acpipwrres19 at acpi0: WRST
acpitz0 at acpi0: critical temperature is 107 degC
acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
extent `acpipci0 pcibus' (0x0 - 0xff), flags=0
     0x7f - 0xff
extent `acpipci0 pciio' (0x0 - 0xffffffff), flags=0
     0xcf8 - 0xcff
     0x10000 - 0xffffffff
extent `acpipci0 pcimem' (0x0 - 0xffffffffffffffff), flags=0
     0x0 - 0x9ffff
     0xc0000 - 0x7f7fffff
     0xf0000000 - 0xfcffffff
     0xfe800000 - 0xffffffffffffffff
acpicmos0 at acpi0
"INT3403" at acpi0 not configured
"INT3403" at acpi0 not configured
"DLL07A0" at acpi0 not configured
"INT3446" at acpi0 not configured
"INT3403" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"INT33A1" at acpi0 not configured
"MSFT0101" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpibtn0 at acpi0: LID0
acpibtn1 at acpi0: PBTN
acpibtn2 at acpi0: SBTN
acpiac0 at acpi0: AC unit online
acpibat0 at acpi0: BAT0 model "DELL PGFX464" serial 3640 type LION oem "SMP"
acpihid0 at acpi0: HIDD, 5 button array
"INT3400" at acpi0 not configured
acpivideo0 at acpi0: GFX0
acpivout0 at acpivideo0: LCD_
cpu0: using VERW MDS workaround (except on vmm entry)
cpu0: Enhanced SpeedStep 2295 MHz: speeds: 2701, 2700, 2400, 2300, 2200, 2000, 1900, 1700, 1500, 1400, 1200, 1100, 900, 700, 600, 400 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "Intel Core 7G Host" rev 0x02
inteldrm0 at pci0 dev 2 function 0 "Intel HD Graphics 620" rev 0x02
drm0 at inteldrm0
inteldrm0: msi, KABYLAKE, gen 9
"Intel Core 6G Thermal" rev 0x02 at pci0 dev 4 function 0 not configured
xhci0 at pci0 dev 20 function 0 "Intel 100 Series xHCI" rev 0x21: msi, xHCI 1.0
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
pchtemp0 at pci0 dev 20 function 2 "Intel 100 Series Thermal" rev 0x21
dwiic0 at pci0 dev 21 function 0 "Intel 100 Series I2C" rev 0x21: apic 2 int 16
iic0 at dwiic0
dwiic1 at pci0 dev 21 function 1 "Intel 100 Series I2C" rev 0x21: apic 2 int 17
iic1 at dwiic1
ihidev0 at iic1 addr 0x2c irq 51, vendor 0x44e product 0x120b, DLL07A0
ihidev0: 7 report ids
ims0 at ihidev0 reportid 1: 3 buttons, Z and W dir
wsmouse0 at ims0 mux 0
ikbd0 at ihidev0 reportid 2: 8 variable keys, 6 key codes
wskbd0 at ikbd0 mux 1
hid at ihidev0 reportid 3 not configured
hid at ihidev0 reportid 4 not configured
hid at ihidev0 reportid 5 not configured
hid at ihidev0 reportid 6 not configured
hid at ihidev0 reportid 7 not configured
"Intel 100 Series I2C" rev 0x21 at pci0 dev 21 function 2 not configured
"Intel 100 Series MEI" rev 0x21 at pci0 dev 22 function 0 not configured
ahci0 at pci0 dev 23 function 0 "Intel 100 Series AHCI" rev 0x21: msi, AHCI 1.3.1
ahci0: port 2: 6.0Gb/s
scsibus1 at ahci0: 32 targets
sd0 at scsibus1 targ 2 lun 0: <ATA, Micron 1100 SATA, M0D> naa.500a075119d7587b
sd0: 244198MB, 512 bytes/sector, 500118192 sectors, thin
ppb0 at pci0 dev 28 function 0 "Intel 100 Series PCIE" rev 0xf1: msi
pci1 at ppb0 bus 1
rtsx0 at pci1 dev 0 function 0 "Realtek RTS525A Card Reader" rev 0x01: msi
sdmmc0 at rtsx0: 4-bit, dma
ppb1 at pci0 dev 28 function 2 "Intel 100 Series PCIE" rev 0xf1: msi
pci2 at ppb1 bus 2
iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
ppb2 at pci0 dev 28 function 4 "Intel 100 Series PCIE" rev 0xf1: msi
pci3 at ppb2 bus 3
pcib0 at pci0 dev 31 function 0 "Intel 200 Series LPC" rev 0x21
"Intel 100 Series PMC" rev 0x21 at pci0 dev 31 function 2 not configured
azalia0 at pci0 dev 31 function 3 "Intel 200 Series HD Audio" rev 0x21: msi
azalia0: codecs: Realtek/0x0256, Intel/0x280b, using Realtek/0x0256
audio0 at azalia0
ichiic0 at pci0 dev 31 function 4 "Intel 100 Series SMBus" rev 0x21: apic 2 int 16
iic2 at ichiic0
spdmem0 at iic2 addr 0x50: 8GB DDR4 SDRAM PC4-19200 SO-DIMM
spdmem1 at iic2 addr 0x52: 8GB DDR4 SDRAM PC4-19200 SO-DIMM
em0 at pci0 dev 31 function 6 "Intel I219-LM" rev 0x21: msi, address a4:4c:c8:7e:52:d3
isa0 at pcib0
isadma0 at isa0
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd1 at pckbd0: console keyboard
pms0 at pckbc0 (aux slot)
wsmouse1 at pms0 mux 0
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: VMX/EPT
efifb at mainbus0 not configured
uhidev0 at uhub0 port 2 configuration 1 interface 0 "Logitech USB-PS/2 Optical Mouse" rev 2.00/20.00 addr 2
uhidev0: iclass 3/1
ums0 at uhidev0: 3 buttons, Z dir
wsmouse2 at ums0 mux 0
uvideo0 at uhub0 port 5 configuration 1 interface 0 "CN0K49W1LOG007B8BMGNA01 Integrated_Webcam_HD" rev 2.00/75.24 addr 3
video0 at uvideo0
ugen0 at uhub0 port 7 "Intel Bluetooth" rev 2.00/0.10 addr 4
ugen1 at uhub0 port 10 "Broadcom Corp 5880" rev 1.10/1.01 addr 5
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
sd1 at scsibus3 targ 1 lun 0: <OPENBSD, SR CRYPTO, 006>
sd1: 244190MB, 512 bytes/sector, 500101898 sectors
root on sd1a (42a289e5051cfa2f.a) swap on sd1b dump on sd1b
inteldrm0: 1920x1080, 32bpp
wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd1
wskbd0: connecting to wsdisplay0
wsdisplay0: screen 1-5 added (std, vt100 emulation)
iwm0: hw rev 0x230, fw ver 34.0.1, address 60:f6:77:bc:3a:04

Please tell me what else I can test.

--

With kind regards / Með bestu kveðju / Mit freundlichen Grüßen

Uwe Werler

Reply | Threaded
Open this post in threaded view
|

Re: net80211: skip input block ack window gaps faster

Martin Pieuchot
In reply to this post by Stefan Sperling-5
On 17/07/20(Fri) 18:15, Stefan Sperling wrote:

> On Fri, Jul 17, 2020 at 03:59:38PM +0200, Stefan Sperling wrote:
> > While measuring Tx performance at a fixed Tx rate with iwm(4) I observed
> > unexpected dips in throughput measured by tcpbench. These dips coincided
> > with one or more gap timeouts shown in 'netstat -W iwm0', such as:
> > 77 input block ack window gaps timed out
> > Which means lost frames on the receive side were stalling subsequent frames
> > and thus slowing tcpbench down.
> >
> > I decided to disable the gap timeout entirely to see what would happen if
> > those missing frames were immediately skipped rather than waiting for them.
> > The result was stable throughput according to tcpbench.
> >
> > I then wrote the patch below which keeps the gap timeout intact (it is needed
> > in case the peer stops sending anything) but skips missing frames at the head
> > of the Rx block window once a certain amount of frames have queued up. This
> > heuristics avoids having to wait for the timeout to fire in order to get
> > frames flowing again if we lose one of more frames during Rx traffic bursts.
> >
> > I have picked a threshold of 16 outstanding frames based on local testing.
> > I have no idea if this is a good threshold for everyone. It would help to
> > get some feedback from tests in other RF environments and other types of
> > access points. Any regressions?
>
> Next version.
>
> One problem with the previous patch was that it effectively limited the
> size of the BA window to the arbitrarily chosen limit of 16. We should not
> drop frames which arrive out of order but still fall within the BA window.
>
> With this version, we allow the entire block ack window (usually 64 frames)
> to fill up beyond the missing frame at the head, and only then bypass the
> gap timeout handler and skip over the missing frame directly. I can still
> trigger this shortcut with tcpbench, and still see the timeout run sometimes.
> Direct skip should be faster than having to wait for the timeout to run,
> and missing just one out of 64 frames is a common case in my testing.
>
> Also, I am not quite sure if calling if_input() from a timeout is such a
> good idea. Any opinions about that? This patch still lets the gap timeout
> handler clear the leading gap but avoids flushing buffered frames there.
> The peer will now need to send another frame to flush the buffer, but now
> if_input() will be called from network interrupt context only. Which is
> probably a good thing?

if_input() can be called in any context.  Using a timeout means you need
some extra logic to free the queue.  It might also add to the latency

> This code still seems to recover well enough from occasional packet loss,
> which is what this is all about. If you are on a really bad link, none
> of this will help anyway.
>
> diff refs/heads/master refs/heads/ba-gap
> blob - 098aa9bce19481ce09676ce3c4fc0040f14c9b93
> blob + 4f41b568311bf29e131a3f4802e0a238ba940fe0
> --- sys/net80211/ieee80211_input.c
> +++ sys/net80211/ieee80211_input.c
> @@ -67,6 +67,7 @@ void ieee80211_input_ba(struct ieee80211com *, struct
>      struct mbuf_list *);
>  void ieee80211_input_ba_flush(struct ieee80211com *, struct ieee80211_node *,
>      struct ieee80211_rx_ba *, struct mbuf_list *);
> +int ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *);
>  void ieee80211_input_ba_gap_timeout(void *arg);
>  void ieee80211_ba_move_window(struct ieee80211com *,
>      struct ieee80211_node *, u_int8_t, u_int16_t, struct mbuf_list *);
> @@ -837,10 +838,29 @@ ieee80211_input_ba(struct ieee80211com *ic, struct mbu
>   rxi->rxi_flags |= IEEE80211_RXI_AMPDU_DONE;
>   ba->ba_buf[idx].rxi = *rxi;
>  
> - if (ba->ba_buf[ba->ba_head].m == NULL)
> - timeout_add_msec(&ba->ba_gap_to, IEEE80211_BA_GAP_TIMEOUT);
> - else if (timeout_pending(&ba->ba_gap_to))
> - timeout_del(&ba->ba_gap_to);
> + if (ba->ba_buf[ba->ba_head].m == NULL) {
> + if (ba->ba_gapwait < (ba->ba_winsize - 1)) {
> + if (ba->ba_gapwait == 0) {
> + timeout_add_msec(&ba->ba_gap_to,
> +    IEEE80211_BA_GAP_TIMEOUT);
> + }
> + ba->ba_gapwait++;
> + } else {
> + /*
> + * A full BA window worth of frames is now waiting.
> + * Skip the missing frame at the head of the window.
> + */
> + int skipped = ieee80211_input_ba_gap_skip(ba);
> + ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
> + ba->ba_gapwait = 0;
> + if (timeout_pending(&ba->ba_gap_to))
> + timeout_del(&ba->ba_gap_to);
> + }
> + } else {
> + ba->ba_gapwait = 0;
> + if (timeout_pending(&ba->ba_gap_to))
> + timeout_del(&ba->ba_gap_to);
> + }
>  
>   ieee80211_input_ba_flush(ic, ni, ba, ml);
>  }
> @@ -908,10 +928,26 @@ ieee80211_input_ba_flush(struct ieee80211com *ic, stru
>   * A leading gap will occur if a particular A-MPDU subframe never arrives
>   * or if a bug in the sender causes sequence numbers to jump forward by > 1.
>   */
> +int
> +ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *ba)
> +{
> + int skipped = 0;
> +
> + while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
> + /* move window forward */
> + ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
> + ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
> + skipped++;
> + }
> + if (skipped > 0)
> + ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
> +
> + return skipped;
> +}
> +
>  void
>  ieee80211_input_ba_gap_timeout(void *arg)
>  {
> - struct mbuf_list ml = MBUF_LIST_INITIALIZER();
>   struct ieee80211_rx_ba *ba = arg;
>   struct ieee80211_node *ni = ba->ba_ni;
>   struct ieee80211com *ic = ni->ni_ic;
> @@ -921,20 +957,9 @@ ieee80211_input_ba_gap_timeout(void *arg)
>  
>   s = splnet();
>  
> - skipped = 0;
> - while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
> - /* move window forward */
> - ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
> - ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
> - skipped++;
> - ic->ic_stats.is_ht_rx_ba_frame_lost++;
> - }
> - if (skipped > 0)
> - ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
> + skipped = ieee80211_input_ba_gap_skip(ba);
> + ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
>  
> - ieee80211_input_ba_flush(ic, ni, ba, &ml);
> - if_input(&ic->ic_if, &ml);
> -
>   splx(s);
>  }
>  
> @@ -2716,6 +2741,7 @@ ieee80211_recv_addba_req(struct ieee80211com *ic, stru
>   ba->ba_token = token;
>   timeout_set(&ba->ba_to, ieee80211_rx_ba_timeout, ba);
>   timeout_set(&ba->ba_gap_to, ieee80211_input_ba_gap_timeout, ba);
> + ba->ba_gapwait = 0;
>   ba->ba_winsize = bufsz;
>   if (ba->ba_winsize == 0 || ba->ba_winsize > IEEE80211_BA_MAX_WINSZ)
>   ba->ba_winsize = IEEE80211_BA_MAX_WINSZ;
> @@ -2956,6 +2982,7 @@ ieee80211_recv_delba(struct ieee80211com *ic, struct m
>   /* stop Block Ack inactivity timer */
>   timeout_del(&ba->ba_to);
>   timeout_del(&ba->ba_gap_to);
> + ba->ba_gapwait = 0;
>  
>   if (ba->ba_buf != NULL) {
>   /* free all MSDUs stored in reordering buffer */
> blob - 4256a8add05c825d9cd25404822b1e147d597325
> blob + 17d982850c16aab271f362a660025a2c0331a499
> --- sys/net80211/ieee80211_node.h
> +++ sys/net80211/ieee80211_node.h
> @@ -226,6 +226,13 @@ struct ieee80211_rx_ba {
>   u_int16_t ba_head;
>   struct timeout ba_gap_to;
>  #define IEEE80211_BA_GAP_TIMEOUT 300 /* msec */
> +
> + /*
> + * Counter for frames forced to wait in the reordering buffer
> + * due to a leading gap caused by one or more missing frames.
> + */
> + int ba_gapwait;
> +
>   /* Counter for consecutive frames which missed the BA window. */
>   int ba_winmiss;
>   /* Sequence number of previous frame which missed the BA window. */
>