umb: aggregate packets on tx

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

umb: aggregate packets on tx

Gerhard Roth-2
The current umb(4) implementation needs one USB transfer for every packet
that is sent. With the following patch, we can now aggregate several
packets from the ifq into one single USB transfer.

This may speed up the tx path. And even if it doesn't, at least it
reduces the number of transfers required.


Gerhard


Index: sys/dev/usb/if_umb.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.8
diff -u -p -u -p -r1.8 if_umb.c
--- sys/dev/usb/if_umb.c 25 Nov 2016 12:43:26 -0000 1.8
+++ sys/dev/usb/if_umb.c 12 Dec 2016 13:38:34 -0000
@@ -156,7 +156,7 @@ int umb_decode_connect_info(struct umb
 int umb_decode_ip_configuration(struct umb_softc *, void *, int);
 void umb_rx(struct umb_softc *);
 void umb_rxeof(struct usbd_xfer *, void *, usbd_status);
-int umb_encap(struct umb_softc *, struct mbuf *);
+int umb_encap(struct umb_softc *);
 void umb_txeof(struct usbd_xfer *, void *, usbd_status);
 void umb_decap(struct umb_softc *, struct usbd_xfer *);
 
@@ -299,6 +299,7 @@ umb_attach(struct device *parent, struct
 
  sc->sc_udev = uaa->device;
  sc->sc_ctrl_ifaceno = uaa->ifaceno;
+ ml_init(&sc->sc_tx_ml);
 
  /*
  * Some MBIM hardware does not provide the mandatory CDC Union
@@ -583,8 +584,25 @@ umb_ncm_setup(struct umb_softc *sc)
     UGETW(np.wLength) == sizeof (np)) {
  sc->sc_rx_bufsz = UGETDW(np.dwNtbInMaxSize);
  sc->sc_tx_bufsz = UGETDW(np.dwNtbOutMaxSize);
- } else
+ sc->sc_maxdgram = UGETW(np.wNtbOutMaxDatagrams);
+ sc->sc_align = UGETW(np.wNdpOutAlignment);
+ sc->sc_ndp_div = UGETW(np.wNdpOutDivisor);
+ sc->sc_ndp_remainder = UGETW(np.wNdpOutPayloadRemainder);
+ /* Validate values */
+ if (!powerof2(sc->sc_align) || sc->sc_align == 0 ||
+    sc->sc_align >= sc->sc_tx_bufsz)
+ sc->sc_align = sizeof (uint32_t);
+ if (!powerof2(sc->sc_ndp_div) || sc->sc_ndp_div == 0 ||
+    sc->sc_ndp_div >= sc->sc_tx_bufsz)
+ sc->sc_ndp_div = sizeof (uint32_t);
+ if (sc->sc_ndp_remainder >= sc->sc_ndp_div)
+ sc->sc_ndp_remainder = 0;
+ } else {
  sc->sc_rx_bufsz = sc->sc_tx_bufsz = 8 * 1024;
+ sc->sc_maxdgram = 0;
+ sc->sc_align = sc->sc_ndp_div = sizeof (uint32_t);
+ sc->sc_ndp_remainder = 0;
+ }
 }
 
 int
@@ -593,12 +611,12 @@ umb_alloc_xfers(struct umb_softc *sc)
  if (!sc->sc_rx_xfer) {
  if ((sc->sc_rx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
  sc->sc_rx_buf = usbd_alloc_buffer(sc->sc_rx_xfer,
-    sc->sc_rx_bufsz + MBIM_HDR32_LEN);
+    sc->sc_rx_bufsz);
  }
  if (!sc->sc_tx_xfer) {
  if ((sc->sc_tx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
  sc->sc_tx_buf = usbd_alloc_buffer(sc->sc_tx_xfer,
-    sc->sc_tx_bufsz + MBIM_HDR16_LEN);
+    sc->sc_tx_bufsz);
  }
  return (sc->sc_rx_buf && sc->sc_tx_buf) ? 1 : 0;
 }
@@ -617,10 +635,7 @@ umb_free_xfers(struct umb_softc *sc)
  sc->sc_tx_xfer = NULL;
  sc->sc_tx_buf = NULL;
  }
- if (sc->sc_tx_m) {
- m_freem(sc->sc_tx_m);
- sc->sc_tx_m = NULL;
- }
+ ml_purge(&sc->sc_tx_ml);
 }
 
 int
@@ -792,35 +807,91 @@ umb_input(struct ifnet *ifp, struct mbuf
  return 1;
 }
 
+static inline int
+umb_align(size_t bufsz, int offs, int alignment, int remainder)
+{
+ size_t m = alignment - 1;
+ int align;
+
+ align = (((size_t)offs + m) & ~m) - alignment + remainder;
+ if (align < offs)
+ align += alignment;
+ if (align > bufsz)
+ align = bufsz;
+ return align - offs;
+}
+
+static inline int
+umb_padding(void *buf, size_t bufsz, int offs, int alignment, int remainder)
+{
+ int nb;
+
+ nb = umb_align(bufsz, offs, alignment, remainder);
+ if (nb > 0)
+ memset(buf + offs, 0, nb);
+ return nb;
+}
+
 void
 umb_start(struct ifnet *ifp)
 {
  struct umb_softc *sc = ifp->if_softc;
- struct mbuf *m_head = NULL;
+ struct mbuf *m = NULL;
+ int ndgram = 0;
+ int offs, plen, len, mlen;
+ int maxalign;
 
  if (usbd_is_dying(sc->sc_udev) ||
     !(ifp->if_flags & IFF_RUNNING) ||
     ifq_is_oactive(&ifp->if_snd))
  return;
 
- m_head = ifq_deq_begin(&ifp->if_snd);
- if (m_head == NULL)
- return;
+ KASSERT(ml_empty(&sc->sc_tx_ml));
 
- if (!umb_encap(sc, m_head)) {
- ifq_deq_rollback(&ifp->if_snd, m_head);
- ifq_set_oactive(&ifp->if_snd);
- return;
- }
- ifq_deq_commit(&ifp->if_snd, m_head);
+ offs = sizeof (struct ncm_header16);
+ offs += umb_align(sc->sc_tx_bufsz, offs, sc->sc_align, 0);
+
+ /*
+ * Note that 'struct ncm_pointer16' already includes space for the
+ * terminating zero pointer.
+ */
+ offs += sizeof (struct ncm_pointer16);
+ plen = sizeof (struct ncm_pointer16_dgram);
+ maxalign = (sc->sc_ndp_div - 1) + sc->sc_ndp_remainder;
+ len = 0;
+ while (1) {
+ m = ifq_deq_begin(&ifp->if_snd);
+ if (m == NULL)
+ break;
+
+ /*
+ * Check if mbuf plus required NCM pointer still fits into
+ * xfer buffers. Assume maximal padding.
+ */
+ plen += sizeof (struct ncm_pointer16_dgram);
+ mlen = maxalign +  m->m_pkthdr.len;
+ if ((sc->sc_maxdgram != 0 && ndgram >= sc->sc_maxdgram) ||
+    (offs + plen + len + mlen > sc->sc_tx_bufsz)) {
+ ifq_deq_rollback(&ifp->if_snd, m);
+ break;
+ }
+ ifq_deq_commit(&ifp->if_snd, m);
+
+ ndgram++;
+ len += mlen;
+ ml_enqueue(&sc->sc_tx_ml, m);
 
 #if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 #endif
-
- ifq_set_oactive(&ifp->if_snd);
- ifp->if_timer = (2 * umb_xfer_tout) / 1000;
+ }
+ if (ml_empty(&sc->sc_tx_ml))
+ return;
+ if (umb_encap(sc)) {
+ ifq_set_oactive(&ifp->if_snd);
+ ifp->if_timer = (2 * umb_xfer_tout) / 1000;
+ }
 }
 
 void
@@ -1222,20 +1293,6 @@ umb_getinfobuf(void *in, int inlen, uint
 }
 
 static inline int
-umb_padding(void *data, int len, size_t sz)
-{
- char *p = data;
- int np = 0;
-
- while (len < sz && (len % 4) != 0) {
- *p++ = '\0';
- len++;
- np++;
- }
- return np;
-}
-
-static inline int
 umb_addstr(void *buf, size_t bufsz, int *offs, void *str, int slen,
     uint32_t *offsmember, uint32_t *sizemember)
 {
@@ -1247,7 +1304,7 @@ umb_addstr(void *buf, size_t bufsz, int
  *offsmember = htole32((uint32_t)*offs);
  memcpy(buf + *offs, str, slen);
  *offs += slen;
- *offs += umb_padding(buf, *offs, bufsz);
+ *offs += umb_padding(buf, bufsz, *offs, sizeof (uint32_t), 0);
  } else
  *offsmember = htole32(0);
  return 1;
@@ -1706,46 +1763,71 @@ umb_rxeof(struct usbd_xfer *xfer, void *
 }
 
 int
-umb_encap(struct umb_softc *sc, struct mbuf *m)
+umb_encap(struct umb_softc *sc)
 {
  struct ncm_header16 *hdr;
  struct ncm_pointer16 *ptr;
+ struct ncm_pointer16_dgram *dgram;
+ int offs, poffs;
+ struct mbuf_list tmpml = MBUF_LIST_INITIALIZER();
+ struct mbuf *m;
  usbd_status  err;
- int len;
-
- KASSERT(sc->sc_tx_m == NULL);
 
+ /* All size constraints have been validated by the caller! */
  hdr = sc->sc_tx_buf;
- ptr = (struct ncm_pointer16 *)(hdr + 1);
-
  USETDW(hdr->dwSignature, NCM_HDR16_SIG);
  USETW(hdr->wHeaderLength, sizeof (*hdr));
+ USETW(hdr->wBlockLength, 0);
  USETW(hdr->wSequence, sc->sc_tx_seq);
  sc->sc_tx_seq++;
- USETW(hdr->wNdpIndex, sizeof (*hdr));
+ offs = sizeof (*hdr);
+ offs += umb_padding(sc->sc_tx_buf, sc->sc_tx_bufsz, offs,
+    sc->sc_align, 0);
+ USETW(hdr->wNdpIndex, offs);
 
- len = m->m_pkthdr.len;
+ poffs = offs;
+ ptr = (struct ncm_pointer16 *)(sc->sc_tx_buf + offs);
  USETDW(ptr->dwSignature, MBIM_NCM_NTH16_SIG(umb_session_id));
- USETW(ptr->wLength, sizeof (*ptr));
  USETW(ptr->wNextNdpIndex, 0);
- USETW(ptr->dgram[0].wDatagramIndex, MBIM_HDR16_LEN);
- USETW(ptr->dgram[0].wDatagramLen, len);
- USETW(ptr->dgram[1].wDatagramIndex, 0);
- USETW(ptr->dgram[1].wDatagramLen, 0);
-
- m_copydata(m, 0, len, (caddr_t)(ptr + 1));
- sc->sc_tx_m = m;
- len += MBIM_HDR16_LEN;
- USETW(hdr->wBlockLength, len);
-
- DPRINTFN(3, "%s: encap %d bytes\n", DEVNAM(sc), len);
- DDUMPN(5, sc->sc_tx_buf, len);
- usbd_setup_xfer(sc->sc_tx_xfer, sc->sc_tx_pipe, sc, sc->sc_tx_buf, len,
+ dgram = &ptr->dgram[0];
+ offs = (caddr_t)dgram - (caddr_t)sc->sc_tx_buf;
+
+ /* Leave space for dgram pointers */
+ while ((m = ml_dequeue(&sc->sc_tx_ml)) != NULL) {
+ offs += sizeof (*dgram);
+ ml_enqueue(&tmpml, m);
+ }
+ offs += sizeof (*dgram); /* one more to terminate pointer list */
+ USETW(ptr->wLength, offs - poffs);
+
+ /* Encap mbufs */
+ while ((m = ml_dequeue(&tmpml)) != NULL) {
+ offs += umb_padding(sc->sc_tx_buf, sc->sc_tx_bufsz, offs,
+    sc->sc_ndp_div, sc->sc_ndp_remainder);
+ USETW(dgram->wDatagramIndex, offs);
+ USETW(dgram->wDatagramLen, m->m_pkthdr.len);
+ dgram++;
+ m_copydata(m, 0, m->m_pkthdr.len, sc->sc_tx_buf + offs);
+ offs += m->m_pkthdr.len;
+ ml_enqueue(&sc->sc_tx_ml, m);
+ }
+
+ /* Terminating pointer */
+ USETW(dgram->wDatagramIndex, 0);
+ USETW(dgram->wDatagramLen, 0);
+ USETW(hdr->wBlockLength, offs);
+
+ DPRINTFN(3, "%s: encap %d bytes\n", DEVNAM(sc), offs);
+ DDUMPN(5, sc->sc_tx_buf, offs);
+ KASSERT(offs <= sc->sc_tx_bufsz);
+
+ usbd_setup_xfer(sc->sc_tx_xfer, sc->sc_tx_pipe, sc, sc->sc_tx_buf, offs,
     USBD_FORCE_SHORT_XFER | USBD_NO_COPY, umb_xfer_tout, umb_txeof);
  err = usbd_transfer(sc->sc_tx_xfer);
  if (err != USBD_IN_PROGRESS) {
  DPRINTF("%s: start tx error: %s\n", DEVNAM(sc),
     usbd_errstr(err));
+ ml_purge(&sc->sc_tx_ml);
  return 0;
  }
  return 1;
@@ -1759,12 +1841,10 @@ umb_txeof(struct usbd_xfer *xfer, void *
  int s;
 
  s = splnet();
+ ml_purge(&sc->sc_tx_ml);
  ifq_clr_oactive(&ifp->if_snd);
  ifp->if_timer = 0;
 
- m_freem(sc->sc_tx_m);
- sc->sc_tx_m = NULL;
-
  if (status != USBD_NORMAL_COMPLETION) {
  if (status != USBD_NOT_STARTED && status != USBD_CANCELLED) {
  ifp->if_oerrors++;
@@ -1814,10 +1894,7 @@ umb_decap(struct umb_softc *sc, struct u
  hlen = UGETW(hdr16->wHeaderLength);
  if (len < hlen)
  goto toosmall;
- if (len > sc->sc_rx_bufsz) {
- DPRINTF("%s: packet too large (%d)\n", DEVNAM(sc), len);
- goto fail;
- }
+
  switch (hsig) {
  case NCM_HDR16_SIG:
  blen = UGETW(hdr16->wBlockLength);
@@ -1843,7 +1920,7 @@ umb_decap(struct umb_softc *sc, struct u
     DEVNAM(sc), hsig);
  goto fail;
  }
- if (len < blen) {
+ if (blen != 0 && len < blen) {
  DPRINTF("%s: bad NTB len (%d) for %d bytes of data\n",
     DEVNAM(sc), blen, len);
  goto fail;
Index: sys/dev/usb/if_umb.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_umb.h,v
retrieving revision 1.3
diff -u -p -u -p -r1.3 if_umb.h
--- sys/dev/usb/if_umb.h 25 Nov 2016 12:43:26 -0000 1.3
+++ sys/dev/usb/if_umb.h 12 Dec 2016 13:38:34 -0000
@@ -339,6 +339,11 @@ struct umb_softc {
  int sc_maxpktlen;
  int sc_maxsessions;
 
+ int sc_maxdgram;
+ int sc_align;
+ int sc_ndp_div;
+ int sc_ndp_remainder;
+
 #define UMBFLG_FCC_AUTH_REQUIRED 0x0001
  uint32_t sc_flags;
  int sc_cid;
@@ -368,7 +373,7 @@ struct umb_softc {
  void *sc_tx_buf;
  int sc_tx_bufsz;
  struct usbd_pipe *sc_tx_pipe;
- struct mbuf *sc_tx_m;
+ struct mbuf_list sc_tx_ml;
  uint32_t sc_tx_seq;
 
  uint32_t sc_tid;
Index: sys/dev/usb/mbim.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/mbim.h,v
retrieving revision 1.3
diff -u -p -u -p -r1.3 mbim.h
--- sys/dev/usb/mbim.h 25 Nov 2016 12:43:26 -0000 1.3
+++ sys/dev/usb/mbim.h 12 Dec 2016 13:38:34 -0000
@@ -612,25 +612,20 @@ struct ncm_ntb_parameters {
 #define NCM_FORMAT_NTB16 0x0001
 #define NCM_FORMAT_NTB32 0x0002
  uDWord dwNtbInMaxSize;
- uWord wNtbInDivisor;
- uWord wNtbInPayloadRemainder;
- uWord wNtbInAlignment;
+ uWord wNdpInDivisor;
+ uWord wNdpInPayloadRemainder;
+ uWord wNdpInAlignment;
  uWord wReserved1;
  uDWord dwNtbOutMaxSize;
- uWord wNtbOutDivisor;
- uWord wNtbOutPayloadRemainder;
- uWord wNtbOutAlignment;
+ uWord wNdpOutDivisor;
+ uWord wNdpOutPayloadRemainder;
+ uWord wNdpOutAlignment;
  uWord wNtbOutMaxDatagrams;
 } __packed;
 
 /*
  * NCM Encoding
  */
-#define MBIM_HDR16_LEN \
- (sizeof (struct ncm_header16) + sizeof (struct ncm_pointer16))
-#define MBIM_HDR32_LEN \
- (sizeof (struct ncm_header32) + sizeof (struct ncm_pointer32))
-
 struct ncm_header16 {
 #define NCM_HDR16_SIG 0x484d434e
  uDWord dwSignature;
@@ -668,7 +663,7 @@ struct ncm_pointer16 {
  uWord wNextNdpIndex;
 
  /* Minimum is two datagrams, but can be more */
- struct ncm_pointer16_dgram dgram[2];
+ struct ncm_pointer16_dgram dgram[1];
 } __packed;
 
 struct ncm_pointer32_dgram {
@@ -689,7 +684,7 @@ struct ncm_pointer32 {
  uDWord dwReserved12;
 
  /* Minimum is two datagrams, but can be more */
- struct ncm_pointer32_dgram dgram[2];
+ struct ncm_pointer32_dgram dgram[1];
 } __packed;
 
 #endif /* _KERNEL */

Reply | Threaded
Open this post in threaded view
|

Re: umb: aggregate packets on tx

Bryan Vyhmeister-3
On Mon, Dec 12, 2016 at 02:50:50PM +0100, Gerhard Roth wrote:
> The current umb(4) implementation needs one USB transfer for every packet
> that is sent. With the following patch, we can now aggregate several
> packets from the ifq into one single USB transfer.
>
> This may speed up the tx path. And even if it doesn't, at least it
> reduces the number of transfers required.

I applied this diff this morning and everything has been working
smoothly so far. There are no obvious regressions that I have noticed.
This is on an X1 Carbon (4th Gen) with an EM7455 just like the X260 has.

Bryan

Reply | Threaded
Open this post in threaded view
|

Re: umb: aggregate packets on tx

Gerhard Roth-2
In reply to this post by Gerhard Roth-2
On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth <[hidden email]> wrote:

> The current umb(4) implementation needs one USB transfer for every packet
> that is sent. With the following patch, we can now aggregate several
> packets from the ifq into one single USB transfer.
>
> This may speed up the tx path. And even if it doesn't, at least it
> reduces the number of transfers required.
>
>
> Gerhard
>

Ping.

Anyone willing to ok this?

(Patch below updated to match current).


Gerhard


Index: sys/dev/usb/if_umb.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 if_umb.c
--- sys/dev/usb/if_umb.c 22 Jan 2017 10:17:39 -0000 1.9
+++ sys/dev/usb/if_umb.c 20 Feb 2017 07:44:40 -0000
@@ -156,7 +156,7 @@ int umb_decode_connect_info(struct umb
 int umb_decode_ip_configuration(struct umb_softc *, void *, int);
 void umb_rx(struct umb_softc *);
 void umb_rxeof(struct usbd_xfer *, void *, usbd_status);
-int umb_encap(struct umb_softc *, struct mbuf *);
+int umb_encap(struct umb_softc *);
 void umb_txeof(struct usbd_xfer *, void *, usbd_status);
 void umb_decap(struct umb_softc *, struct usbd_xfer *);
 
@@ -299,6 +299,7 @@ umb_attach(struct device *parent, struct
 
  sc->sc_udev = uaa->device;
  sc->sc_ctrl_ifaceno = uaa->ifaceno;
+ ml_init(&sc->sc_tx_ml);
 
  /*
  * Some MBIM hardware does not provide the mandatory CDC Union
@@ -583,8 +584,25 @@ umb_ncm_setup(struct umb_softc *sc)
     UGETW(np.wLength) == sizeof (np)) {
  sc->sc_rx_bufsz = UGETDW(np.dwNtbInMaxSize);
  sc->sc_tx_bufsz = UGETDW(np.dwNtbOutMaxSize);
- } else
+ sc->sc_maxdgram = UGETW(np.wNtbOutMaxDatagrams);
+ sc->sc_align = UGETW(np.wNdpOutAlignment);
+ sc->sc_ndp_div = UGETW(np.wNdpOutDivisor);
+ sc->sc_ndp_remainder = UGETW(np.wNdpOutPayloadRemainder);
+ /* Validate values */
+ if (!powerof2(sc->sc_align) || sc->sc_align == 0 ||
+    sc->sc_align >= sc->sc_tx_bufsz)
+ sc->sc_align = sizeof (uint32_t);
+ if (!powerof2(sc->sc_ndp_div) || sc->sc_ndp_div == 0 ||
+    sc->sc_ndp_div >= sc->sc_tx_bufsz)
+ sc->sc_ndp_div = sizeof (uint32_t);
+ if (sc->sc_ndp_remainder >= sc->sc_ndp_div)
+ sc->sc_ndp_remainder = 0;
+ } else {
  sc->sc_rx_bufsz = sc->sc_tx_bufsz = 8 * 1024;
+ sc->sc_maxdgram = 0;
+ sc->sc_align = sc->sc_ndp_div = sizeof (uint32_t);
+ sc->sc_ndp_remainder = 0;
+ }
 }
 
 int
@@ -593,12 +611,12 @@ umb_alloc_xfers(struct umb_softc *sc)
  if (!sc->sc_rx_xfer) {
  if ((sc->sc_rx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
  sc->sc_rx_buf = usbd_alloc_buffer(sc->sc_rx_xfer,
-    sc->sc_rx_bufsz + MBIM_HDR32_LEN);
+    sc->sc_rx_bufsz);
  }
  if (!sc->sc_tx_xfer) {
  if ((sc->sc_tx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
  sc->sc_tx_buf = usbd_alloc_buffer(sc->sc_tx_xfer,
-    sc->sc_tx_bufsz + MBIM_HDR16_LEN);
+    sc->sc_tx_bufsz);
  }
  return (sc->sc_rx_buf && sc->sc_tx_buf) ? 1 : 0;
 }
@@ -617,10 +635,7 @@ umb_free_xfers(struct umb_softc *sc)
  sc->sc_tx_xfer = NULL;
  sc->sc_tx_buf = NULL;
  }
- if (sc->sc_tx_m) {
- m_freem(sc->sc_tx_m);
- sc->sc_tx_m = NULL;
- }
+ ml_purge(&sc->sc_tx_ml);
 }
 
 int
@@ -792,35 +807,91 @@ umb_input(struct ifnet *ifp, struct mbuf
  return 1;
 }
 
+static inline int
+umb_align(size_t bufsz, int offs, int alignment, int remainder)
+{
+ size_t m = alignment - 1;
+ int align;
+
+ align = (((size_t)offs + m) & ~m) - alignment + remainder;
+ if (align < offs)
+ align += alignment;
+ if (align > bufsz)
+ align = bufsz;
+ return align - offs;
+}
+
+static inline int
+umb_padding(void *buf, size_t bufsz, int offs, int alignment, int remainder)
+{
+ int nb;
+
+ nb = umb_align(bufsz, offs, alignment, remainder);
+ if (nb > 0)
+ memset(buf + offs, 0, nb);
+ return nb;
+}
+
 void
 umb_start(struct ifnet *ifp)
 {
  struct umb_softc *sc = ifp->if_softc;
- struct mbuf *m_head = NULL;
+ struct mbuf *m = NULL;
+ int ndgram = 0;
+ int offs, plen, len, mlen;
+ int maxalign;
 
  if (usbd_is_dying(sc->sc_udev) ||
     !(ifp->if_flags & IFF_RUNNING) ||
     ifq_is_oactive(&ifp->if_snd))
  return;
 
- m_head = ifq_deq_begin(&ifp->if_snd);
- if (m_head == NULL)
- return;
+ KASSERT(ml_empty(&sc->sc_tx_ml));
 
- if (!umb_encap(sc, m_head)) {
- ifq_deq_rollback(&ifp->if_snd, m_head);
- ifq_set_oactive(&ifp->if_snd);
- return;
- }
- ifq_deq_commit(&ifp->if_snd, m_head);
+ offs = sizeof (struct ncm_header16);
+ offs += umb_align(sc->sc_tx_bufsz, offs, sc->sc_align, 0);
+
+ /*
+ * Note that 'struct ncm_pointer16' already includes space for the
+ * terminating zero pointer.
+ */
+ offs += sizeof (struct ncm_pointer16);
+ plen = sizeof (struct ncm_pointer16_dgram);
+ maxalign = (sc->sc_ndp_div - 1) + sc->sc_ndp_remainder;
+ len = 0;
+ while (1) {
+ m = ifq_deq_begin(&ifp->if_snd);
+ if (m == NULL)
+ break;
+
+ /*
+ * Check if mbuf plus required NCM pointer still fits into
+ * xfer buffers. Assume maximal padding.
+ */
+ plen += sizeof (struct ncm_pointer16_dgram);
+ mlen = maxalign +  m->m_pkthdr.len;
+ if ((sc->sc_maxdgram != 0 && ndgram >= sc->sc_maxdgram) ||
+    (offs + plen + len + mlen > sc->sc_tx_bufsz)) {
+ ifq_deq_rollback(&ifp->if_snd, m);
+ break;
+ }
+ ifq_deq_commit(&ifp->if_snd, m);
+
+ ndgram++;
+ len += mlen;
+ ml_enqueue(&sc->sc_tx_ml, m);
 
 #if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 #endif
-
- ifq_set_oactive(&ifp->if_snd);
- ifp->if_timer = (2 * umb_xfer_tout) / 1000;
+ }
+ if (ml_empty(&sc->sc_tx_ml))
+ return;
+ if (umb_encap(sc)) {
+ ifq_set_oactive(&ifp->if_snd);
+ ifp->if_timer = (2 * umb_xfer_tout) / 1000;
+ }
 }
 
 void
@@ -1222,20 +1293,6 @@ umb_getinfobuf(void *in, int inlen, uint
 }
 
 static inline int
-umb_padding(void *data, int len, size_t sz)
-{
- char *p = data;
- int np = 0;
-
- while (len < sz && (len % 4) != 0) {
- *p++ = '\0';
- len++;
- np++;
- }
- return np;
-}
-
-static inline int
 umb_addstr(void *buf, size_t bufsz, int *offs, void *str, int slen,
     uint32_t *offsmember, uint32_t *sizemember)
 {
@@ -1247,7 +1304,7 @@ umb_addstr(void *buf, size_t bufsz, int
  *offsmember = htole32((uint32_t)*offs);
  memcpy(buf + *offs, str, slen);
  *offs += slen;
- *offs += umb_padding(buf, *offs, bufsz);
+ *offs += umb_padding(buf, bufsz, *offs, sizeof (uint32_t), 0);
  } else
  *offsmember = htole32(0);
  return 1;
@@ -1706,46 +1763,71 @@ umb_rxeof(struct usbd_xfer *xfer, void *
 }
 
 int
-umb_encap(struct umb_softc *sc, struct mbuf *m)
+umb_encap(struct umb_softc *sc)
 {
  struct ncm_header16 *hdr;
  struct ncm_pointer16 *ptr;
+ struct ncm_pointer16_dgram *dgram;
+ int offs, poffs;
+ struct mbuf_list tmpml = MBUF_LIST_INITIALIZER();
+ struct mbuf *m;
  usbd_status  err;
- int len;
-
- KASSERT(sc->sc_tx_m == NULL);
 
+ /* All size constraints have been validated by the caller! */
  hdr = sc->sc_tx_buf;
- ptr = (struct ncm_pointer16 *)(hdr + 1);
-
  USETDW(hdr->dwSignature, NCM_HDR16_SIG);
  USETW(hdr->wHeaderLength, sizeof (*hdr));
+ USETW(hdr->wBlockLength, 0);
  USETW(hdr->wSequence, sc->sc_tx_seq);
  sc->sc_tx_seq++;
- USETW(hdr->wNdpIndex, sizeof (*hdr));
+ offs = sizeof (*hdr);
+ offs += umb_padding(sc->sc_tx_buf, sc->sc_tx_bufsz, offs,
+    sc->sc_align, 0);
+ USETW(hdr->wNdpIndex, offs);
 
- len = m->m_pkthdr.len;
+ poffs = offs;
+ ptr = (struct ncm_pointer16 *)(sc->sc_tx_buf + offs);
  USETDW(ptr->dwSignature, MBIM_NCM_NTH16_SIG(umb_session_id));
- USETW(ptr->wLength, sizeof (*ptr));
  USETW(ptr->wNextNdpIndex, 0);
- USETW(ptr->dgram[0].wDatagramIndex, MBIM_HDR16_LEN);
- USETW(ptr->dgram[0].wDatagramLen, len);
- USETW(ptr->dgram[1].wDatagramIndex, 0);
- USETW(ptr->dgram[1].wDatagramLen, 0);
-
- m_copydata(m, 0, len, (caddr_t)(ptr + 1));
- sc->sc_tx_m = m;
- len += MBIM_HDR16_LEN;
- USETW(hdr->wBlockLength, len);
-
- DPRINTFN(3, "%s: encap %d bytes\n", DEVNAM(sc), len);
- DDUMPN(5, sc->sc_tx_buf, len);
- usbd_setup_xfer(sc->sc_tx_xfer, sc->sc_tx_pipe, sc, sc->sc_tx_buf, len,
+ dgram = &ptr->dgram[0];
+ offs = (caddr_t)dgram - (caddr_t)sc->sc_tx_buf;
+
+ /* Leave space for dgram pointers */
+ while ((m = ml_dequeue(&sc->sc_tx_ml)) != NULL) {
+ offs += sizeof (*dgram);
+ ml_enqueue(&tmpml, m);
+ }
+ offs += sizeof (*dgram); /* one more to terminate pointer list */
+ USETW(ptr->wLength, offs - poffs);
+
+ /* Encap mbufs */
+ while ((m = ml_dequeue(&tmpml)) != NULL) {
+ offs += umb_padding(sc->sc_tx_buf, sc->sc_tx_bufsz, offs,
+    sc->sc_ndp_div, sc->sc_ndp_remainder);
+ USETW(dgram->wDatagramIndex, offs);
+ USETW(dgram->wDatagramLen, m->m_pkthdr.len);
+ dgram++;
+ m_copydata(m, 0, m->m_pkthdr.len, sc->sc_tx_buf + offs);
+ offs += m->m_pkthdr.len;
+ ml_enqueue(&sc->sc_tx_ml, m);
+ }
+
+ /* Terminating pointer */
+ USETW(dgram->wDatagramIndex, 0);
+ USETW(dgram->wDatagramLen, 0);
+ USETW(hdr->wBlockLength, offs);
+
+ DPRINTFN(3, "%s: encap %d bytes\n", DEVNAM(sc), offs);
+ DDUMPN(5, sc->sc_tx_buf, offs);
+ KASSERT(offs <= sc->sc_tx_bufsz);
+
+ usbd_setup_xfer(sc->sc_tx_xfer, sc->sc_tx_pipe, sc, sc->sc_tx_buf, offs,
     USBD_FORCE_SHORT_XFER | USBD_NO_COPY, umb_xfer_tout, umb_txeof);
  err = usbd_transfer(sc->sc_tx_xfer);
  if (err != USBD_IN_PROGRESS) {
  DPRINTF("%s: start tx error: %s\n", DEVNAM(sc),
     usbd_errstr(err));
+ ml_purge(&sc->sc_tx_ml);
  return 0;
  }
  return 1;
@@ -1759,12 +1841,10 @@ umb_txeof(struct usbd_xfer *xfer, void *
  int s;
 
  s = splnet();
+ ml_purge(&sc->sc_tx_ml);
  ifq_clr_oactive(&ifp->if_snd);
  ifp->if_timer = 0;
 
- m_freem(sc->sc_tx_m);
- sc->sc_tx_m = NULL;
-
  if (status != USBD_NORMAL_COMPLETION) {
  if (status != USBD_NOT_STARTED && status != USBD_CANCELLED) {
  ifp->if_oerrors++;
@@ -1813,10 +1893,7 @@ umb_decap(struct umb_softc *sc, struct u
  hlen = UGETW(hdr16->wHeaderLength);
  if (len < hlen)
  goto toosmall;
- if (len > sc->sc_rx_bufsz) {
- DPRINTF("%s: packet too large (%d)\n", DEVNAM(sc), len);
- goto fail;
- }
+
  switch (hsig) {
  case NCM_HDR16_SIG:
  blen = UGETW(hdr16->wBlockLength);
@@ -1842,7 +1919,7 @@ umb_decap(struct umb_softc *sc, struct u
     DEVNAM(sc), hsig);
  goto fail;
  }
- if (len < blen) {
+ if (blen != 0 && len < blen) {
  DPRINTF("%s: bad NTB len (%d) for %d bytes of data\n",
     DEVNAM(sc), blen, len);
  goto fail;
Index: sys/dev/usb/if_umb.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_umb.h,v
retrieving revision 1.3
diff -u -p -u -p -r1.3 if_umb.h
--- sys/dev/usb/if_umb.h 25 Nov 2016 12:43:26 -0000 1.3
+++ sys/dev/usb/if_umb.h 12 Dec 2016 13:43:10 -0000
@@ -339,6 +339,11 @@ struct umb_softc {
  int sc_maxpktlen;
  int sc_maxsessions;
 
+ int sc_maxdgram;
+ int sc_align;
+ int sc_ndp_div;
+ int sc_ndp_remainder;
+
 #define UMBFLG_FCC_AUTH_REQUIRED 0x0001
  uint32_t sc_flags;
  int sc_cid;
@@ -368,7 +373,7 @@ struct umb_softc {
  void *sc_tx_buf;
  int sc_tx_bufsz;
  struct usbd_pipe *sc_tx_pipe;
- struct mbuf *sc_tx_m;
+ struct mbuf_list sc_tx_ml;
  uint32_t sc_tx_seq;
 
  uint32_t sc_tid;
Index: sys/dev/usb/mbim.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/mbim.h,v
retrieving revision 1.3
diff -u -p -u -p -r1.3 mbim.h
--- sys/dev/usb/mbim.h 25 Nov 2016 12:43:26 -0000 1.3
+++ sys/dev/usb/mbim.h 12 Dec 2016 13:43:10 -0000
@@ -612,25 +612,20 @@ struct ncm_ntb_parameters {
 #define NCM_FORMAT_NTB16 0x0001
 #define NCM_FORMAT_NTB32 0x0002
  uDWord dwNtbInMaxSize;
- uWord wNtbInDivisor;
- uWord wNtbInPayloadRemainder;
- uWord wNtbInAlignment;
+ uWord wNdpInDivisor;
+ uWord wNdpInPayloadRemainder;
+ uWord wNdpInAlignment;
  uWord wReserved1;
  uDWord dwNtbOutMaxSize;
- uWord wNtbOutDivisor;
- uWord wNtbOutPayloadRemainder;
- uWord wNtbOutAlignment;
+ uWord wNdpOutDivisor;
+ uWord wNdpOutPayloadRemainder;
+ uWord wNdpOutAlignment;
  uWord wNtbOutMaxDatagrams;
 } __packed;
 
 /*
  * NCM Encoding
  */
-#define MBIM_HDR16_LEN \
- (sizeof (struct ncm_header16) + sizeof (struct ncm_pointer16))
-#define MBIM_HDR32_LEN \
- (sizeof (struct ncm_header32) + sizeof (struct ncm_pointer32))
-
 struct ncm_header16 {
 #define NCM_HDR16_SIG 0x484d434e
  uDWord dwSignature;
@@ -668,7 +663,7 @@ struct ncm_pointer16 {
  uWord wNextNdpIndex;
 
  /* Minimum is two datagrams, but can be more */
- struct ncm_pointer16_dgram dgram[2];
+ struct ncm_pointer16_dgram dgram[1];
 } __packed;
 
 struct ncm_pointer32_dgram {
@@ -689,7 +684,7 @@ struct ncm_pointer32 {
  uDWord dwReserved12;
 
  /* Minimum is two datagrams, but can be more */
- struct ncm_pointer32_dgram dgram[2];
+ struct ncm_pointer32_dgram dgram[1];
 } __packed;
 
 #endif /* _KERNEL */

Reply | Threaded
Open this post in threaded view
|

Re: umb: aggregate packets on tx

Alexander Bluhm
On Mon, Feb 20, 2017 at 04:35:10PM +0100, Gerhard Roth wrote:
> On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth <[hidden email]> wrote:
> > The current umb(4) implementation needs one USB transfer for every packet
> > that is sent. With the following patch, we can now aggregate several
> > packets from the ifq into one single USB transfer.
> >
> > This may speed up the tx path. And even if it doesn't, at least it
> > reduces the number of transfers required.

I am running with this for two days now.  Works fine.

umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev 2.00/0.00 addr 2

Diff looks reasonable.  OK bluhm@

Reply | Threaded
Open this post in threaded view
|

Re: umb: aggregate packets on tx

Stefan Sperling-5
On Sat, Apr 15, 2017 at 11:21:41PM +0200, Alexander Bluhm wrote:

> On Mon, Feb 20, 2017 at 04:35:10PM +0100, Gerhard Roth wrote:
> > On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth <[hidden email]> wrote:
> > > The current umb(4) implementation needs one USB transfer for every packet
> > > that is sent. With the following patch, we can now aggregate several
> > > packets from the ifq into one single USB transfer.
> > >
> > > This may speed up the tx path. And even if it doesn't, at least it
> > > reduces the number of transfers required.
>
> I am running with this for two days now.  Works fine.
>
> umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev 2.00/0.00 addr 2
>
> Diff looks reasonable.  OK bluhm@

Yes, the diff makes sense. OK by me as well.

Sent over
umb0 at uhub0 port 4 configuration 1 interface 0 "Sierra Wireless Inc. Sierra Wireless EM7345 4G LTE" rev 2.00/17.29 addr 7
with patch applied.

Thanks!