Improve ure(4) performance

classic Classic list List threaded Threaded
13 messages Options
Reply | Threaded
Open this post in threaded view
|

Improve ure(4) performance

Kevin Lo
Hi,

Jonathon Fletcher has been helping get the better performance out of ure(4).
I ran the tcpbench with ure (RTL8156) for 5 minutes:

71538432760 bytes sent over 300.999 seconds
bandwidth min/avg/max/std-dev = 12.071/1901.448/1947.873/135.283 Mbps

A big thanks to Jonathon for his hard work.

ok?

Index: sys/dev/usb/if_ure.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
retrieving revision 1.16
diff -u -p -u -p -r1.16 if_ure.c
--- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
+++ sys/dev/usb/if_ure.c 21 Jul 2020 05:37:45 -0000
@@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
 void ure_lock_mii(struct ure_softc *);
 void ure_unlock_mii(struct ure_softc *);
 
-int ure_encap(struct ure_softc *, struct mbuf *);
+int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
+int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
 void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
 void ure_txeof(struct usbd_xfer *, void *, usbd_status);
-int ure_rx_list_init(struct ure_softc *);
-int ure_tx_list_init(struct ure_softc *);
+int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
+    uint32_t, int);
+void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
 
 void ure_tick_task(void *);
 void ure_tick(void *);
@@ -625,12 +627,36 @@ void
 ure_watchdog(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
+ struct ure_chain *c;
+ usbd_status err;
+ int i, s;
+
+ ifp->if_timer = 0;
 
  if (usbd_is_dying(sc->ure_udev))
  return;
 
+ if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
+ return;
+
+ sc = ifp->if_softc;
+ s = splnet();
+
  ifp->if_oerrors++;
- printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
+ DPRINTF(("watchdog timeout\n"));
+
+ for (i = 0; i < URE_TX_LIST_CNT; i++) {
+ c = &sc->ure_cdata.ure_tx_chain[i];
+ if (ml_len(&c->uc_mbuf_list) > 0) {
+ usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
+    &err);
+ ure_txeof(c->uc_xfer, c, err);
+ }
+ }
+
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
+ splx(s);
 }
 
 void
@@ -653,18 +679,26 @@ ure_init(void *xsc)
  else
  ure_rtl8153_nic_reset(sc);
 
- if (ure_rx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
+ sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
  printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }
 
- if (ure_tx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
+ sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
  printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }
 
+ /* Initialize the SLIST we are using for the multiple tx buffers */
+ SLIST_INIT(&sc->ure_cdata.ure_tx_free);
+ for (i = 0; i < URE_TX_LIST_CNT; i++)
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
+ &sc->ure_cdata.ure_tx_chain[i], ure_list);
+
  /* Set MAC address. */
  ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
  ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
@@ -739,9 +773,9 @@ ure_init(void *xsc)
 
  /* Start up the receive pipe. */
  for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &sc->ure_cdata.rx_chain[i];
+ c = &sc->ure_cdata.ure_rx_chain[i];
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
-    c, c->uc_buf, sc->ure_rxbufsz,
+    c, c->uc_buf, c->uc_bufmax,
     USBD_SHORT_XFER_OK | USBD_NO_COPY,
     USBD_NO_TIMEOUT, ure_rxeof);
  usbd_transfer(c->uc_xfer);
@@ -763,34 +797,83 @@ void
 ure_start(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
- struct mbuf *m_head = NULL;
-
- if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
-    !(sc->ure_flags & URE_FLAG_LINK))
+ struct ure_cdata *cd = &sc->ure_cdata;
+ struct ure_chain *c;
+ struct mbuf *m = NULL;
+ uint32_t new_buflen;
+ int s, mlen;
+
+ if (!(sc->ure_flags & URE_FLAG_LINK) ||
+ (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
+    (IFF_RUNNING|IFF_UP)) {
  return;
+ }
+
+ s = splnet();
 
- for (;;) {
- if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
- ifq_set_oactive(&ifp->if_snd);
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ while (c != NULL) {
+ m = NULL;
+ mlen = ifq_hdatalen(&ifp->if_snd);
+ if (mlen <= 0)
  break;
+
+ /*
+                 * If packet larger than remaining space, send buffer and
+ * continue.
+ */
+ new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
+ if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
+    c->uc_bufmax) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    ure_list);
+ break;
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ continue;
  }
 
- m_head = ifq_dequeue(&ifp->if_snd);
- if (m_head == NULL)
+ m = ifq_dequeue(&ifp->if_snd);
+ if (m == NULL)
  break;
 
- if (ure_encap(sc, m_head)) {
- m_freem(m_head);
- ifq_set_oactive(&ifp->if_snd);
+ /* Discard packet larger than buffer. */
+ if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
+ m_freem(m);
+ continue;
+ }
+
+ /* Append packet to current buffer. */
+ mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
+    c->uc_bufmax - new_buflen);
+ if (mlen <= 0) {
+ m_freem(m);
  break;
  }
+ ml_enqueue(&c->uc_mbuf_list, m);
+ c->uc_buflen = new_buflen + mlen;
+ }
 
-#if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
-#endif
- ifp->if_timer = 5;
+ if (c != NULL) {
+ /* Send current buffer unless empty */
+ if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    ure_list);
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ }
  }
+
+ if (c == NULL)
+ ifq_set_oactive(&ifp->if_snd);
+
+ ifp->if_timer = 5;
+
+ splx(s);
 }
 
 void
@@ -810,9 +893,9 @@ ure_tick(void *xsc)
 void
 ure_stop(struct ure_softc *sc)
 {
- usbd_status err;
+ struct ure_cdata *cd;
  struct ifnet *ifp;
- int i;
+ usbd_status err;
 
  ure_reset(sc);
 
@@ -844,25 +927,54 @@ ure_stop(struct ure_softc *sc)
  sc->ure_ep[URE_ENDPT_TX] = NULL;
  }
 
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
- sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
- sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
+ cd = &sc->ure_cdata;
+ ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
+ ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
+}
+
+int
+ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
+    uint32_t bufsize, int listlen)
+{
+ struct ure_chain *c;
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ c = &ch[i];
+ c->uc_sc = sc;
+ c->uc_idx = i;
+ c->uc_buflen = 0;
+ c->uc_bufmax = bufsize;
+ ml_init(&c->uc_mbuf_list);
+ if (c->uc_xfer == NULL) {
+ c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
+ if (c->uc_xfer == NULL)
+ return (ENOBUFS);
+ c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
+ if (c->uc_buf == NULL) {
+ usbd_free_xfer(c->uc_xfer);
+ c->uc_xfer = NULL;
+ return (ENOBUFS);
+ }
  }
  }
 
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
- sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
- sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
+ return (0);
+}
+
+void
+ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
+{
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ if (ch[i].uc_buf != NULL) {
+ ch[i].uc_buf = NULL;
+ }
+ ml_purge(&ch[i].uc_mbuf_list);
+ if (ch[i].uc_xfer != NULL) {
+ usbd_free_xfer(ch[i].uc_xfer);
+ ch[i].uc_xfer = NULL;
  }
  }
 }
@@ -1458,20 +1570,12 @@ ure_attach(struct device *parent, struct
  }
  }
 
- switch (uaa->product) {
- case USB_PRODUCT_REALTEK_RTL8152:
- sc->ure_flags |= URE_FLAG_8152;
- sc->ure_rxbufsz = URE_8152_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
- break;
- case USB_PRODUCT_REALTEK_RTL8156:
- sc->ure_flags |= URE_FLAG_8156;
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
- break;
- default:
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
+ if (uaa->product == USB_PRODUCT_REALTEK_RTL8152) {
+ sc->ure_rxbufsz = URE_8152_BUFSZ;
+ sc->ure_txbufsz = URE_8152_BUFSZ;
+ } else {
+ sc->ure_txbufsz = URE_8153_BUFSZ;
+ sc->ure_rxbufsz = URE_8153_BUFSZ;
  }
 
  s = splnet();
@@ -1482,10 +1586,12 @@ ure_attach(struct device *parent, struct
  ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
  switch (ver) {
  case 0x4c00:
+ sc->ure_flags = URE_FLAG_8152;
  sc->ure_chip |= URE_CHIP_VER_4C00;
  printf("RTL8152 (0x4c00)");
  break;
  case 0x4c10:
+ sc->ure_flags = URE_FLAG_8152;
  sc->ure_chip |= URE_CHIP_VER_4C10;
  printf("RTL8152 (0x4c10)");
  break;
@@ -1507,18 +1613,18 @@ ure_attach(struct device *parent, struct
  break;
  case 0x6000:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6000)");
  break;
  case 0x6010:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6010)");
  break;
  case 0x7020:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7020)");
  break;
  case 0x7030:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7030)");
  break;
  default:
@@ -1721,7 +1827,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }
 
- buf += roundup(pktlen, 8);
+ buf += roundup(pktlen, URE_RX_BUF_ALIGN);
 
  memcpy(&rxhdr, buf, sizeof(rxhdr));
  total_len -= sizeof(rxhdr);
@@ -1734,7 +1840,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }
 
- total_len -= roundup(pktlen, 8);
+ total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
  buf += sizeof(rxhdr);
 
  m = m_devget(buf, pktlen, ETHER_ALIGN);
@@ -1799,18 +1905,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
  if (usbd_is_dying(sc->ure_udev))
  return;
 
- DPRINTFN(2, ("tx completion\n"));
+ if (status != USBD_NORMAL_COMPLETION)
+ DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
+ __func__, c->uc_idx, usbd_errstr(status)));
 
  s = splnet();
- sc->ure_cdata.tx_cnt--;
+
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
+
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, ure_list);
+
  if (status != USBD_NORMAL_COMPLETION) {
  if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
  splx(s);
  return;
  }
+
  ifp->if_oerrors++;
  printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
     usbd_errstr(status));
+
  if (status == USBD_STALLED)
  usbd_clear_endpoint_stall_async(
     sc->ure_ep[URE_ENDPT_TX]);
@@ -1819,83 +1934,18 @@ ure_txeof(struct usbd_xfer *xfer, void *
  }
 
  ifp->if_timer = 0;
- ifq_clr_oactive(&ifp->if_snd);
-
- m_freem(c->uc_mbuf);
- c->uc_mbuf = NULL;
-
- if (!ifq_empty(&ifp->if_snd))
- ure_start(ifp);
-
- splx(s);
-}
-
-int
-ure_tx_list_init(struct ure_softc *sc)
-{
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- c = &cd->tx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- cd->tx_prod = cd->tx_cnt = 0;
-
- return (0);
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
 }
 
 int
-ure_rx_list_init(struct ure_softc *sc)
+ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
 {
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &cd->rx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
-    sc->ure_rxbufsz);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- return (0);
-}
-
-int
-ure_encap(struct ure_softc *sc, struct mbuf *m)
-{
- struct ure_chain *c;
- usbd_status err;
  struct ure_txpkt txhdr;
- uint32_t frm_len = 0, cflags = 0;
+ uint32_t len = sizeof(txhdr), cflags = 0;
+
+ if (len + m->m_pkthdr.len > maxlen)
+ return (-1);
 
  if ((m->m_pkthdr.csum_flags &
     (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
@@ -1911,35 +1961,41 @@ ure_encap(struct ure_softc *sc, struct m
  cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
 #endif
 
- c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
-
- /* header */
  txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
     URE_TXPKT_TX_LS);
  txhdr.ure_vlan = htole32(cflags);
- memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
- frm_len = sizeof(txhdr);
+ memcpy(buf, &txhdr, len);
 
- /* packet */
- m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
- frm_len += m->m_pkthdr.len;
+ m_copydata(m, 0, m->m_pkthdr.len, buf + len);
+ len += m->m_pkthdr.len;
 
- c->uc_mbuf = m;
+ return (len);
+}
+
+int
+ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
+{
+ usbd_status err;
 
- DPRINTFN(2, ("tx %d bytes\n", frm_len));
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
-    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
+    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
+    ure_txeof);
 
  err = usbd_transfer(c->uc_xfer);
- if (err != 0 && err != USBD_IN_PROGRESS) {
- c->uc_mbuf = NULL;
+ if (err != USBD_IN_PROGRESS) {
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
  ure_stop(sc);
  return (EIO);
  }
 
- sc->ure_cdata.tx_cnt++;
- sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
-    sc->ure_tx_list_cnt;
+#if NBPFILTER > 0
+ struct mbuf *m;
+
+ if (ifp->if_bpf)
+ MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
 
  return (0);
 }
Index: sys/dev/usb/if_urereg.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
retrieving revision 1.8
diff -u -p -u -p -r1.8 if_urereg.h
--- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
+++ sys/dev/usb/if_urereg.h 21 Jul 2020 05:37:45 -0000
@@ -494,28 +494,29 @@ struct ure_txpkt {
 #define URE_ENDPT_TX 1
 #define URE_ENDPT_MAX 2
 
-#define URE_TX_LIST_CNT 8
+#define URE_TX_LIST_CNT 1
 #define URE_RX_LIST_CNT 1
-#define URE_RX_BUF_ALIGN sizeof(uint64_t)
+#define URE_TX_BUF_ALIGN 4
+#define URE_RX_BUF_ALIGN 8
 
-#define URE_TXBUFSZ 16384
-#define URE_8152_RXBUFSZ 16384
-#define URE_8153_RXBUFSZ 32768
+#define URE_8152_BUFSZ 16384
+#define URE_8153_BUFSZ 32768
 
 struct ure_chain {
  struct ure_softc *uc_sc;
  struct usbd_xfer *uc_xfer;
  char *uc_buf;
- struct mbuf *uc_mbuf;
- int uc_accum;
- int uc_idx;
+ uint32_t uc_buflen;
+ uint32_t uc_bufmax;
+ struct mbuf_list uc_mbuf_list;
+ SLIST_ENTRY(ure_chain)  ure_list;
+ uint8_t uc_idx;
 };
 
 struct ure_cdata {
- struct ure_chain tx_chain[URE_TX_LIST_CNT];
- struct ure_chain rx_chain[URE_RX_LIST_CNT];
- int tx_prod;
- int tx_cnt;
+ struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
+ struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
+ SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
 };
 
 struct ure_softc {
@@ -541,7 +542,7 @@ struct ure_softc {
 
  struct timeval ure_rx_notice;
  int ure_rxbufsz;
- int ure_tx_list_cnt;
+ int ure_txbufsz;
 
  int ure_phyno;
 
 

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Mikolaj Kucharski-3
Hi Kevin, Jonathon,

On Tue, Jul 21, 2020 at 02:38:55PM +0800, Kevin Lo wrote:
> Hi,
>
> Jonathon Fletcher has been helping get the better performance out of ure(4).
> I ran the tcpbench with ure (RTL8156) for 5 minutes:
>
> 71538432760 bytes sent over 300.999 seconds
> bandwidth min/avg/max/std-dev = 12.071/1901.448/1947.873/135.283 Mbps
>
> A big thanks to Jonathon for his hard work.

I've tested this on amd64 with following ure(4) card:

ure0 at uhub2 port 2 configuration 1 interface 0 "Realtek USB 10/100/1000 LAN" rev 2.10/30.00 addr 4
ure0: RTL8153 (0x5c30), address 00:e0:4c:04:09:7d

I could see speedtest-cli improvement from around 150 Mbit/s before the
diff, to 245 Mbit/s after the diff.

Big thank you for this patch. Will run it for next couple of days.
Before the diff I faced following errors with ure(4):

usbd_start_next: error=5
ure0: watchdog timeout
ure0: usb errors on rx: IOERROR

and unplug/re-plug dance was needed to bring back the network. That
usually happend during Google Meet video call, so I need few days to see
how this diff goes against web video calls.

> ok?
>
> Index: sys/dev/usb/if_ure.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
> retrieving revision 1.16
> diff -u -p -u -p -r1.16 if_ure.c
> --- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
> +++ sys/dev/usb/if_ure.c 21 Jul 2020 05:37:45 -0000
> @@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
>  void ure_lock_mii(struct ure_softc *);
>  void ure_unlock_mii(struct ure_softc *);
>  
> -int ure_encap(struct ure_softc *, struct mbuf *);
> +int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
> +int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
>  void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
>  void ure_txeof(struct usbd_xfer *, void *, usbd_status);
> -int ure_rx_list_init(struct ure_softc *);
> -int ure_tx_list_init(struct ure_softc *);
> +int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
> +    uint32_t, int);
> +void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
>  
>  void ure_tick_task(void *);
>  void ure_tick(void *);
> @@ -625,12 +627,36 @@ void
>  ure_watchdog(struct ifnet *ifp)
>  {
>   struct ure_softc *sc = ifp->if_softc;
> + struct ure_chain *c;
> + usbd_status err;
> + int i, s;
> +
> + ifp->if_timer = 0;
>  
>   if (usbd_is_dying(sc->ure_udev))
>   return;
>  
> + if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
> + return;
> +
> + sc = ifp->if_softc;
> + s = splnet();
> +
>   ifp->if_oerrors++;
> - printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
> + DPRINTF(("watchdog timeout\n"));
> +
> + for (i = 0; i < URE_TX_LIST_CNT; i++) {
> + c = &sc->ure_cdata.ure_tx_chain[i];
> + if (ml_len(&c->uc_mbuf_list) > 0) {
> + usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
> +    &err);
> + ure_txeof(c->uc_xfer, c, err);
> + }
> + }
> +
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> + splx(s);
>  }
>  
>  void
> @@ -653,18 +679,26 @@ ure_init(void *xsc)
>   else
>   ure_rtl8153_nic_reset(sc);
>  
> - if (ure_rx_list_init(sc) == ENOBUFS) {
> + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
> + sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
>   printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
>   splx(s);
>   return;
>   }
>  
> - if (ure_tx_list_init(sc) == ENOBUFS) {
> + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
> + sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
>   printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
>   splx(s);
>   return;
>   }
>  
> + /* Initialize the SLIST we are using for the multiple tx buffers */
> + SLIST_INIT(&sc->ure_cdata.ure_tx_free);
> + for (i = 0; i < URE_TX_LIST_CNT; i++)
> + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
> + &sc->ure_cdata.ure_tx_chain[i], ure_list);
> +
>   /* Set MAC address. */
>   ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
>   ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
> @@ -739,9 +773,9 @@ ure_init(void *xsc)
>  
>   /* Start up the receive pipe. */
>   for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - c = &sc->ure_cdata.rx_chain[i];
> + c = &sc->ure_cdata.ure_rx_chain[i];
>   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
> -    c, c->uc_buf, sc->ure_rxbufsz,
> +    c, c->uc_buf, c->uc_bufmax,
>      USBD_SHORT_XFER_OK | USBD_NO_COPY,
>      USBD_NO_TIMEOUT, ure_rxeof);
>   usbd_transfer(c->uc_xfer);
> @@ -763,34 +797,83 @@ void
>  ure_start(struct ifnet *ifp)
>  {
>   struct ure_softc *sc = ifp->if_softc;
> - struct mbuf *m_head = NULL;
> -
> - if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
> -    !(sc->ure_flags & URE_FLAG_LINK))
> + struct ure_cdata *cd = &sc->ure_cdata;
> + struct ure_chain *c;
> + struct mbuf *m = NULL;
> + uint32_t new_buflen;
> + int s, mlen;
> +
> + if (!(sc->ure_flags & URE_FLAG_LINK) ||
> + (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
> +    (IFF_RUNNING|IFF_UP)) {
>   return;
> + }
> +
> + s = splnet();
>  
> - for (;;) {
> - if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
> - ifq_set_oactive(&ifp->if_snd);
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + while (c != NULL) {
> + m = NULL;
> + mlen = ifq_hdatalen(&ifp->if_snd);
> + if (mlen <= 0)
>   break;
> +
> + /*
> +                 * If packet larger than remaining space, send buffer and
> + * continue.
> + */
> + new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
> + if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
> +    c->uc_bufmax) {
> + SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
> + if (ure_encap_xfer(ifp, sc, c)) {
> + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> +    ure_list);
> + break;
> + }
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + continue;
>   }
>  
> - m_head = ifq_dequeue(&ifp->if_snd);
> - if (m_head == NULL)
> + m = ifq_dequeue(&ifp->if_snd);
> + if (m == NULL)
>   break;
>  
> - if (ure_encap(sc, m_head)) {
> - m_freem(m_head);
> - ifq_set_oactive(&ifp->if_snd);
> + /* Discard packet larger than buffer. */
> + if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
> + m_freem(m);
> + continue;
> + }
> +
> + /* Append packet to current buffer. */
> + mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
> +    c->uc_bufmax - new_buflen);
> + if (mlen <= 0) {
> + m_freem(m);
>   break;
>   }
> + ml_enqueue(&c->uc_mbuf_list, m);
> + c->uc_buflen = new_buflen + mlen;
> + }
>  
> -#if NBPFILTER > 0
> - if (ifp->if_bpf)
> - bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
> -#endif
> - ifp->if_timer = 5;
> + if (c != NULL) {
> + /* Send current buffer unless empty */
> + if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
> + SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
> + if (ure_encap_xfer(ifp, sc, c)) {
> + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> +    ure_list);
> + }
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + }
>   }
> +
> + if (c == NULL)
> + ifq_set_oactive(&ifp->if_snd);
> +
> + ifp->if_timer = 5;
> +
> + splx(s);
>  }
>  
>  void
> @@ -810,9 +893,9 @@ ure_tick(void *xsc)
>  void
>  ure_stop(struct ure_softc *sc)
>  {
> - usbd_status err;
> + struct ure_cdata *cd;
>   struct ifnet *ifp;
> - int i;
> + usbd_status err;
>  
>   ure_reset(sc);
>  
> @@ -844,25 +927,54 @@ ure_stop(struct ure_softc *sc)
>   sc->ure_ep[URE_ENDPT_TX] = NULL;
>   }
>  
> - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
> - m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
> - sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
> - }
> - if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
> - usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
> - sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
> + cd = &sc->ure_cdata;
> + ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
> + ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
> +}
> +
> +int
> +ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
> +    uint32_t bufsize, int listlen)
> +{
> + struct ure_chain *c;
> + int i;
> +
> + for (i = 0; i < listlen; i++) {
> + c = &ch[i];
> + c->uc_sc = sc;
> + c->uc_idx = i;
> + c->uc_buflen = 0;
> + c->uc_bufmax = bufsize;
> + ml_init(&c->uc_mbuf_list);
> + if (c->uc_xfer == NULL) {
> + c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> + if (c->uc_xfer == NULL)
> + return (ENOBUFS);
> + c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
> + if (c->uc_buf == NULL) {
> + usbd_free_xfer(c->uc_xfer);
> + c->uc_xfer = NULL;
> + return (ENOBUFS);
> + }
>   }
>   }
>  
> - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> - if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
> - m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
> - sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
> - }
> - if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
> - usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
> - sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
> + return (0);
> +}
> +
> +void
> +ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
> +{
> + int i;
> +
> + for (i = 0; i < listlen; i++) {
> + if (ch[i].uc_buf != NULL) {
> + ch[i].uc_buf = NULL;
> + }
> + ml_purge(&ch[i].uc_mbuf_list);
> + if (ch[i].uc_xfer != NULL) {
> + usbd_free_xfer(ch[i].uc_xfer);
> + ch[i].uc_xfer = NULL;
>   }
>   }
>  }
> @@ -1458,20 +1570,12 @@ ure_attach(struct device *parent, struct
>   }
>   }
>  
> - switch (uaa->product) {
> - case USB_PRODUCT_REALTEK_RTL8152:
> - sc->ure_flags |= URE_FLAG_8152;
> - sc->ure_rxbufsz = URE_8152_RXBUFSZ;
> - sc->ure_tx_list_cnt = 1;
> - break;
> - case USB_PRODUCT_REALTEK_RTL8156:
> - sc->ure_flags |= URE_FLAG_8156;
> - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
> - break;
> - default:
> - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> - sc->ure_tx_list_cnt = 1;
> + if (uaa->product == USB_PRODUCT_REALTEK_RTL8152) {
> + sc->ure_rxbufsz = URE_8152_BUFSZ;
> + sc->ure_txbufsz = URE_8152_BUFSZ;
> + } else {
> + sc->ure_txbufsz = URE_8153_BUFSZ;
> + sc->ure_rxbufsz = URE_8153_BUFSZ;
>   }
>  
>   s = splnet();
> @@ -1482,10 +1586,12 @@ ure_attach(struct device *parent, struct
>   ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
>   switch (ver) {
>   case 0x4c00:
> + sc->ure_flags = URE_FLAG_8152;
>   sc->ure_chip |= URE_CHIP_VER_4C00;
>   printf("RTL8152 (0x4c00)");
>   break;
>   case 0x4c10:
> + sc->ure_flags = URE_FLAG_8152;
>   sc->ure_chip |= URE_CHIP_VER_4C10;
>   printf("RTL8152 (0x4c10)");
>   break;
> @@ -1507,18 +1613,18 @@ ure_attach(struct device *parent, struct
>   break;
>   case 0x6000:
>   sc->ure_flags = URE_FLAG_8153B;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
>   printf("RTL8153B (0x6000)");
>   break;
>   case 0x6010:
>   sc->ure_flags = URE_FLAG_8153B;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
>   printf("RTL8153B (0x6010)");
>   break;
>   case 0x7020:
> + sc->ure_flags = URE_FLAG_8156;
>   printf("RTL8156 (0x7020)");
>   break;
>   case 0x7030:
> + sc->ure_flags = URE_FLAG_8156;
>   printf("RTL8156 (0x7030)");
>   break;
>   default:
> @@ -1721,7 +1827,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
>   goto done;
>   }
>  
> - buf += roundup(pktlen, 8);
> + buf += roundup(pktlen, URE_RX_BUF_ALIGN);
>  
>   memcpy(&rxhdr, buf, sizeof(rxhdr));
>   total_len -= sizeof(rxhdr);
> @@ -1734,7 +1840,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
>   goto done;
>   }
>  
> - total_len -= roundup(pktlen, 8);
> + total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
>   buf += sizeof(rxhdr);
>  
>   m = m_devget(buf, pktlen, ETHER_ALIGN);
> @@ -1799,18 +1905,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
>   if (usbd_is_dying(sc->ure_udev))
>   return;
>  
> - DPRINTFN(2, ("tx completion\n"));
> + if (status != USBD_NORMAL_COMPLETION)
> + DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
> + __func__, c->uc_idx, usbd_errstr(status)));
>  
>   s = splnet();
> - sc->ure_cdata.tx_cnt--;
> +
> + ml_purge(&c->uc_mbuf_list);
> + c->uc_buflen = 0;
> +
> + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, ure_list);
> +
>   if (status != USBD_NORMAL_COMPLETION) {
>   if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
>   splx(s);
>   return;
>   }
> +
>   ifp->if_oerrors++;
>   printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
>      usbd_errstr(status));
> +
>   if (status == USBD_STALLED)
>   usbd_clear_endpoint_stall_async(
>      sc->ure_ep[URE_ENDPT_TX]);
> @@ -1819,83 +1934,18 @@ ure_txeof(struct usbd_xfer *xfer, void *
>   }
>  
>   ifp->if_timer = 0;
> - ifq_clr_oactive(&ifp->if_snd);
> -
> - m_freem(c->uc_mbuf);
> - c->uc_mbuf = NULL;
> -
> - if (!ifq_empty(&ifp->if_snd))
> - ure_start(ifp);
> -
> - splx(s);
> -}
> -
> -int
> -ure_tx_list_init(struct ure_softc *sc)
> -{
> - struct ure_cdata *cd;
> - struct ure_chain *c;
> - int i;
> -
> - cd = &sc->ure_cdata;
> - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> - c = &cd->tx_chain[i];
> - c->uc_sc = sc;
> - c->uc_idx = i;
> - c->uc_mbuf = NULL;
> - if (c->uc_xfer == NULL) {
> - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> - if (c->uc_xfer == NULL)
> - return ENOBUFS;
> - c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
> - if (c->uc_buf == NULL) {
> - usbd_free_xfer(c->uc_xfer);
> - return ENOBUFS;
> - }
> - }
> - }
> -
> - cd->tx_prod = cd->tx_cnt = 0;
> -
> - return (0);
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
>  }
>  
>  int
> -ure_rx_list_init(struct ure_softc *sc)
> +ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
>  {
> - struct ure_cdata *cd;
> - struct ure_chain *c;
> - int i;
> -
> - cd = &sc->ure_cdata;
> - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - c = &cd->rx_chain[i];
> - c->uc_sc = sc;
> - c->uc_idx = i;
> - c->uc_mbuf = NULL;
> - if (c->uc_xfer == NULL) {
> - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> - if (c->uc_xfer == NULL)
> - return ENOBUFS;
> - c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
> -    sc->ure_rxbufsz);
> - if (c->uc_buf == NULL) {
> - usbd_free_xfer(c->uc_xfer);
> - return ENOBUFS;
> - }
> - }
> - }
> -
> - return (0);
> -}
> -
> -int
> -ure_encap(struct ure_softc *sc, struct mbuf *m)
> -{
> - struct ure_chain *c;
> - usbd_status err;
>   struct ure_txpkt txhdr;
> - uint32_t frm_len = 0, cflags = 0;
> + uint32_t len = sizeof(txhdr), cflags = 0;
> +
> + if (len + m->m_pkthdr.len > maxlen)
> + return (-1);
>  
>   if ((m->m_pkthdr.csum_flags &
>      (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
> @@ -1911,35 +1961,41 @@ ure_encap(struct ure_softc *sc, struct m
>   cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
>  #endif
>  
> - c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
> -
> - /* header */
>   txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
>      URE_TXPKT_TX_LS);
>   txhdr.ure_vlan = htole32(cflags);
> - memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
> - frm_len = sizeof(txhdr);
> + memcpy(buf, &txhdr, len);
>  
> - /* packet */
> - m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
> - frm_len += m->m_pkthdr.len;
> + m_copydata(m, 0, m->m_pkthdr.len, buf + len);
> + len += m->m_pkthdr.len;
>  
> - c->uc_mbuf = m;
> + return (len);
> +}
> +
> +int
> +ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
> +{
> + usbd_status err;
>  
> - DPRINTFN(2, ("tx %d bytes\n", frm_len));
>   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
> -    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
> +    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
> +    ure_txeof);
>  
>   err = usbd_transfer(c->uc_xfer);
> - if (err != 0 && err != USBD_IN_PROGRESS) {
> - c->uc_mbuf = NULL;
> + if (err != USBD_IN_PROGRESS) {
> + ml_purge(&c->uc_mbuf_list);
> + c->uc_buflen = 0;
>   ure_stop(sc);
>   return (EIO);
>   }
>  
> - sc->ure_cdata.tx_cnt++;
> - sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
> -    sc->ure_tx_list_cnt;
> +#if NBPFILTER > 0
> + struct mbuf *m;
> +
> + if (ifp->if_bpf)
> + MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
> + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> +#endif
>  
>   return (0);
>  }
> Index: sys/dev/usb/if_urereg.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
> retrieving revision 1.8
> diff -u -p -u -p -r1.8 if_urereg.h
> --- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
> +++ sys/dev/usb/if_urereg.h 21 Jul 2020 05:37:45 -0000
> @@ -494,28 +494,29 @@ struct ure_txpkt {
>  #define URE_ENDPT_TX 1
>  #define URE_ENDPT_MAX 2
>  
> -#define URE_TX_LIST_CNT 8
> +#define URE_TX_LIST_CNT 1
>  #define URE_RX_LIST_CNT 1
> -#define URE_RX_BUF_ALIGN sizeof(uint64_t)
> +#define URE_TX_BUF_ALIGN 4
> +#define URE_RX_BUF_ALIGN 8
>  
> -#define URE_TXBUFSZ 16384
> -#define URE_8152_RXBUFSZ 16384
> -#define URE_8153_RXBUFSZ 32768
> +#define URE_8152_BUFSZ 16384
> +#define URE_8153_BUFSZ 32768
>  
>  struct ure_chain {
>   struct ure_softc *uc_sc;
>   struct usbd_xfer *uc_xfer;
>   char *uc_buf;
> - struct mbuf *uc_mbuf;
> - int uc_accum;
> - int uc_idx;
> + uint32_t uc_buflen;
> + uint32_t uc_bufmax;
> + struct mbuf_list uc_mbuf_list;
> + SLIST_ENTRY(ure_chain)  ure_list;
> + uint8_t uc_idx;
>  };
>  
>  struct ure_cdata {
> - struct ure_chain tx_chain[URE_TX_LIST_CNT];
> - struct ure_chain rx_chain[URE_RX_LIST_CNT];
> - int tx_prod;
> - int tx_cnt;
> + struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
> + struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
> + SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
>  };
>  
>  struct ure_softc {
> @@ -541,7 +542,7 @@ struct ure_softc {
>  
>   struct timeval ure_rx_notice;
>   int ure_rxbufsz;
> - int ure_tx_list_cnt;
> + int ure_txbufsz;
>  
>   int ure_phyno;
>  
>  
>

--
Regards,
 Mikolaj

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Jonathon Fletcher-3


> On Jul 26, 2020, at 1:24 PM, Mikolaj Kucharski <[hidden email]> wrote:
>
> Hi Kevin, Jonathon,
>
> On Tue, Jul 21, 2020 at 02:38:55PM +0800, Kevin Lo wrote:
>> Hi,
>>
>> Jonathon Fletcher has been helping get the better performance out of ure(4).
>> I ran the tcpbench with ure (RTL8156) for 5 minutes:
>>
>> 71538432760 bytes sent over 300.999 seconds
>> bandwidth min/avg/max/std-dev = 12.071/1901.448/1947.873/135.283 Mbps
>>
>> A big thanks to Jonathon for his hard work.
>
> I've tested this on amd64 with following ure(4) card:
>
> ure0 at uhub2 port 2 configuration 1 interface 0 "Realtek USB 10/100/1000 LAN" rev 2.10/30.00 addr 4
> ure0: RTL8153 (0x5c30), address 00:e0:4c:04:09:7d
>
> I could see speedtest-cli improvement from around 150 Mbit/s before the
> diff, to 245 Mbit/s after the diff.
>
> Big thank you for this patch. Will run it for next couple of days.
> Before the diff I faced following errors with ure(4):
>
> usbd_start_next: error=5
> ure0: watchdog timeout
> ure0: usb errors on rx: IOERROR
>
> and unplug/re-plug dance was needed to bring back the network. That
> usually happend during Google Meet video call, so I need few days to see
> how this diff goes against web video calls.

Mikolaj,

Thank you for testing my patch.

I am very interested to know what xhci (or ehci) hardware you have.

I have the following:

xhci0 at pci0 dev 20 function 0 "Intel 9 Series xHCI" rev 0x03: msi, xHCI 1.0
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
ure0 at uhub0 port 14 configuration 1 interface 0 "Realtek USB 10/100/1G/2.5G LAN" rev 3.20/30.00 addr 3
ure0: RTL8156 (0x7030), address 00:e0:4c:ab:64:5a

My ure0 is https://www.amazon.com/gp/product/B07Z8S6PN4 <https://www.amazon.com/gp/product/B07Z8S6PN4>

I do not get any watchdog errors with this.

Kevin has been testing my patch and giving me good feedback. He has seen some watchdog errors with an RTL8153B also.

I am starting to suspect that I have the tx xfer size wrong (too big) for the 8153 / 8153B devices. I am also trying to eliminate the XHCI hardware as a cause.

Please could you post your dmesg? At a minimum it would help me a lot to see your usb hardware.

Thanks,
Jonathon

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Mikolaj Kucharski-3
On Sun, Jul 26, 2020 at 06:47:32PM -0700, Jonathon Fletcher wrote:

> Mikolaj,
>
> Thank you for testing my patch.
>
> I am very interested to know what xhci (or ehci) hardware you have.
>
> I have the following:
>
> xhci0 at pci0 dev 20 function 0 "Intel 9 Series xHCI" rev 0x03: msi, xHCI 1.0
> usb0 at xhci0: USB revision 3.0
> uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
> ure0 at uhub0 port 14 configuration 1 interface 0 "Realtek USB 10/100/1G/2.5G LAN" rev 3.20/30.00 addr 3
> ure0: RTL8156 (0x7030), address 00:e0:4c:ab:64:5a
>
> My ure0 is https://www.amazon.com/gp/product/B07Z8S6PN4 <https://www.amazon.com/gp/product/B07Z8S6PN4>
>
> I do not get any watchdog errors with this.
>
> Kevin has been testing my patch and giving me good feedback. He has seen some watchdog errors with an RTL8153B also.
>
> I am starting to suspect that I have the tx xfer size wrong (too big) for the 8153 / 8153B devices. I am also trying to eliminate the XHCI hardware as a cause.
>
> Please could you post your dmesg? At a minimum it would help me a lot to see your usb hardware.
>

Sure, no problem. I'm testing your patch with:

https://www.amazon.co.uk/gp/product/B081YGDBG7/


OpenBSD 6.7-current (GENERIC.MP) #20: Sun Jul 26 19:49:23 UTC 2020
    [hidden email]:/home/mkucharski/openbsd/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 8302006272 (7917MB)
avail mem = 8035315712 (7663MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.0 @ 0x7f114000 (64 entries)
bios0: vendor HUAWEI version "2.00" date 11/07/2017
bios0: HUAWEI HUAWEI MateBook X
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP UEFI UEFI ECDT SSDT MSDM SSDT SSDT SSDT ASPT BOOT HPET APIC MCFG SSDT WSMT SSDT DBGP DBG2 SSDT SSDT DMAR NHLT FPDT BGRT
acpi0: wakeup devices GLAN(S4) XHC_(S3) XDCI(S4) HDAS(S4) RP01(S4) PXSX(S4) RP02(S4) PXSX(S4) RP03(S4) PXSX(S4) RP04(S4) PXSX(S4) RP05(S4) PXSX(S4) RP06(S4) PXSX(S4) [...]
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpiec0 at acpi0
acpihpet0 at acpi0: 23999999 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz, 2595.04 MHz, 06-8e-09
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu0: 256KB 64b/line 8-way L2 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
cpu0: apic clock running at 24MHz
cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4.1.1.1, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz, 2593.97 MHz, 06-8e-09
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu1: 256KB 64b/line 8-way L2 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 1 (application processor)
cpu2: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz, 2593.96 MHz, 06-8e-09
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu2: 256KB 64b/line 8-way L2 cache
cpu2: smt 1, core 0, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz, 2593.97 MHz, 06-8e-09
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
cpu3: 256KB 64b/line 8-way L2 cache
cpu3: smt 1, core 1, package 0
ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 20, 120 pins
acpimcfg0 at acpi0
acpimcfg0: addr 0xe0000000, bus 0-255
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (RP01)
acpiprt2 at acpi0: bus -1 (RP02)
acpiprt3 at acpi0: bus -1 (RP03)
acpiprt4 at acpi0: bus -1 (RP04)
acpiprt5 at acpi0: bus -1 (RP05)
acpiprt6 at acpi0: bus -1 (RP06)
acpiprt7 at acpi0: bus 1 (RP07)
acpiprt8 at acpi0: bus -1 (RP08)
acpiprt9 at acpi0: bus 2 (RP09)
acpiprt10 at acpi0: bus -1 (RP10)
acpiprt11 at acpi0: bus -1 (RP11)
acpiprt12 at acpi0: bus -1 (RP12)
acpiprt13 at acpi0: bus -1 (RP13)
acpiprt14 at acpi0: bus -1 (RP14)
acpiprt15 at acpi0: bus -1 (RP15)
acpiprt16 at acpi0: bus -1 (RP16)
acpiprt17 at acpi0: bus -1 (RP17)
acpiprt18 at acpi0: bus -1 (RP18)
acpiprt19 at acpi0: bus -1 (RP19)
acpiprt20 at acpi0: bus -1 (RP20)
acpiprt21 at acpi0: bus -1 (RP21)
acpiprt22 at acpi0: bus -1 (RP22)
acpiprt23 at acpi0: bus -1 (RP23)
acpiprt24 at acpi0: bus -1 (RP24)
acpicpu0 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu1 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu2 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpicpu3 at acpi0: C3(200@1034 mwait.1@0x60), C2(200@151 mwait.1@0x33), C1(1000@1 mwait.1), PSS
acpipwrres0 at acpi0: WRST
acpipwrres1 at acpi0: WRST
acpipwrres2 at acpi0: WRST
acpipwrres3 at acpi0: WRST
acpipwrres4 at acpi0: WRST
acpipwrres5 at acpi0: WRST
acpipwrres6 at acpi0: WRST
acpipwrres7 at acpi0: WRST
acpipwrres8 at acpi0: WRST
acpipwrres9 at acpi0: WRST
acpipwrres10 at acpi0: WRST
acpipwrres11 at acpi0: WRST
acpipwrres12 at acpi0: WRST
acpipwrres13 at acpi0: WRST
acpipwrres14 at acpi0: WRST
acpipwrres15 at acpi0: WRST
acpipwrres16 at acpi0: WRST
acpipwrres17 at acpi0: WRST
acpipwrres18 at acpi0: WRST
acpipwrres19 at acpi0: WRST
acpitz0 at acpi0: critical temperature is 98 degC
acpipci0 at acpi0 PCI0: 0x00000000 0x00000011 0x00000001
extent `acpipci0 pcibus' (0x0 - 0xff), flags=0
     0xff - 0xff
extent `acpipci0 pciio' (0x0 - 0xffffffff), flags=0
     0xcf8 - 0xcff
     0x10000 - 0xffffffff
extent `acpipci0 pcimem' (0x0 - 0xffffffffffffffff), flags=0
     0x0 - 0x9ffff
     0x100000 - 0x8cffffff
     0xe0000000 - 0xfcffffff
     0xfe800000 - 0xffffffffffffffff
acpicmos0 at acpi0
acpials0 at acpi0: ALSD
"INT3403" at acpi0 not configured
"INT3403" at acpi0 not configured
"INT3403" at acpi0 not configured
"INT3403" at acpi0 not configured
"INT3403" at acpi0 not configured
"INT344B" at acpi0 not configured
"ELAN2201" at acpi0 not configured
acpiac0 at acpi0: AC unit online
acpibat0 at acpi0: BAT0 model "BASE-BAT" serial 123456789 type Li oem "Kollur"
"GXFP3288" at acpi0 not configured
"INT0E0C" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"INT33A1" at acpi0 not configured
acpibtn0 at acpi0: LID_
acpibtn1 at acpi0: PWRB
"INT3400" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"WDT0001" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpivideo0 at acpi0: GFX0
acpivout0 at acpivideo0: DD1F
cpu0: using VERW MDS workaround (except on vmm entry)
cpu0: Enhanced SpeedStep 2595 MHz: speeds: 2701, 2700, 2600, 2500, 2400, 2200, 2000, 1800, 1600, 1500, 1300, 1100, 800, 700, 600, 400 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "Intel Core 7G Host" rev 0x02
inteldrm0 at pci0 dev 2 function 0 "Intel HD Graphics 620" rev 0x02
drm0 at inteldrm0
inteldrm0: msi, KABYLAKE, gen 9
"Intel Core 6G Thermal" rev 0x02 at pci0 dev 4 function 0 not configured
xhci0 at pci0 dev 20 function 0 "Intel 100 Series xHCI" rev 0x21: msi, xHCI 1.0
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
pchtemp0 at pci0 dev 20 function 2 "Intel 100 Series Thermal" rev 0x21
dwiic0 at pci0 dev 21 function 0 "Intel 100 Series I2C" rev 0x21: apic 2 int 16
iic0 at dwiic0
dwiic1 at pci0 dev 21 function 1 "Intel 100 Series I2C" rev 0x21: apic 2 int 17
iic1 at dwiic1
ihidev0 at iic1 addr 0x15 irq 109, vendor 0x4f3 product 0x3056, ELAN2201
ihidev0: 93 report ids
imt0 at ihidev0: clickpad, 5 contacts
wsmouse0 at imt0 mux 0
ims0 at ihidev0 reportid 1: 2 buttons, Z and W dir
wsmouse1 at ims0 mux 0
hid at ihidev0 reportid 5 not configured
hid at ihidev0 reportid 6 not configured
hid at ihidev0 reportid 7 not configured
hid at ihidev0 reportid 11 not configured
hid at ihidev0 reportid 12 not configured
hid at ihidev0 reportid 13 not configured
ims1 at ihidev0 reportid 93
ims1: mouse has no X report
"Intel 100 Series MEI" rev 0x21 at pci0 dev 22 function 0 not configured
ppb0 at pci0 dev 28 function 0 "Intel 100 Series PCIE" rev 0xf1: msi
pci1 at ppb0 bus 1
nvme0 at pci1 dev 0 function 0 "Lite-On CB1 NVMe" rev 0x01: msix, NVMe 1.2
nvme0: LITEON CB1-SD512, firmware C39640E, serial 00253810000100000000
scsibus1 at nvme0: 2 targets, initiator 0
sd0 at scsibus1 targ 1 lun 0: <NVMe, LITEON CB1-SD512, C396>
sd0: 488386MB, 512 bytes/sector, 1000215216 sectors
ppb1 at pci0 dev 29 function 0 "Intel 100 Series PCIE" rev 0xf1: msi
pci2 at ppb1 bus 2
iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
"Intel 100 Series UART" rev 0x21 at pci0 dev 30 function 0 not configured
"Intel 100 Series SPI" rev 0x21 at pci0 dev 30 function 3 not configured
pcib0 at pci0 dev 31 function 0 "Intel 200 Series LPC" rev 0x21
"Intel 100 Series PMC" rev 0x21 at pci0 dev 31 function 2 not configured
azalia0 at pci0 dev 31 function 3 "Intel 200 Series HD Audio" rev 0x21: msi
azalia0: codecs: Realtek ALC298, Intel/0x280b, using Realtek ALC298
audio0 at azalia0
ichiic0 at pci0 dev 31 function 4 "Intel 100 Series SMBus" rev 0x21: apic 2 int 16
iic2 at ichiic0
isa0 at pcib0
isadma0 at isa0
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: VMX/EPT
efifb at mainbus0 not configured
uhub1 at uhub0 port 3 configuration 1 interface 0 "VIA Labs, Inc. USB2.0 Hub" rev 2.10/3.d3 addr 2
uhub2 at uhub1 port 3 configuration 1 interface 0 "Terminus Technology USB 2.0 Hub" rev 2.00/1.00 addr 3
ure0 at uhub2 port 2 configuration 1 interface 0 "Realtek USB 10/100/1000 LAN" rev 2.10/30.00 addr 4
ure0: RTL8153 (0x5c30), address 00:e0:4c:04:09:7d
rgephy0 at ure0 phy 0: RTL8251 PHY, rev. 0
umass0 at uhub1 port 4 configuration 1 interface 0 "Generic USB3.0 Card Reader" rev 2.10/15.38 addr 5
umass0: using SCSI over Bulk-Only
scsibus2 at umass0: 2 targets, initiator 0
sd1 at scsibus2 targ 1 lun 0: <Generic, MassStorageClass, 1538> removable
sd2 at scsibus2 targ 1 lun 1: <Generic, MassStorageClass, 1538> removable
ugen0 at uhub1 port 5 "VIA Labs, Inc. USB Billboard Device" rev 2.01/0.01 addr 6
uvideo0 at uhub0 port 7 configuration 1 interface 0 "Azurewave Huawei Web Camera - HD" rev 2.00/17.06 addr 7
video0 at uvideo0
vscsi0 at root
scsibus3 at vscsi0: 256 targets
softraid0 at root
scsibus4 at softraid0: 256 targets
sd3 at scsibus4 targ 1 lun 0: <OPENBSD, SR CRYPTO, 006>
sd3: 476097MB, 512 bytes/sector, 975047777 sectors
root on sd3a (c9b24daac091e260.a) swap on sd3b dump on sd3b
inteldrm0: 2160x1440, 32bpp
wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd0
wsdisplay0: screen 1-5 added (std, vt100 emulation)
iwm0: hw rev 0x230, fw ver 34.0.1, address 38:37:8b:ab:89:3c

--
Regards,
 Mikolaj

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Jonathon Fletcher-3
On Sun, Jul 26, 2020 at 10:12 PM Mikolaj Kucharski
<[hidden email]> wrote:

>
> On Sun, Jul 26, 2020 at 06:47:32PM -0700, Jonathon Fletcher wrote:
> > Mikolaj,
> >
> > Thank you for testing my patch.
> >
> > I am very interested to know what xhci (or ehci) hardware you have.
> >
> > I have the following:
> >
> > xhci0 at pci0 dev 20 function 0 "Intel 9 Series xHCI" rev 0x03: msi, xHCI 1.0
> > usb0 at xhci0: USB revision 3.0
> > uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
> > ure0 at uhub0 port 14 configuration 1 interface 0 "Realtek USB 10/100/1G/2.5G LAN" rev 3.20/30.00 addr 3
> > ure0: RTL8156 (0x7030), address 00:e0:4c:ab:64:5a
> >
> > My ure0 is https://www.amazon.com/gp/product/B07Z8S6PN4 <https://www.amazon.com/gp/product/B07Z8S6PN4>
> >
> > I do not get any watchdog errors with this.
> >
> > Kevin has been testing my patch and giving me good feedback. He has seen some watchdog errors with an RTL8153B also.
> >
> > I am starting to suspect that I have the tx xfer size wrong (too big) for the 8153 / 8153B devices. I am also trying to eliminate the XHCI hardware as a cause.
> >
> > Please could you post your dmesg? At a minimum it would help me a lot to see your usb hardware.
> >
>
> Sure, no problem. I'm testing your patch with:
>
> https://www.amazon.co.uk/gp/product/B081YGDBG7/
>

[snip]

> xhci0 at pci0 dev 20 function 0 "Intel 100 Series xHCI" rev 0x21: msi, xHCI 1.0
> usb0 at xhci0: USB revision 3.0
> uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 3.00/1.00 addr 1
> uhub1 at uhub0 port 3 configuration 1 interface 0 "VIA Labs, Inc. USB2.0 Hub" rev 2.10/3.d3 addr 2
> uhub2 at uhub1 port 3 configuration 1 interface 0 "Terminus Technology USB 2.0 Hub" rev 2.00/1.00 addr 3
> ure0 at uhub2 port 2 configuration 1 interface 0 "Realtek USB 10/100/1000 LAN" rev 2.10/30.00 addr 4
> ure0: RTL8153 (0x5c30), address 00:e0:4c:04:09:7d
> rgephy0 at ure0 phy 0: RTL8251 PHY, rev. 0

Thanks Mikolaj,

I believe that the intermittent watchdog messages on RTL8153 variants
are now resolved. The previous code defined 32kb buffer sizes for both
tx and rx for RTL8153. The rx buffer size does vary by device type but
the tx buffer size should be 16kb for all types.

The patch improves tx performance primarily by packing multiple
packets into the outgoing xfer buffer if / when this is possible. This
makes it possible to use the entire xfer buffer in a single tx xfer.
If there are enough packets in the outbound queue and the xfer buffer
is defined as larger than the device buffer then the device could be
sent a tx xfer too large for it - eventually resulting in the watchdog
routine getting called.

I have attached an updated patch - which includes Kevin's changes from
his earlier email - that also corrects the tx buffer sizes for RTL8153
/ RTL8153B devices.

Jonathon

Index: sys/dev/usb/if_urereg.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
retrieving revision 1.8
diff -u -p -u -r1.8 if_urereg.h
--- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
+++ sys/dev/usb/if_urereg.h 27 Jul 2020 17:10:09 -0000
@@ -494,28 +494,32 @@ struct ure_txpkt {
 #define URE_ENDPT_TX 1
 #define URE_ENDPT_MAX 2

-#define URE_TX_LIST_CNT 8
+#define URE_TX_LIST_CNT 1
 #define URE_RX_LIST_CNT 1
-#define URE_RX_BUF_ALIGN sizeof(uint64_t)
+#define URE_TX_BUF_ALIGN 4
+#define URE_RX_BUF_ALIGN 8

-#define URE_TXBUFSZ 16384
-#define URE_8152_RXBUFSZ 16384
-#define URE_8153_RXBUFSZ 32768
+#define URE_8152_TX_BUFSZ 16384
+#define URE_8152_RX_BUFSZ 16384
+
+#define URE_8153_TX_BUFSZ 16384
+#define URE_8153_RX_BUFSZ 32768

 struct ure_chain {
  struct ure_softc *uc_sc;
  struct usbd_xfer *uc_xfer;
  char *uc_buf;
- struct mbuf *uc_mbuf;
- int uc_accum;
- int uc_idx;
+ uint32_t uc_buflen;
+ uint32_t uc_bufmax;
+ struct mbuf_list uc_mbuf_list;
+ SLIST_ENTRY(ure_chain)  ure_list;
+ uint8_t uc_idx;
 };

 struct ure_cdata {
- struct ure_chain tx_chain[URE_TX_LIST_CNT];
- struct ure_chain rx_chain[URE_RX_LIST_CNT];
- int tx_prod;
- int tx_cnt;
+ struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
+ struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
+ SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
 };

 struct ure_softc {
@@ -541,7 +545,7 @@ struct ure_softc {

  struct timeval ure_rx_notice;
  int ure_rxbufsz;
- int ure_tx_list_cnt;
+ int ure_txbufsz;

  int ure_phyno;

Index: sys/dev/usb/if_ure.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
retrieving revision 1.16
diff -u -p -u -r1.16 if_ure.c
--- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
+++ sys/dev/usb/if_ure.c 27 Jul 2020 17:10:39 -0000
@@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
 void ure_lock_mii(struct ure_softc *);
 void ure_unlock_mii(struct ure_softc *);

-int ure_encap(struct ure_softc *, struct mbuf *);
+int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
+int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
 void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
 void ure_txeof(struct usbd_xfer *, void *, usbd_status);
-int ure_rx_list_init(struct ure_softc *);
-int ure_tx_list_init(struct ure_softc *);
+int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
+    uint32_t, int);
+void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);

 void ure_tick_task(void *);
 void ure_tick(void *);
@@ -625,12 +627,36 @@ void
 ure_watchdog(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
+ struct ure_chain *c;
+ usbd_status err;
+ int i, s;
+
+ ifp->if_timer = 0;

  if (usbd_is_dying(sc->ure_udev))
  return;

+ if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
+ return;
+
+ sc = ifp->if_softc;
+ s = splnet();
+
  ifp->if_oerrors++;
- printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
+ DPRINTF(("watchdog timeout\n"));
+
+ for (i = 0; i < URE_TX_LIST_CNT; i++) {
+ c = &sc->ure_cdata.ure_tx_chain[i];
+ if (ml_len(&c->uc_mbuf_list) > 0) {
+ usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
+    &err);
+ ure_txeof(c->uc_xfer, c, err);
+ }
+ }
+
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
+ splx(s);
 }

 void
@@ -653,18 +679,26 @@ ure_init(void *xsc)
  else
  ure_rtl8153_nic_reset(sc);

- if (ure_rx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
+ sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
  printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }

- if (ure_tx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
+ sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
  printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }

+ /* Initialize the SLIST we are using for the multiple tx buffers */
+ SLIST_INIT(&sc->ure_cdata.ure_tx_free);
+ for (i = 0; i < URE_TX_LIST_CNT; i++)
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
+ &sc->ure_cdata.ure_tx_chain[i], ure_list);
+
  /* Set MAC address. */
  ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
  ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
@@ -739,9 +773,9 @@ ure_init(void *xsc)

  /* Start up the receive pipe. */
  for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &sc->ure_cdata.rx_chain[i];
+ c = &sc->ure_cdata.ure_rx_chain[i];
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
-    c, c->uc_buf, sc->ure_rxbufsz,
+    c, c->uc_buf, c->uc_bufmax,
     USBD_SHORT_XFER_OK | USBD_NO_COPY,
     USBD_NO_TIMEOUT, ure_rxeof);
  usbd_transfer(c->uc_xfer);
@@ -763,34 +797,83 @@ void
 ure_start(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
- struct mbuf *m_head = NULL;
-
- if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
-    !(sc->ure_flags & URE_FLAG_LINK))
+ struct ure_cdata *cd = &sc->ure_cdata;
+ struct ure_chain *c;
+ struct mbuf *m = NULL;
+ uint32_t new_buflen;
+ int s, mlen;
+
+ if (!(sc->ure_flags & URE_FLAG_LINK) ||
+ (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
+    (IFF_RUNNING|IFF_UP)) {
  return;
+ }
+
+ s = splnet();

- for (;;) {
- if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
- ifq_set_oactive(&ifp->if_snd);
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ while (c != NULL) {
+ m = NULL;
+ mlen = ifq_hdatalen(&ifp->if_snd);
+ if (mlen <= 0)
  break;
+
+ /*
+                 * If packet larger than remaining space, send buffer and
+ * continue.
+ */
+ new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
+ if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
+    c->uc_bufmax) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    ure_list);
+ break;
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ continue;
  }

- m_head = ifq_dequeue(&ifp->if_snd);
- if (m_head == NULL)
+ m = ifq_dequeue(&ifp->if_snd);
+ if (m == NULL)
  break;

- if (ure_encap(sc, m_head)) {
- m_freem(m_head);
- ifq_set_oactive(&ifp->if_snd);
+ /* Discard packet larger than buffer. */
+ if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
+ m_freem(m);
+ continue;
+ }
+
+ /* Append packet to current buffer. */
+ mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
+    c->uc_bufmax - new_buflen);
+ if (mlen <= 0) {
+ m_freem(m);
  break;
  }
+ ml_enqueue(&c->uc_mbuf_list, m);
+ c->uc_buflen = new_buflen + mlen;
+ }

-#if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
-#endif
- ifp->if_timer = 5;
+ if (c != NULL) {
+ /* Send current buffer unless empty */
+ if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    ure_list);
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ }
  }
+
+ if (c == NULL)
+ ifq_set_oactive(&ifp->if_snd);
+
+ ifp->if_timer = 5;
+
+ splx(s);
 }

 void
@@ -810,9 +893,9 @@ ure_tick(void *xsc)
 void
 ure_stop(struct ure_softc *sc)
 {
- usbd_status err;
+ struct ure_cdata *cd;
  struct ifnet *ifp;
- int i;
+ usbd_status err;

  ure_reset(sc);

@@ -844,25 +927,54 @@ ure_stop(struct ure_softc *sc)
  sc->ure_ep[URE_ENDPT_TX] = NULL;
  }

- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
- sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
- sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
+ cd = &sc->ure_cdata;
+ ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
+ ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
+}
+
+int
+ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
+    uint32_t bufsize, int listlen)
+{
+ struct ure_chain *c;
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ c = &ch[i];
+ c->uc_sc = sc;
+ c->uc_idx = i;
+ c->uc_buflen = 0;
+ c->uc_bufmax = bufsize;
+ ml_init(&c->uc_mbuf_list);
+ if (c->uc_xfer == NULL) {
+ c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
+ if (c->uc_xfer == NULL)
+ return (ENOBUFS);
+ c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
+ if (c->uc_buf == NULL) {
+ usbd_free_xfer(c->uc_xfer);
+ c->uc_xfer = NULL;
+ return (ENOBUFS);
+ }
  }
  }

- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
- sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
- sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
+ return (0);
+}
+
+void
+ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
+{
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ if (ch[i].uc_buf != NULL) {
+ ch[i].uc_buf = NULL;
+ }
+ ml_purge(&ch[i].uc_mbuf_list);
+ if (ch[i].uc_xfer != NULL) {
+ usbd_free_xfer(ch[i].uc_xfer);
+ ch[i].uc_xfer = NULL;
  }
  }
 }
@@ -1458,20 +1570,12 @@ ure_attach(struct device *parent, struct
  }
  }

- switch (uaa->product) {
- case USB_PRODUCT_REALTEK_RTL8152:
- sc->ure_flags |= URE_FLAG_8152;
- sc->ure_rxbufsz = URE_8152_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
- break;
- case USB_PRODUCT_REALTEK_RTL8156:
- sc->ure_flags |= URE_FLAG_8156;
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
- break;
- default:
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
+ if (uaa->product == USB_PRODUCT_REALTEK_RTL8152) {
+ sc->ure_txbufsz = URE_8152_TX_BUFSZ;
+ sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
+ } else {
+ sc->ure_txbufsz = URE_8153_TX_BUFSZ;
+ sc->ure_rxbufsz = URE_8153_RX_BUFSZ;
  }

  s = splnet();
@@ -1482,10 +1586,12 @@ ure_attach(struct device *parent, struct
  ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
  switch (ver) {
  case 0x4c00:
+ sc->ure_flags = URE_FLAG_8152;
  sc->ure_chip |= URE_CHIP_VER_4C00;
  printf("RTL8152 (0x4c00)");
  break;
  case 0x4c10:
+ sc->ure_flags = URE_FLAG_8152;
  sc->ure_chip |= URE_CHIP_VER_4C10;
  printf("RTL8152 (0x4c10)");
  break;
@@ -1507,18 +1613,18 @@ ure_attach(struct device *parent, struct
  break;
  case 0x6000:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6000)");
  break;
  case 0x6010:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6010)");
  break;
  case 0x7020:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7020)");
  break;
  case 0x7030:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7030)");
  break;
  default:
@@ -1721,7 +1827,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }

- buf += roundup(pktlen, 8);
+ buf += roundup(pktlen, URE_RX_BUF_ALIGN);

  memcpy(&rxhdr, buf, sizeof(rxhdr));
  total_len -= sizeof(rxhdr);
@@ -1734,7 +1840,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }

- total_len -= roundup(pktlen, 8);
+ total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
  buf += sizeof(rxhdr);

  m = m_devget(buf, pktlen, ETHER_ALIGN);
@@ -1799,18 +1905,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
  if (usbd_is_dying(sc->ure_udev))
  return;

- DPRINTFN(2, ("tx completion\n"));
+ if (status != USBD_NORMAL_COMPLETION)
+ DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
+ __func__, c->uc_idx, usbd_errstr(status)));

  s = splnet();
- sc->ure_cdata.tx_cnt--;
+
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
+
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, ure_list);
+
  if (status != USBD_NORMAL_COMPLETION) {
  if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
  splx(s);
  return;
  }
+
  ifp->if_oerrors++;
  printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
     usbd_errstr(status));
+
  if (status == USBD_STALLED)
  usbd_clear_endpoint_stall_async(
     sc->ure_ep[URE_ENDPT_TX]);
@@ -1819,83 +1934,18 @@ ure_txeof(struct usbd_xfer *xfer, void *
  }

  ifp->if_timer = 0;
- ifq_clr_oactive(&ifp->if_snd);
-
- m_freem(c->uc_mbuf);
- c->uc_mbuf = NULL;
-
- if (!ifq_empty(&ifp->if_snd))
- ure_start(ifp);
-
- splx(s);
-}
-
-int
-ure_tx_list_init(struct ure_softc *sc)
-{
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- c = &cd->tx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- cd->tx_prod = cd->tx_cnt = 0;
-
- return (0);
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
 }

 int
-ure_rx_list_init(struct ure_softc *sc)
+ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
 {
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &cd->rx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
-    sc->ure_rxbufsz);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- return (0);
-}
-
-int
-ure_encap(struct ure_softc *sc, struct mbuf *m)
-{
- struct ure_chain *c;
- usbd_status err;
  struct ure_txpkt txhdr;
- uint32_t frm_len = 0, cflags = 0;
+ uint32_t len = sizeof(txhdr), cflags = 0;
+
+ if (len + m->m_pkthdr.len > maxlen)
+ return (-1);

  if ((m->m_pkthdr.csum_flags &
     (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
@@ -1911,35 +1961,41 @@ ure_encap(struct ure_softc *sc, struct m
  cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
 #endif

- c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
-
- /* header */
  txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
     URE_TXPKT_TX_LS);
  txhdr.ure_vlan = htole32(cflags);
- memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
- frm_len = sizeof(txhdr);
+ memcpy(buf, &txhdr, len);

- /* packet */
- m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
- frm_len += m->m_pkthdr.len;
+ m_copydata(m, 0, m->m_pkthdr.len, buf + len);
+ len += m->m_pkthdr.len;

- c->uc_mbuf = m;
+ return (len);
+}
+
+int
+ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
+{
+ usbd_status err;

- DPRINTFN(2, ("tx %d bytes\n", frm_len));
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
-    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
+    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
+    ure_txeof);

  err = usbd_transfer(c->uc_xfer);
- if (err != 0 && err != USBD_IN_PROGRESS) {
- c->uc_mbuf = NULL;
+ if (err != USBD_IN_PROGRESS) {
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
  ure_stop(sc);
  return (EIO);
  }

- sc->ure_cdata.tx_cnt++;
- sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
-    sc->ure_tx_list_cnt;
+#if NBPFILTER > 0
+ struct mbuf *m;
+
+ if (ifp->if_bpf)
+ MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif

  return (0);
 }

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Mikolaj Kucharski-3
On Mon, Jul 27, 2020 at 11:24:55AM -0700, Jonathon Fletcher wrote:
>
> I have attached an updated patch - which includes Kevin's changes from
> his earlier email - that also corrects the tx buffer sizes for RTL8153
> / RTL8153B devices.
>

Unfortunately below diff fails to apply.

2 out of 2 hunks failed
15 out of 15 hunks failed

I've tried on fresh cvs version of if_urereg.h and if_ure.c

I usually, for convenience take the diffs from marc.info:

https://marc.info/?l=openbsd-tech&m=159587449018248&q=mbox

>
> Index: sys/dev/usb/if_urereg.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
> retrieving revision 1.8
> diff -u -p -u -r1.8 if_urereg.h
> --- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
> +++ sys/dev/usb/if_urereg.h 27 Jul 2020 17:10:09 -0000
> @@ -494,28 +494,32 @@ struct ure_txpkt {
>  #define URE_ENDPT_TX 1
>  #define URE_ENDPT_MAX 2
>
> -#define URE_TX_LIST_CNT 8
> +#define URE_TX_LIST_CNT 1
>  #define URE_RX_LIST_CNT 1
> -#define URE_RX_BUF_ALIGN sizeof(uint64_t)
> +#define URE_TX_BUF_ALIGN 4
> +#define URE_RX_BUF_ALIGN 8
>
> -#define URE_TXBUFSZ 16384
> -#define URE_8152_RXBUFSZ 16384
> -#define URE_8153_RXBUFSZ 32768
> +#define URE_8152_TX_BUFSZ 16384
> +#define URE_8152_RX_BUFSZ 16384
> +
> +#define URE_8153_TX_BUFSZ 16384
> +#define URE_8153_RX_BUFSZ 32768
>
>  struct ure_chain {
>   struct ure_softc *uc_sc;
>   struct usbd_xfer *uc_xfer;
>   char *uc_buf;
> - struct mbuf *uc_mbuf;
> - int uc_accum;
> - int uc_idx;
> + uint32_t uc_buflen;
> + uint32_t uc_bufmax;
> + struct mbuf_list uc_mbuf_list;
> + SLIST_ENTRY(ure_chain)  ure_list;
> + uint8_t uc_idx;
>  };
>
>  struct ure_cdata {
> - struct ure_chain tx_chain[URE_TX_LIST_CNT];
> - struct ure_chain rx_chain[URE_RX_LIST_CNT];
> - int tx_prod;
> - int tx_cnt;
> + struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
> + struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
> + SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
>  };
>
>  struct ure_softc {
> @@ -541,7 +545,7 @@ struct ure_softc {
>
>   struct timeval ure_rx_notice;
>   int ure_rxbufsz;
> - int ure_tx_list_cnt;
> + int ure_txbufsz;
>
>   int ure_phyno;
>
> Index: sys/dev/usb/if_ure.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
> retrieving revision 1.16
> diff -u -p -u -r1.16 if_ure.c
> --- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
> +++ sys/dev/usb/if_ure.c 27 Jul 2020 17:10:39 -0000
> @@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
>  void ure_lock_mii(struct ure_softc *);
>  void ure_unlock_mii(struct ure_softc *);
>
> -int ure_encap(struct ure_softc *, struct mbuf *);
> +int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
> +int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
>  void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
>  void ure_txeof(struct usbd_xfer *, void *, usbd_status);
> -int ure_rx_list_init(struct ure_softc *);
> -int ure_tx_list_init(struct ure_softc *);
> +int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
> +    uint32_t, int);
> +void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
>
>  void ure_tick_task(void *);
>  void ure_tick(void *);
> @@ -625,12 +627,36 @@ void
>  ure_watchdog(struct ifnet *ifp)
>  {
>   struct ure_softc *sc = ifp->if_softc;
> + struct ure_chain *c;
> + usbd_status err;
> + int i, s;
> +
> + ifp->if_timer = 0;
>
>   if (usbd_is_dying(sc->ure_udev))
>   return;
>
> + if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
> + return;
> +
> + sc = ifp->if_softc;
> + s = splnet();
> +
>   ifp->if_oerrors++;
> - printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
> + DPRINTF(("watchdog timeout\n"));
> +
> + for (i = 0; i < URE_TX_LIST_CNT; i++) {
> + c = &sc->ure_cdata.ure_tx_chain[i];
> + if (ml_len(&c->uc_mbuf_list) > 0) {
> + usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
> +    &err);
> + ure_txeof(c->uc_xfer, c, err);
> + }
> + }
> +
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> + splx(s);
>  }
>
>  void
> @@ -653,18 +679,26 @@ ure_init(void *xsc)
>   else
>   ure_rtl8153_nic_reset(sc);
>
> - if (ure_rx_list_init(sc) == ENOBUFS) {
> + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
> + sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
>   printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
>   splx(s);
>   return;
>   }
>
> - if (ure_tx_list_init(sc) == ENOBUFS) {
> + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
> + sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
>   printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
>   splx(s);
>   return;
>   }
>
> + /* Initialize the SLIST we are using for the multiple tx buffers */
> + SLIST_INIT(&sc->ure_cdata.ure_tx_free);
> + for (i = 0; i < URE_TX_LIST_CNT; i++)
> + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
> + &sc->ure_cdata.ure_tx_chain[i], ure_list);
> +
>   /* Set MAC address. */
>   ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
>   ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
> @@ -739,9 +773,9 @@ ure_init(void *xsc)
>
>   /* Start up the receive pipe. */
>   for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - c = &sc->ure_cdata.rx_chain[i];
> + c = &sc->ure_cdata.ure_rx_chain[i];
>   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
> -    c, c->uc_buf, sc->ure_rxbufsz,
> +    c, c->uc_buf, c->uc_bufmax,
>      USBD_SHORT_XFER_OK | USBD_NO_COPY,
>      USBD_NO_TIMEOUT, ure_rxeof);
>   usbd_transfer(c->uc_xfer);
> @@ -763,34 +797,83 @@ void
>  ure_start(struct ifnet *ifp)
>  {
>   struct ure_softc *sc = ifp->if_softc;
> - struct mbuf *m_head = NULL;
> -
> - if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
> -    !(sc->ure_flags & URE_FLAG_LINK))
> + struct ure_cdata *cd = &sc->ure_cdata;
> + struct ure_chain *c;
> + struct mbuf *m = NULL;
> + uint32_t new_buflen;
> + int s, mlen;
> +
> + if (!(sc->ure_flags & URE_FLAG_LINK) ||
> + (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
> +    (IFF_RUNNING|IFF_UP)) {
>   return;
> + }
> +
> + s = splnet();
>
> - for (;;) {
> - if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
> - ifq_set_oactive(&ifp->if_snd);
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + while (c != NULL) {
> + m = NULL;
> + mlen = ifq_hdatalen(&ifp->if_snd);
> + if (mlen <= 0)
>   break;
> +
> + /*
> +                 * If packet larger than remaining space, send buffer and
> + * continue.
> + */
> + new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
> + if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
> +    c->uc_bufmax) {
> + SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
> + if (ure_encap_xfer(ifp, sc, c)) {
> + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> +    ure_list);
> + break;
> + }
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + continue;
>   }
>
> - m_head = ifq_dequeue(&ifp->if_snd);
> - if (m_head == NULL)
> + m = ifq_dequeue(&ifp->if_snd);
> + if (m == NULL)
>   break;
>
> - if (ure_encap(sc, m_head)) {
> - m_freem(m_head);
> - ifq_set_oactive(&ifp->if_snd);
> + /* Discard packet larger than buffer. */
> + if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
> + m_freem(m);
> + continue;
> + }
> +
> + /* Append packet to current buffer. */
> + mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
> +    c->uc_bufmax - new_buflen);
> + if (mlen <= 0) {
> + m_freem(m);
>   break;
>   }
> + ml_enqueue(&c->uc_mbuf_list, m);
> + c->uc_buflen = new_buflen + mlen;
> + }
>
> -#if NBPFILTER > 0
> - if (ifp->if_bpf)
> - bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
> -#endif
> - ifp->if_timer = 5;
> + if (c != NULL) {
> + /* Send current buffer unless empty */
> + if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
> + SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
> + if (ure_encap_xfer(ifp, sc, c)) {
> + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> +    ure_list);
> + }
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + }
>   }
> +
> + if (c == NULL)
> + ifq_set_oactive(&ifp->if_snd);
> +
> + ifp->if_timer = 5;
> +
> + splx(s);
>  }
>
>  void
> @@ -810,9 +893,9 @@ ure_tick(void *xsc)
>  void
>  ure_stop(struct ure_softc *sc)
>  {
> - usbd_status err;
> + struct ure_cdata *cd;
>   struct ifnet *ifp;
> - int i;
> + usbd_status err;
>
>   ure_reset(sc);
>
> @@ -844,25 +927,54 @@ ure_stop(struct ure_softc *sc)
>   sc->ure_ep[URE_ENDPT_TX] = NULL;
>   }
>
> - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
> - m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
> - sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
> - }
> - if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
> - usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
> - sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
> + cd = &sc->ure_cdata;
> + ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
> + ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
> +}
> +
> +int
> +ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
> +    uint32_t bufsize, int listlen)
> +{
> + struct ure_chain *c;
> + int i;
> +
> + for (i = 0; i < listlen; i++) {
> + c = &ch[i];
> + c->uc_sc = sc;
> + c->uc_idx = i;
> + c->uc_buflen = 0;
> + c->uc_bufmax = bufsize;
> + ml_init(&c->uc_mbuf_list);
> + if (c->uc_xfer == NULL) {
> + c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> + if (c->uc_xfer == NULL)
> + return (ENOBUFS);
> + c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
> + if (c->uc_buf == NULL) {
> + usbd_free_xfer(c->uc_xfer);
> + c->uc_xfer = NULL;
> + return (ENOBUFS);
> + }
>   }
>   }
>
> - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> - if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
> - m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
> - sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
> - }
> - if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
> - usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
> - sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
> + return (0);
> +}
> +
> +void
> +ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
> +{
> + int i;
> +
> + for (i = 0; i < listlen; i++) {
> + if (ch[i].uc_buf != NULL) {
> + ch[i].uc_buf = NULL;
> + }
> + ml_purge(&ch[i].uc_mbuf_list);
> + if (ch[i].uc_xfer != NULL) {
> + usbd_free_xfer(ch[i].uc_xfer);
> + ch[i].uc_xfer = NULL;
>   }
>   }
>  }
> @@ -1458,20 +1570,12 @@ ure_attach(struct device *parent, struct
>   }
>   }
>
> - switch (uaa->product) {
> - case USB_PRODUCT_REALTEK_RTL8152:
> - sc->ure_flags |= URE_FLAG_8152;
> - sc->ure_rxbufsz = URE_8152_RXBUFSZ;
> - sc->ure_tx_list_cnt = 1;
> - break;
> - case USB_PRODUCT_REALTEK_RTL8156:
> - sc->ure_flags |= URE_FLAG_8156;
> - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
> - break;
> - default:
> - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> - sc->ure_tx_list_cnt = 1;
> + if (uaa->product == USB_PRODUCT_REALTEK_RTL8152) {
> + sc->ure_txbufsz = URE_8152_TX_BUFSZ;
> + sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
> + } else {
> + sc->ure_txbufsz = URE_8153_TX_BUFSZ;
> + sc->ure_rxbufsz = URE_8153_RX_BUFSZ;
>   }
>
>   s = splnet();
> @@ -1482,10 +1586,12 @@ ure_attach(struct device *parent, struct
>   ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
>   switch (ver) {
>   case 0x4c00:
> + sc->ure_flags = URE_FLAG_8152;
>   sc->ure_chip |= URE_CHIP_VER_4C00;
>   printf("RTL8152 (0x4c00)");
>   break;
>   case 0x4c10:
> + sc->ure_flags = URE_FLAG_8152;
>   sc->ure_chip |= URE_CHIP_VER_4C10;
>   printf("RTL8152 (0x4c10)");
>   break;
> @@ -1507,18 +1613,18 @@ ure_attach(struct device *parent, struct
>   break;
>   case 0x6000:
>   sc->ure_flags = URE_FLAG_8153B;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
>   printf("RTL8153B (0x6000)");
>   break;
>   case 0x6010:
>   sc->ure_flags = URE_FLAG_8153B;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
>   printf("RTL8153B (0x6010)");
>   break;
>   case 0x7020:
> + sc->ure_flags = URE_FLAG_8156;
>   printf("RTL8156 (0x7020)");
>   break;
>   case 0x7030:
> + sc->ure_flags = URE_FLAG_8156;
>   printf("RTL8156 (0x7030)");
>   break;
>   default:
> @@ -1721,7 +1827,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
>   goto done;
>   }
>
> - buf += roundup(pktlen, 8);
> + buf += roundup(pktlen, URE_RX_BUF_ALIGN);
>
>   memcpy(&rxhdr, buf, sizeof(rxhdr));
>   total_len -= sizeof(rxhdr);
> @@ -1734,7 +1840,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
>   goto done;
>   }
>
> - total_len -= roundup(pktlen, 8);
> + total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
>   buf += sizeof(rxhdr);
>
>   m = m_devget(buf, pktlen, ETHER_ALIGN);
> @@ -1799,18 +1905,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
>   if (usbd_is_dying(sc->ure_udev))
>   return;
>
> - DPRINTFN(2, ("tx completion\n"));
> + if (status != USBD_NORMAL_COMPLETION)
> + DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
> + __func__, c->uc_idx, usbd_errstr(status)));
>
>   s = splnet();
> - sc->ure_cdata.tx_cnt--;
> +
> + ml_purge(&c->uc_mbuf_list);
> + c->uc_buflen = 0;
> +
> + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, ure_list);
> +
>   if (status != USBD_NORMAL_COMPLETION) {
>   if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
>   splx(s);
>   return;
>   }
> +
>   ifp->if_oerrors++;
>   printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
>      usbd_errstr(status));
> +
>   if (status == USBD_STALLED)
>   usbd_clear_endpoint_stall_async(
>      sc->ure_ep[URE_ENDPT_TX]);
> @@ -1819,83 +1934,18 @@ ure_txeof(struct usbd_xfer *xfer, void *
>   }
>
>   ifp->if_timer = 0;
> - ifq_clr_oactive(&ifp->if_snd);
> -
> - m_freem(c->uc_mbuf);
> - c->uc_mbuf = NULL;
> -
> - if (!ifq_empty(&ifp->if_snd))
> - ure_start(ifp);
> -
> - splx(s);
> -}
> -
> -int
> -ure_tx_list_init(struct ure_softc *sc)
> -{
> - struct ure_cdata *cd;
> - struct ure_chain *c;
> - int i;
> -
> - cd = &sc->ure_cdata;
> - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> - c = &cd->tx_chain[i];
> - c->uc_sc = sc;
> - c->uc_idx = i;
> - c->uc_mbuf = NULL;
> - if (c->uc_xfer == NULL) {
> - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> - if (c->uc_xfer == NULL)
> - return ENOBUFS;
> - c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
> - if (c->uc_buf == NULL) {
> - usbd_free_xfer(c->uc_xfer);
> - return ENOBUFS;
> - }
> - }
> - }
> -
> - cd->tx_prod = cd->tx_cnt = 0;
> -
> - return (0);
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
>  }
>
>  int
> -ure_rx_list_init(struct ure_softc *sc)
> +ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
>  {
> - struct ure_cdata *cd;
> - struct ure_chain *c;
> - int i;
> -
> - cd = &sc->ure_cdata;
> - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - c = &cd->rx_chain[i];
> - c->uc_sc = sc;
> - c->uc_idx = i;
> - c->uc_mbuf = NULL;
> - if (c->uc_xfer == NULL) {
> - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> - if (c->uc_xfer == NULL)
> - return ENOBUFS;
> - c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
> -    sc->ure_rxbufsz);
> - if (c->uc_buf == NULL) {
> - usbd_free_xfer(c->uc_xfer);
> - return ENOBUFS;
> - }
> - }
> - }
> -
> - return (0);
> -}
> -
> -int
> -ure_encap(struct ure_softc *sc, struct mbuf *m)
> -{
> - struct ure_chain *c;
> - usbd_status err;
>   struct ure_txpkt txhdr;
> - uint32_t frm_len = 0, cflags = 0;
> + uint32_t len = sizeof(txhdr), cflags = 0;
> +
> + if (len + m->m_pkthdr.len > maxlen)
> + return (-1);
>
>   if ((m->m_pkthdr.csum_flags &
>      (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
> @@ -1911,35 +1961,41 @@ ure_encap(struct ure_softc *sc, struct m
>   cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
>  #endif
>
> - c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
> -
> - /* header */
>   txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
>      URE_TXPKT_TX_LS);
>   txhdr.ure_vlan = htole32(cflags);
> - memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
> - frm_len = sizeof(txhdr);
> + memcpy(buf, &txhdr, len);
>
> - /* packet */
> - m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
> - frm_len += m->m_pkthdr.len;
> + m_copydata(m, 0, m->m_pkthdr.len, buf + len);
> + len += m->m_pkthdr.len;
>
> - c->uc_mbuf = m;
> + return (len);
> +}
> +
> +int
> +ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
> +{
> + usbd_status err;
>
> - DPRINTFN(2, ("tx %d bytes\n", frm_len));
>   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
> -    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
> +    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
> +    ure_txeof);
>
>   err = usbd_transfer(c->uc_xfer);
> - if (err != 0 && err != USBD_IN_PROGRESS) {
> - c->uc_mbuf = NULL;
> + if (err != USBD_IN_PROGRESS) {
> + ml_purge(&c->uc_mbuf_list);
> + c->uc_buflen = 0;
>   ure_stop(sc);
>   return (EIO);
>   }
>
> - sc->ure_cdata.tx_cnt++;
> - sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
> -    sc->ure_tx_list_cnt;
> +#if NBPFILTER > 0
> + struct mbuf *m;
> +
> + if (ifp->if_bpf)
> + MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
> + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> +#endif
>
>   return (0);
>  }

--
Regards,
 Mikolaj

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Jonathon Fletcher-3

> On Jul 27, 2020, at 11:47 AM, Mikolaj Kucharski <[hidden email]> wrote:
>
> On Mon, Jul 27, 2020 at 11:24:55AM -0700, Jonathon Fletcher wrote:
>>
>> I have attached an updated patch - which includes Kevin's changes from
>> his earlier email - that also corrects the tx buffer sizes for RTL8153
>> / RTL8153B devices.
>>
>
> Unfortunately below diff fails to apply.
>
> 2 out of 2 hunks failed
> 15 out of 15 hunks failed
>
> I've tried on fresh cvs version of if_urereg.h and if_ure.c
>
> I usually, for convenience take the diffs from marc.info <http://marc.info/>:
>
> https://marc.info/?l=openbsd-tech&m=159587449018248&q=mbox <https://marc.info/?l=openbsd-tech&m=159587449018248&q=mbox>

I appear to have a mangled whitespace problem with my email.

It applies successfully when I copy / paste the patch out of the url above and apply it from /usr/src with patch -p0 -l .

Are you ok using patch's -l option for this patch?

Thanks,
Jonathon

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Mikolaj Kucharski-3
On Mon, Jul 27, 2020 at 11:58:02AM -0700, Jonathon Fletcher wrote:
>
> Are you ok using patch's -l option for this patch?
>

Sure, can do. New kernel compiled, will switch my machine tonight. Will
let you know how things go. For meetings I would need couple of days, as
I don't have Google Meet calls that often.

--
Regards,
 Mikolaj

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Jonathon Fletcher-3
On Mon, Jul 27, 2020 at 07:13:33PM +0000, Mikolaj Kucharski wrote:
> On Mon, Jul 27, 2020 at 11:58:02AM -0700, Jonathon Fletcher wrote:
> >
> > Are you ok using patch's -l option for this patch?
>
> Sure, can do. New kernel compiled, will switch my machine tonight. Will
> let you know how things go. For meetings I would need couple of days, as
> I don't have Google Meet calls that often.

Mikolaj,

Great, thank you. I have attached the patch below with the whitespace intact.

Thanks,
Jonathon

Index: sys/dev/usb/if_urereg.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
retrieving revision 1.8
diff -u -p -u -r1.8 if_urereg.h
--- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
+++ sys/dev/usb/if_urereg.h 27 Jul 2020 17:10:09 -0000
@@ -494,28 +494,32 @@ struct ure_txpkt {
 #define URE_ENDPT_TX 1
 #define URE_ENDPT_MAX 2
 
-#define URE_TX_LIST_CNT 8
+#define URE_TX_LIST_CNT 1
 #define URE_RX_LIST_CNT 1
-#define URE_RX_BUF_ALIGN sizeof(uint64_t)
+#define URE_TX_BUF_ALIGN 4
+#define URE_RX_BUF_ALIGN 8
 
-#define URE_TXBUFSZ 16384
-#define URE_8152_RXBUFSZ 16384
-#define URE_8153_RXBUFSZ 32768
+#define URE_8152_TX_BUFSZ 16384
+#define URE_8152_RX_BUFSZ 16384
+
+#define URE_8153_TX_BUFSZ 16384
+#define URE_8153_RX_BUFSZ 32768
 
 struct ure_chain {
  struct ure_softc *uc_sc;
  struct usbd_xfer *uc_xfer;
  char *uc_buf;
- struct mbuf *uc_mbuf;
- int uc_accum;
- int uc_idx;
+ uint32_t uc_buflen;
+ uint32_t uc_bufmax;
+ struct mbuf_list uc_mbuf_list;
+ SLIST_ENTRY(ure_chain)  ure_list;
+ uint8_t uc_idx;
 };
 
 struct ure_cdata {
- struct ure_chain tx_chain[URE_TX_LIST_CNT];
- struct ure_chain rx_chain[URE_RX_LIST_CNT];
- int tx_prod;
- int tx_cnt;
+ struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
+ struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
+ SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
 };
 
 struct ure_softc {
@@ -541,7 +545,7 @@ struct ure_softc {
 
  struct timeval ure_rx_notice;
  int ure_rxbufsz;
- int ure_tx_list_cnt;
+ int ure_txbufsz;
 
  int ure_phyno;
 
Index: sys/dev/usb/if_ure.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
retrieving revision 1.16
diff -u -p -u -r1.16 if_ure.c
--- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
+++ sys/dev/usb/if_ure.c 27 Jul 2020 17:10:39 -0000
@@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
 void ure_lock_mii(struct ure_softc *);
 void ure_unlock_mii(struct ure_softc *);
 
-int ure_encap(struct ure_softc *, struct mbuf *);
+int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
+int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
 void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
 void ure_txeof(struct usbd_xfer *, void *, usbd_status);
-int ure_rx_list_init(struct ure_softc *);
-int ure_tx_list_init(struct ure_softc *);
+int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
+    uint32_t, int);
+void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
 
 void ure_tick_task(void *);
 void ure_tick(void *);
@@ -625,12 +627,36 @@ void
 ure_watchdog(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
+ struct ure_chain *c;
+ usbd_status err;
+ int i, s;
+
+ ifp->if_timer = 0;
 
  if (usbd_is_dying(sc->ure_udev))
  return;
 
+ if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
+ return;
+
+ sc = ifp->if_softc;
+ s = splnet();
+
  ifp->if_oerrors++;
- printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
+ DPRINTF(("watchdog timeout\n"));
+
+ for (i = 0; i < URE_TX_LIST_CNT; i++) {
+ c = &sc->ure_cdata.ure_tx_chain[i];
+ if (ml_len(&c->uc_mbuf_list) > 0) {
+ usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
+    &err);
+ ure_txeof(c->uc_xfer, c, err);
+ }
+ }
+
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
+ splx(s);
 }
 
 void
@@ -653,18 +679,26 @@ ure_init(void *xsc)
  else
  ure_rtl8153_nic_reset(sc);
 
- if (ure_rx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
+ sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
  printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }
 
- if (ure_tx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
+ sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
  printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }
 
+ /* Initialize the SLIST we are using for the multiple tx buffers */
+ SLIST_INIT(&sc->ure_cdata.ure_tx_free);
+ for (i = 0; i < URE_TX_LIST_CNT; i++)
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
+ &sc->ure_cdata.ure_tx_chain[i], ure_list);
+
  /* Set MAC address. */
  ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
  ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
@@ -739,9 +773,9 @@ ure_init(void *xsc)
 
  /* Start up the receive pipe. */
  for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &sc->ure_cdata.rx_chain[i];
+ c = &sc->ure_cdata.ure_rx_chain[i];
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
-    c, c->uc_buf, sc->ure_rxbufsz,
+    c, c->uc_buf, c->uc_bufmax,
     USBD_SHORT_XFER_OK | USBD_NO_COPY,
     USBD_NO_TIMEOUT, ure_rxeof);
  usbd_transfer(c->uc_xfer);
@@ -763,34 +797,83 @@ void
 ure_start(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
- struct mbuf *m_head = NULL;
-
- if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
-    !(sc->ure_flags & URE_FLAG_LINK))
+ struct ure_cdata *cd = &sc->ure_cdata;
+ struct ure_chain *c;
+ struct mbuf *m = NULL;
+ uint32_t new_buflen;
+ int s, mlen;
+
+ if (!(sc->ure_flags & URE_FLAG_LINK) ||
+ (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
+    (IFF_RUNNING|IFF_UP)) {
  return;
+ }
+
+ s = splnet();
 
- for (;;) {
- if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
- ifq_set_oactive(&ifp->if_snd);
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ while (c != NULL) {
+ m = NULL;
+ mlen = ifq_hdatalen(&ifp->if_snd);
+ if (mlen <= 0)
  break;
+
+ /*
+                 * If packet larger than remaining space, send buffer and
+ * continue.
+ */
+ new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
+ if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
+    c->uc_bufmax) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    ure_list);
+ break;
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ continue;
  }
 
- m_head = ifq_dequeue(&ifp->if_snd);
- if (m_head == NULL)
+ m = ifq_dequeue(&ifp->if_snd);
+ if (m == NULL)
  break;
 
- if (ure_encap(sc, m_head)) {
- m_freem(m_head);
- ifq_set_oactive(&ifp->if_snd);
+ /* Discard packet larger than buffer. */
+ if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
+ m_freem(m);
+ continue;
+ }
+
+ /* Append packet to current buffer. */
+ mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
+    c->uc_bufmax - new_buflen);
+ if (mlen <= 0) {
+ m_freem(m);
  break;
  }
+ ml_enqueue(&c->uc_mbuf_list, m);
+ c->uc_buflen = new_buflen + mlen;
+ }
 
-#if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
-#endif
- ifp->if_timer = 5;
+ if (c != NULL) {
+ /* Send current buffer unless empty */
+ if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, ure_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    ure_list);
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ }
  }
+
+ if (c == NULL)
+ ifq_set_oactive(&ifp->if_snd);
+
+ ifp->if_timer = 5;
+
+ splx(s);
 }
 
 void
@@ -810,9 +893,9 @@ ure_tick(void *xsc)
 void
 ure_stop(struct ure_softc *sc)
 {
- usbd_status err;
+ struct ure_cdata *cd;
  struct ifnet *ifp;
- int i;
+ usbd_status err;
 
  ure_reset(sc);
 
@@ -844,25 +927,54 @@ ure_stop(struct ure_softc *sc)
  sc->ure_ep[URE_ENDPT_TX] = NULL;
  }
 
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
- sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
- sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
+ cd = &sc->ure_cdata;
+ ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
+ ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
+}
+
+int
+ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
+    uint32_t bufsize, int listlen)
+{
+ struct ure_chain *c;
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ c = &ch[i];
+ c->uc_sc = sc;
+ c->uc_idx = i;
+ c->uc_buflen = 0;
+ c->uc_bufmax = bufsize;
+ ml_init(&c->uc_mbuf_list);
+ if (c->uc_xfer == NULL) {
+ c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
+ if (c->uc_xfer == NULL)
+ return (ENOBUFS);
+ c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
+ if (c->uc_buf == NULL) {
+ usbd_free_xfer(c->uc_xfer);
+ c->uc_xfer = NULL;
+ return (ENOBUFS);
+ }
  }
  }
 
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
- sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
- sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
+ return (0);
+}
+
+void
+ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
+{
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ if (ch[i].uc_buf != NULL) {
+ ch[i].uc_buf = NULL;
+ }
+ ml_purge(&ch[i].uc_mbuf_list);
+ if (ch[i].uc_xfer != NULL) {
+ usbd_free_xfer(ch[i].uc_xfer);
+ ch[i].uc_xfer = NULL;
  }
  }
 }
@@ -1458,20 +1570,12 @@ ure_attach(struct device *parent, struct
  }
  }
 
- switch (uaa->product) {
- case USB_PRODUCT_REALTEK_RTL8152:
- sc->ure_flags |= URE_FLAG_8152;
- sc->ure_rxbufsz = URE_8152_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
- break;
- case USB_PRODUCT_REALTEK_RTL8156:
- sc->ure_flags |= URE_FLAG_8156;
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
- break;
- default:
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
+ if (uaa->product == USB_PRODUCT_REALTEK_RTL8152) {
+ sc->ure_txbufsz = URE_8152_TX_BUFSZ;
+ sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
+ } else {
+ sc->ure_txbufsz = URE_8153_TX_BUFSZ;
+ sc->ure_rxbufsz = URE_8153_RX_BUFSZ;
  }
 
  s = splnet();
@@ -1482,10 +1586,12 @@ ure_attach(struct device *parent, struct
  ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
  switch (ver) {
  case 0x4c00:
+ sc->ure_flags = URE_FLAG_8152;
  sc->ure_chip |= URE_CHIP_VER_4C00;
  printf("RTL8152 (0x4c00)");
  break;
  case 0x4c10:
+ sc->ure_flags = URE_FLAG_8152;
  sc->ure_chip |= URE_CHIP_VER_4C10;
  printf("RTL8152 (0x4c10)");
  break;
@@ -1507,18 +1613,18 @@ ure_attach(struct device *parent, struct
  break;
  case 0x6000:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6000)");
  break;
  case 0x6010:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6010)");
  break;
  case 0x7020:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7020)");
  break;
  case 0x7030:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7030)");
  break;
  default:
@@ -1721,7 +1827,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }
 
- buf += roundup(pktlen, 8);
+ buf += roundup(pktlen, URE_RX_BUF_ALIGN);
 
  memcpy(&rxhdr, buf, sizeof(rxhdr));
  total_len -= sizeof(rxhdr);
@@ -1734,7 +1840,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }
 
- total_len -= roundup(pktlen, 8);
+ total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
  buf += sizeof(rxhdr);
 
  m = m_devget(buf, pktlen, ETHER_ALIGN);
@@ -1799,18 +1905,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
  if (usbd_is_dying(sc->ure_udev))
  return;
 
- DPRINTFN(2, ("tx completion\n"));
+ if (status != USBD_NORMAL_COMPLETION)
+ DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
+ __func__, c->uc_idx, usbd_errstr(status)));
 
  s = splnet();
- sc->ure_cdata.tx_cnt--;
+
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
+
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, ure_list);
+
  if (status != USBD_NORMAL_COMPLETION) {
  if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
  splx(s);
  return;
  }
+
  ifp->if_oerrors++;
  printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
     usbd_errstr(status));
+
  if (status == USBD_STALLED)
  usbd_clear_endpoint_stall_async(
     sc->ure_ep[URE_ENDPT_TX]);
@@ -1819,83 +1934,18 @@ ure_txeof(struct usbd_xfer *xfer, void *
  }
 
  ifp->if_timer = 0;
- ifq_clr_oactive(&ifp->if_snd);
-
- m_freem(c->uc_mbuf);
- c->uc_mbuf = NULL;
-
- if (!ifq_empty(&ifp->if_snd))
- ure_start(ifp);
-
- splx(s);
-}
-
-int
-ure_tx_list_init(struct ure_softc *sc)
-{
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- c = &cd->tx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- cd->tx_prod = cd->tx_cnt = 0;
-
- return (0);
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
 }
 
 int
-ure_rx_list_init(struct ure_softc *sc)
+ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
 {
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &cd->rx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
-    sc->ure_rxbufsz);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- return (0);
-}
-
-int
-ure_encap(struct ure_softc *sc, struct mbuf *m)
-{
- struct ure_chain *c;
- usbd_status err;
  struct ure_txpkt txhdr;
- uint32_t frm_len = 0, cflags = 0;
+ uint32_t len = sizeof(txhdr), cflags = 0;
+
+ if (len + m->m_pkthdr.len > maxlen)
+ return (-1);
 
  if ((m->m_pkthdr.csum_flags &
     (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
@@ -1911,35 +1961,41 @@ ure_encap(struct ure_softc *sc, struct m
  cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
 #endif
 
- c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
-
- /* header */
  txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
     URE_TXPKT_TX_LS);
  txhdr.ure_vlan = htole32(cflags);
- memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
- frm_len = sizeof(txhdr);
+ memcpy(buf, &txhdr, len);
 
- /* packet */
- m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
- frm_len += m->m_pkthdr.len;
+ m_copydata(m, 0, m->m_pkthdr.len, buf + len);
+ len += m->m_pkthdr.len;
 
- c->uc_mbuf = m;
+ return (len);
+}
+
+int
+ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
+{
+ usbd_status err;
 
- DPRINTFN(2, ("tx %d bytes\n", frm_len));
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
-    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
+    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
+    ure_txeof);
 
  err = usbd_transfer(c->uc_xfer);
- if (err != 0 && err != USBD_IN_PROGRESS) {
- c->uc_mbuf = NULL;
+ if (err != USBD_IN_PROGRESS) {
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
  ure_stop(sc);
  return (EIO);
  }
 
- sc->ure_cdata.tx_cnt++;
- sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
-    sc->ure_tx_list_cnt;
+#if NBPFILTER > 0
+ struct mbuf *m;
+
+ if (ifp->if_bpf)
+ MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
 
  return (0);
 }

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Jonathon Fletcher-3
In reply to this post by Mikolaj Kucharski-3
On Mon, Jul 27, 2020 at 07:13:33PM +0000, Mikolaj Kucharski wrote:
> On Mon, Jul 27, 2020 at 11:58:02AM -0700, Jonathon Fletcher wrote:
> >
> > Are you ok using patch's -l option for this patch?
> >
>
> Sure, can do. New kernel compiled, will switch my machine tonight. Will
> let you know how things go. For meetings I would need couple of days, as
> I don't have Google Meet calls that often.

Mikolaj,

I hope the patch has been stable for you. I do have an update - it appears
that a splx(s) got lost along the way (from the end of ure_txeof). This
patch adds that back in and has some minor cleanup (variable name, cleanup
defines, adjust the setting of flags and buffer sizes based on device type).
I have been running this for a couple of days now.

Jonathon


Index: sys/dev/usb/if_urereg.h
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
retrieving revision 1.8
diff -u -p -u -r1.8 if_urereg.h
--- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
+++ sys/dev/usb/if_urereg.h 28 Jul 2020 15:30:41 -0000
@@ -494,28 +494,30 @@ struct ure_txpkt {
 #define URE_ENDPT_TX 1
 #define URE_ENDPT_MAX 2
 
-#define URE_TX_LIST_CNT 8
+#define URE_TX_LIST_CNT 1
 #define URE_RX_LIST_CNT 1
-#define URE_RX_BUF_ALIGN sizeof(uint64_t)
+#define URE_TX_BUF_ALIGN 4
+#define URE_RX_BUF_ALIGN 8
 
-#define URE_TXBUFSZ 16384
-#define URE_8152_RXBUFSZ 16384
-#define URE_8153_RXBUFSZ 32768
+#define URE_TX_BUFSZ 16384
+#define URE_8152_RX_BUFSZ 16384
+#define URE_8153_RX_BUFSZ 32768
 
 struct ure_chain {
  struct ure_softc *uc_sc;
  struct usbd_xfer *uc_xfer;
  char *uc_buf;
- struct mbuf *uc_mbuf;
- int uc_accum;
- int uc_idx;
+ uint32_t uc_buflen;
+ uint32_t uc_bufmax;
+ struct mbuf_list uc_mbuf_list;
+ SLIST_ENTRY(ure_chain)  uc_list;
+ uint8_t uc_idx;
 };
 
 struct ure_cdata {
- struct ure_chain tx_chain[URE_TX_LIST_CNT];
- struct ure_chain rx_chain[URE_RX_LIST_CNT];
- int tx_prod;
- int tx_cnt;
+ struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
+ struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
+ SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
 };
 
 struct ure_softc {
@@ -541,7 +543,7 @@ struct ure_softc {
 
  struct timeval ure_rx_notice;
  int ure_rxbufsz;
- int ure_tx_list_cnt;
+ int ure_txbufsz;
 
  int ure_phyno;
 
Index: sys/dev/usb/if_ure.c
===================================================================
RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
retrieving revision 1.16
diff -u -p -u -r1.16 if_ure.c
--- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
+++ sys/dev/usb/if_ure.c 28 Jul 2020 22:56:29 -0000
@@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
 void ure_lock_mii(struct ure_softc *);
 void ure_unlock_mii(struct ure_softc *);
 
-int ure_encap(struct ure_softc *, struct mbuf *);
+int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
+int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
 void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
 void ure_txeof(struct usbd_xfer *, void *, usbd_status);
-int ure_rx_list_init(struct ure_softc *);
-int ure_tx_list_init(struct ure_softc *);
+int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
+    uint32_t, int);
+void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
 
 void ure_tick_task(void *);
 void ure_tick(void *);
@@ -625,12 +627,36 @@ void
 ure_watchdog(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
+ struct ure_chain *c;
+ usbd_status err;
+ int i, s;
+
+ ifp->if_timer = 0;
 
  if (usbd_is_dying(sc->ure_udev))
  return;
 
+ if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
+ return;
+
+ sc = ifp->if_softc;
+ s = splnet();
+
  ifp->if_oerrors++;
- printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
+ DPRINTF(("%s: watchdog timeout\n", sc->ure_dev.dv_xname));
+
+ for (i = 0; i < URE_TX_LIST_CNT; i++) {
+ c = &sc->ure_cdata.ure_tx_chain[i];
+ if (ml_len(&c->uc_mbuf_list) > 0) {
+ usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
+    &err);
+ ure_txeof(c->uc_xfer, c, err);
+ }
+ }
+
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
+ splx(s);
 }
 
 void
@@ -653,18 +679,26 @@ ure_init(void *xsc)
  else
  ure_rtl8153_nic_reset(sc);
 
- if (ure_rx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
+ sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
  printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }
 
- if (ure_tx_list_init(sc) == ENOBUFS) {
+ if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
+ sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
  printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
  splx(s);
  return;
  }
 
+ /* Initialize the SLIST we are using for the multiple tx buffers */
+ SLIST_INIT(&sc->ure_cdata.ure_tx_free);
+ for (i = 0; i < URE_TX_LIST_CNT; i++)
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
+ &sc->ure_cdata.ure_tx_chain[i], uc_list);
+
  /* Set MAC address. */
  ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
  ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
@@ -739,9 +773,9 @@ ure_init(void *xsc)
 
  /* Start up the receive pipe. */
  for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &sc->ure_cdata.rx_chain[i];
+ c = &sc->ure_cdata.ure_rx_chain[i];
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
-    c, c->uc_buf, sc->ure_rxbufsz,
+    c, c->uc_buf, c->uc_bufmax,
     USBD_SHORT_XFER_OK | USBD_NO_COPY,
     USBD_NO_TIMEOUT, ure_rxeof);
  usbd_transfer(c->uc_xfer);
@@ -763,34 +797,81 @@ void
 ure_start(struct ifnet *ifp)
 {
  struct ure_softc *sc = ifp->if_softc;
- struct mbuf *m_head = NULL;
-
- if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
-    !(sc->ure_flags & URE_FLAG_LINK))
+ struct ure_cdata *cd = &sc->ure_cdata;
+ struct ure_chain *c;
+ struct mbuf *m = NULL;
+ uint32_t new_buflen;
+ int s, mlen;
+
+ if (!(sc->ure_flags & URE_FLAG_LINK) ||
+ (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
+    (IFF_RUNNING|IFF_UP)) {
  return;
+ }
+
+ s = splnet();
 
- for (;;) {
- if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
- ifq_set_oactive(&ifp->if_snd);
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ while (c != NULL) {
+ m = NULL;
+ mlen = ifq_hdatalen(&ifp->if_snd);
+ if (mlen <= 0)
  break;
+
+ /*
+                 * If packet larger than remaining space, send buffer and
+ * continue.
+ */
+ new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
+ if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
+    c->uc_bufmax) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, uc_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    uc_list);
+ break;
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ continue;
  }
 
- m_head = ifq_dequeue(&ifp->if_snd);
- if (m_head == NULL)
+ m = ifq_dequeue(&ifp->if_snd);
+ if (m == NULL)
  break;
 
- if (ure_encap(sc, m_head)) {
- m_freem(m_head);
- ifq_set_oactive(&ifp->if_snd);
+ /* Discard packet larger than buffer. */
+ if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
+ m_freem(m);
+ continue;
+ }
+
+ /* Append packet to current buffer. */
+ mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
+    c->uc_bufmax - new_buflen);
+ if (mlen <= 0) {
+ m_freem(m);
  break;
  }
+ ml_enqueue(&c->uc_mbuf_list, m);
+ c->uc_buflen = new_buflen + mlen;
+ }
 
-#if NBPFILTER > 0
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
-#endif
- ifp->if_timer = 5;
+ if (c != NULL) {
+ /* Send current buffer unless empty */
+ if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
+ SLIST_REMOVE_HEAD(&cd->ure_tx_free, uc_list);
+ if (ure_encap_xfer(ifp, sc, c)) {
+ SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
+    uc_list);
+ }
+ c = SLIST_FIRST(&cd->ure_tx_free);
+ }
  }
+
+ ifp->if_timer = 5;
+ if (c == NULL)
+ ifq_set_oactive(&ifp->if_snd);
+ splx(s);
 }
 
 void
@@ -810,9 +891,9 @@ ure_tick(void *xsc)
 void
 ure_stop(struct ure_softc *sc)
 {
- usbd_status err;
+ struct ure_cdata *cd;
  struct ifnet *ifp;
- int i;
+ usbd_status err;
 
  ure_reset(sc);
 
@@ -844,25 +925,54 @@ ure_stop(struct ure_softc *sc)
  sc->ure_ep[URE_ENDPT_TX] = NULL;
  }
 
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
- sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
- sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
+ cd = &sc->ure_cdata;
+ ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
+ ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
+}
+
+int
+ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
+    uint32_t bufsize, int listlen)
+{
+ struct ure_chain *c;
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ c = &ch[i];
+ c->uc_sc = sc;
+ c->uc_idx = i;
+ c->uc_buflen = 0;
+ c->uc_bufmax = bufsize;
+ ml_init(&c->uc_mbuf_list);
+ if (c->uc_xfer == NULL) {
+ c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
+ if (c->uc_xfer == NULL)
+ return (ENOBUFS);
+ c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
+ if (c->uc_buf == NULL) {
+ usbd_free_xfer(c->uc_xfer);
+ c->uc_xfer = NULL;
+ return (ENOBUFS);
+ }
  }
  }
 
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
- m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
- sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
- }
- if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
- usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
- sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
+ return (0);
+}
+
+void
+ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
+{
+ int i;
+
+ for (i = 0; i < listlen; i++) {
+ if (ch[i].uc_buf != NULL) {
+ ch[i].uc_buf = NULL;
+ }
+ ml_purge(&ch[i].uc_mbuf_list);
+ if (ch[i].uc_xfer != NULL) {
+ usbd_free_xfer(ch[i].uc_xfer);
+ ch[i].uc_xfer = NULL;
  }
  }
 }
@@ -1458,21 +1568,8 @@ ure_attach(struct device *parent, struct
  }
  }
 
- switch (uaa->product) {
- case USB_PRODUCT_REALTEK_RTL8152:
- sc->ure_flags |= URE_FLAG_8152;
- sc->ure_rxbufsz = URE_8152_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
- break;
- case USB_PRODUCT_REALTEK_RTL8156:
- sc->ure_flags |= URE_FLAG_8156;
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
- break;
- default:
- sc->ure_rxbufsz = URE_8153_RXBUFSZ;
- sc->ure_tx_list_cnt = 1;
- }
+ sc->ure_txbufsz = URE_TX_BUFSZ;
+ sc->ure_rxbufsz = URE_8153_RX_BUFSZ;
 
  s = splnet();
 
@@ -1482,10 +1579,14 @@ ure_attach(struct device *parent, struct
  ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
  switch (ver) {
  case 0x4c00:
+ sc->ure_flags = URE_FLAG_8152;
+ sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
  sc->ure_chip |= URE_CHIP_VER_4C00;
  printf("RTL8152 (0x4c00)");
  break;
  case 0x4c10:
+ sc->ure_flags = URE_FLAG_8152;
+ sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
  sc->ure_chip |= URE_CHIP_VER_4C10;
  printf("RTL8152 (0x4c10)");
  break;
@@ -1507,18 +1608,18 @@ ure_attach(struct device *parent, struct
  break;
  case 0x6000:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6000)");
  break;
  case 0x6010:
  sc->ure_flags = URE_FLAG_8153B;
- sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
  printf("RTL8153B (0x6010)");
  break;
  case 0x7020:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7020)");
  break;
  case 0x7030:
+ sc->ure_flags = URE_FLAG_8156;
  printf("RTL8156 (0x7030)");
  break;
  default:
@@ -1721,7 +1822,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }
 
- buf += roundup(pktlen, 8);
+ buf += roundup(pktlen, URE_RX_BUF_ALIGN);
 
  memcpy(&rxhdr, buf, sizeof(rxhdr));
  total_len -= sizeof(rxhdr);
@@ -1734,7 +1835,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
  goto done;
  }
 
- total_len -= roundup(pktlen, 8);
+ total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
  buf += sizeof(rxhdr);
 
  m = m_devget(buf, pktlen, ETHER_ALIGN);
@@ -1799,18 +1900,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
  if (usbd_is_dying(sc->ure_udev))
  return;
 
- DPRINTFN(2, ("tx completion\n"));
+ if (status != USBD_NORMAL_COMPLETION)
+ DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
+ __func__, c->uc_idx, usbd_errstr(status)));
 
  s = splnet();
- sc->ure_cdata.tx_cnt--;
+
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
+
+ SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, uc_list);
+
  if (status != USBD_NORMAL_COMPLETION) {
  if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
  splx(s);
  return;
  }
+
  ifp->if_oerrors++;
  printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
     usbd_errstr(status));
+
  if (status == USBD_STALLED)
  usbd_clear_endpoint_stall_async(
     sc->ure_ep[URE_ENDPT_TX]);
@@ -1819,83 +1929,19 @@ ure_txeof(struct usbd_xfer *xfer, void *
  }
 
  ifp->if_timer = 0;
- ifq_clr_oactive(&ifp->if_snd);
-
- m_freem(c->uc_mbuf);
- c->uc_mbuf = NULL;
-
- if (!ifq_empty(&ifp->if_snd))
- ure_start(ifp);
-
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
  splx(s);
 }
 
 int
-ure_tx_list_init(struct ure_softc *sc)
-{
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < sc->ure_tx_list_cnt; i++) {
- c = &cd->tx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- cd->tx_prod = cd->tx_cnt = 0;
-
- return (0);
-}
-
-int
-ure_rx_list_init(struct ure_softc *sc)
+ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
 {
- struct ure_cdata *cd;
- struct ure_chain *c;
- int i;
-
- cd = &sc->ure_cdata;
- for (i = 0; i < URE_RX_LIST_CNT; i++) {
- c = &cd->rx_chain[i];
- c->uc_sc = sc;
- c->uc_idx = i;
- c->uc_mbuf = NULL;
- if (c->uc_xfer == NULL) {
- c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
- if (c->uc_xfer == NULL)
- return ENOBUFS;
- c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
-    sc->ure_rxbufsz);
- if (c->uc_buf == NULL) {
- usbd_free_xfer(c->uc_xfer);
- return ENOBUFS;
- }
- }
- }
-
- return (0);
-}
-
-int
-ure_encap(struct ure_softc *sc, struct mbuf *m)
-{
- struct ure_chain *c;
- usbd_status err;
  struct ure_txpkt txhdr;
- uint32_t frm_len = 0, cflags = 0;
+ uint32_t len = sizeof(txhdr), cflags = 0;
+
+ if (len + m->m_pkthdr.len > maxlen)
+ return (-1);
 
  if ((m->m_pkthdr.csum_flags &
     (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
@@ -1911,35 +1957,41 @@ ure_encap(struct ure_softc *sc, struct m
  cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
 #endif
 
- c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
-
- /* header */
  txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
     URE_TXPKT_TX_LS);
  txhdr.ure_vlan = htole32(cflags);
- memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
- frm_len = sizeof(txhdr);
+ memcpy(buf, &txhdr, len);
 
- /* packet */
- m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
- frm_len += m->m_pkthdr.len;
+ m_copydata(m, 0, m->m_pkthdr.len, buf + len);
+ len += m->m_pkthdr.len;
 
- c->uc_mbuf = m;
+ return (len);
+}
+
+int
+ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
+{
+ usbd_status err;
 
- DPRINTFN(2, ("tx %d bytes\n", frm_len));
  usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
-    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
+    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
+    ure_txeof);
 
  err = usbd_transfer(c->uc_xfer);
- if (err != 0 && err != USBD_IN_PROGRESS) {
- c->uc_mbuf = NULL;
+ if (err != USBD_IN_PROGRESS) {
+ ml_purge(&c->uc_mbuf_list);
+ c->uc_buflen = 0;
  ure_stop(sc);
  return (EIO);
  }
 
- sc->ure_cdata.tx_cnt++;
- sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
-    sc->ure_tx_list_cnt;
+#if NBPFILTER > 0
+ struct mbuf *m;
+
+ if (ifp->if_bpf)
+ MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
 
  return (0);
 }

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Mikolaj Kucharski-3
On Thu, Jul 30, 2020 at 07:58:23AM -0700, Jonathon Fletcher wrote:
>
> Mikolaj,
>
> I hope the patch has been stable for you. I do have an update - it appears
> that a splx(s) got lost along the way (from the end of ure_txeof). This
> patch adds that back in and has some minor cleanup (variable name, cleanup
> defines, adjust the setting of flags and buffer sizes based on device type).
> I have been running this for a couple of days now.

Yes, no issues so far. I managed to have on Google Meet call, audio only
and one Zoom call (was surprised it actually worked on OpenBSD, but
needed to fake user agent to Linux). Network card didn't fail like
before your diff during VoIP calls. That's really great. I just compiled
new version of the kernel with your below patch, will reboot my laptop
tonight to switch to that new kernel. Thank you!

> Jonathon
>
>
> Index: sys/dev/usb/if_urereg.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
> retrieving revision 1.8
> diff -u -p -u -r1.8 if_urereg.h
> --- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
> +++ sys/dev/usb/if_urereg.h 28 Jul 2020 15:30:41 -0000
> @@ -494,28 +494,30 @@ struct ure_txpkt {
>  #define URE_ENDPT_TX 1
>  #define URE_ENDPT_MAX 2
>  
> -#define URE_TX_LIST_CNT 8
> +#define URE_TX_LIST_CNT 1
>  #define URE_RX_LIST_CNT 1
> -#define URE_RX_BUF_ALIGN sizeof(uint64_t)
> +#define URE_TX_BUF_ALIGN 4
> +#define URE_RX_BUF_ALIGN 8
>  
> -#define URE_TXBUFSZ 16384
> -#define URE_8152_RXBUFSZ 16384
> -#define URE_8153_RXBUFSZ 32768
> +#define URE_TX_BUFSZ 16384
> +#define URE_8152_RX_BUFSZ 16384
> +#define URE_8153_RX_BUFSZ 32768
>  
>  struct ure_chain {
>   struct ure_softc *uc_sc;
>   struct usbd_xfer *uc_xfer;
>   char *uc_buf;
> - struct mbuf *uc_mbuf;
> - int uc_accum;
> - int uc_idx;
> + uint32_t uc_buflen;
> + uint32_t uc_bufmax;
> + struct mbuf_list uc_mbuf_list;
> + SLIST_ENTRY(ure_chain)  uc_list;
> + uint8_t uc_idx;
>  };
>  
>  struct ure_cdata {
> - struct ure_chain tx_chain[URE_TX_LIST_CNT];
> - struct ure_chain rx_chain[URE_RX_LIST_CNT];
> - int tx_prod;
> - int tx_cnt;
> + struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
> + struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
> + SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
>  };
>  
>  struct ure_softc {
> @@ -541,7 +543,7 @@ struct ure_softc {
>  
>   struct timeval ure_rx_notice;
>   int ure_rxbufsz;
> - int ure_tx_list_cnt;
> + int ure_txbufsz;
>  
>   int ure_phyno;
>  
> Index: sys/dev/usb/if_ure.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
> retrieving revision 1.16
> diff -u -p -u -r1.16 if_ure.c
> --- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
> +++ sys/dev/usb/if_ure.c 28 Jul 2020 22:56:29 -0000
> @@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
>  void ure_lock_mii(struct ure_softc *);
>  void ure_unlock_mii(struct ure_softc *);
>  
> -int ure_encap(struct ure_softc *, struct mbuf *);
> +int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
> +int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
>  void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
>  void ure_txeof(struct usbd_xfer *, void *, usbd_status);
> -int ure_rx_list_init(struct ure_softc *);
> -int ure_tx_list_init(struct ure_softc *);
> +int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
> +    uint32_t, int);
> +void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
>  
>  void ure_tick_task(void *);
>  void ure_tick(void *);
> @@ -625,12 +627,36 @@ void
>  ure_watchdog(struct ifnet *ifp)
>  {
>   struct ure_softc *sc = ifp->if_softc;
> + struct ure_chain *c;
> + usbd_status err;
> + int i, s;
> +
> + ifp->if_timer = 0;
>  
>   if (usbd_is_dying(sc->ure_udev))
>   return;
>  
> + if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
> + return;
> +
> + sc = ifp->if_softc;
> + s = splnet();
> +
>   ifp->if_oerrors++;
> - printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
> + DPRINTF(("%s: watchdog timeout\n", sc->ure_dev.dv_xname));
> +
> + for (i = 0; i < URE_TX_LIST_CNT; i++) {
> + c = &sc->ure_cdata.ure_tx_chain[i];
> + if (ml_len(&c->uc_mbuf_list) > 0) {
> + usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
> +    &err);
> + ure_txeof(c->uc_xfer, c, err);
> + }
> + }
> +
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> + splx(s);
>  }
>  
>  void
> @@ -653,18 +679,26 @@ ure_init(void *xsc)
>   else
>   ure_rtl8153_nic_reset(sc);
>  
> - if (ure_rx_list_init(sc) == ENOBUFS) {
> + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
> + sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
>   printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
>   splx(s);
>   return;
>   }
>  
> - if (ure_tx_list_init(sc) == ENOBUFS) {
> + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
> + sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
>   printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
>   splx(s);
>   return;
>   }
>  
> + /* Initialize the SLIST we are using for the multiple tx buffers */
> + SLIST_INIT(&sc->ure_cdata.ure_tx_free);
> + for (i = 0; i < URE_TX_LIST_CNT; i++)
> + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
> + &sc->ure_cdata.ure_tx_chain[i], uc_list);
> +
>   /* Set MAC address. */
>   ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
>   ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
> @@ -739,9 +773,9 @@ ure_init(void *xsc)
>  
>   /* Start up the receive pipe. */
>   for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - c = &sc->ure_cdata.rx_chain[i];
> + c = &sc->ure_cdata.ure_rx_chain[i];
>   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
> -    c, c->uc_buf, sc->ure_rxbufsz,
> +    c, c->uc_buf, c->uc_bufmax,
>      USBD_SHORT_XFER_OK | USBD_NO_COPY,
>      USBD_NO_TIMEOUT, ure_rxeof);
>   usbd_transfer(c->uc_xfer);
> @@ -763,34 +797,81 @@ void
>  ure_start(struct ifnet *ifp)
>  {
>   struct ure_softc *sc = ifp->if_softc;
> - struct mbuf *m_head = NULL;
> -
> - if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
> -    !(sc->ure_flags & URE_FLAG_LINK))
> + struct ure_cdata *cd = &sc->ure_cdata;
> + struct ure_chain *c;
> + struct mbuf *m = NULL;
> + uint32_t new_buflen;
> + int s, mlen;
> +
> + if (!(sc->ure_flags & URE_FLAG_LINK) ||
> + (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
> +    (IFF_RUNNING|IFF_UP)) {
>   return;
> + }
> +
> + s = splnet();
>  
> - for (;;) {
> - if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
> - ifq_set_oactive(&ifp->if_snd);
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + while (c != NULL) {
> + m = NULL;
> + mlen = ifq_hdatalen(&ifp->if_snd);
> + if (mlen <= 0)
>   break;
> +
> + /*
> +                 * If packet larger than remaining space, send buffer and
> + * continue.
> + */
> + new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
> + if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
> +    c->uc_bufmax) {
> + SLIST_REMOVE_HEAD(&cd->ure_tx_free, uc_list);
> + if (ure_encap_xfer(ifp, sc, c)) {
> + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> +    uc_list);
> + break;
> + }
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + continue;
>   }
>  
> - m_head = ifq_dequeue(&ifp->if_snd);
> - if (m_head == NULL)
> + m = ifq_dequeue(&ifp->if_snd);
> + if (m == NULL)
>   break;
>  
> - if (ure_encap(sc, m_head)) {
> - m_freem(m_head);
> - ifq_set_oactive(&ifp->if_snd);
> + /* Discard packet larger than buffer. */
> + if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
> + m_freem(m);
> + continue;
> + }
> +
> + /* Append packet to current buffer. */
> + mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
> +    c->uc_bufmax - new_buflen);
> + if (mlen <= 0) {
> + m_freem(m);
>   break;
>   }
> + ml_enqueue(&c->uc_mbuf_list, m);
> + c->uc_buflen = new_buflen + mlen;
> + }
>  
> -#if NBPFILTER > 0
> - if (ifp->if_bpf)
> - bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
> -#endif
> - ifp->if_timer = 5;
> + if (c != NULL) {
> + /* Send current buffer unless empty */
> + if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
> + SLIST_REMOVE_HEAD(&cd->ure_tx_free, uc_list);
> + if (ure_encap_xfer(ifp, sc, c)) {
> + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> +    uc_list);
> + }
> + c = SLIST_FIRST(&cd->ure_tx_free);
> + }
>   }
> +
> + ifp->if_timer = 5;
> + if (c == NULL)
> + ifq_set_oactive(&ifp->if_snd);
> + splx(s);
>  }
>  
>  void
> @@ -810,9 +891,9 @@ ure_tick(void *xsc)
>  void
>  ure_stop(struct ure_softc *sc)
>  {
> - usbd_status err;
> + struct ure_cdata *cd;
>   struct ifnet *ifp;
> - int i;
> + usbd_status err;
>  
>   ure_reset(sc);
>  
> @@ -844,25 +925,54 @@ ure_stop(struct ure_softc *sc)
>   sc->ure_ep[URE_ENDPT_TX] = NULL;
>   }
>  
> - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
> - m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
> - sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
> - }
> - if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
> - usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
> - sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
> + cd = &sc->ure_cdata;
> + ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
> + ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
> +}
> +
> +int
> +ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
> +    uint32_t bufsize, int listlen)
> +{
> + struct ure_chain *c;
> + int i;
> +
> + for (i = 0; i < listlen; i++) {
> + c = &ch[i];
> + c->uc_sc = sc;
> + c->uc_idx = i;
> + c->uc_buflen = 0;
> + c->uc_bufmax = bufsize;
> + ml_init(&c->uc_mbuf_list);
> + if (c->uc_xfer == NULL) {
> + c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> + if (c->uc_xfer == NULL)
> + return (ENOBUFS);
> + c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
> + if (c->uc_buf == NULL) {
> + usbd_free_xfer(c->uc_xfer);
> + c->uc_xfer = NULL;
> + return (ENOBUFS);
> + }
>   }
>   }
>  
> - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> - if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
> - m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
> - sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
> - }
> - if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
> - usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
> - sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
> + return (0);
> +}
> +
> +void
> +ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
> +{
> + int i;
> +
> + for (i = 0; i < listlen; i++) {
> + if (ch[i].uc_buf != NULL) {
> + ch[i].uc_buf = NULL;
> + }
> + ml_purge(&ch[i].uc_mbuf_list);
> + if (ch[i].uc_xfer != NULL) {
> + usbd_free_xfer(ch[i].uc_xfer);
> + ch[i].uc_xfer = NULL;
>   }
>   }
>  }
> @@ -1458,21 +1568,8 @@ ure_attach(struct device *parent, struct
>   }
>   }
>  
> - switch (uaa->product) {
> - case USB_PRODUCT_REALTEK_RTL8152:
> - sc->ure_flags |= URE_FLAG_8152;
> - sc->ure_rxbufsz = URE_8152_RXBUFSZ;
> - sc->ure_tx_list_cnt = 1;
> - break;
> - case USB_PRODUCT_REALTEK_RTL8156:
> - sc->ure_flags |= URE_FLAG_8156;
> - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
> - break;
> - default:
> - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> - sc->ure_tx_list_cnt = 1;
> - }
> + sc->ure_txbufsz = URE_TX_BUFSZ;
> + sc->ure_rxbufsz = URE_8153_RX_BUFSZ;
>  
>   s = splnet();
>  
> @@ -1482,10 +1579,14 @@ ure_attach(struct device *parent, struct
>   ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
>   switch (ver) {
>   case 0x4c00:
> + sc->ure_flags = URE_FLAG_8152;
> + sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
>   sc->ure_chip |= URE_CHIP_VER_4C00;
>   printf("RTL8152 (0x4c00)");
>   break;
>   case 0x4c10:
> + sc->ure_flags = URE_FLAG_8152;
> + sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
>   sc->ure_chip |= URE_CHIP_VER_4C10;
>   printf("RTL8152 (0x4c10)");
>   break;
> @@ -1507,18 +1608,18 @@ ure_attach(struct device *parent, struct
>   break;
>   case 0x6000:
>   sc->ure_flags = URE_FLAG_8153B;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
>   printf("RTL8153B (0x6000)");
>   break;
>   case 0x6010:
>   sc->ure_flags = URE_FLAG_8153B;
> - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
>   printf("RTL8153B (0x6010)");
>   break;
>   case 0x7020:
> + sc->ure_flags = URE_FLAG_8156;
>   printf("RTL8156 (0x7020)");
>   break;
>   case 0x7030:
> + sc->ure_flags = URE_FLAG_8156;
>   printf("RTL8156 (0x7030)");
>   break;
>   default:
> @@ -1721,7 +1822,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
>   goto done;
>   }
>  
> - buf += roundup(pktlen, 8);
> + buf += roundup(pktlen, URE_RX_BUF_ALIGN);
>  
>   memcpy(&rxhdr, buf, sizeof(rxhdr));
>   total_len -= sizeof(rxhdr);
> @@ -1734,7 +1835,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
>   goto done;
>   }
>  
> - total_len -= roundup(pktlen, 8);
> + total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
>   buf += sizeof(rxhdr);
>  
>   m = m_devget(buf, pktlen, ETHER_ALIGN);
> @@ -1799,18 +1900,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
>   if (usbd_is_dying(sc->ure_udev))
>   return;
>  
> - DPRINTFN(2, ("tx completion\n"));
> + if (status != USBD_NORMAL_COMPLETION)
> + DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
> + __func__, c->uc_idx, usbd_errstr(status)));
>  
>   s = splnet();
> - sc->ure_cdata.tx_cnt--;
> +
> + ml_purge(&c->uc_mbuf_list);
> + c->uc_buflen = 0;
> +
> + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, uc_list);
> +
>   if (status != USBD_NORMAL_COMPLETION) {
>   if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
>   splx(s);
>   return;
>   }
> +
>   ifp->if_oerrors++;
>   printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
>      usbd_errstr(status));
> +
>   if (status == USBD_STALLED)
>   usbd_clear_endpoint_stall_async(
>      sc->ure_ep[URE_ENDPT_TX]);
> @@ -1819,83 +1929,19 @@ ure_txeof(struct usbd_xfer *xfer, void *
>   }
>  
>   ifp->if_timer = 0;
> - ifq_clr_oactive(&ifp->if_snd);
> -
> - m_freem(c->uc_mbuf);
> - c->uc_mbuf = NULL;
> -
> - if (!ifq_empty(&ifp->if_snd))
> - ure_start(ifp);
> -
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
>   splx(s);
>  }
>  
>  int
> -ure_tx_list_init(struct ure_softc *sc)
> -{
> - struct ure_cdata *cd;
> - struct ure_chain *c;
> - int i;
> -
> - cd = &sc->ure_cdata;
> - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> - c = &cd->tx_chain[i];
> - c->uc_sc = sc;
> - c->uc_idx = i;
> - c->uc_mbuf = NULL;
> - if (c->uc_xfer == NULL) {
> - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> - if (c->uc_xfer == NULL)
> - return ENOBUFS;
> - c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
> - if (c->uc_buf == NULL) {
> - usbd_free_xfer(c->uc_xfer);
> - return ENOBUFS;
> - }
> - }
> - }
> -
> - cd->tx_prod = cd->tx_cnt = 0;
> -
> - return (0);
> -}
> -
> -int
> -ure_rx_list_init(struct ure_softc *sc)
> +ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
>  {
> - struct ure_cdata *cd;
> - struct ure_chain *c;
> - int i;
> -
> - cd = &sc->ure_cdata;
> - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> - c = &cd->rx_chain[i];
> - c->uc_sc = sc;
> - c->uc_idx = i;
> - c->uc_mbuf = NULL;
> - if (c->uc_xfer == NULL) {
> - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> - if (c->uc_xfer == NULL)
> - return ENOBUFS;
> - c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
> -    sc->ure_rxbufsz);
> - if (c->uc_buf == NULL) {
> - usbd_free_xfer(c->uc_xfer);
> - return ENOBUFS;
> - }
> - }
> - }
> -
> - return (0);
> -}
> -
> -int
> -ure_encap(struct ure_softc *sc, struct mbuf *m)
> -{
> - struct ure_chain *c;
> - usbd_status err;
>   struct ure_txpkt txhdr;
> - uint32_t frm_len = 0, cflags = 0;
> + uint32_t len = sizeof(txhdr), cflags = 0;
> +
> + if (len + m->m_pkthdr.len > maxlen)
> + return (-1);
>  
>   if ((m->m_pkthdr.csum_flags &
>      (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
> @@ -1911,35 +1957,41 @@ ure_encap(struct ure_softc *sc, struct m
>   cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
>  #endif
>  
> - c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
> -
> - /* header */
>   txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
>      URE_TXPKT_TX_LS);
>   txhdr.ure_vlan = htole32(cflags);
> - memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
> - frm_len = sizeof(txhdr);
> + memcpy(buf, &txhdr, len);
>  
> - /* packet */
> - m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
> - frm_len += m->m_pkthdr.len;
> + m_copydata(m, 0, m->m_pkthdr.len, buf + len);
> + len += m->m_pkthdr.len;
>  
> - c->uc_mbuf = m;
> + return (len);
> +}
> +
> +int
> +ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
> +{
> + usbd_status err;
>  
> - DPRINTFN(2, ("tx %d bytes\n", frm_len));
>   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
> -    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
> +    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
> +    ure_txeof);
>  
>   err = usbd_transfer(c->uc_xfer);
> - if (err != 0 && err != USBD_IN_PROGRESS) {
> - c->uc_mbuf = NULL;
> + if (err != USBD_IN_PROGRESS) {
> + ml_purge(&c->uc_mbuf_list);
> + c->uc_buflen = 0;
>   ure_stop(sc);
>   return (EIO);
>   }
>  
> - sc->ure_cdata.tx_cnt++;
> - sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
> -    sc->ure_tx_list_cnt;
> +#if NBPFILTER > 0
> + struct mbuf *m;
> +
> + if (ifp->if_bpf)
> + MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
> + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> +#endif
>  
>   return (0);
>  }

--
Regards,
 Mikolaj

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Jonathon Fletcher-3
On Thu, Jul 30, 2020 at 06:56:48PM +0000, Mikolaj Kucharski wrote:

> On Thu, Jul 30, 2020 at 07:58:23AM -0700, Jonathon Fletcher wrote:
> >
> > Mikolaj,
> >
> > I hope the patch has been stable for you. I do have an update - it appears
> > that a splx(s) got lost along the way (from the end of ure_txeof). This
> > patch adds that back in and has some minor cleanup (variable name, cleanup
> > defines, adjust the setting of flags and buffer sizes based on device type).
> > I have been running this for a couple of days now.
>
> Yes, no issues so far. I managed to have on Google Meet call, audio only
> and one Zoom call (was surprised it actually worked on OpenBSD, but
> needed to fake user agent to Linux). Network card didn't fail like
> before your diff during VoIP calls. That's really great. I just compiled
> new version of the kernel with your below patch, will reboot my laptop
> tonight to switch to that new kernel. Thank you!

Assuming no issues with this, I would like to get it into the tree. This
patch predates Marcus' usbd_abort_pipe patch by a few weeks. Let me know
if you want an updated patch against HEAD after his patch.

After reports from multiple people, this patch improves tx performance on
ure from about 90-110MBps to between about 250MBps (Mikolaj), 290MBps (Joshua),
550MBps (me), 1900MBps (Kevin), depending on ure device, network setup, and
underlying hardware.

Thanks,
Jonathon

> >
> > Index: sys/dev/usb/if_urereg.h
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/usb/if_urereg.h,v
> > retrieving revision 1.8
> > diff -u -p -u -r1.8 if_urereg.h
> > --- sys/dev/usb/if_urereg.h 7 Dec 2019 08:45:28 -0000 1.8
> > +++ sys/dev/usb/if_urereg.h 28 Jul 2020 15:30:41 -0000
> > @@ -494,28 +494,30 @@ struct ure_txpkt {
> >  #define URE_ENDPT_TX 1
> >  #define URE_ENDPT_MAX 2
> >  
> > -#define URE_TX_LIST_CNT 8
> > +#define URE_TX_LIST_CNT 1
> >  #define URE_RX_LIST_CNT 1
> > -#define URE_RX_BUF_ALIGN sizeof(uint64_t)
> > +#define URE_TX_BUF_ALIGN 4
> > +#define URE_RX_BUF_ALIGN 8
> >  
> > -#define URE_TXBUFSZ 16384
> > -#define URE_8152_RXBUFSZ 16384
> > -#define URE_8153_RXBUFSZ 32768
> > +#define URE_TX_BUFSZ 16384
> > +#define URE_8152_RX_BUFSZ 16384
> > +#define URE_8153_RX_BUFSZ 32768
> >  
> >  struct ure_chain {
> >   struct ure_softc *uc_sc;
> >   struct usbd_xfer *uc_xfer;
> >   char *uc_buf;
> > - struct mbuf *uc_mbuf;
> > - int uc_accum;
> > - int uc_idx;
> > + uint32_t uc_buflen;
> > + uint32_t uc_bufmax;
> > + struct mbuf_list uc_mbuf_list;
> > + SLIST_ENTRY(ure_chain)  uc_list;
> > + uint8_t uc_idx;
> >  };
> >  
> >  struct ure_cdata {
> > - struct ure_chain tx_chain[URE_TX_LIST_CNT];
> > - struct ure_chain rx_chain[URE_RX_LIST_CNT];
> > - int tx_prod;
> > - int tx_cnt;
> > + struct ure_chain ure_rx_chain[URE_RX_LIST_CNT];
> > + struct ure_chain ure_tx_chain[URE_TX_LIST_CNT];
> > + SLIST_HEAD(ure_list_head, ure_chain)    ure_tx_free;
> >  };
> >  
> >  struct ure_softc {
> > @@ -541,7 +543,7 @@ struct ure_softc {
> >  
> >   struct timeval ure_rx_notice;
> >   int ure_rxbufsz;
> > - int ure_tx_list_cnt;
> > + int ure_txbufsz;
> >  
> >   int ure_phyno;
> >  
> > Index: sys/dev/usb/if_ure.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/usb/if_ure.c,v
> > retrieving revision 1.16
> > diff -u -p -u -r1.16 if_ure.c
> > --- sys/dev/usb/if_ure.c 10 Jul 2020 13:26:41 -0000 1.16
> > +++ sys/dev/usb/if_ure.c 28 Jul 2020 22:56:29 -0000
> > @@ -117,11 +117,13 @@ void ure_miibus_writereg(struct device
> >  void ure_lock_mii(struct ure_softc *);
> >  void ure_unlock_mii(struct ure_softc *);
> >  
> > -int ure_encap(struct ure_softc *, struct mbuf *);
> > +int ure_encap_txpkt(struct mbuf *, char *, uint32_t);
> > +int ure_encap_xfer(struct ifnet *, struct ure_softc *, struct ure_chain *);
> >  void ure_rxeof(struct usbd_xfer *, void *, usbd_status);
> >  void ure_txeof(struct usbd_xfer *, void *, usbd_status);
> > -int ure_rx_list_init(struct ure_softc *);
> > -int ure_tx_list_init(struct ure_softc *);
> > +int ure_xfer_list_init(struct ure_softc *, struct ure_chain *,
> > +    uint32_t, int);
> > +void ure_xfer_list_free(struct ure_softc *, struct ure_chain *, int);
> >  
> >  void ure_tick_task(void *);
> >  void ure_tick(void *);
> > @@ -625,12 +627,36 @@ void
> >  ure_watchdog(struct ifnet *ifp)
> >  {
> >   struct ure_softc *sc = ifp->if_softc;
> > + struct ure_chain *c;
> > + usbd_status err;
> > + int i, s;
> > +
> > + ifp->if_timer = 0;
> >  
> >   if (usbd_is_dying(sc->ure_udev))
> >   return;
> >  
> > + if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
> > + return;
> > +
> > + sc = ifp->if_softc;
> > + s = splnet();
> > +
> >   ifp->if_oerrors++;
> > - printf("%s: watchdog timeout\n", sc->ure_dev.dv_xname);
> > + DPRINTF(("%s: watchdog timeout\n", sc->ure_dev.dv_xname));
> > +
> > + for (i = 0; i < URE_TX_LIST_CNT; i++) {
> > + c = &sc->ure_cdata.ure_tx_chain[i];
> > + if (ml_len(&c->uc_mbuf_list) > 0) {
> > + usbd_get_xfer_status(c->uc_xfer, NULL, NULL, NULL,
> > +    &err);
> > + ure_txeof(c->uc_xfer, c, err);
> > + }
> > + }
> > +
> > + if (ifq_is_oactive(&ifp->if_snd))
> > + ifq_restart(&ifp->if_snd);
> > + splx(s);
> >  }
> >  
> >  void
> > @@ -653,18 +679,26 @@ ure_init(void *xsc)
> >   else
> >   ure_rtl8153_nic_reset(sc);
> >  
> > - if (ure_rx_list_init(sc) == ENOBUFS) {
> > + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_rx_chain,
> > + sc->ure_rxbufsz, URE_RX_LIST_CNT) == ENOBUFS) {
> >   printf("%s: rx list init failed\n", sc->ure_dev.dv_xname);
> >   splx(s);
> >   return;
> >   }
> >  
> > - if (ure_tx_list_init(sc) == ENOBUFS) {
> > + if (ure_xfer_list_init(sc, sc->ure_cdata.ure_tx_chain,
> > + sc->ure_txbufsz, URE_TX_LIST_CNT) == ENOBUFS) {
> >   printf("%s: tx list init failed\n", sc->ure_dev.dv_xname);
> >   splx(s);
> >   return;
> >   }
> >  
> > + /* Initialize the SLIST we are using for the multiple tx buffers */
> > + SLIST_INIT(&sc->ure_cdata.ure_tx_free);
> > + for (i = 0; i < URE_TX_LIST_CNT; i++)
> > + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free,
> > + &sc->ure_cdata.ure_tx_chain[i], uc_list);
> > +
> >   /* Set MAC address. */
> >   ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
> >   ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
> > @@ -739,9 +773,9 @@ ure_init(void *xsc)
> >  
> >   /* Start up the receive pipe. */
> >   for (i = 0; i < URE_RX_LIST_CNT; i++) {
> > - c = &sc->ure_cdata.rx_chain[i];
> > + c = &sc->ure_cdata.ure_rx_chain[i];
> >   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_RX],
> > -    c, c->uc_buf, sc->ure_rxbufsz,
> > +    c, c->uc_buf, c->uc_bufmax,
> >      USBD_SHORT_XFER_OK | USBD_NO_COPY,
> >      USBD_NO_TIMEOUT, ure_rxeof);
> >   usbd_transfer(c->uc_xfer);
> > @@ -763,34 +797,81 @@ void
> >  ure_start(struct ifnet *ifp)
> >  {
> >   struct ure_softc *sc = ifp->if_softc;
> > - struct mbuf *m_head = NULL;
> > -
> > - if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd) ||
> > -    !(sc->ure_flags & URE_FLAG_LINK))
> > + struct ure_cdata *cd = &sc->ure_cdata;
> > + struct ure_chain *c;
> > + struct mbuf *m = NULL;
> > + uint32_t new_buflen;
> > + int s, mlen;
> > +
> > + if (!(sc->ure_flags & URE_FLAG_LINK) ||
> > + (ifp->if_flags & (IFF_RUNNING|IFF_UP)) !=
> > +    (IFF_RUNNING|IFF_UP)) {
> >   return;
> > + }
> > +
> > + s = splnet();
> >  
> > - for (;;) {
> > - if (sc->ure_cdata.tx_cnt == sc->ure_tx_list_cnt) {
> > - ifq_set_oactive(&ifp->if_snd);
> > + c = SLIST_FIRST(&cd->ure_tx_free);
> > + while (c != NULL) {
> > + m = NULL;
> > + mlen = ifq_hdatalen(&ifp->if_snd);
> > + if (mlen <= 0)
> >   break;
> > +
> > + /*
> > +                 * If packet larger than remaining space, send buffer and
> > + * continue.
> > + */
> > + new_buflen = roundup(c->uc_buflen, URE_TX_BUF_ALIGN);
> > + if (new_buflen + sizeof(struct ure_txpkt) + mlen >=
> > +    c->uc_bufmax) {
> > + SLIST_REMOVE_HEAD(&cd->ure_tx_free, uc_list);
> > + if (ure_encap_xfer(ifp, sc, c)) {
> > + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> > +    uc_list);
> > + break;
> > + }
> > + c = SLIST_FIRST(&cd->ure_tx_free);
> > + continue;
> >   }
> >  
> > - m_head = ifq_dequeue(&ifp->if_snd);
> > - if (m_head == NULL)
> > + m = ifq_dequeue(&ifp->if_snd);
> > + if (m == NULL)
> >   break;
> >  
> > - if (ure_encap(sc, m_head)) {
> > - m_freem(m_head);
> > - ifq_set_oactive(&ifp->if_snd);
> > + /* Discard packet larger than buffer. */
> > + if (mlen + sizeof(struct ure_txpkt) >= c->uc_bufmax) {
> > + m_freem(m);
> > + continue;
> > + }
> > +
> > + /* Append packet to current buffer. */
> > + mlen = ure_encap_txpkt(m, c->uc_buf + new_buflen,
> > +    c->uc_bufmax - new_buflen);
> > + if (mlen <= 0) {
> > + m_freem(m);
> >   break;
> >   }
> > + ml_enqueue(&c->uc_mbuf_list, m);
> > + c->uc_buflen = new_buflen + mlen;
> > + }
> >  
> > -#if NBPFILTER > 0
> > - if (ifp->if_bpf)
> > - bpf_mtap(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
> > -#endif
> > - ifp->if_timer = 5;
> > + if (c != NULL) {
> > + /* Send current buffer unless empty */
> > + if (c->uc_buflen > 0 && ml_len(&c->uc_mbuf_list) > 0) {
> > + SLIST_REMOVE_HEAD(&cd->ure_tx_free, uc_list);
> > + if (ure_encap_xfer(ifp, sc, c)) {
> > + SLIST_INSERT_HEAD(&cd->ure_tx_free, c,
> > +    uc_list);
> > + }
> > + c = SLIST_FIRST(&cd->ure_tx_free);
> > + }
> >   }
> > +
> > + ifp->if_timer = 5;
> > + if (c == NULL)
> > + ifq_set_oactive(&ifp->if_snd);
> > + splx(s);
> >  }
> >  
> >  void
> > @@ -810,9 +891,9 @@ ure_tick(void *xsc)
> >  void
> >  ure_stop(struct ure_softc *sc)
> >  {
> > - usbd_status err;
> > + struct ure_cdata *cd;
> >   struct ifnet *ifp;
> > - int i;
> > + usbd_status err;
> >  
> >   ure_reset(sc);
> >  
> > @@ -844,25 +925,54 @@ ure_stop(struct ure_softc *sc)
> >   sc->ure_ep[URE_ENDPT_TX] = NULL;
> >   }
> >  
> > - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> > - if (sc->ure_cdata.rx_chain[i].uc_mbuf != NULL) {
> > - m_freem(sc->ure_cdata.rx_chain[i].uc_mbuf);
> > - sc->ure_cdata.rx_chain[i].uc_mbuf = NULL;
> > - }
> > - if (sc->ure_cdata.rx_chain[i].uc_xfer != NULL) {
> > - usbd_free_xfer(sc->ure_cdata.rx_chain[i].uc_xfer);
> > - sc->ure_cdata.rx_chain[i].uc_xfer = NULL;
> > + cd = &sc->ure_cdata;
> > + ure_xfer_list_free(sc, cd->ure_rx_chain, URE_RX_LIST_CNT);
> > + ure_xfer_list_free(sc, cd->ure_tx_chain, URE_TX_LIST_CNT);
> > +}
> > +
> > +int
> > +ure_xfer_list_init(struct ure_softc *sc, struct ure_chain *ch,
> > +    uint32_t bufsize, int listlen)
> > +{
> > + struct ure_chain *c;
> > + int i;
> > +
> > + for (i = 0; i < listlen; i++) {
> > + c = &ch[i];
> > + c->uc_sc = sc;
> > + c->uc_idx = i;
> > + c->uc_buflen = 0;
> > + c->uc_bufmax = bufsize;
> > + ml_init(&c->uc_mbuf_list);
> > + if (c->uc_xfer == NULL) {
> > + c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> > + if (c->uc_xfer == NULL)
> > + return (ENOBUFS);
> > + c->uc_buf = usbd_alloc_buffer(c->uc_xfer, c->uc_bufmax);
> > + if (c->uc_buf == NULL) {
> > + usbd_free_xfer(c->uc_xfer);
> > + c->uc_xfer = NULL;
> > + return (ENOBUFS);
> > + }
> >   }
> >   }
> >  
> > - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> > - if (sc->ure_cdata.tx_chain[i].uc_mbuf != NULL) {
> > - m_freem(sc->ure_cdata.tx_chain[i].uc_mbuf);
> > - sc->ure_cdata.tx_chain[i].uc_mbuf = NULL;
> > - }
> > - if (sc->ure_cdata.tx_chain[i].uc_xfer != NULL) {
> > - usbd_free_xfer(sc->ure_cdata.tx_chain[i].uc_xfer);
> > - sc->ure_cdata.tx_chain[i].uc_xfer = NULL;
> > + return (0);
> > +}
> > +
> > +void
> > +ure_xfer_list_free(struct ure_softc *sc, struct ure_chain *ch, int listlen)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < listlen; i++) {
> > + if (ch[i].uc_buf != NULL) {
> > + ch[i].uc_buf = NULL;
> > + }
> > + ml_purge(&ch[i].uc_mbuf_list);
> > + if (ch[i].uc_xfer != NULL) {
> > + usbd_free_xfer(ch[i].uc_xfer);
> > + ch[i].uc_xfer = NULL;
> >   }
> >   }
> >  }
> > @@ -1458,21 +1568,8 @@ ure_attach(struct device *parent, struct
> >   }
> >   }
> >  
> > - switch (uaa->product) {
> > - case USB_PRODUCT_REALTEK_RTL8152:
> > - sc->ure_flags |= URE_FLAG_8152;
> > - sc->ure_rxbufsz = URE_8152_RXBUFSZ;
> > - sc->ure_tx_list_cnt = 1;
> > - break;
> > - case USB_PRODUCT_REALTEK_RTL8156:
> > - sc->ure_flags |= URE_FLAG_8156;
> > - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> > - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
> > - break;
> > - default:
> > - sc->ure_rxbufsz = URE_8153_RXBUFSZ;
> > - sc->ure_tx_list_cnt = 1;
> > - }
> > + sc->ure_txbufsz = URE_TX_BUFSZ;
> > + sc->ure_rxbufsz = URE_8153_RX_BUFSZ;
> >  
> >   s = splnet();
> >  
> > @@ -1482,10 +1579,14 @@ ure_attach(struct device *parent, struct
> >   ver = ure_read_2(sc, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
> >   switch (ver) {
> >   case 0x4c00:
> > + sc->ure_flags = URE_FLAG_8152;
> > + sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
> >   sc->ure_chip |= URE_CHIP_VER_4C00;
> >   printf("RTL8152 (0x4c00)");
> >   break;
> >   case 0x4c10:
> > + sc->ure_flags = URE_FLAG_8152;
> > + sc->ure_rxbufsz = URE_8152_RX_BUFSZ;
> >   sc->ure_chip |= URE_CHIP_VER_4C10;
> >   printf("RTL8152 (0x4c10)");
> >   break;
> > @@ -1507,18 +1608,18 @@ ure_attach(struct device *parent, struct
> >   break;
> >   case 0x6000:
> >   sc->ure_flags = URE_FLAG_8153B;
> > - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
> >   printf("RTL8153B (0x6000)");
> >   break;
> >   case 0x6010:
> >   sc->ure_flags = URE_FLAG_8153B;
> > - sc->ure_tx_list_cnt = URE_TX_LIST_CNT;
> >   printf("RTL8153B (0x6010)");
> >   break;
> >   case 0x7020:
> > + sc->ure_flags = URE_FLAG_8156;
> >   printf("RTL8156 (0x7020)");
> >   break;
> >   case 0x7030:
> > + sc->ure_flags = URE_FLAG_8156;
> >   printf("RTL8156 (0x7030)");
> >   break;
> >   default:
> > @@ -1721,7 +1822,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
> >   goto done;
> >   }
> >  
> > - buf += roundup(pktlen, 8);
> > + buf += roundup(pktlen, URE_RX_BUF_ALIGN);
> >  
> >   memcpy(&rxhdr, buf, sizeof(rxhdr));
> >   total_len -= sizeof(rxhdr);
> > @@ -1734,7 +1835,7 @@ ure_rxeof(struct usbd_xfer *xfer, void *
> >   goto done;
> >   }
> >  
> > - total_len -= roundup(pktlen, 8);
> > + total_len -= roundup(pktlen, URE_RX_BUF_ALIGN);
> >   buf += sizeof(rxhdr);
> >  
> >   m = m_devget(buf, pktlen, ETHER_ALIGN);
> > @@ -1799,18 +1900,27 @@ ure_txeof(struct usbd_xfer *xfer, void *
> >   if (usbd_is_dying(sc->ure_udev))
> >   return;
> >  
> > - DPRINTFN(2, ("tx completion\n"));
> > + if (status != USBD_NORMAL_COMPLETION)
> > + DPRINTF(("%s: %s uc_idx=%u : %s\n", sc->ure_dev.dv_xname,
> > + __func__, c->uc_idx, usbd_errstr(status)));
> >  
> >   s = splnet();
> > - sc->ure_cdata.tx_cnt--;
> > +
> > + ml_purge(&c->uc_mbuf_list);
> > + c->uc_buflen = 0;
> > +
> > + SLIST_INSERT_HEAD(&sc->ure_cdata.ure_tx_free, c, uc_list);
> > +
> >   if (status != USBD_NORMAL_COMPLETION) {
> >   if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
> >   splx(s);
> >   return;
> >   }
> > +
> >   ifp->if_oerrors++;
> >   printf("%s: usb error on tx: %s\n", sc->ure_dev.dv_xname,
> >      usbd_errstr(status));
> > +
> >   if (status == USBD_STALLED)
> >   usbd_clear_endpoint_stall_async(
> >      sc->ure_ep[URE_ENDPT_TX]);
> > @@ -1819,83 +1929,19 @@ ure_txeof(struct usbd_xfer *xfer, void *
> >   }
> >  
> >   ifp->if_timer = 0;
> > - ifq_clr_oactive(&ifp->if_snd);
> > -
> > - m_freem(c->uc_mbuf);
> > - c->uc_mbuf = NULL;
> > -
> > - if (!ifq_empty(&ifp->if_snd))
> > - ure_start(ifp);
> > -
> > + if (ifq_is_oactive(&ifp->if_snd))
> > + ifq_restart(&ifp->if_snd);
> >   splx(s);
> >  }
> >  
> >  int
> > -ure_tx_list_init(struct ure_softc *sc)
> > -{
> > - struct ure_cdata *cd;
> > - struct ure_chain *c;
> > - int i;
> > -
> > - cd = &sc->ure_cdata;
> > - for (i = 0; i < sc->ure_tx_list_cnt; i++) {
> > - c = &cd->tx_chain[i];
> > - c->uc_sc = sc;
> > - c->uc_idx = i;
> > - c->uc_mbuf = NULL;
> > - if (c->uc_xfer == NULL) {
> > - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> > - if (c->uc_xfer == NULL)
> > - return ENOBUFS;
> > - c->uc_buf = usbd_alloc_buffer(c->uc_xfer, URE_TXBUFSZ);
> > - if (c->uc_buf == NULL) {
> > - usbd_free_xfer(c->uc_xfer);
> > - return ENOBUFS;
> > - }
> > - }
> > - }
> > -
> > - cd->tx_prod = cd->tx_cnt = 0;
> > -
> > - return (0);
> > -}
> > -
> > -int
> > -ure_rx_list_init(struct ure_softc *sc)
> > +ure_encap_txpkt(struct mbuf *m, char *buf, uint32_t maxlen)
> >  {
> > - struct ure_cdata *cd;
> > - struct ure_chain *c;
> > - int i;
> > -
> > - cd = &sc->ure_cdata;
> > - for (i = 0; i < URE_RX_LIST_CNT; i++) {
> > - c = &cd->rx_chain[i];
> > - c->uc_sc = sc;
> > - c->uc_idx = i;
> > - c->uc_mbuf = NULL;
> > - if (c->uc_xfer == NULL) {
> > - c->uc_xfer = usbd_alloc_xfer(sc->ure_udev);
> > - if (c->uc_xfer == NULL)
> > - return ENOBUFS;
> > - c->uc_buf = usbd_alloc_buffer(c->uc_xfer,
> > -    sc->ure_rxbufsz);
> > - if (c->uc_buf == NULL) {
> > - usbd_free_xfer(c->uc_xfer);
> > - return ENOBUFS;
> > - }
> > - }
> > - }
> > -
> > - return (0);
> > -}
> > -
> > -int
> > -ure_encap(struct ure_softc *sc, struct mbuf *m)
> > -{
> > - struct ure_chain *c;
> > - usbd_status err;
> >   struct ure_txpkt txhdr;
> > - uint32_t frm_len = 0, cflags = 0;
> > + uint32_t len = sizeof(txhdr), cflags = 0;
> > +
> > + if (len + m->m_pkthdr.len > maxlen)
> > + return (-1);
> >  
> >   if ((m->m_pkthdr.csum_flags &
> >      (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT)) != 0) {
> > @@ -1911,35 +1957,41 @@ ure_encap(struct ure_softc *sc, struct m
> >   cflags |= swap16(m->m_pkthdr.ether_vtag | URE_TXPKT_VLAN_TAG);
> >  #endif
> >  
> > - c = &sc->ure_cdata.tx_chain[sc->ure_cdata.tx_prod];
> > -
> > - /* header */
> >   txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
> >      URE_TXPKT_TX_LS);
> >   txhdr.ure_vlan = htole32(cflags);
> > - memcpy(c->uc_buf, &txhdr, sizeof(txhdr));
> > - frm_len = sizeof(txhdr);
> > + memcpy(buf, &txhdr, len);
> >  
> > - /* packet */
> > - m_copydata(m, 0, m->m_pkthdr.len, c->uc_buf + frm_len);
> > - frm_len += m->m_pkthdr.len;
> > + m_copydata(m, 0, m->m_pkthdr.len, buf + len);
> > + len += m->m_pkthdr.len;
> >  
> > - c->uc_mbuf = m;
> > + return (len);
> > +}
> > +
> > +int
> > +ure_encap_xfer(struct ifnet *ifp, struct ure_softc *sc, struct ure_chain *c)
> > +{
> > + usbd_status err;
> >  
> > - DPRINTFN(2, ("tx %d bytes\n", frm_len));
> >   usbd_setup_xfer(c->uc_xfer, sc->ure_ep[URE_ENDPT_TX], c, c->uc_buf,
> > -    frm_len, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000, ure_txeof);
> > +    c->uc_buflen, USBD_FORCE_SHORT_XFER | USBD_NO_COPY, 10000,
> > +    ure_txeof);
> >  
> >   err = usbd_transfer(c->uc_xfer);
> > - if (err != 0 && err != USBD_IN_PROGRESS) {
> > - c->uc_mbuf = NULL;
> > + if (err != USBD_IN_PROGRESS) {
> > + ml_purge(&c->uc_mbuf_list);
> > + c->uc_buflen = 0;
> >   ure_stop(sc);
> >   return (EIO);
> >   }
> >  
> > - sc->ure_cdata.tx_cnt++;
> > - sc->ure_cdata.tx_prod = (sc->ure_cdata.tx_prod + 1) %
> > -    sc->ure_tx_list_cnt;
> > +#if NBPFILTER > 0
> > + struct mbuf *m;
> > +
> > + if (ifp->if_bpf)
> > + MBUF_LIST_FOREACH(&c->uc_mbuf_list, m)
> > + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> > +#endif
> >  
> >   return (0);
> >  }
>
> --
> Regards,
>  Mikolaj

Reply | Threaded
Open this post in threaded view
|

Re: Improve ure(4) performance

Mikolaj Kucharski-3
On Mon, Aug 03, 2020 at 09:07:39AM -0700, Jonathon Fletcher wrote:

>
> Assuming no issues with this, I would like to get it into the tree. This
> patch predates Marcus' usbd_abort_pipe patch by a few weeks. Let me know
> if you want an updated patch against HEAD after his patch.
>
> After reports from multiple people, this patch improves tx performance on
> ure from about 90-110MBps to between about 250MBps (Mikolaj), 290MBps (Joshua),
> 550MBps (me), 1900MBps (Kevin), depending on ure device, network setup, and
> underlying hardware.
>

No issues on my end. All works really nice. I managed to have couple of
Zoom calls on ure(4) and card didn't crash like before Jonathon's diff.
I've also updated the tree with the diff after usbd_abort_pipe commit
from mglocker@ and so far I don't see any issues. Network card seems
stable. At the time of the writing machine has 21 hours of uptime.

--
Regards,
 Mikolaj