Xen virtual network (Netfront) driver

classic Classic list List threaded Threaded
22 messages Options
12
Reply | Threaded
Open this post in threaded view
|

Xen virtual network (Netfront) driver

Mike Belopuhov-5
There's still stuff to do, but it receives and transmits reliably
(at least on modern Xen) so I'd like to get it in.  Man page will
follow.

OK?

diff --git sys/arch/amd64/conf/GENERIC sys/arch/amd64/conf/GENERIC
index fca4459..77e07cc 100644
--- sys/arch/amd64/conf/GENERIC
+++ sys/arch/amd64/conf/GENERIC
@@ -67,10 +67,11 @@ mpbios0 at bios0
 ipmi0 at mainbus? disable # IPMI
 
 vmt0 at pvbus? # VMware Tools
 
 #xen0 at pvbus? # Xen HVM domU
+#xnf* at xen? # Xen Netfront
 
 option PCIVERBOSE
 option USBVERBOSE
 
 pchb* at pci? # PCI-Host bridges
diff --git sys/dev/pv/files.pv sys/dev/pv/files.pv
index d0e3b8c..e1272b2 100644
--- sys/dev/pv/files.pv
+++ sys/dev/pv/files.pv
@@ -16,5 +16,9 @@ file dev/pv/vmt.c vmt needs-flag
 # Xen
 device xen {}
 attach xen at pvbus
 file dev/pv/xen.c xen needs-flag
 file dev/pv/xenstore.c xen
+
+device xnf: ether, ifnet, ifmedia
+attach xnf at xen
+file dev/pv/if_xnf.c xnf
diff --git sys/dev/pv/if_xnf.c sys/dev/pv/if_xnf.c
new file mode 100644
index 0000000..7f8b08e
--- /dev/null
+++ sys/dev/pv/if_xnf.c
@@ -0,0 +1,1022 @@
+/*
+ * Copyright (c) 2015 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bpfilter.h"
+#include "vlan.h"
+#include "xen.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/atomic.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/queue.h>
+#include <sys/timeout.h>
+#include <sys/pool.h>
+
+#include <machine/bus.h>
+
+#include <dev/pv/xenreg.h>
+#include <dev/pv/xenvar.h>
+
+#include <net/if.h>
+#include <net/if_media.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+
+/*
+ * Rx ring
+ */
+
+struct xnf_rx_req {
+ uint16_t rxq_id;
+ uint16_t rxq_pad;
+ uint32_t rxq_ref;
+} __packed;
+
+struct xnf_rx_rsp {
+ uint16_t rxp_id;
+ uint16_t rxp_offset;
+ uint16_t rxp_flags;
+#define  XNF_RXF_CSUM  0x0001
+#define  XNF_RXF_BLANK  0x0002
+#define  XNF_RXF_CHUNK  0x0004
+#define  XNF_RXF_EXTRA  0x0008
+ int16_t rxp_status;
+} __packed;
+
+union xnf_rx_desc {
+ struct xnf_rx_req rxd_req;
+ struct xnf_rx_rsp rxd_rsp;
+} __packed;
+
+#define XNF_RX_DESC 256
+#define XNF_MCLEN PAGE_SIZE
+#define XNF_RX_MIN 32
+
+struct xnf_rx_ring {
+ uint32_t rxr_prod;
+ uint32_t rxr_req_evt;
+ uint32_t rxr_cons;
+ uint32_t rxr_rsp_evt;
+ uint32_t rxr_reserved[12];
+ union xnf_rx_desc rxr_desc[XNF_RX_DESC];
+} __packed;
+
+
+/*
+ * Tx ring
+ */
+
+struct xnf_tx_req {
+ uint32_t txq_ref;
+ uint16_t txq_offset;
+ uint16_t txq_flags;
+#define  XNF_TXF_CSUM  0x0001
+#define  XNF_TXF_VALID  0x0002
+#define  XNF_TXF_CHUNK  0x0004
+#define  XNF_TXF_ETXRA  0x0008
+ uint16_t txq_id;
+ uint16_t txq_size;
+} __packed;
+
+struct xnf_tx_rsp {
+ uint16_t txp_id;
+ int16_t txp_status;
+} __packed;
+
+union xnf_tx_desc {
+ struct xnf_tx_req txd_req;
+ struct xnf_tx_rsp txd_rsp;
+} __packed;
+
+#define XNF_TX_DESC 256
+#define XNF_TX_FRAG 8 /* down from 18 */
+
+struct xnf_tx_ring {
+ uint32_t txr_prod;
+ uint32_t txr_req_evt;
+ uint32_t txr_cons;
+ uint32_t txr_rsp_evt;
+ uint32_t txr_reserved[12];
+ union xnf_tx_desc txr_desc[XNF_TX_DESC];
+} __packed;
+
+
+/* Management frame, "extra info" in Xen parlance */
+struct xnf_mgmt {
+ uint8_t mg_type;
+#define  XNF_MGMT_MCAST_ADD 2
+#define  XNF_MGMT_MCAST_DEL 3
+ uint8_t mg_flags;
+ union {
+ uint8_t mgu_mcaddr[ETHER_ADDR_LEN];
+ uint16_t mgu_pad[3];
+ } u;
+#define mg_mcaddr u.mgu_mcaddr
+} __packed;
+
+
+struct xnf_softc {
+ struct device sc_dev;
+ struct xen_attach_args sc_xa;
+ struct xen_softc *sc_xen;
+ bus_dma_tag_t sc_dmat;
+
+ struct arpcom sc_ac;
+ struct ifmedia sc_media;
+
+ xen_intr_handle_t sc_xih;
+
+ /* Rx ring */
+ struct xnf_rx_ring *sc_rx_ring;
+ int sc_rx_cons;
+ bus_dmamap_t sc_rx_rmap;  /* map for the ring */
+ bus_dma_segment_t sc_rx_seg;
+ uint32_t sc_rx_ref;  /* grant table ref */
+ struct mbuf *sc_rx_buf[XNF_RX_DESC];
+ bus_dmamap_t sc_rx_dmap[XNF_RX_DESC]; /* maps for packets */
+ struct mbuf *sc_rx_cbuf[2];    /* chain handling */
+ struct if_rxring sc_rx_slots;
+ struct timeout sc_rx_fill;
+
+ /* Tx ring */
+ struct xnf_tx_ring *sc_tx_ring;
+ int sc_tx_cons;
+ bus_dmamap_t sc_tx_rmap;  /* map for the ring */
+ bus_dma_segment_t sc_tx_seg;
+ uint32_t sc_tx_ref;  /* grant table ref */
+ struct mbuf *sc_tx_buf[XNF_TX_DESC];
+ bus_dmamap_t sc_tx_dmap[XNF_TX_DESC]; /* maps for packets */
+};
+
+int xnf_match(struct device *, void *, void *);
+void xnf_attach(struct device *, struct device *, void *);
+int xnf_lladdr(struct xnf_softc *);
+int xnf_ioctl(struct ifnet *, u_long, caddr_t);
+int xnf_media_change(struct ifnet *);
+void xnf_media_status(struct ifnet *, struct ifmediareq *);
+int xnf_iff(struct xnf_softc *);
+void xnf_init(struct xnf_softc *);
+void xnf_stop(struct xnf_softc *);
+void xnf_start(struct ifnet *);
+int xnf_encap(struct xnf_softc *, struct mbuf *, uint32_t *);
+void xnf_intr(void *);
+int xnf_txeof(struct xnf_softc *);
+int xnf_rxeof(struct xnf_softc *);
+void xnf_rx_ring_fill(void *);
+int xnf_rx_ring_create(struct xnf_softc *);
+void xnf_rx_ring_drain(struct xnf_softc *);
+void xnf_rx_ring_destroy(struct xnf_softc *);
+int xnf_tx_ring_create(struct xnf_softc *);
+void xnf_tx_ring_drain(struct xnf_softc *);
+void xnf_tx_ring_destroy(struct xnf_softc *);
+int xnf_init_backend(struct xnf_softc *);
+int xnf_stop_backend(struct xnf_softc *);
+
+struct cfdriver xnf_cd = {
+ NULL, "xnf", DV_IFNET
+};
+
+const struct cfattach xnf_ca = {
+ sizeof(struct xnf_softc), xnf_match, xnf_attach
+};
+
+int
+xnf_match(struct device *parent, void *match, void *aux)
+{
+ struct xen_attach_args *xa = aux;
+ char type[64];
+
+ if (strcmp("vif", xa->xa_name))
+ return (0);
+
+ if (xs_getprop(xa, "type", type, sizeof(type)) == 0 &&
+    ((strcmp("vif", type) == 0) || (strcmp("front", type) == 0)))
+ return (1);
+
+ return (0);
+}
+
+void
+xnf_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct xen_attach_args *xa = aux;
+ struct xnf_softc *sc = (struct xnf_softc *)self;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+ sc->sc_xa = *xa;
+ sc->sc_xen = xa->xa_parent;
+ sc->sc_dmat = xa->xa_dmat;
+
+ strlcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ);
+
+ if (xnf_lladdr(sc)) {
+ printf(": failed to obtain MAC address\n");
+ return;
+ }
+
+ if (xen_intr_establish(0, &sc->sc_xih, xnf_intr, sc, ifp->if_xname)) {
+ printf("%s: failed to establish an interrupt\n", ifp->if_xname);
+ return;
+ }
+
+ printf(": event channel %u, address %s\n", sc->sc_xih,
+    ether_sprintf(sc->sc_ac.ac_enaddr));
+
+ if (xnf_rx_ring_create(sc)) {
+ xen_intr_disestablish(sc->sc_xih);
+ return;
+ }
+ if (xnf_tx_ring_create(sc)) {
+ xen_intr_disestablish(sc->sc_xih);
+ xnf_rx_ring_destroy(sc);
+ return;
+ }
+ if (xnf_init_backend(sc)) {
+ xen_intr_disestablish(sc->sc_xih);
+ xnf_rx_ring_destroy(sc);
+ xnf_tx_ring_destroy(sc);
+ return;
+ }
+
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_xflags = IFXF_MPSAFE;
+ ifp->if_ioctl = xnf_ioctl;
+ ifp->if_start = xnf_start;
+ ifp->if_softc = sc;
+
+ ifp->if_capabilities = IFCAP_VLAN_MTU;
+
+ IFQ_SET_MAXLEN(&ifp->if_snd, XNF_TX_DESC - 1);
+ IFQ_SET_READY(&ifp->if_snd);
+
+ ifmedia_init(&sc->sc_media, IFM_IMASK, xnf_media_change,
+    xnf_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_MANUAL, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_MANUAL);
+
+ if_attach(ifp);
+ ether_ifattach(ifp);
+
+ timeout_set(&sc->sc_rx_fill, xnf_rx_ring_fill, sc);
+}
+
+static int
+nibble(int ch)
+{
+ if (ch >= '0' && ch <= '9')
+ return (ch - '0');
+ if (ch >= 'A' && ch <= 'F')
+ return (10 + ch - 'A');
+ if (ch >= 'a' && ch <= 'f')
+ return (10 + ch - 'a');
+ return (-1);
+}
+
+int
+xnf_lladdr(struct xnf_softc *sc)
+{
+ char enaddr[ETHER_ADDR_LEN];
+ char mac[32];
+ int i, j, lo, hi;
+
+ if (xs_getprop(&sc->sc_xa, "mac", mac, sizeof(mac)))
+ return (-1);
+
+ for (i = 0, j = 0; j < ETHER_ADDR_LEN; i += 3) {
+ if ((hi = nibble(mac[i])) == -1 ||
+    (lo = nibble(mac[i+1])) == -1)
+ return (-1);
+ enaddr[j++] = hi << 4 | lo;
+ }
+
+ memcpy(sc->sc_ac.ac_enaddr, enaddr, ETHER_ADDR_LEN);
+ return (0);
+}
+
+int
+xnf_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct xnf_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ int s, error = 0;
+
+ s = splnet();
+
+ switch (command) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ if (!(ifp->if_flags & IFF_RUNNING))
+ xnf_init(sc);
+ break;
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP) {
+ if (ifp->if_flags & IFF_RUNNING)
+ error = ENETRESET;
+ else
+ xnf_init(sc);
+ } else {
+ if (ifp->if_flags & IFF_RUNNING)
+ xnf_stop(sc);
+ }
+ break;
+ case SIOCGIFMEDIA:
+ case SIOCSIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
+ break;
+ case SIOCGIFRXR:
+ error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
+    NULL, XNF_MCLEN, &sc->sc_rx_slots);
+ break;
+ default:
+ error = ether_ioctl(ifp, &sc->sc_ac, command, data);
+ break;
+ }
+
+ if (error == ENETRESET) {
+ if (ifp->if_flags & IFF_RUNNING)
+ xnf_iff(sc);
+ error = 0;
+ }
+
+ splx(s);
+
+ return (error);
+}
+
+int
+xnf_media_change(struct ifnet *ifp)
+{
+ return (0);
+}
+
+void
+xnf_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+ ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
+ ifmr->ifm_active = IFM_ETHER | IFM_MANUAL;
+}
+
+int
+xnf_iff(struct xnf_softc *sc)
+{
+ return (0);
+}
+
+void
+xnf_init(struct xnf_softc *sc)
+{
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+ xnf_stop(sc);
+
+ xnf_iff(sc);
+
+ if (xen_intr_unmask(sc->sc_xih)) {
+ printf("%s: failed to enable interrupts\n", ifp->if_xname);
+ xnf_stop(sc);
+ return;
+ }
+
+ ifp->if_flags |= IFF_RUNNING;
+ ifq_clr_oactive(&ifp->if_snd);
+}
+
+void
+xnf_stop(struct xnf_softc *sc)
+{
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+ ifp->if_flags &= ~IFF_RUNNING;
+
+ xen_intr_mask(sc->sc_xih);
+
+ timeout_del(&sc->sc_rx_fill);
+
+ ifq_barrier(&ifp->if_snd);
+ intr_barrier(&sc->sc_xih);
+
+ ifq_clr_oactive(&ifp->if_snd);
+
+ if (sc->sc_tx_ring)
+ xnf_tx_ring_drain(sc);
+ if (sc->sc_rx_ring)
+ xnf_rx_ring_drain(sc);
+}
+
+void
+xnf_start(struct ifnet *ifp)
+{
+ struct xnf_softc *sc = ifp->if_softc;
+ struct xnf_tx_ring *txr = sc->sc_tx_ring;
+ struct mbuf *m;
+ int error, pkts = 0;
+ uint32_t prod;
+
+ if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
+ return;
+
+ prod = txr->txr_prod;
+ membar_consumer();
+
+ for (;;) {
+ m = ifq_deq_begin(&ifp->if_snd);
+ if (m == NULL)
+ break;
+
+ error = xnf_encap(sc, m, &prod);
+ if (error == ENOENT) {
+ /* transient */
+ ifq_deq_rollback(&ifp->if_snd, m);
+ ifq_set_oactive(&ifp->if_snd);
+ break;
+ } else if (error) {
+ /* the chain is too large */
+ ifq_deq_commit(&ifp->if_snd, m);
+ m_freem(m);
+ continue;
+ }
+ ifq_deq_commit(&ifp->if_snd, m);
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+ pkts++;
+ }
+ if (pkts > 0) {
+ txr->txr_prod = prod;
+ xen_intr_signal(sc->sc_xih);
+ }
+}
+
+int
+xnf_encap(struct xnf_softc *sc, struct mbuf *m, uint32_t *prod)
+{
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct xnf_tx_ring *txr = sc->sc_tx_ring;
+ union xnf_tx_desc *txd;
+ bus_dmamap_t dmap;
+ int error, i, n = 0;
+
+ if (((txr->txr_cons - *prod - 1) & (XNF_TX_DESC - 1)) < XNF_TX_FRAG) {
+ error = ENOENT;
+ goto errout;
+ }
+
+ i = *prod & (XNF_TX_DESC - 1);
+ dmap = sc->sc_tx_dmap[i];
+
+ error = bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_WRITE |
+    BUS_DMA_NOWAIT);
+ if (error == EFBIG) {
+ if (m_defrag(m, M_DONTWAIT) ||
+    bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_WRITE |
+     BUS_DMA_NOWAIT))
+ goto errout;
+ } else if (error)
+ goto errout;
+
+ for (n = 0; n < dmap->dm_nsegs; n++, (*prod)++) {
+ i = *prod & (XNF_TX_DESC - 1);
+ if (sc->sc_tx_buf[i])
+ panic("%s: save vs spell: %d\n", ifp->if_xname, i);
+ txd = &txr->txr_desc[i];
+ if (n == 0) {
+ sc->sc_tx_buf[i] = m;
+ if (0 && m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
+ txd->txd_req.txq_flags = XNF_TXF_CSUM |
+    XNF_TXF_VALID;
+ txd->txd_req.txq_size = m->m_pkthdr.len;
+ } else
+ txd->txd_req.txq_size = dmap->dm_segs[n].ds_len;
+ if (n != dmap->dm_nsegs - 1)
+ txd->txd_req.txq_flags |= XNF_TXF_CHUNK;
+ txd->txd_req.txq_ref = dmap->dm_segs[n].ds_addr;
+ txd->txd_req.txq_offset = dmap->dm_segs[n].ds_offset;
+ }
+
+ ifp->if_opackets++;
+ return (0);
+
+ errout:
+ ifp->if_oerrors++;
+ return (error);
+}
+
+void
+xnf_intr(void *arg)
+{
+ struct xnf_softc *sc = arg;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+ if (ifp->if_flags & IFF_RUNNING) {
+ xnf_rxeof(sc);
+ xnf_txeof(sc);
+ }
+}
+
+int
+xnf_txeof(struct xnf_softc *sc)
+{
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct xnf_tx_ring *txr = sc->sc_tx_ring;
+ union xnf_tx_desc *txd;
+ struct mbuf *m;
+ bus_dmamap_t dmap;
+ volatile uint32_t r;
+ uint32_t cons;
+ int i, id, pkts = 0;
+
+ do {
+ for (cons = sc->sc_tx_cons; cons != txr->txr_cons; cons++) {
+ membar_consumer();
+ i = cons & (XNF_TX_DESC - 1);
+ txd = &txr->txr_desc[i];
+ id = txd->txd_rsp.txp_id;
+ memset(txd, 0, sizeof(*txd));
+ txd->txd_req.txq_id = id;
+ membar_producer();
+ if (sc->sc_tx_buf[i]) {
+ dmap = sc->sc_tx_dmap[i];
+ bus_dmamap_unload(sc->sc_dmat, dmap);
+ m = sc->sc_tx_buf[i];
+ sc->sc_tx_buf[i] = NULL;
+ m_freem(m);
+ }
+ pkts++;
+ }
+
+ if (pkts > 0) {
+ sc->sc_tx_cons = cons;
+ membar_producer();
+ txr->txr_rsp_evt = cons + 1;
+ pkts = 0;
+ }
+
+ r = txr->txr_cons - sc->sc_tx_cons;
+ membar_consumer();
+ } while (r > 0);
+
+ if (ifq_is_oactive(&ifp->if_snd))
+ ifq_restart(&ifp->if_snd);
+
+ return (0);
+}
+
+int
+xnf_rxeof(struct xnf_softc *sc)
+{
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct xnf_rx_ring *rxr = sc->sc_rx_ring;
+ union xnf_rx_desc *rxd;
+ struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+ struct mbuf *fmp = sc->sc_rx_cbuf[0];
+ struct mbuf *lmp = sc->sc_rx_cbuf[1];
+ struct mbuf *m;
+ bus_dmamap_t dmap;
+ volatile uint32_t r;
+ uint32_t cons;
+ int i, id, flags, len, offset, pkts = 0;
+
+ do {
+ for (cons = sc->sc_rx_cons; cons != rxr->rxr_cons; cons++) {
+ membar_consumer();
+ i = cons & (XNF_RX_DESC - 1);
+ rxd = &rxr->rxr_desc[i];
+ dmap = sc->sc_rx_dmap[i];
+
+ len = rxd->rxd_rsp.rxp_status;
+ flags = rxd->rxd_rsp.rxp_flags;
+ offset = rxd->rxd_rsp.rxp_offset;
+ id = rxd->rxd_rsp.rxp_id;
+ memset(rxd, 0, sizeof(*rxd));
+ rxd->rxd_req.rxq_id = id;
+ membar_producer();
+
+ bus_dmamap_unload(sc->sc_dmat, dmap);
+
+ m = sc->sc_rx_buf[i];
+ KASSERT(m != NULL);
+ sc->sc_rx_buf[i] = NULL;
+
+ if (flags & XNF_RXF_EXTRA)
+ printf("%s: management data present\n",
+    ifp->if_xname);
+
+ if (flags & XNF_RXF_CSUM)
+ m->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
+
+ if_rxr_put(&sc->sc_rx_slots, 1);
+ pkts++;
+
+ if (len < 0 || (len + offset > PAGE_SIZE)) {
+ ifp->if_ierrors++;
+ m_freem(m);
+ continue;
+ }
+
+ m->m_len = len;
+ m->m_data += offset;
+
+ if (fmp == NULL) {
+ m->m_pkthdr.len = len;
+ fmp = m;
+ } else {
+ m->m_flags &= ~M_PKTHDR;
+ lmp->m_next = m;
+ fmp->m_pkthdr.len += m->m_len;
+ }
+ lmp = m;
+
+ if (flags & XNF_RXF_CHUNK) {
+ sc->sc_rx_cbuf[0] = fmp;
+ sc->sc_rx_cbuf[1] = lmp;
+ continue;
+ }
+
+ m = fmp;
+
+ ml_enqueue(&ml, m);
+ sc->sc_rx_cbuf[0] = sc->sc_rx_cbuf[1] =
+    fmp = lmp = NULL;
+ }
+
+ if (pkts > 0) {
+ sc->sc_rx_cons = cons;
+ membar_producer();
+ rxr->rxr_rsp_evt = cons + 1;
+ pkts = 0;
+ }
+
+ r = rxr->rxr_cons - sc->sc_rx_cons;
+ membar_consumer();
+ } while (r > 0);
+
+ if (!ml_empty(&ml)) {
+ if_input(ifp, &ml);
+
+ xnf_rx_ring_fill(sc);
+ }
+
+ return (0);
+}
+
+void
+xnf_rx_ring_fill(void *arg)
+{
+ struct xnf_softc *sc = arg;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct xnf_rx_ring *rxr = sc->sc_rx_ring;
+ bus_dmamap_t dmap;
+ struct mbuf *m;
+ uint32_t cons, prod;
+ static int timer = 0;
+ int i, n;
+
+ cons = rxr->rxr_cons;
+ prod = rxr->rxr_prod;
+
+ n = if_rxr_get(&sc->sc_rx_slots, XNF_RX_DESC);
+
+ /* Less than XNF_RX_MIN slots available? */
+ if (n == 0 && prod - cons < XNF_RX_MIN) {
+ if (ifp->if_flags & IFF_RUNNING)
+ timeout_add(&sc->sc_rx_fill, 1 << timer);
+ if (timer < 10)
+ timer++;
+ return;
+ }
+
+ for (; n > 0; prod++, n--) {
+ i = prod & (XNF_RX_DESC - 1);
+ if (sc->sc_rx_buf[i])
+ break;
+ m = MCLGETI(NULL, M_DONTWAIT, NULL, XNF_MCLEN);
+ if (m == NULL)
+ break;
+ m->m_len = m->m_pkthdr.len = XNF_MCLEN;
+ dmap = sc->sc_rx_dmap[i];
+ if (bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_READ |
+    BUS_DMA_NOWAIT)) {
+ m_freem(m);
+ break;
+ }
+ sc->sc_rx_buf[i] = m;
+ rxr->rxr_desc[i].rxd_req.rxq_ref = dmap->dm_segs[0].ds_addr;
+ }
+
+ if (n > 0)
+ if_rxr_put(&sc->sc_rx_slots, n);
+
+ membar_producer();
+ rxr->rxr_prod = prod;
+
+ xen_intr_signal(sc->sc_xih);
+}
+
+int
+xnf_rx_ring_create(struct xnf_softc *sc)
+{
+ int i, rsegs;
+
+ /* Allocate a page of memory for the ring */
+ if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE, 0,
+    &sc->sc_rx_seg, 1, &rsegs, BUS_DMA_ZERO | BUS_DMA_WAITOK)) {
+ printf("%s: failed to allocate memory for the rx ring\n",
+    sc->sc_dev.dv_xname);
+ return (-1);
+ }
+ /* Map in the allocated memory into the ring structure */
+ if (bus_dmamem_map(sc->sc_dmat, &sc->sc_rx_seg, 1, PAGE_SIZE,
+    (caddr_t *)(&sc->sc_rx_ring), BUS_DMA_WAITOK)) {
+ printf("%s: failed to map memory for the rx ring\n",
+    sc->sc_dev.dv_xname);
+ goto errout;
+ }
+ /* Create a map to load the ring memory into */
+ if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
+    BUS_DMA_WAITOK, &sc->sc_rx_rmap)) {
+ printf("%s: failed to create a memory map for the rx ring\n",
+    sc->sc_dev.dv_xname);
+ goto errout;
+ }
+ /* Load the ring into the ring map to extract the PA */
+ if (bus_dmamap_load(sc->sc_dmat, sc->sc_rx_rmap, sc->sc_rx_ring,
+    PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
+ printf("%s: failed to load the rx ring map\n",
+    sc->sc_dev.dv_xname);
+ goto errout;
+ }
+ sc->sc_rx_ref = sc->sc_rx_rmap->dm_segs[0].ds_addr;
+
+ sc->sc_rx_ring->rxr_req_evt = sc->sc_rx_ring->rxr_rsp_evt = 1;
+
+ for (i = 0; i < XNF_RX_DESC; i++) {
+ if (bus_dmamap_create(sc->sc_dmat, XNF_MCLEN, 1,
+    XNF_MCLEN, 0, BUS_DMA_WAITOK, &sc->sc_rx_dmap[i])) {
+ printf("%s: failed to create a memory map for the rx "
+    "slot %d/%d\n", sc->sc_dev.dv_xname, i,
+    XNF_RX_DESC);
+ goto errout;
+ }
+ sc->sc_rx_ring->rxr_desc[i].rxd_req.rxq_id = i;
+ }
+
+ if_rxr_init(&sc->sc_rx_slots, XNF_RX_MIN, XNF_RX_DESC);
+ xnf_rx_ring_fill(sc);
+
+ return (0);
+
+ errout:
+ xnf_rx_ring_destroy(sc);
+ return (-1);
+}
+
+void
+xnf_rx_ring_drain(struct xnf_softc *sc)
+{
+ struct xnf_rx_ring *rxr = sc->sc_rx_ring;
+
+ if (sc->sc_rx_cons != rxr->rxr_cons)
+ xnf_rxeof(sc);
+}
+
+void
+xnf_rx_ring_destroy(struct xnf_softc *sc)
+{
+ int i, slots = 0;
+
+ for (i = 0; i < XNF_RX_DESC; i++) {
+ if (sc->sc_rx_buf[i] == NULL)
+ continue;
+ bus_dmamap_unload(sc->sc_dmat, sc->sc_rx_dmap[i]);
+ m_freem(sc->sc_rx_buf[i]);
+ sc->sc_rx_buf[i] = NULL;
+ slots++;
+ }
+ printf("%s: unload done\n", __func__);
+ if_rxr_put(&sc->sc_rx_slots, slots);
+ printf("%s: rxr_put done\n", __func__);
+
+ for (i = 0; i < XNF_RX_DESC; i++) {
+ if (sc->sc_rx_dmap[i] == NULL)
+ continue;
+ bus_dmamap_destroy(sc->sc_dmat, sc->sc_rx_dmap[i]);
+ sc->sc_rx_dmap[i] = NULL;
+ }
+ printf("%s: desc map destroy done\n", __func__);
+ if (sc->sc_rx_rmap) {
+ bus_dmamap_unload(sc->sc_dmat, sc->sc_rx_rmap);
+ bus_dmamap_destroy(sc->sc_dmat, sc->sc_rx_rmap);
+ }
+ printf("%s: ring map destroy done\n", __func__);
+ if (sc->sc_rx_ring) {
+ bus_dmamem_unmap(sc->sc_dmat, (caddr_t)sc->sc_rx_ring,
+    PAGE_SIZE);
+ bus_dmamem_free(sc->sc_dmat, &sc->sc_rx_seg, 1);
+ }
+ printf("%s: ring mem free done\n", __func__);
+ sc->sc_rx_ring = NULL;
+ sc->sc_rx_rmap = NULL;
+ sc->sc_rx_cons = 0;
+}
+
+int
+xnf_tx_ring_create(struct xnf_softc *sc)
+{
+ int i, rsegs;
+
+ /* Allocate a page of memory for the ring */
+ if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE, 0,
+    &sc->sc_tx_seg, 1, &rsegs, BUS_DMA_ZERO | BUS_DMA_WAITOK)) {
+ printf("%s: failed to allocate memory for the tx ring\n",
+    sc->sc_dev.dv_xname);
+ return (-1);
+ }
+ /* Map in the allocated memory into the ring structure */
+ if (bus_dmamem_map(sc->sc_dmat, &sc->sc_tx_seg, 1, PAGE_SIZE,
+    (caddr_t *)&sc->sc_tx_ring, BUS_DMA_WAITOK)) {
+ printf("%s: failed to map memory for the tx ring\n",
+    sc->sc_dev.dv_xname);
+ goto errout;
+ }
+ /* Create a map to load the ring memory into */
+ if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
+    BUS_DMA_WAITOK, &sc->sc_tx_rmap)) {
+ printf("%s: failed to create a memory map for the tx ring\n",
+    sc->sc_dev.dv_xname);
+ goto errout;
+ }
+ /* Load the ring into the ring map to extract the PA */
+ if (bus_dmamap_load(sc->sc_dmat, sc->sc_tx_rmap, sc->sc_tx_ring,
+    PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
+ printf("%s: failed to load the tx ring map\n",
+    sc->sc_dev.dv_xname);
+ goto errout;
+ }
+ sc->sc_tx_ref = sc->sc_tx_rmap->dm_segs[0].ds_addr;
+
+ sc->sc_tx_ring->txr_req_evt = sc->sc_tx_ring->txr_rsp_evt = 1;
+
+ for (i = 0; i < XNF_TX_DESC; i++) {
+ if (bus_dmamap_create(sc->sc_dmat, XNF_MCLEN, XNF_TX_FRAG,
+    XNF_MCLEN, 0, BUS_DMA_WAITOK, &sc->sc_tx_dmap[i])) {
+ printf("%s: failed to create a memory map for the tx "
+    "slot %d/%d\n", sc->sc_dev.dv_xname, i,
+    XNF_TX_DESC);
+ goto errout;
+ }
+ sc->sc_tx_ring->txr_desc[i].txd_req.txq_id = i;
+ }
+
+ return (0);
+
+ errout:
+ xnf_tx_ring_destroy(sc);
+ return (-1);
+}
+
+void
+xnf_tx_ring_drain(struct xnf_softc *sc)
+{
+ struct xnf_tx_ring *txr = sc->sc_tx_ring;
+
+ if (sc->sc_tx_cons != txr->txr_cons)
+ xnf_txeof(sc);
+}
+
+void
+xnf_tx_ring_destroy(struct xnf_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < XNF_TX_DESC; i++) {
+ if (sc->sc_tx_dmap[i] == NULL)
+ continue;
+ bus_dmamap_unload(sc->sc_dmat, sc->sc_tx_dmap[i]);
+ if (sc->sc_tx_buf[i] == NULL)
+ continue;
+ m_freem(sc->sc_tx_buf[i]);
+ sc->sc_tx_buf[i] = NULL;
+ }
+ for (i = 0; i < XNF_TX_DESC; i++) {
+ if (sc->sc_tx_dmap[i] == NULL)
+ continue;
+ bus_dmamap_destroy(sc->sc_dmat, sc->sc_tx_dmap[i]);
+ sc->sc_tx_dmap[i] = NULL;
+ }
+ if (sc->sc_tx_rmap) {
+ bus_dmamap_unload(sc->sc_dmat, sc->sc_tx_rmap);
+ bus_dmamap_destroy(sc->sc_dmat, sc->sc_tx_rmap);
+ }
+ if (sc->sc_tx_ring) {
+ bus_dmamem_unmap(sc->sc_dmat, (caddr_t)sc->sc_tx_ring,
+    PAGE_SIZE);
+ bus_dmamem_free(sc->sc_dmat, &sc->sc_tx_seg, 1);
+ }
+ sc->sc_tx_ring = NULL;
+ sc->sc_tx_rmap = NULL;
+}
+
+int
+xnf_init_backend(struct xnf_softc *sc)
+{
+ const char *prop;
+ char val[32];
+
+ /* Plumb the Rx ring */
+ prop = "rx-ring-ref";
+ snprintf(val, sizeof(val), "%u", sc->sc_rx_ref);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+ /* Enable "copy" mode */
+ prop = "request-rx-copy";
+ snprintf(val, sizeof(val), "%u", 1);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+ /* Enable notify mode */
+ prop = "feature-rx-notify";
+ snprintf(val, sizeof(val), "%u", 1);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+ /* Request multicast filtering */
+ prop = "request-multicast-control";
+ snprintf(val, sizeof(val), "%u", 1);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+
+ /* Plumb the Tx ring */
+ prop = "tx-ring-ref";
+ snprintf(val, sizeof(val), "%u", sc->sc_tx_ref);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+ /* Enable transmit scatter-gather mode */
+ prop = "feature-sg";
+ snprintf(val, sizeof(val), "%u", 1);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+
+ /* Disable TCP/UDP checksum offload */
+ prop = "feature-csum-offload";
+ if (xs_setprop(&sc->sc_xa, prop, NULL, 0))
+ goto errout;
+ prop = "feature-no-csum-offload";
+ snprintf(val, sizeof(val), "%u", 1);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+ prop = "feature-ipv6-csum-offload";
+ if (xs_setprop(&sc->sc_xa, prop, NULL, 0))
+ goto errout;
+ prop = "feature-no-ipv6-csum-offload";
+ snprintf(val, sizeof(val), "%u", 1);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+
+ /* Plumb the event channel port */
+ prop = "event-channel";
+ snprintf(val, sizeof(val), "%u", sc->sc_xih);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+
+ /* Connect the device */
+ prop = "state";
+ snprintf(val, sizeof(val), "%u", 4);
+ if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
+ goto errout;
+
+ return (0);
+
+ errout:
+ printf("%s: failed to set \"%s\" property to \"%s\"\n",
+    sc->sc_dev.dv_xname, prop, val);
+ return (-1);
+}

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Mike Belopuhov-5
On Wed, Jan 06, 2016 at 16:37 +0100, Mike Belopuhov wrote:
> There's still stuff to do, but it receives and transmits reliably
> (at least on modern Xen) so I'd like to get it in.  Man page will
> follow.
>
> OK?
>

Just noticed that a couple of debug printfs have sneaked in.
I'm not going to commit them.

> +void
> +xnf_rx_ring_destroy(struct xnf_softc *sc)
> +{
> + int i, slots = 0;
> +
> + for (i = 0; i < XNF_RX_DESC; i++) {
> + if (sc->sc_rx_buf[i] == NULL)
> + continue;
> + bus_dmamap_unload(sc->sc_dmat, sc->sc_rx_dmap[i]);
> + m_freem(sc->sc_rx_buf[i]);
> + sc->sc_rx_buf[i] = NULL;
> + slots++;
> + }
> + printf("%s: unload done\n", __func__);
> + if_rxr_put(&sc->sc_rx_slots, slots);
> + printf("%s: rxr_put done\n", __func__);
> +
> + for (i = 0; i < XNF_RX_DESC; i++) {
> + if (sc->sc_rx_dmap[i] == NULL)
> + continue;
> + bus_dmamap_destroy(sc->sc_dmat, sc->sc_rx_dmap[i]);
> + sc->sc_rx_dmap[i] = NULL;
> + }
> + printf("%s: desc map destroy done\n", __func__);
> + if (sc->sc_rx_rmap) {
> + bus_dmamap_unload(sc->sc_dmat, sc->sc_rx_rmap);
> + bus_dmamap_destroy(sc->sc_dmat, sc->sc_rx_rmap);
> + }
> + printf("%s: ring map destroy done\n", __func__);
> + if (sc->sc_rx_ring) {
> + bus_dmamem_unmap(sc->sc_dmat, (caddr_t)sc->sc_rx_ring,
> +    PAGE_SIZE);
> + bus_dmamem_free(sc->sc_dmat, &sc->sc_rx_seg, 1);
> + }
> + printf("%s: ring mem free done\n", __func__);
> + sc->sc_rx_ring = NULL;
> + sc->sc_rx_rmap = NULL;
> + sc->sc_rx_cons = 0;
> +}
> +

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Stefan Fritsch
In reply to this post by Mike Belopuhov-5
On Wed, 6 Jan 2016, Mike Belopuhov wrote:

> There's still stuff to do, but it receives and transmits reliably
> (at least on modern Xen) so I'd like to get it in.  Man page will
> follow.

I only had a quick glance at the code, but I have one comment about your
use of memory barriers. The membar_* macros are pure compiler barriers
when the openbsd kernel is compiled for UP. But since the host machine and
xen may use SMP even in this case, I suspect the that you need hardware
memory barriers even if MULTIPROCESSOR is not defined. This does not seem
relevant for x86 because you don't use membar_sync, but it may become
relevant for arm, which is also supported by xen.

I had the same problem in virtio and introduced the virtio_membar_* macros
for this purpose. Maybe they should be renamed to a more generic name and
you should use them, too?


> + if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
> + return;
> +
> + prod = txr->txr_prod;
> + membar_consumer();
> +
> + for (;;) {
> + m = ifq_deq_begin(&ifp->if_snd);
> + if (m == NULL)
> + break;
> +
> + error = xnf_encap(sc, m, &prod);
> + if (error == ENOENT) {
> + /* transient */
> + ifq_deq_rollback(&ifp->if_snd, m);
> + ifq_set_oactive(&ifp->if_snd);
> + break;
> + } else if (error) {
> + /* the chain is too large */
> + ifq_deq_commit(&ifp->if_snd, m);
> + m_freem(m);
> + continue;
> + }
> + ifq_deq_commit(&ifp->if_snd, m);
> +
> +#if NBPFILTER > 0
> + if (ifp->if_bpf)
> + bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> +#endif
> + pkts++;
> + }
> + if (pkts > 0) {
> + txr->txr_prod = prod;
> + xen_intr_signal(sc->sc_xih);
> + }
> +}
> +
> +int
> +xnf_encap(struct xnf_softc *sc, struct mbuf *m, uint32_t *prod)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> + struct xnf_tx_ring *txr = sc->sc_tx_ring;
> + union xnf_tx_desc *txd;
> + bus_dmamap_t dmap;
> + int error, i, n = 0;
> +
> + if (((txr->txr_cons - *prod - 1) & (XNF_TX_DESC - 1)) < XNF_TX_FRAG) {
> + error = ENOENT;
> + goto errout;
> + }
> +
> + i = *prod & (XNF_TX_DESC - 1);
> + dmap = sc->sc_tx_dmap[i];
> +
> + error = bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_WRITE |
> +    BUS_DMA_NOWAIT);
> + if (error == EFBIG) {
> + if (m_defrag(m, M_DONTWAIT) ||
> +    bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_WRITE |
> +     BUS_DMA_NOWAIT))
> + goto errout;
> + } else if (error)
> + goto errout;
> +
> + for (n = 0; n < dmap->dm_nsegs; n++, (*prod)++) {
> + i = *prod & (XNF_TX_DESC - 1);
> + if (sc->sc_tx_buf[i])
> + panic("%s: save vs spell: %d\n", ifp->if_xname, i);
> + txd = &txr->txr_desc[i];
> + if (n == 0) {
> + sc->sc_tx_buf[i] = m;
> + if (0 && m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> + txd->txd_req.txq_flags = XNF_TXF_CSUM |
> +    XNF_TXF_VALID;
> + txd->txd_req.txq_size = m->m_pkthdr.len;
> + } else
> + txd->txd_req.txq_size = dmap->dm_segs[n].ds_len;
> + if (n != dmap->dm_nsegs - 1)
> + txd->txd_req.txq_flags |= XNF_TXF_CHUNK;
> + txd->txd_req.txq_ref = dmap->dm_segs[n].ds_addr;
> + txd->txd_req.txq_offset = dmap->dm_segs[n].ds_offset;
> + }
> +
> + ifp->if_opackets++;
> + return (0);
> +
> + errout:
> + ifp->if_oerrors++;
> + return (error);
> +}
> +
> +void
> +xnf_intr(void *arg)
> +{
> + struct xnf_softc *sc = arg;
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> +
> + if (ifp->if_flags & IFF_RUNNING) {
> + xnf_rxeof(sc);
> + xnf_txeof(sc);
> + }
> +}
> +
> +int
> +xnf_txeof(struct xnf_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> + struct xnf_tx_ring *txr = sc->sc_tx_ring;
> + union xnf_tx_desc *txd;
> + struct mbuf *m;
> + bus_dmamap_t dmap;
> + volatile uint32_t r;
> + uint32_t cons;
> + int i, id, pkts = 0;
> +
> + do {
> + for (cons = sc->sc_tx_cons; cons != txr->txr_cons; cons++) {
> + membar_consumer();
> + i = cons & (XNF_TX_DESC - 1);
> + txd = &txr->txr_desc[i];
> + id = txd->txd_rsp.txp_id;
> + memset(txd, 0, sizeof(*txd));
> + txd->txd_req.txq_id = id;
> + membar_producer();
> + if (sc->sc_tx_buf[i]) {
> + dmap = sc->sc_tx_dmap[i];
> + bus_dmamap_unload(sc->sc_dmat, dmap);
> + m = sc->sc_tx_buf[i];
> + sc->sc_tx_buf[i] = NULL;
> + m_freem(m);
> + }
> + pkts++;
> + }
> +
> + if (pkts > 0) {
> + sc->sc_tx_cons = cons;
> + membar_producer();
> + txr->txr_rsp_evt = cons + 1;
> + pkts = 0;
> + }
> +
> + r = txr->txr_cons - sc->sc_tx_cons;
> + membar_consumer();
> + } while (r > 0);
> +
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> +
> + return (0);
> +}

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Reyk Floeter-2
In reply to this post by Mike Belopuhov-5
On Wed, Jan 06, 2016 at 04:37:36PM +0100, Mike Belopuhov wrote:
> There's still stuff to do, but it receives and transmits reliably
> (at least on modern Xen) so I'd like to get it in.  Man page will
> follow.
>
> OK?
>

I can see it works now and as mentioned in icb:
I just had the first contact with OpenBSD in an EC2 instance.
(once again, we need emoji in xterm to see the U+1F596)

Two bugs:
- I didn't work on m4.10xlarge (see cvs:~reyk/dmesg.m4.10xlarge).
- One time, xnf stopped while coping a large file to a remote machine.

I think it is good enough to go in and be tweaked in the tree.

OK reyk@

> diff --git sys/arch/amd64/conf/GENERIC sys/arch/amd64/conf/GENERIC
> index fca4459..77e07cc 100644
> --- sys/arch/amd64/conf/GENERIC
> +++ sys/arch/amd64/conf/GENERIC
> @@ -67,10 +67,11 @@ mpbios0 at bios0
>  ipmi0 at mainbus? disable # IPMI
>  
>  vmt0 at pvbus? # VMware Tools
>  
>  #xen0 at pvbus? # Xen HVM domU
> +#xnf* at xen? # Xen Netfront
>  
>  option PCIVERBOSE
>  option USBVERBOSE
>  
>  pchb* at pci? # PCI-Host bridges
> diff --git sys/dev/pv/files.pv sys/dev/pv/files.pv
> index d0e3b8c..e1272b2 100644
> --- sys/dev/pv/files.pv
> +++ sys/dev/pv/files.pv
> @@ -16,5 +16,9 @@ file dev/pv/vmt.c vmt needs-flag
>  # Xen
>  device xen {}
>  attach xen at pvbus
>  file dev/pv/xen.c xen needs-flag
>  file dev/pv/xenstore.c xen
> +
> +device xnf: ether, ifnet, ifmedia
> +attach xnf at xen
> +file dev/pv/if_xnf.c xnf
> diff --git sys/dev/pv/if_xnf.c sys/dev/pv/if_xnf.c
> new file mode 100644
> index 0000000..7f8b08e
> --- /dev/null
> +++ sys/dev/pv/if_xnf.c
> @@ -0,0 +1,1022 @@
> +/*
> + * Copyright (c) 2015 Mike Belopuhov
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include "bpfilter.h"
> +#include "vlan.h"
> +#include "xen.h"
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +#include <sys/atomic.h>
> +#include <sys/malloc.h>
> +#include <sys/mbuf.h>
> +#include <sys/kernel.h>
> +#include <sys/device.h>
> +#include <sys/socket.h>
> +#include <sys/sockio.h>
> +#include <sys/queue.h>
> +#include <sys/timeout.h>
> +#include <sys/pool.h>
> +
> +#include <machine/bus.h>
> +
> +#include <dev/pv/xenreg.h>
> +#include <dev/pv/xenvar.h>
> +
> +#include <net/if.h>
> +#include <net/if_media.h>
> +
> +#include <netinet/in.h>
> +#include <netinet/if_ether.h>
> +
> +#ifdef INET6
> +#include <netinet/ip6.h>
> +#endif
> +
> +#if NBPFILTER > 0
> +#include <net/bpf.h>
> +#endif
> +
> +
> +/*
> + * Rx ring
> + */
> +
> +struct xnf_rx_req {
> + uint16_t rxq_id;
> + uint16_t rxq_pad;
> + uint32_t rxq_ref;
> +} __packed;
> +
> +struct xnf_rx_rsp {
> + uint16_t rxp_id;
> + uint16_t rxp_offset;
> + uint16_t rxp_flags;
> +#define  XNF_RXF_CSUM  0x0001
> +#define  XNF_RXF_BLANK  0x0002
> +#define  XNF_RXF_CHUNK  0x0004
> +#define  XNF_RXF_EXTRA  0x0008
> + int16_t rxp_status;
> +} __packed;
> +
> +union xnf_rx_desc {
> + struct xnf_rx_req rxd_req;
> + struct xnf_rx_rsp rxd_rsp;
> +} __packed;
> +
> +#define XNF_RX_DESC 256
> +#define XNF_MCLEN PAGE_SIZE
> +#define XNF_RX_MIN 32
> +
> +struct xnf_rx_ring {
> + uint32_t rxr_prod;
> + uint32_t rxr_req_evt;
> + uint32_t rxr_cons;
> + uint32_t rxr_rsp_evt;
> + uint32_t rxr_reserved[12];
> + union xnf_rx_desc rxr_desc[XNF_RX_DESC];
> +} __packed;
> +
> +
> +/*
> + * Tx ring
> + */
> +
> +struct xnf_tx_req {
> + uint32_t txq_ref;
> + uint16_t txq_offset;
> + uint16_t txq_flags;
> +#define  XNF_TXF_CSUM  0x0001
> +#define  XNF_TXF_VALID  0x0002
> +#define  XNF_TXF_CHUNK  0x0004
> +#define  XNF_TXF_ETXRA  0x0008
> + uint16_t txq_id;
> + uint16_t txq_size;
> +} __packed;
> +
> +struct xnf_tx_rsp {
> + uint16_t txp_id;
> + int16_t txp_status;
> +} __packed;
> +
> +union xnf_tx_desc {
> + struct xnf_tx_req txd_req;
> + struct xnf_tx_rsp txd_rsp;
> +} __packed;
> +
> +#define XNF_TX_DESC 256
> +#define XNF_TX_FRAG 8 /* down from 18 */
> +
> +struct xnf_tx_ring {
> + uint32_t txr_prod;
> + uint32_t txr_req_evt;
> + uint32_t txr_cons;
> + uint32_t txr_rsp_evt;
> + uint32_t txr_reserved[12];
> + union xnf_tx_desc txr_desc[XNF_TX_DESC];
> +} __packed;
> +
> +
> +/* Management frame, "extra info" in Xen parlance */
> +struct xnf_mgmt {
> + uint8_t mg_type;
> +#define  XNF_MGMT_MCAST_ADD 2
> +#define  XNF_MGMT_MCAST_DEL 3
> + uint8_t mg_flags;
> + union {
> + uint8_t mgu_mcaddr[ETHER_ADDR_LEN];
> + uint16_t mgu_pad[3];
> + } u;
> +#define mg_mcaddr u.mgu_mcaddr
> +} __packed;
> +
> +
> +struct xnf_softc {
> + struct device sc_dev;
> + struct xen_attach_args sc_xa;
> + struct xen_softc *sc_xen;
> + bus_dma_tag_t sc_dmat;
> +
> + struct arpcom sc_ac;
> + struct ifmedia sc_media;
> +
> + xen_intr_handle_t sc_xih;
> +
> + /* Rx ring */
> + struct xnf_rx_ring *sc_rx_ring;
> + int sc_rx_cons;
> + bus_dmamap_t sc_rx_rmap;  /* map for the ring */
> + bus_dma_segment_t sc_rx_seg;
> + uint32_t sc_rx_ref;  /* grant table ref */
> + struct mbuf *sc_rx_buf[XNF_RX_DESC];
> + bus_dmamap_t sc_rx_dmap[XNF_RX_DESC]; /* maps for packets */
> + struct mbuf *sc_rx_cbuf[2];    /* chain handling */
> + struct if_rxring sc_rx_slots;
> + struct timeout sc_rx_fill;
> +
> + /* Tx ring */
> + struct xnf_tx_ring *sc_tx_ring;
> + int sc_tx_cons;
> + bus_dmamap_t sc_tx_rmap;  /* map for the ring */
> + bus_dma_segment_t sc_tx_seg;
> + uint32_t sc_tx_ref;  /* grant table ref */
> + struct mbuf *sc_tx_buf[XNF_TX_DESC];
> + bus_dmamap_t sc_tx_dmap[XNF_TX_DESC]; /* maps for packets */
> +};
> +
> +int xnf_match(struct device *, void *, void *);
> +void xnf_attach(struct device *, struct device *, void *);
> +int xnf_lladdr(struct xnf_softc *);
> +int xnf_ioctl(struct ifnet *, u_long, caddr_t);
> +int xnf_media_change(struct ifnet *);
> +void xnf_media_status(struct ifnet *, struct ifmediareq *);
> +int xnf_iff(struct xnf_softc *);
> +void xnf_init(struct xnf_softc *);
> +void xnf_stop(struct xnf_softc *);
> +void xnf_start(struct ifnet *);
> +int xnf_encap(struct xnf_softc *, struct mbuf *, uint32_t *);
> +void xnf_intr(void *);
> +int xnf_txeof(struct xnf_softc *);
> +int xnf_rxeof(struct xnf_softc *);
> +void xnf_rx_ring_fill(void *);
> +int xnf_rx_ring_create(struct xnf_softc *);
> +void xnf_rx_ring_drain(struct xnf_softc *);
> +void xnf_rx_ring_destroy(struct xnf_softc *);
> +int xnf_tx_ring_create(struct xnf_softc *);
> +void xnf_tx_ring_drain(struct xnf_softc *);
> +void xnf_tx_ring_destroy(struct xnf_softc *);
> +int xnf_init_backend(struct xnf_softc *);
> +int xnf_stop_backend(struct xnf_softc *);
> +
> +struct cfdriver xnf_cd = {
> + NULL, "xnf", DV_IFNET
> +};
> +
> +const struct cfattach xnf_ca = {
> + sizeof(struct xnf_softc), xnf_match, xnf_attach
> +};
> +
> +int
> +xnf_match(struct device *parent, void *match, void *aux)
> +{
> + struct xen_attach_args *xa = aux;
> + char type[64];
> +
> + if (strcmp("vif", xa->xa_name))
> + return (0);
> +
> + if (xs_getprop(xa, "type", type, sizeof(type)) == 0 &&
> +    ((strcmp("vif", type) == 0) || (strcmp("front", type) == 0)))
> + return (1);
> +
> + return (0);
> +}
> +
> +void
> +xnf_attach(struct device *parent, struct device *self, void *aux)
> +{
> + struct xen_attach_args *xa = aux;
> + struct xnf_softc *sc = (struct xnf_softc *)self;
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> +
> + sc->sc_xa = *xa;
> + sc->sc_xen = xa->xa_parent;
> + sc->sc_dmat = xa->xa_dmat;
> +
> + strlcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ);
> +
> + if (xnf_lladdr(sc)) {
> + printf(": failed to obtain MAC address\n");
> + return;
> + }
> +
> + if (xen_intr_establish(0, &sc->sc_xih, xnf_intr, sc, ifp->if_xname)) {
> + printf("%s: failed to establish an interrupt\n", ifp->if_xname);
> + return;
> + }
> +
> + printf(": event channel %u, address %s\n", sc->sc_xih,
> +    ether_sprintf(sc->sc_ac.ac_enaddr));
> +
> + if (xnf_rx_ring_create(sc)) {
> + xen_intr_disestablish(sc->sc_xih);
> + return;
> + }
> + if (xnf_tx_ring_create(sc)) {
> + xen_intr_disestablish(sc->sc_xih);
> + xnf_rx_ring_destroy(sc);
> + return;
> + }
> + if (xnf_init_backend(sc)) {
> + xen_intr_disestablish(sc->sc_xih);
> + xnf_rx_ring_destroy(sc);
> + xnf_tx_ring_destroy(sc);
> + return;
> + }
> +
> + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> + ifp->if_xflags = IFXF_MPSAFE;
> + ifp->if_ioctl = xnf_ioctl;
> + ifp->if_start = xnf_start;
> + ifp->if_softc = sc;
> +
> + ifp->if_capabilities = IFCAP_VLAN_MTU;
> +
> + IFQ_SET_MAXLEN(&ifp->if_snd, XNF_TX_DESC - 1);
> + IFQ_SET_READY(&ifp->if_snd);
> +
> + ifmedia_init(&sc->sc_media, IFM_IMASK, xnf_media_change,
> +    xnf_media_status);
> + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_MANUAL, 0, NULL);
> + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_MANUAL);
> +
> + if_attach(ifp);
> + ether_ifattach(ifp);
> +
> + timeout_set(&sc->sc_rx_fill, xnf_rx_ring_fill, sc);
> +}
> +
> +static int
> +nibble(int ch)
> +{
> + if (ch >= '0' && ch <= '9')
> + return (ch - '0');
> + if (ch >= 'A' && ch <= 'F')
> + return (10 + ch - 'A');
> + if (ch >= 'a' && ch <= 'f')
> + return (10 + ch - 'a');
> + return (-1);
> +}
> +
> +int
> +xnf_lladdr(struct xnf_softc *sc)
> +{
> + char enaddr[ETHER_ADDR_LEN];
> + char mac[32];
> + int i, j, lo, hi;
> +
> + if (xs_getprop(&sc->sc_xa, "mac", mac, sizeof(mac)))
> + return (-1);
> +
> + for (i = 0, j = 0; j < ETHER_ADDR_LEN; i += 3) {
> + if ((hi = nibble(mac[i])) == -1 ||
> +    (lo = nibble(mac[i+1])) == -1)
> + return (-1);
> + enaddr[j++] = hi << 4 | lo;
> + }
> +
> + memcpy(sc->sc_ac.ac_enaddr, enaddr, ETHER_ADDR_LEN);
> + return (0);
> +}
> +
> +int
> +xnf_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
> +{
> + struct xnf_softc *sc = ifp->if_softc;
> + struct ifreq *ifr = (struct ifreq *)data;
> + int s, error = 0;
> +
> + s = splnet();
> +
> + switch (command) {
> + case SIOCSIFADDR:
> + ifp->if_flags |= IFF_UP;
> + if (!(ifp->if_flags & IFF_RUNNING))
> + xnf_init(sc);
> + break;
> + case SIOCSIFFLAGS:
> + if (ifp->if_flags & IFF_UP) {
> + if (ifp->if_flags & IFF_RUNNING)
> + error = ENETRESET;
> + else
> + xnf_init(sc);
> + } else {
> + if (ifp->if_flags & IFF_RUNNING)
> + xnf_stop(sc);
> + }
> + break;
> + case SIOCGIFMEDIA:
> + case SIOCSIFMEDIA:
> + error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
> + break;
> + case SIOCGIFRXR:
> + error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
> +    NULL, XNF_MCLEN, &sc->sc_rx_slots);
> + break;
> + default:
> + error = ether_ioctl(ifp, &sc->sc_ac, command, data);
> + break;
> + }
> +
> + if (error == ENETRESET) {
> + if (ifp->if_flags & IFF_RUNNING)
> + xnf_iff(sc);
> + error = 0;
> + }
> +
> + splx(s);
> +
> + return (error);
> +}
> +
> +int
> +xnf_media_change(struct ifnet *ifp)
> +{
> + return (0);
> +}
> +
> +void
> +xnf_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
> +{
> + ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
> + ifmr->ifm_active = IFM_ETHER | IFM_MANUAL;
> +}
> +
> +int
> +xnf_iff(struct xnf_softc *sc)
> +{
> + return (0);
> +}
> +
> +void
> +xnf_init(struct xnf_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> +
> + xnf_stop(sc);
> +
> + xnf_iff(sc);
> +
> + if (xen_intr_unmask(sc->sc_xih)) {
> + printf("%s: failed to enable interrupts\n", ifp->if_xname);
> + xnf_stop(sc);
> + return;
> + }
> +
> + ifp->if_flags |= IFF_RUNNING;
> + ifq_clr_oactive(&ifp->if_snd);
> +}
> +
> +void
> +xnf_stop(struct xnf_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> +
> + ifp->if_flags &= ~IFF_RUNNING;
> +
> + xen_intr_mask(sc->sc_xih);
> +
> + timeout_del(&sc->sc_rx_fill);
> +
> + ifq_barrier(&ifp->if_snd);
> + intr_barrier(&sc->sc_xih);
> +
> + ifq_clr_oactive(&ifp->if_snd);
> +
> + if (sc->sc_tx_ring)
> + xnf_tx_ring_drain(sc);
> + if (sc->sc_rx_ring)
> + xnf_rx_ring_drain(sc);
> +}
> +
> +void
> +xnf_start(struct ifnet *ifp)
> +{
> + struct xnf_softc *sc = ifp->if_softc;
> + struct xnf_tx_ring *txr = sc->sc_tx_ring;
> + struct mbuf *m;
> + int error, pkts = 0;
> + uint32_t prod;
> +
> + if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
> + return;
> +
> + prod = txr->txr_prod;
> + membar_consumer();
> +
> + for (;;) {
> + m = ifq_deq_begin(&ifp->if_snd);
> + if (m == NULL)
> + break;
> +
> + error = xnf_encap(sc, m, &prod);
> + if (error == ENOENT) {
> + /* transient */
> + ifq_deq_rollback(&ifp->if_snd, m);
> + ifq_set_oactive(&ifp->if_snd);
> + break;
> + } else if (error) {
> + /* the chain is too large */
> + ifq_deq_commit(&ifp->if_snd, m);
> + m_freem(m);
> + continue;
> + }
> + ifq_deq_commit(&ifp->if_snd, m);
> +
> +#if NBPFILTER > 0
> + if (ifp->if_bpf)
> + bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> +#endif
> + pkts++;
> + }
> + if (pkts > 0) {
> + txr->txr_prod = prod;
> + xen_intr_signal(sc->sc_xih);
> + }
> +}
> +
> +int
> +xnf_encap(struct xnf_softc *sc, struct mbuf *m, uint32_t *prod)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> + struct xnf_tx_ring *txr = sc->sc_tx_ring;
> + union xnf_tx_desc *txd;
> + bus_dmamap_t dmap;
> + int error, i, n = 0;
> +
> + if (((txr->txr_cons - *prod - 1) & (XNF_TX_DESC - 1)) < XNF_TX_FRAG) {
> + error = ENOENT;
> + goto errout;
> + }
> +
> + i = *prod & (XNF_TX_DESC - 1);
> + dmap = sc->sc_tx_dmap[i];
> +
> + error = bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_WRITE |
> +    BUS_DMA_NOWAIT);
> + if (error == EFBIG) {
> + if (m_defrag(m, M_DONTWAIT) ||
> +    bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_WRITE |
> +     BUS_DMA_NOWAIT))
> + goto errout;
> + } else if (error)
> + goto errout;
> +
> + for (n = 0; n < dmap->dm_nsegs; n++, (*prod)++) {
> + i = *prod & (XNF_TX_DESC - 1);
> + if (sc->sc_tx_buf[i])
> + panic("%s: save vs spell: %d\n", ifp->if_xname, i);
> + txd = &txr->txr_desc[i];
> + if (n == 0) {
> + sc->sc_tx_buf[i] = m;
> + if (0 && m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> + txd->txd_req.txq_flags = XNF_TXF_CSUM |
> +    XNF_TXF_VALID;
> + txd->txd_req.txq_size = m->m_pkthdr.len;
> + } else
> + txd->txd_req.txq_size = dmap->dm_segs[n].ds_len;
> + if (n != dmap->dm_nsegs - 1)
> + txd->txd_req.txq_flags |= XNF_TXF_CHUNK;
> + txd->txd_req.txq_ref = dmap->dm_segs[n].ds_addr;
> + txd->txd_req.txq_offset = dmap->dm_segs[n].ds_offset;
> + }
> +
> + ifp->if_opackets++;
> + return (0);
> +
> + errout:
> + ifp->if_oerrors++;
> + return (error);
> +}
> +
> +void
> +xnf_intr(void *arg)
> +{
> + struct xnf_softc *sc = arg;
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> +
> + if (ifp->if_flags & IFF_RUNNING) {
> + xnf_rxeof(sc);
> + xnf_txeof(sc);
> + }
> +}
> +
> +int
> +xnf_txeof(struct xnf_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> + struct xnf_tx_ring *txr = sc->sc_tx_ring;
> + union xnf_tx_desc *txd;
> + struct mbuf *m;
> + bus_dmamap_t dmap;
> + volatile uint32_t r;
> + uint32_t cons;
> + int i, id, pkts = 0;
> +
> + do {
> + for (cons = sc->sc_tx_cons; cons != txr->txr_cons; cons++) {
> + membar_consumer();
> + i = cons & (XNF_TX_DESC - 1);
> + txd = &txr->txr_desc[i];
> + id = txd->txd_rsp.txp_id;
> + memset(txd, 0, sizeof(*txd));
> + txd->txd_req.txq_id = id;
> + membar_producer();
> + if (sc->sc_tx_buf[i]) {
> + dmap = sc->sc_tx_dmap[i];
> + bus_dmamap_unload(sc->sc_dmat, dmap);
> + m = sc->sc_tx_buf[i];
> + sc->sc_tx_buf[i] = NULL;
> + m_freem(m);
> + }
> + pkts++;
> + }
> +
> + if (pkts > 0) {
> + sc->sc_tx_cons = cons;
> + membar_producer();
> + txr->txr_rsp_evt = cons + 1;
> + pkts = 0;
> + }
> +
> + r = txr->txr_cons - sc->sc_tx_cons;
> + membar_consumer();
> + } while (r > 0);
> +
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> +
> + return (0);
> +}
> +
> +int
> +xnf_rxeof(struct xnf_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> + struct xnf_rx_ring *rxr = sc->sc_rx_ring;
> + union xnf_rx_desc *rxd;
> + struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> + struct mbuf *fmp = sc->sc_rx_cbuf[0];
> + struct mbuf *lmp = sc->sc_rx_cbuf[1];
> + struct mbuf *m;
> + bus_dmamap_t dmap;
> + volatile uint32_t r;
> + uint32_t cons;
> + int i, id, flags, len, offset, pkts = 0;
> +
> + do {
> + for (cons = sc->sc_rx_cons; cons != rxr->rxr_cons; cons++) {
> + membar_consumer();
> + i = cons & (XNF_RX_DESC - 1);
> + rxd = &rxr->rxr_desc[i];
> + dmap = sc->sc_rx_dmap[i];
> +
> + len = rxd->rxd_rsp.rxp_status;
> + flags = rxd->rxd_rsp.rxp_flags;
> + offset = rxd->rxd_rsp.rxp_offset;
> + id = rxd->rxd_rsp.rxp_id;
> + memset(rxd, 0, sizeof(*rxd));
> + rxd->rxd_req.rxq_id = id;
> + membar_producer();
> +
> + bus_dmamap_unload(sc->sc_dmat, dmap);
> +
> + m = sc->sc_rx_buf[i];
> + KASSERT(m != NULL);
> + sc->sc_rx_buf[i] = NULL;
> +
> + if (flags & XNF_RXF_EXTRA)
> + printf("%s: management data present\n",
> +    ifp->if_xname);
> +
> + if (flags & XNF_RXF_CSUM)
> + m->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
> +
> + if_rxr_put(&sc->sc_rx_slots, 1);
> + pkts++;
> +
> + if (len < 0 || (len + offset > PAGE_SIZE)) {
> + ifp->if_ierrors++;
> + m_freem(m);
> + continue;
> + }
> +
> + m->m_len = len;
> + m->m_data += offset;
> +
> + if (fmp == NULL) {
> + m->m_pkthdr.len = len;
> + fmp = m;
> + } else {
> + m->m_flags &= ~M_PKTHDR;
> + lmp->m_next = m;
> + fmp->m_pkthdr.len += m->m_len;
> + }
> + lmp = m;
> +
> + if (flags & XNF_RXF_CHUNK) {
> + sc->sc_rx_cbuf[0] = fmp;
> + sc->sc_rx_cbuf[1] = lmp;
> + continue;
> + }
> +
> + m = fmp;
> +
> + ml_enqueue(&ml, m);
> + sc->sc_rx_cbuf[0] = sc->sc_rx_cbuf[1] =
> +    fmp = lmp = NULL;
> + }
> +
> + if (pkts > 0) {
> + sc->sc_rx_cons = cons;
> + membar_producer();
> + rxr->rxr_rsp_evt = cons + 1;
> + pkts = 0;
> + }
> +
> + r = rxr->rxr_cons - sc->sc_rx_cons;
> + membar_consumer();
> + } while (r > 0);
> +
> + if (!ml_empty(&ml)) {
> + if_input(ifp, &ml);
> +
> + xnf_rx_ring_fill(sc);
> + }
> +
> + return (0);
> +}
> +
> +void
> +xnf_rx_ring_fill(void *arg)
> +{
> + struct xnf_softc *sc = arg;
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> + struct xnf_rx_ring *rxr = sc->sc_rx_ring;
> + bus_dmamap_t dmap;
> + struct mbuf *m;
> + uint32_t cons, prod;
> + static int timer = 0;
> + int i, n;
> +
> + cons = rxr->rxr_cons;
> + prod = rxr->rxr_prod;
> +
> + n = if_rxr_get(&sc->sc_rx_slots, XNF_RX_DESC);
> +
> + /* Less than XNF_RX_MIN slots available? */
> + if (n == 0 && prod - cons < XNF_RX_MIN) {
> + if (ifp->if_flags & IFF_RUNNING)
> + timeout_add(&sc->sc_rx_fill, 1 << timer);
> + if (timer < 10)
> + timer++;
> + return;
> + }
> +
> + for (; n > 0; prod++, n--) {
> + i = prod & (XNF_RX_DESC - 1);
> + if (sc->sc_rx_buf[i])
> + break;
> + m = MCLGETI(NULL, M_DONTWAIT, NULL, XNF_MCLEN);
> + if (m == NULL)
> + break;
> + m->m_len = m->m_pkthdr.len = XNF_MCLEN;
> + dmap = sc->sc_rx_dmap[i];
> + if (bus_dmamap_load_mbuf(sc->sc_dmat, dmap, m, BUS_DMA_READ |
> +    BUS_DMA_NOWAIT)) {
> + m_freem(m);
> + break;
> + }
> + sc->sc_rx_buf[i] = m;
> + rxr->rxr_desc[i].rxd_req.rxq_ref = dmap->dm_segs[0].ds_addr;
> + }
> +
> + if (n > 0)
> + if_rxr_put(&sc->sc_rx_slots, n);
> +
> + membar_producer();
> + rxr->rxr_prod = prod;
> +
> + xen_intr_signal(sc->sc_xih);
> +}
> +
> +int
> +xnf_rx_ring_create(struct xnf_softc *sc)
> +{
> + int i, rsegs;
> +
> + /* Allocate a page of memory for the ring */
> + if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE, 0,
> +    &sc->sc_rx_seg, 1, &rsegs, BUS_DMA_ZERO | BUS_DMA_WAITOK)) {
> + printf("%s: failed to allocate memory for the rx ring\n",
> +    sc->sc_dev.dv_xname);
> + return (-1);
> + }
> + /* Map in the allocated memory into the ring structure */
> + if (bus_dmamem_map(sc->sc_dmat, &sc->sc_rx_seg, 1, PAGE_SIZE,
> +    (caddr_t *)(&sc->sc_rx_ring), BUS_DMA_WAITOK)) {
> + printf("%s: failed to map memory for the rx ring\n",
> +    sc->sc_dev.dv_xname);
> + goto errout;
> + }
> + /* Create a map to load the ring memory into */
> + if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
> +    BUS_DMA_WAITOK, &sc->sc_rx_rmap)) {
> + printf("%s: failed to create a memory map for the rx ring\n",
> +    sc->sc_dev.dv_xname);
> + goto errout;
> + }
> + /* Load the ring into the ring map to extract the PA */
> + if (bus_dmamap_load(sc->sc_dmat, sc->sc_rx_rmap, sc->sc_rx_ring,
> +    PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
> + printf("%s: failed to load the rx ring map\n",
> +    sc->sc_dev.dv_xname);
> + goto errout;
> + }
> + sc->sc_rx_ref = sc->sc_rx_rmap->dm_segs[0].ds_addr;
> +
> + sc->sc_rx_ring->rxr_req_evt = sc->sc_rx_ring->rxr_rsp_evt = 1;
> +
> + for (i = 0; i < XNF_RX_DESC; i++) {
> + if (bus_dmamap_create(sc->sc_dmat, XNF_MCLEN, 1,
> +    XNF_MCLEN, 0, BUS_DMA_WAITOK, &sc->sc_rx_dmap[i])) {
> + printf("%s: failed to create a memory map for the rx "
> +    "slot %d/%d\n", sc->sc_dev.dv_xname, i,
> +    XNF_RX_DESC);
> + goto errout;
> + }
> + sc->sc_rx_ring->rxr_desc[i].rxd_req.rxq_id = i;
> + }
> +
> + if_rxr_init(&sc->sc_rx_slots, XNF_RX_MIN, XNF_RX_DESC);
> + xnf_rx_ring_fill(sc);
> +
> + return (0);
> +
> + errout:
> + xnf_rx_ring_destroy(sc);
> + return (-1);
> +}
> +
> +void
> +xnf_rx_ring_drain(struct xnf_softc *sc)
> +{
> + struct xnf_rx_ring *rxr = sc->sc_rx_ring;
> +
> + if (sc->sc_rx_cons != rxr->rxr_cons)
> + xnf_rxeof(sc);
> +}
> +
> +void
> +xnf_rx_ring_destroy(struct xnf_softc *sc)
> +{
> + int i, slots = 0;
> +
> + for (i = 0; i < XNF_RX_DESC; i++) {
> + if (sc->sc_rx_buf[i] == NULL)
> + continue;
> + bus_dmamap_unload(sc->sc_dmat, sc->sc_rx_dmap[i]);
> + m_freem(sc->sc_rx_buf[i]);
> + sc->sc_rx_buf[i] = NULL;
> + slots++;
> + }
> + printf("%s: unload done\n", __func__);
> + if_rxr_put(&sc->sc_rx_slots, slots);
> + printf("%s: rxr_put done\n", __func__);
> +
> + for (i = 0; i < XNF_RX_DESC; i++) {
> + if (sc->sc_rx_dmap[i] == NULL)
> + continue;
> + bus_dmamap_destroy(sc->sc_dmat, sc->sc_rx_dmap[i]);
> + sc->sc_rx_dmap[i] = NULL;
> + }
> + printf("%s: desc map destroy done\n", __func__);
> + if (sc->sc_rx_rmap) {
> + bus_dmamap_unload(sc->sc_dmat, sc->sc_rx_rmap);
> + bus_dmamap_destroy(sc->sc_dmat, sc->sc_rx_rmap);
> + }
> + printf("%s: ring map destroy done\n", __func__);
> + if (sc->sc_rx_ring) {
> + bus_dmamem_unmap(sc->sc_dmat, (caddr_t)sc->sc_rx_ring,
> +    PAGE_SIZE);
> + bus_dmamem_free(sc->sc_dmat, &sc->sc_rx_seg, 1);
> + }
> + printf("%s: ring mem free done\n", __func__);
> + sc->sc_rx_ring = NULL;
> + sc->sc_rx_rmap = NULL;
> + sc->sc_rx_cons = 0;
> +}
> +
> +int
> +xnf_tx_ring_create(struct xnf_softc *sc)
> +{
> + int i, rsegs;
> +
> + /* Allocate a page of memory for the ring */
> + if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE, 0,
> +    &sc->sc_tx_seg, 1, &rsegs, BUS_DMA_ZERO | BUS_DMA_WAITOK)) {
> + printf("%s: failed to allocate memory for the tx ring\n",
> +    sc->sc_dev.dv_xname);
> + return (-1);
> + }
> + /* Map in the allocated memory into the ring structure */
> + if (bus_dmamem_map(sc->sc_dmat, &sc->sc_tx_seg, 1, PAGE_SIZE,
> +    (caddr_t *)&sc->sc_tx_ring, BUS_DMA_WAITOK)) {
> + printf("%s: failed to map memory for the tx ring\n",
> +    sc->sc_dev.dv_xname);
> + goto errout;
> + }
> + /* Create a map to load the ring memory into */
> + if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
> +    BUS_DMA_WAITOK, &sc->sc_tx_rmap)) {
> + printf("%s: failed to create a memory map for the tx ring\n",
> +    sc->sc_dev.dv_xname);
> + goto errout;
> + }
> + /* Load the ring into the ring map to extract the PA */
> + if (bus_dmamap_load(sc->sc_dmat, sc->sc_tx_rmap, sc->sc_tx_ring,
> +    PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
> + printf("%s: failed to load the tx ring map\n",
> +    sc->sc_dev.dv_xname);
> + goto errout;
> + }
> + sc->sc_tx_ref = sc->sc_tx_rmap->dm_segs[0].ds_addr;
> +
> + sc->sc_tx_ring->txr_req_evt = sc->sc_tx_ring->txr_rsp_evt = 1;
> +
> + for (i = 0; i < XNF_TX_DESC; i++) {
> + if (bus_dmamap_create(sc->sc_dmat, XNF_MCLEN, XNF_TX_FRAG,
> +    XNF_MCLEN, 0, BUS_DMA_WAITOK, &sc->sc_tx_dmap[i])) {
> + printf("%s: failed to create a memory map for the tx "
> +    "slot %d/%d\n", sc->sc_dev.dv_xname, i,
> +    XNF_TX_DESC);
> + goto errout;
> + }
> + sc->sc_tx_ring->txr_desc[i].txd_req.txq_id = i;
> + }
> +
> + return (0);
> +
> + errout:
> + xnf_tx_ring_destroy(sc);
> + return (-1);
> +}
> +
> +void
> +xnf_tx_ring_drain(struct xnf_softc *sc)
> +{
> + struct xnf_tx_ring *txr = sc->sc_tx_ring;
> +
> + if (sc->sc_tx_cons != txr->txr_cons)
> + xnf_txeof(sc);
> +}
> +
> +void
> +xnf_tx_ring_destroy(struct xnf_softc *sc)
> +{
> + int i;
> +
> + for (i = 0; i < XNF_TX_DESC; i++) {
> + if (sc->sc_tx_dmap[i] == NULL)
> + continue;
> + bus_dmamap_unload(sc->sc_dmat, sc->sc_tx_dmap[i]);
> + if (sc->sc_tx_buf[i] == NULL)
> + continue;
> + m_freem(sc->sc_tx_buf[i]);
> + sc->sc_tx_buf[i] = NULL;
> + }
> + for (i = 0; i < XNF_TX_DESC; i++) {
> + if (sc->sc_tx_dmap[i] == NULL)
> + continue;
> + bus_dmamap_destroy(sc->sc_dmat, sc->sc_tx_dmap[i]);
> + sc->sc_tx_dmap[i] = NULL;
> + }
> + if (sc->sc_tx_rmap) {
> + bus_dmamap_unload(sc->sc_dmat, sc->sc_tx_rmap);
> + bus_dmamap_destroy(sc->sc_dmat, sc->sc_tx_rmap);
> + }
> + if (sc->sc_tx_ring) {
> + bus_dmamem_unmap(sc->sc_dmat, (caddr_t)sc->sc_tx_ring,
> +    PAGE_SIZE);
> + bus_dmamem_free(sc->sc_dmat, &sc->sc_tx_seg, 1);
> + }
> + sc->sc_tx_ring = NULL;
> + sc->sc_tx_rmap = NULL;
> +}
> +
> +int
> +xnf_init_backend(struct xnf_softc *sc)
> +{
> + const char *prop;
> + char val[32];
> +
> + /* Plumb the Rx ring */
> + prop = "rx-ring-ref";
> + snprintf(val, sizeof(val), "%u", sc->sc_rx_ref);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> + /* Enable "copy" mode */
> + prop = "request-rx-copy";
> + snprintf(val, sizeof(val), "%u", 1);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> + /* Enable notify mode */
> + prop = "feature-rx-notify";
> + snprintf(val, sizeof(val), "%u", 1);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> + /* Request multicast filtering */
> + prop = "request-multicast-control";
> + snprintf(val, sizeof(val), "%u", 1);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> +
> + /* Plumb the Tx ring */
> + prop = "tx-ring-ref";
> + snprintf(val, sizeof(val), "%u", sc->sc_tx_ref);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> + /* Enable transmit scatter-gather mode */
> + prop = "feature-sg";
> + snprintf(val, sizeof(val), "%u", 1);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> +
> + /* Disable TCP/UDP checksum offload */
> + prop = "feature-csum-offload";
> + if (xs_setprop(&sc->sc_xa, prop, NULL, 0))
> + goto errout;
> + prop = "feature-no-csum-offload";
> + snprintf(val, sizeof(val), "%u", 1);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> + prop = "feature-ipv6-csum-offload";
> + if (xs_setprop(&sc->sc_xa, prop, NULL, 0))
> + goto errout;
> + prop = "feature-no-ipv6-csum-offload";
> + snprintf(val, sizeof(val), "%u", 1);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> +
> + /* Plumb the event channel port */
> + prop = "event-channel";
> + snprintf(val, sizeof(val), "%u", sc->sc_xih);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> +
> + /* Connect the device */
> + prop = "state";
> + snprintf(val, sizeof(val), "%u", 4);
> + if (xs_setprop(&sc->sc_xa, prop, val, strlen(val)))
> + goto errout;
> +
> + return (0);
> +
> + errout:
> + printf("%s: failed to set \"%s\" property to \"%s\"\n",
> +    sc->sc_dev.dv_xname, prop, val);
> + return (-1);
> +}
>

--

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Mike Belopuhov-5
In reply to this post by Stefan Fritsch
On 6 January 2016 at 17:58, Stefan Fritsch <[hidden email]> wrote:

> On Wed, 6 Jan 2016, Mike Belopuhov wrote:
>
>> There's still stuff to do, but it receives and transmits reliably
>> (at least on modern Xen) so I'd like to get it in.  Man page will
>> follow.
>
> I only had a quick glance at the code, but I have one comment about your
> use of memory barriers. The membar_* macros are pure compiler barriers
> when the openbsd kernel is compiled for UP. But since the host machine and
> xen may use SMP even in this case, I suspect the that you need hardware
> memory barriers even if MULTIPROCESSOR is not defined. This does not seem
> relevant for x86 because you don't use membar_sync, but it may become
> relevant for arm, which is also supported by xen.
>

membar_{producer,consumer} are defined on arm to perform store and
load memory barriers.  Our arm code currently does not distinguish
between an MP case and non-MP case regarding the definition of these
macros, so I'm not entirely certain what are you trying to say.

However I'm thankful for bringing this up and I'll spend some time
figuring out if I need actual fences in my code.  for instance the
cas loop in xen_grant_table_remove runs for more than 10000 iterations
in the normal case.  I've changed the code to perform bus_dma_unload
after zeroing the descriptor out so that there won't be technically
any dangling grant table references, but I didn't remeasure the cas
loop.  Possibly due to caching and CPU migration on the host we lose
out and perhaps can get a boost in performance by putting an implicit
memory barrier.

> I had the same problem in virtio and introduced the virtio_membar_* macros
> for this purpose. Maybe they should be renamed to a more generic name and
> you should use them, too?
>

I'm not sure cause I don't think x86 needs any explicit membars, but
I'll do some test and report on this.

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Mark Kettenis
> From: Mike Belopuhov <[hidden email]>
> Date: Thu, 7 Jan 2016 12:02:23 +0100
>
> On 6 January 2016 at 17:58, Stefan Fritsch <[hidden email]> wrote:
> > On Wed, 6 Jan 2016, Mike Belopuhov wrote:
> >
> >> There's still stuff to do, but it receives and transmits reliably
> >> (at least on modern Xen) so I'd like to get it in.  Man page will
> >> follow.
> >
> > I only had a quick glance at the code, but I have one comment about your
> > use of memory barriers. The membar_* macros are pure compiler barriers
> > when the openbsd kernel is compiled for UP. But since the host machine and
> > xen may use SMP even in this case, I suspect the that you need hardware
> > memory barriers even if MULTIPROCESSOR is not defined. This does not seem
> > relevant for x86 because you don't use membar_sync, but it may become
> > relevant for arm, which is also supported by xen.
> >
>
> membar_{producer,consumer} are defined on arm to perform store and
> load memory barriers.  Our arm code currently does not distinguish
> between an MP case and non-MP case regarding the definition of these
> macros, so I'm not entirely certain what are you trying to say.

Not sure ARM is a good example to look at.

In principle I think that the membar_xxx() interfaces could be simple
compiler barriers on all our architectures, at least as long as the
CPU will observe its own stores in the same order as they were
emitted.  But I think all sane CPU architectures make those
guarantees.  At least for "normal" memory.  However, we treat that as
an optimization.  And we haven't done that for all our architectures.

The problem with virtualization is of course that even a non-MP kernel
is actually running in an MP environment.  If data structures are
shared with the hypervisor or another domain running on a different
CPU, proper memory barriers must be used to guarantee the other side
sees our stores in the right order.  The typical case would be
populating a descriptor with some sort of validity bit.  There you
want to make sure the other side doesn't see the valid bit set until
all the other parts of the descriptor have been filled in and are
visible.  In that case a simple compiler barrier may not be enough.
This is why the virtio_membar_xxx() primitives were introduced.

This is actually not all that different from handling DMA to real
hardware devices.  There we must make sure that stores become visible
to the hardware device in the right order.  That matters even on
non-MP kernels too and is handled by bus_dmamap_sync(9).

Since you have embraced bus_dma(9) for the xen stuff, it would make
sense to add a xen-specifc bus_dmamap_sync() implementation that
issues the appropriate memory barrier.  I think it should be
virtio_membar_consumer() for BUS_DMASYNC_PREREAD and
virtio_membar_producer() for BUS_DMASYNC_POSTWRITE.  But you'd better
double-check, because I always get confused!

BTW, your xen bus_dma(9) implementation relies on the internals of the
MD bus_dma(9) implementation.  Don't expect it to to work on other
architectures.  I'm not even sure I want to be held responsible if
changes in the MD code break it.

> However I'm thankful for bringing this up and I'll spend some time
> figuring out if I need actual fences in my code.  for instance the
> cas loop in xen_grant_table_remove runs for more than 10000 iterations
> in the normal case.  I've changed the code to perform bus_dma_unload
> after zeroing the descriptor out so that there won't be technically
> any dangling grant table references, but I didn't remeasure the cas
> loop.  Possibly due to caching and CPU migration on the host we lose
> out and perhaps can get a boost in performance by putting an implicit
> memory barrier.

Not sure what memory fences have to do with this.  The hypervisor
should defenitely issue any appropriate barriers as part of the
context switching.

> > I had the same problem in virtio and introduced the virtio_membar_* macros
> > for this purpose. Maybe they should be renamed to a more generic name and
> > you should use them, too?
> >
>
> I'm not sure cause I don't think x86 needs any explicit membars, but
> I'll do some test and report on this.

It's a grey area.  The x86 memory model evolved over time and isn't
all that well specified.  On top of that it seems the actual hardware
is abit more strongly ordered than the specification.  It is fairly
strongly ordered, which means that memory barriers can be omitted in
most cases that deal with "normal" memory.  But our implementation
does issue memory barriers for membar_enter() and membar_sync().  I'm
not 100% certain it is correct.

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Mike Belopuhov-5
On 7 January 2016 at 13:17, Mark Kettenis <[hidden email]> wrote:

>> From: Mike Belopuhov <[hidden email]>
>> Date: Thu, 7 Jan 2016 12:02:23 +0100
>>
>> On 6 January 2016 at 17:58, Stefan Fritsch <[hidden email]> wrote:
>> > On Wed, 6 Jan 2016, Mike Belopuhov wrote:
>> >
>> >> There's still stuff to do, but it receives and transmits reliably
>> >> (at least on modern Xen) so I'd like to get it in.  Man page will
>> >> follow.
>> >
>> > I only had a quick glance at the code, but I have one comment about your
>> > use of memory barriers. The membar_* macros are pure compiler barriers
>> > when the openbsd kernel is compiled for UP. But since the host machine and
>> > xen may use SMP even in this case, I suspect the that you need hardware
>> > memory barriers even if MULTIPROCESSOR is not defined. This does not seem
>> > relevant for x86 because you don't use membar_sync, but it may become
>> > relevant for arm, which is also supported by xen.
>> >
>>
>> membar_{producer,consumer} are defined on arm to perform store and
>> load memory barriers.  Our arm code currently does not distinguish
>> between an MP case and non-MP case regarding the definition of these
>> macros, so I'm not entirely certain what are you trying to say.
>
> Not sure ARM is a good example to look at.
>

The only architectures that Xen dom0 is implemented for are i386,
adm64 and arm, so there's no real need to look at anything else.

> In principle I think that the membar_xxx() interfaces could be simple
> compiler barriers on all our architectures, at least as long as the
> CPU will observe its own stores in the same order as they were
> emitted.  But I think all sane CPU architectures make those
> guarantees.  At least for "normal" memory.  However, we treat that as
> an optimization.  And we haven't done that for all our architectures.
>
> The problem with virtualization is of course that even a non-MP kernel
> is actually running in an MP environment.  If data structures are
> shared with the hypervisor or another domain running on a different
> CPU, proper memory barriers must be used to guarantee the other side
> sees our stores in the right order.  The typical case would be
> populating a descriptor with some sort of validity bit.  There you
> want to make sure the other side doesn't see the valid bit set until
> all the other parts of the descriptor have been filled in and are
> visible.  In that case a simple compiler barrier may not be enough.

That's what I was referring to in my example below.

> This is why the virtio_membar_xxx() primitives were introduced.
>

Any idea why wasn't store and load barriers implemented separately?

> This is actually not all that different from handling DMA to real
> hardware devices.  There we must make sure that stores become visible
> to the hardware device in the right order.  That matters even on
> non-MP kernels too and is handled by bus_dmamap_sync(9).
>

Except that bus_dmamap_sync is not needed on amd64 and is in
fact empty.

> Since you have embraced bus_dma(9) for the xen stuff, it would make
> sense to add a xen-specifc bus_dmamap_sync() implementation that
> issues the appropriate memory barrier.  I think it should be
> virtio_membar_consumer() for BUS_DMASYNC_PREREAD and
> virtio_membar_producer() for BUS_DMASYNC_POSTWRITE.  But you'd better
> double-check, because I always get confused!
>

Will do.

> BTW, your xen bus_dma(9) implementation relies on the internals of the
> MD bus_dma(9) implementation.  Don't expect it to to work on other
> architectures.  I'm not even sure I want to be held responsible if
> changes in the MD code break it.
>

If that's _ds_boundary you're referring to, it has been there for 15 years
so it's unlikely that it's going away.  And in any case we can easily add
another member into this opaque data type so I don't think it's a big deal
at all.

>> However I'm thankful for bringing this up and I'll spend some time
>> figuring out if I need actual fences in my code.  for instance the
>> cas loop in xen_grant_table_remove runs for more than 10000 iterations
>> in the normal case.  I've changed the code to perform bus_dma_unload
>> after zeroing the descriptor out so that there won't be technically
>> any dangling grant table references, but I didn't remeasure the cas
>> loop.  Possibly due to caching and CPU migration on the host we lose
>> out and perhaps can get a boost in performance by putting an implicit
>> memory barrier.
>
> Not sure what memory fences have to do with this.
>

It's what you've described above.  I possibly need to make sure that
the hypervisor sees one store before the other.

> The hypervisor should defenitely issue any appropriate barriers as part
> of the context switching.

There's no context switching in between.

>> > I had the same problem in virtio and introduced the virtio_membar_* macros
>> > for this purpose. Maybe they should be renamed to a more generic name and
>> > you should use them, too?
>> >
>>
>> I'm not sure cause I don't think x86 needs any explicit membars, but
>> I'll do some test and report on this.
>
> It's a grey area.  The x86 memory model evolved over time and isn't
> all that well specified.  On top of that it seems the actual hardware
> is abit more strongly ordered than the specification.  It is fairly
> strongly ordered, which means that memory barriers can be omitted in
> most cases that deal with "normal" memory.  But our implementation
> does issue memory barriers for membar_enter() and membar_sync().  I'm
> not 100% certain it is correct.

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Stefan Fritsch
On Thu, 7 Jan 2016, Mike Belopuhov wrote:

> On 7 January 2016 at 13:17, Mark Kettenis <[hidden email]> wrote:
> >> From: Mike Belopuhov <[hidden email]>
> >> Date: Thu, 7 Jan 2016 12:02:23 +0100
> >>
> >> On 6 January 2016 at 17:58, Stefan Fritsch <[hidden email]> wrote:
> >> > On Wed, 6 Jan 2016, Mike Belopuhov wrote:
> >> >
> >> >> There's still stuff to do, but it receives and transmits reliably
> >> >> (at least on modern Xen) so I'd like to get it in.  Man page will
> >> >> follow.
> >> >
> >> > I only had a quick glance at the code, but I have one comment about your
> >> > use of memory barriers. The membar_* macros are pure compiler barriers
> >> > when the openbsd kernel is compiled for UP. But since the host machine and
> >> > xen may use SMP even in this case, I suspect the that you need hardware
> >> > memory barriers even if MULTIPROCESSOR is not defined. This does not seem
> >> > relevant for x86 because you don't use membar_sync, but it may become
> >> > relevant for arm, which is also supported by xen.
> >> >
> >>
> >> membar_{producer,consumer} are defined on arm to perform store and
> >> load memory barriers.  Our arm code currently does not distinguish
> >> between an MP case and non-MP case regarding the definition of these
> >> macros, so I'm not entirely certain what are you trying to say.

I didn't check arm's implementation but new that it had non-empty
membar_{producer,consumer}. So, if it does not distinguish between an MP
case and non-MP case, then there is no problem there. But maybe you should
document somewhere which assumptions about the architecture you make, so
that they can be checked when adding a new architecture. I guess arm64
will come sooner or later and I don't know if it has exactly the same
memory model as 32bit arm.

> >
> > Not sure ARM is a good example to look at.
> >
>
> The only architectures that Xen dom0 is implemented for are i386,
> adm64 and arm, so there's no real need to look at anything else.
>
> > In principle I think that the membar_xxx() interfaces could be simple
> > compiler barriers on all our architectures, at least as long as the
> > CPU will observe its own stores in the same order as they were
> > emitted.  But I think all sane CPU architectures make those
> > guarantees.  At least for "normal" memory.  However, we treat that as
> > an optimization.  And we haven't done that for all our architectures.
> >
> > The problem with virtualization is of course that even a non-MP kernel
> > is actually running in an MP environment.  If data structures are
> > shared with the hypervisor or another domain running on a different
> > CPU, proper memory barriers must be used to guarantee the other side
> > sees our stores in the right order.  The typical case would be
> > populating a descriptor with some sort of validity bit.  There you
> > want to make sure the other side doesn't see the valid bit set until
> > all the other parts of the descriptor have been filled in and are
> > visible.  In that case a simple compiler barrier may not be enough.

Yes. With intel it's the "Reads may be reordered with older writes to
different locations but not with older writes to the same location" bit
from the memory model that is causing problems. So you have to check if
xen hits this case. virtio does (and removing the membarriers causes
observable hangs).

>
> That's what I was referring to in my example below.
>
> > This is why the virtio_membar_xxx() primitives were introduced.
> >
>
> Any idea why wasn't store and load barriers implemented separately?

No idea. virtio_membar_xxx() was modeled after the existing membar_xxx().
But AIUI membar_consumer() plus membar_producer() is not equivalent to
membar_sync() (which also prevents read vs. write reordering).

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Anders Berggren-2
In reply to this post by Reyk Floeter-2
> On 06 Jan 2016, at 18:49, Reyk Floeter <[hidden email]> wrote:
> - I didn't work on m4.10xlarge (see cvs:~reyk/dmesg.m4.10xlarge).

I didn’t see any mentions of it in the dmesg https://gist.github.com/reyk/b372af303eb86bab3fee but could it be that those machine classes (*x*large-ish) uses Intel NICs with SR-IOV (ixgbe/ixv-ish) by default http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html. Last time I tried, sriovNetSupport couldn’t be disabled after the AMI/VM was created, and I had to use the "aws ec2 register-image …” commands, because the AWS web console didn’t offer any web to create a machine without it...
Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Reyk Floeter-2

> On 23.01.2016, at 12:12, Anders Berggren <[hidden email]> wrote:
>
>> On 06 Jan 2016, at 18:49, Reyk Floeter <[hidden email]> wrote:
>> - I didn't work on m4.10xlarge (see cvs:~reyk/dmesg.m4.10xlarge).
>
> I didn’t see any mentions of it in the dmesg https://gist.github.com/reyk/b372af303eb86bab3fee but could it be that those machine classes (*x*large-ish) uses Intel NICs with SR-IOV (ixgbe/ixv-ish) by default http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html. Last time I tried, sriovNetSupport couldn’t be disabled after the AMI/VM was created, and I had to use the "aws ec2 register-image …” commands, because the AWS web console didn’t offer any web to create a machine without it...

No, you have to *enable* SR-IOV in the image.

Machines with the Intel NIC will not show any netfront in the device list via XenStore (just try Ubuntu).

Reyk
Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
On Sat, Jan 23, 2016 at 12:19:29PM +0100, Reyk Floeter wrote:
> No, you have to *enable* SR-IOV in the image.
>
> Machines with the Intel NIC will not show any netfront in the device list via XenStore (just try Ubuntu).
>
> Reyk

That's correct, but I think what was being pointed out is that
an instance with SRIOV enabled cannot have it *disabled* (i.e.
to switch back to xnf NICs).  I was able to get xnf operational
on a c3.large (enhanced networking-capable) by creating an instance
with CentOS and swapping the root volume out.  Any AMI constructed
on Amazon Linux or Ubuntu will have enhanced networking enabled
by default, whereas CentOS doesn't appear to have it enabled (unless
you manually enable it).

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Reyk Floeter-2

> On 23.01.2016, at 22:27, Jonathon Sisson <[hidden email]> wrote:
>
> On Sat, Jan 23, 2016 at 12:19:29PM +0100, Reyk Floeter wrote:
>> No, you have to *enable* SR-IOV in the image.
>>
>> Machines with the Intel NIC will not show any netfront in the device list via XenStore (just try Ubuntu).
>>
>> Reyk
>
> That's correct, but I think what was being pointed out is that
> an instance with SRIOV enabled cannot have it *disabled* (i.e.
> to switch back to xnf NICs).  I was able to get xnf operational
> on a c3.large (enhanced networking-capable) by creating an instance
> with CentOS and swapping the root volume out.  Any AMI constructed
> on Amazon Linux or Ubuntu will have enhanced networking enabled
> by default, whereas CentOS doesn't appear to have it enabled (unless
> you manually enable it).

Ah, OK.

I recommend to upload new images or to use my public openbsd
images to bootstrap new AMIs.

The "dd from Linux" trick is just a hack if you don't want to install the
aws and ec2 cli tools - but we have ports now.

Reyk

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
On Sat, Jan 23, 2016 at 10:57:21PM +0100, Reyk Floeter wrote:

>
> > On 23.01.2016, at 22:27, Jonathon Sisson <[hidden email]> wrote:
> >
> > On Sat, Jan 23, 2016 at 12:19:29PM +0100, Reyk Floeter wrote:
> >> No, you have to *enable* SR-IOV in the image.
> >>
> >> Machines with the Intel NIC will not show any netfront in the device list via XenStore (just try Ubuntu).
> >>
> >> Reyk
> >
> > That's correct, but I think what was being pointed out is that
> > an instance with SRIOV enabled cannot have it *disabled* (i.e.
> > to switch back to xnf NICs).  I was able to get xnf operational
> > on a c3.large (enhanced networking-capable) by creating an instance
> > with CentOS and swapping the root volume out.  Any AMI constructed
> > on Amazon Linux or Ubuntu will have enhanced networking enabled
> > by default, whereas CentOS doesn't appear to have it enabled (unless
> > you manually enable it).
>
> Ah, OK.
>
> I recommend to upload new images or to use my public openbsd
> images to bootstrap new AMIs.
>
> The "dd from Linux" trick is just a hack if you don't want to install the
> aws and ec2 cli tools - but we have ports now.
>
> Reyk
>
Fair enough =)

I wasn't certain if the experimental images were considered ready
for testing.  I'll switch to using them for any other testing I do.

Speaking of testing, is there any particular area non-devs could
assist with at this time?  Gathering dmesgs for different instance
types?

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
On Sat, Jan 23, 2016 at 02:18:17PM -0800, Jonathon Sisson wrote:
> Speaking of testing, is there any particular area non-devs could
> assist with at this time?  Gathering dmesgs for different instance
> types?
>
I decided to spin up one of each instance type and grab the console
output in case it would be beneficial to the on-going work:

http://update.j3z.org/dmesg/c3.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/c3.4xlarge_dmesg.txt
http://update.j3z.org/dmesg/c3.8xlarge_dmesg.txt
http://update.j3z.org/dmesg/c3.large_dmesg.txt
http://update.j3z.org/dmesg/c3.xlarge_dmesg.txt
http://update.j3z.org/dmesg/c4.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/c4.4xlarge_dmesg.txt
http://update.j3z.org/dmesg/c4.8xlarge_dmesg.txt
http://update.j3z.org/dmesg/c4.large_dmesg.txt
http://update.j3z.org/dmesg/c4.xlarge_dmesg.txt
http://update.j3z.org/dmesg/d2.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/d2.4xlarge_dmesg.txt
http://update.j3z.org/dmesg/d2.8xlarge_dmesg.txt
http://update.j3z.org/dmesg/d2.xlarge_dmesg.txt
http://update.j3z.org/dmesg/g2.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/g2.8xlarge_dmesg.txt
http://update.j3z.org/dmesg/i2.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/i2.4xlarge_dmesg.txt
http://update.j3z.org/dmesg/i2.8xlarge_dmesg.txt
http://update.j3z.org/dmesg/i2.xlarge_dmesg.txt
http://update.j3z.org/dmesg/m3.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/m3.large_dmesg.txt
http://update.j3z.org/dmesg/m3.medium_dmesg.txt
http://update.j3z.org/dmesg/m3.xlarge_dmesg.txt
http://update.j3z.org/dmesg/m4.10xlarge_dmesg.txt
http://update.j3z.org/dmesg/m4.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/m4.4xlarge_dmesg.txt
http://update.j3z.org/dmesg/m4.large_dmesg.txt
http://update.j3z.org/dmesg/m4.xlarge_dmesg.txt
http://update.j3z.org/dmesg/r3.2xlarge_dmesg.txt
http://update.j3z.org/dmesg/r3.4xlarge_dmesg.txt
http://update.j3z.org/dmesg/r3.8xlarge_dmesg.txt
http://update.j3z.org/dmesg/r3.large_dmesg.txt
http://update.j3z.org/dmesg/r3.xlarge_dmesg.txt
http://update.j3z.org/dmesg/t2.large_dmesg.txt
http://update.j3z.org/dmesg/t2.medium_dmesg.txt
http://update.j3z.org/dmesg/t2.micro_dmesg.txt
http://update.j3z.org/dmesg/t2.nano_dmesg.txt
http://update.j3z.org/dmesg/t2.small_dmesg.txt

If it is deemed helpful, I can keep them updated as
new AMIs come out.

Thanks!

-Jonathon

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Mike Belopuhov-5
Hi Jonathon,

Thanks a lot for taking your time to test this.

On 24 January 2016 at 06:49, Jonathon Sisson <[hidden email]> wrote:
> On Sat, Jan 23, 2016 at 02:18:17PM -0800, Jonathon Sisson wrote:
>> Speaking of testing, is there any particular area non-devs could
>> assist with at this time?  Gathering dmesgs for different instance
>> types?
>>

Trying newer kernels would be the most helpful. I've just enabled tcp/udp
checksum offloading in the xnf on Friday and would welcome any network
tests.

> I decided to spin up one of each instance type and grab the console
> output in case it would be beneficial to the on-going work:
>
> http://update.j3z.org/dmesg/c3.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c3.4xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c3.8xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c3.large_dmesg.txt
> http://update.j3z.org/dmesg/c3.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c4.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c4.4xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c4.8xlarge_dmesg.txt
> http://update.j3z.org/dmesg/c4.large_dmesg.txt
> http://update.j3z.org/dmesg/c4.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/d2.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/d2.4xlarge_dmesg.txt
> http://update.j3z.org/dmesg/d2.8xlarge_dmesg.txt
> http://update.j3z.org/dmesg/d2.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/g2.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/g2.8xlarge_dmesg.txt
> http://update.j3z.org/dmesg/i2.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/i2.4xlarge_dmesg.txt
> http://update.j3z.org/dmesg/i2.8xlarge_dmesg.txt
> http://update.j3z.org/dmesg/i2.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/m3.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/m3.large_dmesg.txt
> http://update.j3z.org/dmesg/m3.medium_dmesg.txt
> http://update.j3z.org/dmesg/m3.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/m4.10xlarge_dmesg.txt
> http://update.j3z.org/dmesg/m4.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/m4.4xlarge_dmesg.txt
> http://update.j3z.org/dmesg/m4.large_dmesg.txt
> http://update.j3z.org/dmesg/m4.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/r3.2xlarge_dmesg.txt
> http://update.j3z.org/dmesg/r3.4xlarge_dmesg.txt
> http://update.j3z.org/dmesg/r3.8xlarge_dmesg.txt
> http://update.j3z.org/dmesg/r3.large_dmesg.txt
> http://update.j3z.org/dmesg/r3.xlarge_dmesg.txt
> http://update.j3z.org/dmesg/t2.large_dmesg.txt
> http://update.j3z.org/dmesg/t2.medium_dmesg.txt
> http://update.j3z.org/dmesg/t2.micro_dmesg.txt
> http://update.j3z.org/dmesg/t2.nano_dmesg.txt
> http://update.j3z.org/dmesg/t2.small_dmesg.txt
>
> If it is deemed helpful, I can keep them updated as
> new AMIs come out.
>

It would be very interesting to see newer code run on these.

> Thanks!
>
> -Jonathon

Cheers,
Mike

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
On Sun, Jan 24, 2016 at 02:16:37PM +0100, Mike Belopuhov wrote:
> Hi Jonathon,
>
> Thanks a lot for taking your time to test this.
>
No, thank you guys for all of the work you're doing to get
this working.  I'm just a user heh.
 
>
> Trying newer kernels would be the most helpful. I've just enabled tcp/udp
> checksum offloading in the xnf on Friday and would welcome any network
> tests.
>
I rebuilt with a source checkout earlier today, and after
rebooting to the new kernel I can't seem to get a dhcp lease.
I'm working on building userland to determine if there is
some issue with dhclient, but I haven't finished that step
yet.  Has anyone else noted the dhcp issue?
 

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Mike Belopuhov-5
On 24 January 2016 at 20:55, Jonathon Sisson <[hidden email]> wrote:

> On Sun, Jan 24, 2016 at 02:16:37PM +0100, Mike Belopuhov wrote:
>> Hi Jonathon,
>>
>> Thanks a lot for taking your time to test this.
>>
> No, thank you guys for all of the work you're doing to get
> this working.  I'm just a user heh.
>
>>
>> Trying newer kernels would be the most helpful. I've just enabled tcp/udp
>> checksum offloading in the xnf on Friday and would welcome any network
>> tests.
>>
> I rebuilt with a source checkout earlier today, and after
> rebooting to the new kernel I can't seem to get a dhcp lease.
> I'm working on building userland to determine if there is
> some issue with dhclient, but I haven't finished that step
> yet.  Has anyone else noted the dhcp issue?
>

I haven't seen that on my test box (not AWS), but maybe reverting
the minimum number of rx slots back to 32 can help?

http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/dev/pv/if_xnf.c.diff?r1=1.9&r2=1.10

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
On Sun, Jan 24, 2016 at 09:08:32PM +0100, Mike Belopuhov wrote:

> On 24 January 2016 at 20:55, Jonathon Sisson <[hidden email]> wrote:
> > On Sun, Jan 24, 2016 at 02:16:37PM +0100, Mike Belopuhov wrote:
> >> Hi Jonathon,
> >>
> >> Thanks a lot for taking your time to test this.
> >>
> > No, thank you guys for all of the work you're doing to get
> > this working.  I'm just a user heh.
> >
> >>
> >> Trying newer kernels would be the most helpful. I've just enabled tcp/udp
> >> checksum offloading in the xnf on Friday and would welcome any network
> >> tests.
> >>
> > I rebuilt with a source checkout earlier today, and after
> > rebooting to the new kernel I can't seem to get a dhcp lease.
> > I'm working on building userland to determine if there is
> > some issue with dhclient, but I haven't finished that step
> > yet.  Has anyone else noted the dhcp issue?
> >
>
> I haven't seen that on my test box (not AWS), but maybe reverting
> the minimum number of rx slots back to 32 can help?
>
> http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/dev/pv/if_xnf.c.diff?r1=1.9&r2=1.10
>
Reverting to 32 fixed the dhcp issue.

I'll go ahead and get those dmesgs for you now =)

Thanks again!

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
On Sun, Jan 24, 2016 at 01:22:20PM -0800, Jonathon Sisson wrote:

> On Sun, Jan 24, 2016 at 09:08:32PM +0100, Mike Belopuhov wrote:
> > I haven't seen that on my test box (not AWS), but maybe reverting
> > the minimum number of rx slots back to 32 can help?
> >
> > http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/dev/pv/if_xnf.c.diff?r1=1.9&r2=1.10
> >
> Reverting to 32 fixed the dhcp issue.
>
> I'll go ahead and get those dmesgs for you now =)
>
> Thanks again!
>
Mike,

A series of quick iperf tests showed the following:

iperf server:  iperf -s (Amazon Linux)
iperf client:  iperf -c $SRVIP -dt 300 -i 30

Amazon Linux <-> Amazon Linux (same AZ/VPC subnet)
~690 Mbits.  (M3.larges)

OpenBSD-CURRENT <-> Amazon Linux (same AZ/VPC subnet)
~400 Mbits.  (M3.larges again)

Each test, I ran the same Amazon Linux instance as the server.

Since I was running bi-directional, I did notice that the
OpenBSD machine accepting incoming traffic was slower than
it sending traffic:

[  4]  0.0-30.0 sec  1.48 GBytes   422 Mbits/sec
[  5]  0.0-30.0 sec  1.08 GBytes   310 Mbits/sec

I chose M3 due to the lack of support for SRIOV, so the Amazon
Linux instances wouldn't utilize it:

[root@ip-172-31-46-242 ~]# ethtool -i eth0 | grep driver
driver: vif

I'm gathering the dmesgs and will post links when I have them
uploaded.

-Jonathon

Reply | Threaded
Open this post in threaded view
|

Re: Xen virtual network (Netfront) driver

Jonathon Sisson
tech@,

I've uploaded a few of the dmesgs gathered to dmesgd.nycbug.org:

http://dmesgd.nycbug.org/index.cgi?action=dmesgd&do=index&fts=Jonathon

Currently I have m4.10xlarge, c4.8xlarge, m3.medium, and t2.nano
uploaded for perusal.

I noticed some new output in the m4.10xlarge console output here:

starting network
DHCPDISCOVER on xnf0 - interval 3
DHCPDISCOVER on xnf0 - interval 5
xnf0: tx prod 2 cons 2,0 evt 3,1
DHCPDISCOVER on xnf0 - interval 8
xnf0: tx prod 3 cons 3,0 evt 4,1
DHCPDISCOVER on xnf0 - interval 10
xnf0: tx prod 4 cons 4,0 evt 5,1
DHCPDISCOVER on xnf0 - interval 15
xnf0: tx prod 5 cons 5,0 evt 6,1
DHCPDISCOVER on xnf0 - interval 20
xnf0: tx prod 6 cons 6,0 evt 7,1
No acceptable DHCPOFFERS received.
No working leases in persistent database - sleeping.

Not certain if this is debug output put there intentionally or
if this is some error condition?  At any rate, there it is =)

-Jonathon

12