split ether_output up into resolution, encapsulation, and output

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

split ether_output up into resolution, encapsulation, and output

David Gwynne-5
i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
provide their own output functions so they can bypass the ifq machinery
and push the packet onto the underlying layer directly.

they'll still need to get an ethernet header though. vlan needs to get
the ethernet header and put the vlan shim into it, therefore
ether_resolve is exposed. etherip doesnt need a shim, it just wants
ethernet encapsulating the payload before adding its own headers to the
packet, therefore there is ether_encap.

does this make sense?

ok?

Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.253
diff -u -p -r1.253 if_ethersubr.c
--- net/if_ethersubr.c 13 Mar 2018 01:31:48 -0000 1.253
+++ net/if_ethersubr.c 30 Nov 2018 02:02:58 -0000
@@ -178,24 +178,18 @@ ether_rtrequest(struct ifnet *ifp, int r
  break;
  }
 }
-/*
- * Ethernet output routine.
- * Encapsulate a packet of type family for the local net.
- * Assumes that ifp is actually pointer to arpcom structure.
- */
+
 int
-ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-    struct rtentry *rt)
+ether_resolve(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt, struct ether_header *eh)
 {
- u_int16_t etype;
- u_char edst[ETHER_ADDR_LEN];
- u_char *esrc;
- struct mbuf *mcopy = NULL;
- struct ether_header *eh;
  struct arpcom *ac = (struct arpcom *)ifp;
  sa_family_t af = dst->sa_family;
  int error = 0;
 
+ if (!ISSET(ifp->if_flags, IFF_RUNNING))
+ senderr(ENETDOWN);
+
  KASSERT(rt != NULL || ISSET(m->m_flags, M_MCAST|M_BCAST) ||
  af == AF_UNSPEC || af == pseudo_AF_HDRCMPLT);
 
@@ -207,28 +201,31 @@ ether_output(struct ifnet *ifp, struct m
  }
 #endif
 
- esrc = ac->ac_enaddr;
-
- if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
- senderr(ENETDOWN);
-
  switch (af) {
  case AF_INET:
- error = arpresolve(ifp, rt, m, dst, edst);
+ error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
  if (error)
- return (error == EAGAIN ? 0 : error);
+ return (error);
+ eh->ether_type = htons(ETHERTYPE_IP);
+
  /* If broadcasting on a simplex interface, loopback a copy */
- if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
-    !m->m_pkthdr.pf.routed)
+ if (ISSET(m->m_flags, M_BCAST) &&
+    ISSET(ifp->if_flags, IFF_SIMPLEX) &&
+    !m->m_pkthdr.pf.routed) {
+ struct mbuf *mcopy;
+
+ /* XXX Should we input an unencrypted IPsec packet? */
  mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
- etype = htons(ETHERTYPE_IP);
+ if (mcopy != NULL)
+ if_input_local(ifp, mcopy, af);
+ }
  break;
 #ifdef INET6
  case AF_INET6:
- error = nd6_resolve(ifp, rt, m, dst, edst);
+ error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
  if (error)
- return (error == EAGAIN ? 0 : error);
- etype = htons(ETHERTYPE_IPV6);
+ return (error);
+ eh->ether_type = htons(ETHERTYPE_IPV6);
  break;
 #endif
 #ifdef MPLS
@@ -242,72 +239,102 @@ ether_output(struct ifnet *ifp, struct m
  senderr(ENETUNREACH);
 
  switch (dst->sa_family) {
- case AF_LINK:
- if (satosdl(dst)->sdl_alen < sizeof(edst))
- senderr(EHOSTUNREACH);
- memcpy(edst, LLADDR(satosdl(dst)),
-    sizeof(edst));
- break;
+ case AF_LINK:
+ if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost))
+ senderr(EHOSTUNREACH);
+ memcpy(eh->ether_dhost, LLADDR(satosdl(dst)),
+    sizeof(eh->ether_dhost));
+ break;
 #ifdef INET6
- case AF_INET6:
- error = nd6_resolve(ifp, rt, m, dst, edst);
- if (error)
- return (error == EAGAIN ? 0 : error);
- break;
+ case AF_INET6:
+ error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+ if (error)
+ return (error == EAGAIN ? 0 : error);
+ break;
 #endif
- case AF_INET:
- case AF_MPLS:
- error = arpresolve(ifp, rt, m, dst, edst);
- if (error)
- return (error == EAGAIN ? 0 : error);
- break;
- default:
- senderr(EHOSTUNREACH);
+ case AF_INET:
+ case AF_MPLS:
+ error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+ if (error)
+ return (error == EAGAIN ? 0 : error);
+ break;
+ default:
+ senderr(EHOSTUNREACH);
  }
  /* XXX handling for simplex devices in case of M/BCAST ?? */
  if (m->m_flags & (M_BCAST | M_MCAST))
- etype = htons(ETHERTYPE_MPLS_MCAST);
+ eh->ether_type = htons(ETHERTYPE_MPLS_MCAST);
  else
- etype = htons(ETHERTYPE_MPLS);
+ eh->ether_type = htons(ETHERTYPE_MPLS);
  break;
 #endif /* MPLS */
  case pseudo_AF_HDRCMPLT:
- eh = (struct ether_header *)dst->sa_data;
- esrc = eh->ether_shost;
- /* FALLTHROUGH */
+ /* take the whole header from the sa */
+ memcpy(eh, dst->sa_data, sizeof(*eh));
+ return (0);
 
  case AF_UNSPEC:
- eh = (struct ether_header *)dst->sa_data;
- memcpy(edst, eh->ether_dhost, sizeof(edst));
- /* AF_UNSPEC doesn't swap the byte order of the ether_type. */
- etype = eh->ether_type;
+ /* take the dst and type from the sa, but get src below */
+ memcpy(eh, dst->sa_data, sizeof(*eh));
  break;
 
  default:
- printf("%s: can't handle af%d\n", ifp->if_xname,
- dst->sa_family);
+ printf("%s: can't handle af%d\n", ifp->if_xname, af);
  senderr(EAFNOSUPPORT);
  }
 
- /* XXX Should we feed-back an unencrypted IPsec packet ? */
- if (mcopy)
- if_input_local(ifp, mcopy, dst->sa_family);
+ memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
 
- M_PREPEND(m, sizeof(struct ether_header) + ETHER_ALIGN, M_DONTWAIT);
- if (m == NULL)
- return (ENOBUFS);
- m_adj(m, ETHER_ALIGN);
- eh = mtod(m, struct ether_header *);
- eh->ether_type = etype;
- memcpy(eh->ether_dhost, edst, sizeof(eh->ether_dhost));
- memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
+ return (0);
 
- return (if_enqueue(ifp, m));
 bad:
  m_freem(m);
  return (error);
 }
 
+struct mbuf*
+ether_encap(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt, int *errorp)
+{
+ struct ether_header eh;
+ int error;
+
+ error = ether_resolve(ifp, m, dst, rt, &eh);
+ switch (error) {
+ case 0:
+ break;
+ case EAGAIN:
+ error = 0;
+ default:
+ *errorp = error;
+ return (NULL);
+ }
+
+ m = m_prepend(m, ETHER_ALIGN + sizeof(eh), M_DONTWAIT);
+ if (m == NULL) {
+ *errorp = ENOBUFS;
+ return (NULL);
+ }
+
+ m_adj(m, ETHER_ALIGN);
+ memcpy(mtod(m, struct ether_header *), &eh, sizeof(eh));
+
+ return (m);
+}
+
+int
+ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+ int error;
+
+ m = ether_encap(ifp, m, dst, rt, &error);
+ if (m == NULL)
+ return (error);
+
+ return (if_enqueue(ifp, m));
+}
+
 /*
  * Process a received Ethernet packet;
  * the packet is in the mbuf chain m without
Index: netinet/if_ether.h
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.h,v
retrieving revision 1.73
diff -u -p -r1.73 if_ether.h
--- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73
+++ netinet/if_ether.h 30 Nov 2018 02:02:58 -0000
@@ -240,8 +245,13 @@ void ether_ifattach(struct ifnet *);
 void ether_ifdetach(struct ifnet *);
 int ether_ioctl(struct ifnet *, struct arpcom *, u_long, caddr_t);
 int ether_input(struct ifnet *, struct mbuf *, void *);
-int ether_output(struct ifnet *,
-    struct mbuf *, struct sockaddr *, struct rtentry *);
+int ether_resolve(struct ifnet *, struct mbuf *, struct sockaddr *,
+    struct rtentry *, struct ether_header *);
+struct mbuf *
+ ether_encap(struct ifnet *, struct mbuf *, struct sockaddr *,
+    struct rtentry *, int *);
+int ether_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+    struct rtentry *);
 void ether_rtrequest(struct ifnet *, int, struct rtentry *);
 char *ether_sprintf(u_char *);
 

Reply | Threaded
Open this post in threaded view
|

Re: split ether_output up into resolution, encapsulation, and output

David Gwynne-5
On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:

> i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> provide their own output functions so they can bypass the ifq machinery
> and push the packet onto the underlying layer directly.
>
> they'll still need to get an ethernet header though. vlan needs to get
> the ethernet header and put the vlan shim into it, therefore
> ether_resolve is exposed. etherip doesnt need a shim, it just wants
> ethernet encapsulating the payload before adding its own headers to the
> packet, therefore there is ether_encap.
>
> does this make sense?
>
> ok?

this shows vlan and etherip using the new functions.

Index: net/if_etherip.c
===================================================================
RCS file: /cvs/src/sys/net/if_etherip.c,v
retrieving revision 1.40
diff -u -p -r1.40 if_etherip.c
--- net/if_etherip.c 12 Nov 2018 23:57:06 -0000 1.40
+++ net/if_etherip.c 30 Nov 2018 02:24:09 -0000
@@ -102,7 +102,10 @@ void etheripattach(int);
 int etherip_clone_create(struct if_clone *, int);
 int etherip_clone_destroy(struct ifnet *);
 int etherip_ioctl(struct ifnet *, u_long, caddr_t);
-void etherip_start(struct ifnet *);
+int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+    struct rtentry *rt);
+void etherip_start(struct ifqueue *);
+void etherip_send(struct ifnet *, struct mbuf *);
 int etherip_media_change(struct ifnet *);
 void etherip_media_status(struct ifnet *, struct ifmediareq *);
 int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
@@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
  ifp->if_softc = sc;
  ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
  ifp->if_ioctl = etherip_ioctl;
- ifp->if_start = etherip_start;
+ ifp->if_output = etherip_output;
+ ifp->if_qstart = etherip_start;
  ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_xflags = IFXF_CLONED;
+ ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
  IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
  ifp->if_capabilities = IFCAP_VLAN_MTU;
  ether_fakeaddr(ifp);
@@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp,
 }
 
 void
-etherip_start(struct ifnet *ifp)
+etherip_send(struct ifnet *ifp, struct mbuf *m)
 {
  struct etherip_softc *sc = ifp->if_softc;
- struct mbuf *m;
  int error;
-#if NBPFILTER > 0
- caddr_t if_bpf;
-#endif
 
- while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
 #if NBPFILTER > 0
- if_bpf = ifp->if_bpf;
- if (if_bpf)
- bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
+ caddr_t if_bpf = ifp->if_bpf;
+ if (if_bpf)
+ bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
 #endif
 
- switch (sc->sc_tunnel.t_af) {
- case AF_INET:
- error = ip_etherip_output(ifp, m);
- break;
+ switch (sc->sc_tunnel.t_af) {
+ case AF_INET:
+ error = ip_etherip_output(ifp, m);
+ break;
 #ifdef INET6
- case AF_INET6:
- error = ip6_etherip_output(ifp, m);
- break;
+ case AF_INET6:
+ error = ip6_etherip_output(ifp, m);
+ break;
 #endif
- default:
- /* unhandled_af(sc->sc_tunnel.t_af); */
- m_freem(m);
- continue;
- }
+ default:
+ /* unhandled_af(sc->sc_tunnel.t_af); */
+ m_freem(m);
+ error = ENETDOWN;
+ break;
+ }
 
- if (error)
- ifp->if_oerrors++;
+ if (error)
+ ifp->if_oerrors++;
+}
+
+void
+etherip_start(struct ifqueue *ifq)
+{
+ struct ifnet *ifp = ifq->ifq_if;
+ struct mbuf *m;
+
+ while ((m = ifq_dequeue(ifq)) != NULL)
+ etherip_send(ifp, m);
+}
+
+int
+etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+ int error;
+
+ m = ether_encap(ifp, m, dst, rt, &error);
+ if (m == NULL)
+ return (error);
+
+ if (ifq_is_priq(&ifp->if_snd)) {
+ etherip_send(ifp, m);
+ return (0);
  }
+
+ return (ifq_enqueue(&ifp->if_snd, m));
 }
 
 int
Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.253
diff -u -p -r1.253 if_ethersubr.c
--- net/if_ethersubr.c 13 Mar 2018 01:31:48 -0000 1.253
+++ net/if_ethersubr.c 30 Nov 2018 02:24:09 -0000
@@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp)
  ifp->if_addrlen = ETHER_ADDR_LEN;
  ifp->if_hdrlen = ETHER_HDR_LEN;
  ifp->if_mtu = ETHERMTU;
- ifp->if_output = ether_output;
+ if (ifp->if_output == NULL)
+ ifp->if_output = ether_output;
  ifp->if_rtrequest = ether_rtrequest;
 
  if_ih_insert(ifp, ether_input, NULL);
Index: net/if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.179
diff -u -p -r1.179 if_vlan.c
--- net/if_vlan.c 16 Nov 2018 08:43:08 -0000 1.179
+++ net/if_vlan.c 30 Nov 2018 02:24:09 -0000
@@ -58,6 +58,7 @@
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/rwlock.h>
+#include <sys/percpu.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
@@ -85,6 +86,8 @@ int vlan_clone_create(struct if_clone *,
 int vlan_clone_destroy(struct ifnet *);
 
 int vlan_input(struct ifnet *, struct mbuf *, void *);
+int vlan_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+    struct rtentry *rt);
 void vlan_start(struct ifqueue *ifq);
 int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
 
@@ -177,6 +180,7 @@ vlan_clone_create(struct if_clone *ifc,
 
  ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
  ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE;
+ ifp->if_output = vlan_output;
  ifp->if_qstart = vlan_start;
  ifp->if_ioctl = vlan_ioctl;
  ifp->if_hardmtu = 0xffff;
@@ -185,6 +189,8 @@ vlan_clone_create(struct if_clone *ifc,
  ether_ifattach(ifp);
  ifp->if_hdrlen = EVL_ENCAPLEN;
 
+ if_counters_alloc(ifp);
+
  return (0);
 }
 
@@ -239,6 +245,110 @@ vlan_mplstunnel(int ifidx)
 #endif
 }
 
+static uint16_t
+vlan_tag(const struct ifvlan *ifv, const struct mbuf *m)
+{
+ int txprio = ifv->ifv_prio;
+ uint16_t prio;
+
+ prio = (txprio == IF_HDRPRIO_PACKET) ?
+    m->m_pkthdr.pf.prio : txprio;
+
+ /* IEEE 802.1p has prio 0 and 1 swapped */
+ if (prio <= 1)
+ prio = !prio;
+
+ return (ifv->if_prio | (prio << EVL_PRIO_BITS));
+}
+
+int
+vlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+ struct ifvlan *ifv;
+ struct ifnet *ifp0;
+#if NBPFILTER > 0
+ caddr_t if_bpf;
+#endif
+ struct sockaddr vdst;
+ struct ether_header *eh = (struct ether_header *)vdst.sa_data;
+ int txprio;
+ uint16_t vtag;
+ int error;
+ unsigned int bytes;
+
+ if (!ifq_is_priq(&ifp->if_snd)) {
+ /*
+ * user wants to delay packets, which relies on the ifq
+ * machinery. fall back to if_enqueue via ether_output.
+ */
+ return (ether_output(ifp, m, dst, rt));
+ }
+
+ error = ether_resolve(ifp, m, dst, rt, eh);
+ switch (error) {
+ case 0:
+ break;
+ case EAGAIN:
+ return (0);
+ default:
+ return (error);
+ }
+
+ ifv = ifp->if_softc;
+ ifp0 = if_get(ifv->ifv_ifp0);
+ if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
+ m_freem(m);
+ error = ENETDOWN;
+ goto leave;
+ }
+
+#if NBPFILTER > 0
+ if_bpf = ifp->if_bpf;
+ if (if_bpf) {
+ bpf_mtap_hdr(if_bpf, (caddr_t)eh, sizeof(*eh), m,
+    BPF_DIRECTION_OUT, NULL);
+ }
+#endif
+
+ vtag = vlan_tag(ifv, m);
+
+ bytes = sizeof(*eh) + m->m_pkthdr.len; /* plus shim? */
+
+ /*
+ * If the underlying interface cannot do VLAN tag insertion
+ * itself, create an encapsulation header.
+ */
+ if (ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING) &&
+    ifv->ifv_type == ETHERTYPE_VLAN) {
+ m->m_pkthdr.ether_vtag = vtag;
+ m->m_flags |= M_VLANTAG;
+ } else {
+ struct ether_vlan_shim *evl;
+
+ M_PREPEND(m, sizeof(*evl), M_DONTWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto leave;
+ }
+
+ evl = mtod(m, struct ether_vlan_shim *);
+ evl->evl_tag = htons(vtag);
+ evl->evl_proto = eh->ether_type;
+
+ eh->ether_type = htons(ifv->ifv_type);
+ }
+
+ counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes, bytes);
+
+ vdst.sa_family = pseudo_AF_HDRCMPLT;
+ error = ifp0->if_output(ifp0, m, &vdst, NULL);
+
+leave:
+ if_put(ifp0);
+ return (error);
+}
+
 void
 vlan_start(struct ifqueue *ifq)
 {
@@ -246,31 +356,22 @@ vlan_start(struct ifqueue *ifq)
  struct ifvlan   *ifv;
  struct ifnet *ifp0;
  struct mbuf *m;
- int txprio;
- uint8_t prio;
+ uint16_t vtag;
 
  ifv = ifp->if_softc;
  ifp0 = if_get(ifv->ifv_ifp0);
- if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) !=
-    (IFF_UP|IFF_RUNNING)) {
+ if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
  ifq_purge(ifq);
  goto leave;
  }
 
- txprio = ifv->ifv_prio;
-
  while ((m = ifq_dequeue(ifq)) != NULL) {
 #if NBPFILTER > 0
  if (ifp->if_bpf)
  bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 #endif /* NBPFILTER > 0 */
 
- prio = (txprio == IF_HDRPRIO_PACKET) ?
-    m->m_pkthdr.pf.prio : txprio;
-
- /* IEEE 802.1p has prio 0 and 1 swapped */
- if (prio <= 1)
- prio = !prio;
+ vtag = vlan_tag(ifv, m);
 
  /*
  * If this packet came from a pseudowire it means it already
@@ -285,23 +386,17 @@ vlan_start(struct ifqueue *ifq)
  */
  } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) &&
     (ifv->ifv_type == ETHERTYPE_VLAN)) {
- m->m_pkthdr.ether_vtag = ifv->ifv_tag +
-    (prio << EVL_PRIO_BITS);
+ m->m_pkthdr.ether_vtag = vtag;
  m->m_flags |= M_VLANTAG;
  } else {
- m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag |
-    (prio << EVL_PRIO_BITS));
+ m = vlan_inject(m, ifv->ifv_type, vtag);
  if (m == NULL) {
- ifp->if_oerrors++;
+ ifq->ifq_oerrors++;
  continue;
  }
  }
 
- if (if_enqueue(ifp0, m)) {
- ifp->if_oerrors++;
- ifq->ifq_errors++;
- continue;
- }
+ if_enqueue(ifp0, m);
  }
 
 leave:
Index: net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.20
diff -u -p -r1.20 ifq.h
--- net/ifq.h 4 Jan 2018 11:02:57 -0000 1.20
+++ net/ifq.h 30 Nov 2018 02:24:09 -0000
@@ -421,6 +422,7 @@ void ifq_barrier(struct ifqueue *);
 #define ifq_len(_ifq) ((_ifq)->ifq_len)
 #define ifq_empty(_ifq) (ifq_len(_ifq) == 0)
 #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l))
+#define ifq_is_priq(_ifq) ((_ifq)->ifq_ops == ifq_priq_ops)
 
 static inline void
 ifq_set_oactive(struct ifqueue *ifq)
Index: netinet/if_ether.h
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.h,v
retrieving revision 1.73
diff -u -p -r1.73 if_ether.h
--- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73
+++ netinet/if_ether.h 30 Nov 2018 02:24:09 -0000
@@ -92,6 +92,11 @@ struct  ether_vlan_header {
         u_int16_t evl_proto;
 };
 
+struct ether_vlan_shim {
+ u_int16_t evl_tag;
+ u_int16_t evl_proto;
+};
+
 #define EVL_VLID_MASK 0xFFF
 #define EVL_VLID_NULL 0x000
 /* 0x000 and 0xfff are reserved */
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.90
diff -u -p -r1.90 if_var.h
--- net/if_var.h 10 Sep 2018 16:18:34 -0000 1.90
+++ net/if_var.h 30 Nov 2018 02:24:09 -0000
@@ -76,6 +76,7 @@
 struct rtentry;
 struct ifnet;
 struct task;
+struct cpumem;
 
 /*
  * Structure describing a `cloning' interface.
@@ -144,6 +145,7 @@ struct ifnet { /* and the entries */
  unsigned short if_flags; /* [N] up/down, broadcast, etc. */
  int if_xflags; /* [N] extra softnet flags */
  struct if_data if_data; /* stats and other data about if */
+ struct cpumem *if_counters; /* per cpu stats */
  uint32_t if_hardmtu; /* [d] maximum MTU device supports */
  char if_description[IFDESCRSIZE]; /* [c] interface description */
  u_short if_rtlabelid; /* [c] next route label */
@@ -202,6 +204,23 @@ struct ifnet { /* and the entries */
 #define if_capabilities if_data.ifi_capabilities
 #define if_rdomain if_data.ifi_rdomain
 
+enum if_counters {
+ ifc_ipackets, /* packets received on interface */
+ ifc_ierrors, /* input errors on interface */
+ ifc_opackets, /* packets sent on interface */
+ ifc_oerrors, /* output errors on interface */
+ ifc_collisions, /* collisions on csma interfaces */
+ ifc_ibytes, /* total number of octets received */
+ ifc_obytes, /* total number of octets sent */
+ ifc_imcasts, /* packets received via multicast */
+ ifc_omcasts, /* packets sent via multicast */
+ ifc_iqdrops, /* dropped on input, this interface */
+ ifc_oqdrops, /* dropped on output, this interface */
+ ifc_noproto, /* destined for unsupported protocol */
+
+ ifc_ncounters
+};
+
 /*
  * The ifaddr structure contains information about one address
  * of an interface.  They are maintained by the different address families,
@@ -356,6 +375,9 @@ u_int if_rxr_get(struct if_rxring *, u_i
 int if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct if_rxring_info *);
 int if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int,
     struct if_rxring *);
+
+void if_counters_alloc(struct ifnet *);
+void if_counters_free(struct ifnet *);
 
 #endif /* _KERNEL */
 
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.568
diff -u -p -r1.568 if.c
--- net/if.c 29 Nov 2018 00:11:49 -0000 1.568
+++ net/if.c 30 Nov 2018 02:24:09 -0000
@@ -84,6 +84,7 @@
 #include <sys/domain.h>
 #include <sys/task.h>
 #include <sys/atomic.h>
+#include <sys/percpu.h>
 #include <sys/proc.h>
 
 #include <dev/rndvar.h>
@@ -1103,6 +1104,9 @@ if_detach(struct ifnet *ifp)
  splx(s);
  NET_UNLOCK();
 
+ if (ifp->if_counters != NULL)
+ if_counters_free(ifp);
+
  for (i = 0; i < ifp->if_nifqs; i++)
  ifq_destroy(ifp->if_ifqs[i]);
  if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
@@ -2362,11 +2366,47 @@ ifconf(caddr_t data)
 }
 
 void
+if_counters_alloc(struct ifnet *ifp)
+{
+ KASSERT(ifp->if_counters == NULL);
+
+ ifp->if_counters = counters_alloc(ifc_ncounters);
+}
+
+void
+if_counters_free(struct ifnet *ifp)
+{
+ KASSERT(ifp->if_counters != NULL);
+
+ counters_free(ifp->if_counters, ifc_ncounters);
+ ifp->if_counters = NULL;
+}
+
+void
 if_getdata(struct ifnet *ifp, struct if_data *data)
 {
  unsigned int i;
 
  *data = ifp->if_data;
+
+ if (ifp->if_counters != NULL) {
+ uint64_t counters[ifc_ncounters];
+
+ counters_read(ifp->if_counters, counters, nitems(counters));
+
+ data->ifi_ipackets += counters[ifc_ipackets];
+ data->ifi_ierrors += counters[ifc_ierrors];
+ data->ifi_opackets += counters[ifc_opackets];
+ data->ifi_oerrors += counters[ifc_oerrors];
+ data->ifi_collisions += counters[ifc_collisions];
+ data->ifi_ibytes += counters[ifc_ibytes];
+ data->ifi_obytes += counters[ifc_obytes];
+ data->ifi_imcasts += counters[ifc_imcasts];
+ data->ifi_omcasts += counters[ifc_omcasts];
+ data->ifi_iqdrops += counters[ifc_iqdrops];
+ data->ifi_oqdrops += counters[ifc_oqdrops];
+ data->ifi_noproto += counters[ifc_noproto];
+ }
 
  for (i = 0; i < ifp->if_nifqs; i++) {
  struct ifqueue *ifq = ifp->if_ifqs[i];

Reply | Threaded
Open this post in threaded view
|

Re: split ether_output up into resolution, encapsulation, and output

Martin Pieuchot
On 30/11/18(Fri) 12:35, David Gwynne wrote:

> On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> > provide their own output functions so they can bypass the ifq machinery
> > and push the packet onto the underlying layer directly.
> >
> > they'll still need to get an ethernet header though. vlan needs to get
> > the ethernet header and put the vlan shim into it, therefore
> > ether_resolve is exposed. etherip doesnt need a shim, it just wants
> > ethernet encapsulating the payload before adding its own headers to the
> > packet, therefore there is ether_encap.
> >
> > does this make sense?
> >
> > ok?
>
> this shows vlan and etherip using the new functions.

You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to
understand what is the gain of this change.

> Index: net/if_etherip.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_etherip.c,v
> retrieving revision 1.40
> diff -u -p -r1.40 if_etherip.c
> --- net/if_etherip.c 12 Nov 2018 23:57:06 -0000 1.40
> +++ net/if_etherip.c 30 Nov 2018 02:24:09 -0000
> @@ -102,7 +102,10 @@ void etheripattach(int);
>  int etherip_clone_create(struct if_clone *, int);
>  int etherip_clone_destroy(struct ifnet *);
>  int etherip_ioctl(struct ifnet *, u_long, caddr_t);
> -void etherip_start(struct ifnet *);
> +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> +    struct rtentry *rt);
> +void etherip_start(struct ifqueue *);
> +void etherip_send(struct ifnet *, struct mbuf *);
>  int etherip_media_change(struct ifnet *);
>  void etherip_media_status(struct ifnet *, struct ifmediareq *);
>  int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
> @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
>   ifp->if_softc = sc;
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_ioctl = etherip_ioctl;
> - ifp->if_start = etherip_start;
> + ifp->if_output = etherip_output;
> + ifp->if_qstart = etherip_start;
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> - ifp->if_xflags = IFXF_CLONED;
> + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
>   IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
>   ether_fakeaddr(ifp);
> @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp,
>  }
>  
>  void
> -etherip_start(struct ifnet *ifp)
> +etherip_send(struct ifnet *ifp, struct mbuf *m)
>  {
>   struct etherip_softc *sc = ifp->if_softc;
> - struct mbuf *m;
>   int error;
> -#if NBPFILTER > 0
> - caddr_t if_bpf;
> -#endif
>  
> - while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
>  #if NBPFILTER > 0
> - if_bpf = ifp->if_bpf;
> - if (if_bpf)
> - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> + caddr_t if_bpf = ifp->if_bpf;
> + if (if_bpf)
> + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
>  #endif
>  
> - switch (sc->sc_tunnel.t_af) {
> - case AF_INET:
> - error = ip_etherip_output(ifp, m);
> - break;
> + switch (sc->sc_tunnel.t_af) {
> + case AF_INET:
> + error = ip_etherip_output(ifp, m);
> + break;
>  #ifdef INET6
> - case AF_INET6:
> - error = ip6_etherip_output(ifp, m);
> - break;
> + case AF_INET6:
> + error = ip6_etherip_output(ifp, m);
> + break;
>  #endif
> - default:
> - /* unhandled_af(sc->sc_tunnel.t_af); */
> - m_freem(m);
> - continue;
> - }
> + default:
> + /* unhandled_af(sc->sc_tunnel.t_af); */
> + m_freem(m);
> + error = ENETDOWN;
> + break;
> + }
>  
> - if (error)
> - ifp->if_oerrors++;
> + if (error)
> + ifp->if_oerrors++;
> +}
> +
> +void
> +etherip_start(struct ifqueue *ifq)
> +{
> + struct ifnet *ifp = ifq->ifq_if;
> + struct mbuf *m;
> +
> + while ((m = ifq_dequeue(ifq)) != NULL)
> + etherip_send(ifp, m);
> +}
> +
> +int
> +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +    struct rtentry *rt)
> +{
> + int error;
> +
> + m = ether_encap(ifp, m, dst, rt, &error);
> + if (m == NULL)
> + return (error);
> +
> + if (ifq_is_priq(&ifp->if_snd)) {
> + etherip_send(ifp, m);
> + return (0);
>   }
> +
> + return (ifq_enqueue(&ifp->if_snd, m));
>  }
>  
>  int
> Index: net/if_ethersubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.253
> diff -u -p -r1.253 if_ethersubr.c
> --- net/if_ethersubr.c 13 Mar 2018 01:31:48 -0000 1.253
> +++ net/if_ethersubr.c 30 Nov 2018 02:24:09 -0000
> @@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp)
>   ifp->if_addrlen = ETHER_ADDR_LEN;
>   ifp->if_hdrlen = ETHER_HDR_LEN;
>   ifp->if_mtu = ETHERMTU;
> - ifp->if_output = ether_output;
> + if (ifp->if_output == NULL)
> + ifp->if_output = ether_output;
>   ifp->if_rtrequest = ether_rtrequest;
>  
>   if_ih_insert(ifp, ether_input, NULL);
> Index: net/if_vlan.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_vlan.c,v
> retrieving revision 1.179
> diff -u -p -r1.179 if_vlan.c
> --- net/if_vlan.c 16 Nov 2018 08:43:08 -0000 1.179
> +++ net/if_vlan.c 30 Nov 2018 02:24:09 -0000
> @@ -58,6 +58,7 @@
>  #include <sys/sockio.h>
>  #include <sys/systm.h>
>  #include <sys/rwlock.h>
> +#include <sys/percpu.h>
>  
>  #include <net/if.h>
>  #include <net/if_dl.h>
> @@ -85,6 +86,8 @@ int vlan_clone_create(struct if_clone *,
>  int vlan_clone_destroy(struct ifnet *);
>  
>  int vlan_input(struct ifnet *, struct mbuf *, void *);
> +int vlan_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> +    struct rtentry *rt);
>  void vlan_start(struct ifqueue *ifq);
>  int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
>  
> @@ -177,6 +180,7 @@ vlan_clone_create(struct if_clone *ifc,
>  
>   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
>   ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE;
> + ifp->if_output = vlan_output;
>   ifp->if_qstart = vlan_start;
>   ifp->if_ioctl = vlan_ioctl;
>   ifp->if_hardmtu = 0xffff;
> @@ -185,6 +189,8 @@ vlan_clone_create(struct if_clone *ifc,
>   ether_ifattach(ifp);
>   ifp->if_hdrlen = EVL_ENCAPLEN;
>  
> + if_counters_alloc(ifp);
> +
>   return (0);
>  }
>  
> @@ -239,6 +245,110 @@ vlan_mplstunnel(int ifidx)
>  #endif
>  }
>  
> +static uint16_t
> +vlan_tag(const struct ifvlan *ifv, const struct mbuf *m)
> +{
> + int txprio = ifv->ifv_prio;
> + uint16_t prio;
> +
> + prio = (txprio == IF_HDRPRIO_PACKET) ?
> +    m->m_pkthdr.pf.prio : txprio;
> +
> + /* IEEE 802.1p has prio 0 and 1 swapped */
> + if (prio <= 1)
> + prio = !prio;
> +
> + return (ifv->if_prio | (prio << EVL_PRIO_BITS));
> +}
> +
> +int
> +vlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +    struct rtentry *rt)
> +{
> + struct ifvlan *ifv;
> + struct ifnet *ifp0;
> +#if NBPFILTER > 0
> + caddr_t if_bpf;
> +#endif
> + struct sockaddr vdst;
> + struct ether_header *eh = (struct ether_header *)vdst.sa_data;
> + int txprio;
> + uint16_t vtag;
> + int error;
> + unsigned int bytes;
> +
> + if (!ifq_is_priq(&ifp->if_snd)) {
> + /*
> + * user wants to delay packets, which relies on the ifq
> + * machinery. fall back to if_enqueue via ether_output.
> + */
> + return (ether_output(ifp, m, dst, rt));
> + }
> +
> + error = ether_resolve(ifp, m, dst, rt, eh);
> + switch (error) {
> + case 0:
> + break;
> + case EAGAIN:
> + return (0);
> + default:
> + return (error);
> + }
> +
> + ifv = ifp->if_softc;
> + ifp0 = if_get(ifv->ifv_ifp0);
> + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
> + m_freem(m);
> + error = ENETDOWN;
> + goto leave;
> + }
> +
> +#if NBPFILTER > 0
> + if_bpf = ifp->if_bpf;
> + if (if_bpf) {
> + bpf_mtap_hdr(if_bpf, (caddr_t)eh, sizeof(*eh), m,
> +    BPF_DIRECTION_OUT, NULL);
> + }
> +#endif
> +
> + vtag = vlan_tag(ifv, m);
> +
> + bytes = sizeof(*eh) + m->m_pkthdr.len; /* plus shim? */
> +
> + /*
> + * If the underlying interface cannot do VLAN tag insertion
> + * itself, create an encapsulation header.
> + */
> + if (ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING) &&
> +    ifv->ifv_type == ETHERTYPE_VLAN) {
> + m->m_pkthdr.ether_vtag = vtag;
> + m->m_flags |= M_VLANTAG;
> + } else {
> + struct ether_vlan_shim *evl;
> +
> + M_PREPEND(m, sizeof(*evl), M_DONTWAIT);
> + if (m == NULL) {
> + error = ENOBUFS;
> + goto leave;
> + }
> +
> + evl = mtod(m, struct ether_vlan_shim *);
> + evl->evl_tag = htons(vtag);
> + evl->evl_proto = eh->ether_type;
> +
> + eh->ether_type = htons(ifv->ifv_type);
> + }
> +
> + counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes, bytes);
> +
> + vdst.sa_family = pseudo_AF_HDRCMPLT;
> + error = ifp0->if_output(ifp0, m, &vdst, NULL);
> +
> +leave:
> + if_put(ifp0);
> + return (error);
> +}
> +
>  void
>  vlan_start(struct ifqueue *ifq)
>  {
> @@ -246,31 +356,22 @@ vlan_start(struct ifqueue *ifq)
>   struct ifvlan   *ifv;
>   struct ifnet *ifp0;
>   struct mbuf *m;
> - int txprio;
> - uint8_t prio;
> + uint16_t vtag;
>  
>   ifv = ifp->if_softc;
>   ifp0 = if_get(ifv->ifv_ifp0);
> - if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) !=
> -    (IFF_UP|IFF_RUNNING)) {
> + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
>   ifq_purge(ifq);
>   goto leave;
>   }
>  
> - txprio = ifv->ifv_prio;
> -
>   while ((m = ifq_dequeue(ifq)) != NULL) {
>  #if NBPFILTER > 0
>   if (ifp->if_bpf)
>   bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
>  #endif /* NBPFILTER > 0 */
>  
> - prio = (txprio == IF_HDRPRIO_PACKET) ?
> -    m->m_pkthdr.pf.prio : txprio;
> -
> - /* IEEE 802.1p has prio 0 and 1 swapped */
> - if (prio <= 1)
> - prio = !prio;
> + vtag = vlan_tag(ifv, m);
>  
>   /*
>   * If this packet came from a pseudowire it means it already
> @@ -285,23 +386,17 @@ vlan_start(struct ifqueue *ifq)
>   */
>   } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) &&
>      (ifv->ifv_type == ETHERTYPE_VLAN)) {
> - m->m_pkthdr.ether_vtag = ifv->ifv_tag +
> -    (prio << EVL_PRIO_BITS);
> + m->m_pkthdr.ether_vtag = vtag;
>   m->m_flags |= M_VLANTAG;
>   } else {
> - m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag |
> -    (prio << EVL_PRIO_BITS));
> + m = vlan_inject(m, ifv->ifv_type, vtag);
>   if (m == NULL) {
> - ifp->if_oerrors++;
> + ifq->ifq_oerrors++;
>   continue;
>   }
>   }
>  
> - if (if_enqueue(ifp0, m)) {
> - ifp->if_oerrors++;
> - ifq->ifq_errors++;
> - continue;
> - }
> + if_enqueue(ifp0, m);
>   }
>  
>  leave:
> Index: net/ifq.h
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.h,v
> retrieving revision 1.20
> diff -u -p -r1.20 ifq.h
> --- net/ifq.h 4 Jan 2018 11:02:57 -0000 1.20
> +++ net/ifq.h 30 Nov 2018 02:24:09 -0000
> @@ -421,6 +422,7 @@ void ifq_barrier(struct ifqueue *);
>  #define ifq_len(_ifq) ((_ifq)->ifq_len)
>  #define ifq_empty(_ifq) (ifq_len(_ifq) == 0)
>  #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l))
> +#define ifq_is_priq(_ifq) ((_ifq)->ifq_ops == ifq_priq_ops)
>  
>  static inline void
>  ifq_set_oactive(struct ifqueue *ifq)
> Index: netinet/if_ether.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/if_ether.h,v
> retrieving revision 1.73
> diff -u -p -r1.73 if_ether.h
> --- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73
> +++ netinet/if_ether.h 30 Nov 2018 02:24:09 -0000
> @@ -92,6 +92,11 @@ struct  ether_vlan_header {
>          u_int16_t evl_proto;
>  };
>  
> +struct ether_vlan_shim {
> + u_int16_t evl_tag;
> + u_int16_t evl_proto;
> +};
> +
>  #define EVL_VLID_MASK 0xFFF
>  #define EVL_VLID_NULL 0x000
>  /* 0x000 and 0xfff are reserved */
> Index: net/if_var.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_var.h,v
> retrieving revision 1.90
> diff -u -p -r1.90 if_var.h
> --- net/if_var.h 10 Sep 2018 16:18:34 -0000 1.90
> +++ net/if_var.h 30 Nov 2018 02:24:09 -0000
> @@ -76,6 +76,7 @@
>  struct rtentry;
>  struct ifnet;
>  struct task;
> +struct cpumem;
>  
>  /*
>   * Structure describing a `cloning' interface.
> @@ -144,6 +145,7 @@ struct ifnet { /* and the entries */
>   unsigned short if_flags; /* [N] up/down, broadcast, etc. */
>   int if_xflags; /* [N] extra softnet flags */
>   struct if_data if_data; /* stats and other data about if */
> + struct cpumem *if_counters; /* per cpu stats */
>   uint32_t if_hardmtu; /* [d] maximum MTU device supports */
>   char if_description[IFDESCRSIZE]; /* [c] interface description */
>   u_short if_rtlabelid; /* [c] next route label */
> @@ -202,6 +204,23 @@ struct ifnet { /* and the entries */
>  #define if_capabilities if_data.ifi_capabilities
>  #define if_rdomain if_data.ifi_rdomain
>  
> +enum if_counters {
> + ifc_ipackets, /* packets received on interface */
> + ifc_ierrors, /* input errors on interface */
> + ifc_opackets, /* packets sent on interface */
> + ifc_oerrors, /* output errors on interface */
> + ifc_collisions, /* collisions on csma interfaces */
> + ifc_ibytes, /* total number of octets received */
> + ifc_obytes, /* total number of octets sent */
> + ifc_imcasts, /* packets received via multicast */
> + ifc_omcasts, /* packets sent via multicast */
> + ifc_iqdrops, /* dropped on input, this interface */
> + ifc_oqdrops, /* dropped on output, this interface */
> + ifc_noproto, /* destined for unsupported protocol */
> +
> + ifc_ncounters
> +};
> +
>  /*
>   * The ifaddr structure contains information about one address
>   * of an interface.  They are maintained by the different address families,
> @@ -356,6 +375,9 @@ u_int if_rxr_get(struct if_rxring *, u_i
>  int if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct if_rxring_info *);
>  int if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int,
>      struct if_rxring *);
> +
> +void if_counters_alloc(struct ifnet *);
> +void if_counters_free(struct ifnet *);
>  
>  #endif /* _KERNEL */
>  
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.568
> diff -u -p -r1.568 if.c
> --- net/if.c 29 Nov 2018 00:11:49 -0000 1.568
> +++ net/if.c 30 Nov 2018 02:24:09 -0000
> @@ -84,6 +84,7 @@
>  #include <sys/domain.h>
>  #include <sys/task.h>
>  #include <sys/atomic.h>
> +#include <sys/percpu.h>
>  #include <sys/proc.h>
>  
>  #include <dev/rndvar.h>
> @@ -1103,6 +1104,9 @@ if_detach(struct ifnet *ifp)
>   splx(s);
>   NET_UNLOCK();
>  
> + if (ifp->if_counters != NULL)
> + if_counters_free(ifp);
> +
>   for (i = 0; i < ifp->if_nifqs; i++)
>   ifq_destroy(ifp->if_ifqs[i]);
>   if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
> @@ -2362,11 +2366,47 @@ ifconf(caddr_t data)
>  }
>  
>  void
> +if_counters_alloc(struct ifnet *ifp)
> +{
> + KASSERT(ifp->if_counters == NULL);
> +
> + ifp->if_counters = counters_alloc(ifc_ncounters);
> +}
> +
> +void
> +if_counters_free(struct ifnet *ifp)
> +{
> + KASSERT(ifp->if_counters != NULL);
> +
> + counters_free(ifp->if_counters, ifc_ncounters);
> + ifp->if_counters = NULL;
> +}
> +
> +void
>  if_getdata(struct ifnet *ifp, struct if_data *data)
>  {
>   unsigned int i;
>  
>   *data = ifp->if_data;
> +
> + if (ifp->if_counters != NULL) {
> + uint64_t counters[ifc_ncounters];
> +
> + counters_read(ifp->if_counters, counters, nitems(counters));
> +
> + data->ifi_ipackets += counters[ifc_ipackets];
> + data->ifi_ierrors += counters[ifc_ierrors];
> + data->ifi_opackets += counters[ifc_opackets];
> + data->ifi_oerrors += counters[ifc_oerrors];
> + data->ifi_collisions += counters[ifc_collisions];
> + data->ifi_ibytes += counters[ifc_ibytes];
> + data->ifi_obytes += counters[ifc_obytes];
> + data->ifi_imcasts += counters[ifc_imcasts];
> + data->ifi_omcasts += counters[ifc_omcasts];
> + data->ifi_iqdrops += counters[ifc_iqdrops];
> + data->ifi_oqdrops += counters[ifc_oqdrops];
> + data->ifi_noproto += counters[ifc_noproto];
> + }
>  
>   for (i = 0; i < ifp->if_nifqs; i++) {
>   struct ifqueue *ifq = ifp->if_ifqs[i];
>

Reply | Threaded
Open this post in threaded view
|

Re: split ether_output up into resolution, encapsulation, and output

Claudio Jeker
On Fri, Nov 30, 2018 at 02:04:40PM -0200, Martin Pieuchot wrote:

> On 30/11/18(Fri) 12:35, David Gwynne wrote:
> > On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:
> > > i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> > > provide their own output functions so they can bypass the ifq machinery
> > > and push the packet onto the underlying layer directly.
> > >
> > > they'll still need to get an ethernet header though. vlan needs to get
> > > the ethernet header and put the vlan shim into it, therefore
> > > ether_resolve is exposed. etherip doesnt need a shim, it just wants
> > > ethernet encapsulating the payload before adding its own headers to the
> > > packet, therefore there is ether_encap.
> > >
> > > does this make sense?
> > >
> > > ok?
> >
> > this shows vlan and etherip using the new functions.
>
> You're still using ifq_enqueue() and ifq_dequeue(). So, I'm not sure to
> understand what is the gain of this change.

If I understood the change correctly it will bypass queueing when no HFSC
queueing is configured on that interface. If there is no traffic shaping
in use the ifp_output function will directly call the next ifp_output
function or in the case of etherip / gre /gif it will call ip_send.

IMO this is a step in the right direction.
 

> > Index: net/if_etherip.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_etherip.c,v
> > retrieving revision 1.40
> > diff -u -p -r1.40 if_etherip.c
> > --- net/if_etherip.c 12 Nov 2018 23:57:06 -0000 1.40
> > +++ net/if_etherip.c 30 Nov 2018 02:24:09 -0000
> > @@ -102,7 +102,10 @@ void etheripattach(int);
> >  int etherip_clone_create(struct if_clone *, int);
> >  int etherip_clone_destroy(struct ifnet *);
> >  int etherip_ioctl(struct ifnet *, u_long, caddr_t);
> > -void etherip_start(struct ifnet *);
> > +int etherip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> > +    struct rtentry *rt);
> > +void etherip_start(struct ifqueue *);
> > +void etherip_send(struct ifnet *, struct mbuf *);
> >  int etherip_media_change(struct ifnet *);
> >  void etherip_media_status(struct ifnet *, struct ifmediareq *);
> >  int etherip_set_tunnel(struct etherip_softc *, struct if_laddrreq *);
> > @@ -144,9 +147,10 @@ etherip_clone_create(struct if_clone *if
> >   ifp->if_softc = sc;
> >   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> >   ifp->if_ioctl = etherip_ioctl;
> > - ifp->if_start = etherip_start;
> > + ifp->if_output = etherip_output;
> > + ifp->if_qstart = etherip_start;
> >   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> > - ifp->if_xflags = IFXF_CLONED;
> > + ifp->if_xflags = IFXF_MPSAFE | IFXF_CLONED;
> >   IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
> >   ifp->if_capabilities = IFCAP_VLAN_MTU;
> >   ether_fakeaddr(ifp);
> > @@ -201,40 +205,63 @@ etherip_media_status(struct ifnet *ifp,
> >  }
> >  
> >  void
> > -etherip_start(struct ifnet *ifp)
> > +etherip_send(struct ifnet *ifp, struct mbuf *m)
> >  {
> >   struct etherip_softc *sc = ifp->if_softc;
> > - struct mbuf *m;
> >   int error;
> > -#if NBPFILTER > 0
> > - caddr_t if_bpf;
> > -#endif
> >  
> > - while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
> >  #if NBPFILTER > 0
> > - if_bpf = ifp->if_bpf;
> > - if (if_bpf)
> > - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> > + caddr_t if_bpf = ifp->if_bpf;
> > + if (if_bpf)
> > + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
> >  #endif
> >  
> > - switch (sc->sc_tunnel.t_af) {
> > - case AF_INET:
> > - error = ip_etherip_output(ifp, m);
> > - break;
> > + switch (sc->sc_tunnel.t_af) {
> > + case AF_INET:
> > + error = ip_etherip_output(ifp, m);
> > + break;
> >  #ifdef INET6
> > - case AF_INET6:
> > - error = ip6_etherip_output(ifp, m);
> > - break;
> > + case AF_INET6:
> > + error = ip6_etherip_output(ifp, m);
> > + break;
> >  #endif
> > - default:
> > - /* unhandled_af(sc->sc_tunnel.t_af); */
> > - m_freem(m);
> > - continue;
> > - }
> > + default:
> > + /* unhandled_af(sc->sc_tunnel.t_af); */
> > + m_freem(m);
> > + error = ENETDOWN;
> > + break;
> > + }
> >  
> > - if (error)
> > - ifp->if_oerrors++;
> > + if (error)
> > + ifp->if_oerrors++;
> > +}
> > +
> > +void
> > +etherip_start(struct ifqueue *ifq)
> > +{
> > + struct ifnet *ifp = ifq->ifq_if;
> > + struct mbuf *m;
> > +
> > + while ((m = ifq_dequeue(ifq)) != NULL)
> > + etherip_send(ifp, m);
> > +}
> > +
> > +int
> > +etherip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> > +    struct rtentry *rt)
> > +{
> > + int error;
> > +
> > + m = ether_encap(ifp, m, dst, rt, &error);
> > + if (m == NULL)
> > + return (error);
> > +
> > + if (ifq_is_priq(&ifp->if_snd)) {
> > + etherip_send(ifp, m);
> > + return (0);
> >   }
> > +
> > + return (ifq_enqueue(&ifp->if_snd, m));
> >  }
> >  
> >  int
> > Index: net/if_ethersubr.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> > retrieving revision 1.253
> > diff -u -p -r1.253 if_ethersubr.c
> > --- net/if_ethersubr.c 13 Mar 2018 01:31:48 -0000 1.253
> > +++ net/if_ethersubr.c 30 Nov 2018 02:24:09 -0000
> > @@ -483,7 +510,8 @@ ether_ifattach(struct ifnet *ifp)
> >   ifp->if_addrlen = ETHER_ADDR_LEN;
> >   ifp->if_hdrlen = ETHER_HDR_LEN;
> >   ifp->if_mtu = ETHERMTU;
> > - ifp->if_output = ether_output;
> > + if (ifp->if_output == NULL)
> > + ifp->if_output = ether_output;
> >   ifp->if_rtrequest = ether_rtrequest;
> >  
> >   if_ih_insert(ifp, ether_input, NULL);
> > Index: net/if_vlan.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_vlan.c,v
> > retrieving revision 1.179
> > diff -u -p -r1.179 if_vlan.c
> > --- net/if_vlan.c 16 Nov 2018 08:43:08 -0000 1.179
> > +++ net/if_vlan.c 30 Nov 2018 02:24:09 -0000
> > @@ -58,6 +58,7 @@
> >  #include <sys/sockio.h>
> >  #include <sys/systm.h>
> >  #include <sys/rwlock.h>
> > +#include <sys/percpu.h>
> >  
> >  #include <net/if.h>
> >  #include <net/if_dl.h>
> > @@ -85,6 +86,8 @@ int vlan_clone_create(struct if_clone *,
> >  int vlan_clone_destroy(struct ifnet *);
> >  
> >  int vlan_input(struct ifnet *, struct mbuf *, void *);
> > +int vlan_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> > +    struct rtentry *rt);
> >  void vlan_start(struct ifqueue *ifq);
> >  int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
> >  
> > @@ -177,6 +180,7 @@ vlan_clone_create(struct if_clone *ifc,
> >  
> >   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
> >   ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE;
> > + ifp->if_output = vlan_output;
> >   ifp->if_qstart = vlan_start;
> >   ifp->if_ioctl = vlan_ioctl;
> >   ifp->if_hardmtu = 0xffff;
> > @@ -185,6 +189,8 @@ vlan_clone_create(struct if_clone *ifc,
> >   ether_ifattach(ifp);
> >   ifp->if_hdrlen = EVL_ENCAPLEN;
> >  
> > + if_counters_alloc(ifp);
> > +
> >   return (0);
> >  }
> >  
> > @@ -239,6 +245,110 @@ vlan_mplstunnel(int ifidx)
> >  #endif
> >  }
> >  
> > +static uint16_t
> > +vlan_tag(const struct ifvlan *ifv, const struct mbuf *m)
> > +{
> > + int txprio = ifv->ifv_prio;
> > + uint16_t prio;
> > +
> > + prio = (txprio == IF_HDRPRIO_PACKET) ?
> > +    m->m_pkthdr.pf.prio : txprio;
> > +
> > + /* IEEE 802.1p has prio 0 and 1 swapped */
> > + if (prio <= 1)
> > + prio = !prio;
> > +
> > + return (ifv->if_prio | (prio << EVL_PRIO_BITS));
> > +}
> > +
> > +int
> > +vlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> > +    struct rtentry *rt)
> > +{
> > + struct ifvlan *ifv;
> > + struct ifnet *ifp0;
> > +#if NBPFILTER > 0
> > + caddr_t if_bpf;
> > +#endif
> > + struct sockaddr vdst;
> > + struct ether_header *eh = (struct ether_header *)vdst.sa_data;
> > + int txprio;
> > + uint16_t vtag;
> > + int error;
> > + unsigned int bytes;
> > +
> > + if (!ifq_is_priq(&ifp->if_snd)) {
> > + /*
> > + * user wants to delay packets, which relies on the ifq
> > + * machinery. fall back to if_enqueue via ether_output.
> > + */
> > + return (ether_output(ifp, m, dst, rt));
> > + }
> > +
> > + error = ether_resolve(ifp, m, dst, rt, eh);
> > + switch (error) {
> > + case 0:
> > + break;
> > + case EAGAIN:
> > + return (0);
> > + default:
> > + return (error);
> > + }
> > +
> > + ifv = ifp->if_softc;
> > + ifp0 = if_get(ifv->ifv_ifp0);
> > + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
> > + m_freem(m);
> > + error = ENETDOWN;
> > + goto leave;
> > + }
> > +
> > +#if NBPFILTER > 0
> > + if_bpf = ifp->if_bpf;
> > + if (if_bpf) {
> > + bpf_mtap_hdr(if_bpf, (caddr_t)eh, sizeof(*eh), m,
> > +    BPF_DIRECTION_OUT, NULL);
> > + }
> > +#endif
> > +
> > + vtag = vlan_tag(ifv, m);
> > +
> > + bytes = sizeof(*eh) + m->m_pkthdr.len; /* plus shim? */
> > +
> > + /*
> > + * If the underlying interface cannot do VLAN tag insertion
> > + * itself, create an encapsulation header.
> > + */
> > + if (ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING) &&
> > +    ifv->ifv_type == ETHERTYPE_VLAN) {
> > + m->m_pkthdr.ether_vtag = vtag;
> > + m->m_flags |= M_VLANTAG;
> > + } else {
> > + struct ether_vlan_shim *evl;
> > +
> > + M_PREPEND(m, sizeof(*evl), M_DONTWAIT);
> > + if (m == NULL) {
> > + error = ENOBUFS;
> > + goto leave;
> > + }
> > +
> > + evl = mtod(m, struct ether_vlan_shim *);
> > + evl->evl_tag = htons(vtag);
> > + evl->evl_proto = eh->ether_type;
> > +
> > + eh->ether_type = htons(ifv->ifv_type);
> > + }
> > +
> > + counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes, bytes);
> > +
> > + vdst.sa_family = pseudo_AF_HDRCMPLT;
> > + error = ifp0->if_output(ifp0, m, &vdst, NULL);
> > +
> > +leave:
> > + if_put(ifp0);
> > + return (error);
> > +}
> > +
> >  void
> >  vlan_start(struct ifqueue *ifq)
> >  {
> > @@ -246,31 +356,22 @@ vlan_start(struct ifqueue *ifq)
> >   struct ifvlan   *ifv;
> >   struct ifnet *ifp0;
> >   struct mbuf *m;
> > - int txprio;
> > - uint8_t prio;
> > + uint16_t vtag;
> >  
> >   ifv = ifp->if_softc;
> >   ifp0 = if_get(ifv->ifv_ifp0);
> > - if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) !=
> > -    (IFF_UP|IFF_RUNNING)) {
> > + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
> >   ifq_purge(ifq);
> >   goto leave;
> >   }
> >  
> > - txprio = ifv->ifv_prio;
> > -
> >   while ((m = ifq_dequeue(ifq)) != NULL) {
> >  #if NBPFILTER > 0
> >   if (ifp->if_bpf)
> >   bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> >  #endif /* NBPFILTER > 0 */
> >  
> > - prio = (txprio == IF_HDRPRIO_PACKET) ?
> > -    m->m_pkthdr.pf.prio : txprio;
> > -
> > - /* IEEE 802.1p has prio 0 and 1 swapped */
> > - if (prio <= 1)
> > - prio = !prio;
> > + vtag = vlan_tag(ifv, m);
> >  
> >   /*
> >   * If this packet came from a pseudowire it means it already
> > @@ -285,23 +386,17 @@ vlan_start(struct ifqueue *ifq)
> >   */
> >   } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) &&
> >      (ifv->ifv_type == ETHERTYPE_VLAN)) {
> > - m->m_pkthdr.ether_vtag = ifv->ifv_tag +
> > -    (prio << EVL_PRIO_BITS);
> > + m->m_pkthdr.ether_vtag = vtag;
> >   m->m_flags |= M_VLANTAG;
> >   } else {
> > - m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag |
> > -    (prio << EVL_PRIO_BITS));
> > + m = vlan_inject(m, ifv->ifv_type, vtag);
> >   if (m == NULL) {
> > - ifp->if_oerrors++;
> > + ifq->ifq_oerrors++;
> >   continue;
> >   }
> >   }
> >  
> > - if (if_enqueue(ifp0, m)) {
> > - ifp->if_oerrors++;
> > - ifq->ifq_errors++;
> > - continue;
> > - }
> > + if_enqueue(ifp0, m);
> >   }
> >  
> >  leave:
> > Index: net/ifq.h
> > ===================================================================
> > RCS file: /cvs/src/sys/net/ifq.h,v
> > retrieving revision 1.20
> > diff -u -p -r1.20 ifq.h
> > --- net/ifq.h 4 Jan 2018 11:02:57 -0000 1.20
> > +++ net/ifq.h 30 Nov 2018 02:24:09 -0000
> > @@ -421,6 +422,7 @@ void ifq_barrier(struct ifqueue *);
> >  #define ifq_len(_ifq) ((_ifq)->ifq_len)
> >  #define ifq_empty(_ifq) (ifq_len(_ifq) == 0)
> >  #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l))
> > +#define ifq_is_priq(_ifq) ((_ifq)->ifq_ops == ifq_priq_ops)
> >  
> >  static inline void
> >  ifq_set_oactive(struct ifqueue *ifq)
> > Index: netinet/if_ether.h
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/if_ether.h,v
> > retrieving revision 1.73
> > diff -u -p -r1.73 if_ether.h
> > --- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73
> > +++ netinet/if_ether.h 30 Nov 2018 02:24:09 -0000
> > @@ -92,6 +92,11 @@ struct  ether_vlan_header {
> >          u_int16_t evl_proto;
> >  };
> >  
> > +struct ether_vlan_shim {
> > + u_int16_t evl_tag;
> > + u_int16_t evl_proto;
> > +};
> > +
> >  #define EVL_VLID_MASK 0xFFF
> >  #define EVL_VLID_NULL 0x000
> >  /* 0x000 and 0xfff are reserved */
> > Index: net/if_var.h
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_var.h,v
> > retrieving revision 1.90
> > diff -u -p -r1.90 if_var.h
> > --- net/if_var.h 10 Sep 2018 16:18:34 -0000 1.90
> > +++ net/if_var.h 30 Nov 2018 02:24:09 -0000
> > @@ -76,6 +76,7 @@
> >  struct rtentry;
> >  struct ifnet;
> >  struct task;
> > +struct cpumem;
> >  
> >  /*
> >   * Structure describing a `cloning' interface.
> > @@ -144,6 +145,7 @@ struct ifnet { /* and the entries */
> >   unsigned short if_flags; /* [N] up/down, broadcast, etc. */
> >   int if_xflags; /* [N] extra softnet flags */
> >   struct if_data if_data; /* stats and other data about if */
> > + struct cpumem *if_counters; /* per cpu stats */
> >   uint32_t if_hardmtu; /* [d] maximum MTU device supports */
> >   char if_description[IFDESCRSIZE]; /* [c] interface description */
> >   u_short if_rtlabelid; /* [c] next route label */
> > @@ -202,6 +204,23 @@ struct ifnet { /* and the entries */
> >  #define if_capabilities if_data.ifi_capabilities
> >  #define if_rdomain if_data.ifi_rdomain
> >  
> > +enum if_counters {
> > + ifc_ipackets, /* packets received on interface */
> > + ifc_ierrors, /* input errors on interface */
> > + ifc_opackets, /* packets sent on interface */
> > + ifc_oerrors, /* output errors on interface */
> > + ifc_collisions, /* collisions on csma interfaces */
> > + ifc_ibytes, /* total number of octets received */
> > + ifc_obytes, /* total number of octets sent */
> > + ifc_imcasts, /* packets received via multicast */
> > + ifc_omcasts, /* packets sent via multicast */
> > + ifc_iqdrops, /* dropped on input, this interface */
> > + ifc_oqdrops, /* dropped on output, this interface */
> > + ifc_noproto, /* destined for unsupported protocol */
> > +
> > + ifc_ncounters
> > +};
> > +
> >  /*
> >   * The ifaddr structure contains information about one address
> >   * of an interface.  They are maintained by the different address families,
> > @@ -356,6 +375,9 @@ u_int if_rxr_get(struct if_rxring *, u_i
> >  int if_rxr_info_ioctl(struct if_rxrinfo *, u_int, struct if_rxring_info *);
> >  int if_rxr_ioctl(struct if_rxrinfo *, const char *, u_int,
> >      struct if_rxring *);
> > +
> > +void if_counters_alloc(struct ifnet *);
> > +void if_counters_free(struct ifnet *);
> >  
> >  #endif /* _KERNEL */
> >  
> > Index: net/if.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if.c,v
> > retrieving revision 1.568
> > diff -u -p -r1.568 if.c
> > --- net/if.c 29 Nov 2018 00:11:49 -0000 1.568
> > +++ net/if.c 30 Nov 2018 02:24:09 -0000
> > @@ -84,6 +84,7 @@
> >  #include <sys/domain.h>
> >  #include <sys/task.h>
> >  #include <sys/atomic.h>
> > +#include <sys/percpu.h>
> >  #include <sys/proc.h>
> >  
> >  #include <dev/rndvar.h>
> > @@ -1103,6 +1104,9 @@ if_detach(struct ifnet *ifp)
> >   splx(s);
> >   NET_UNLOCK();
> >  
> > + if (ifp->if_counters != NULL)
> > + if_counters_free(ifp);
> > +
> >   for (i = 0; i < ifp->if_nifqs; i++)
> >   ifq_destroy(ifp->if_ifqs[i]);
> >   if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
> > @@ -2362,11 +2366,47 @@ ifconf(caddr_t data)
> >  }
> >  
> >  void
> > +if_counters_alloc(struct ifnet *ifp)
> > +{
> > + KASSERT(ifp->if_counters == NULL);
> > +
> > + ifp->if_counters = counters_alloc(ifc_ncounters);
> > +}
> > +
> > +void
> > +if_counters_free(struct ifnet *ifp)
> > +{
> > + KASSERT(ifp->if_counters != NULL);
> > +
> > + counters_free(ifp->if_counters, ifc_ncounters);
> > + ifp->if_counters = NULL;
> > +}
> > +
> > +void
> >  if_getdata(struct ifnet *ifp, struct if_data *data)
> >  {
> >   unsigned int i;
> >  
> >   *data = ifp->if_data;
> > +
> > + if (ifp->if_counters != NULL) {
> > + uint64_t counters[ifc_ncounters];
> > +
> > + counters_read(ifp->if_counters, counters, nitems(counters));
> > +
> > + data->ifi_ipackets += counters[ifc_ipackets];
> > + data->ifi_ierrors += counters[ifc_ierrors];
> > + data->ifi_opackets += counters[ifc_opackets];
> > + data->ifi_oerrors += counters[ifc_oerrors];
> > + data->ifi_collisions += counters[ifc_collisions];
> > + data->ifi_ibytes += counters[ifc_ibytes];
> > + data->ifi_obytes += counters[ifc_obytes];
> > + data->ifi_imcasts += counters[ifc_imcasts];
> > + data->ifi_omcasts += counters[ifc_omcasts];
> > + data->ifi_iqdrops += counters[ifc_iqdrops];
> > + data->ifi_oqdrops += counters[ifc_oqdrops];
> > + data->ifi_noproto += counters[ifc_noproto];
> > + }
> >  
> >   for (i = 0; i < ifp->if_nifqs; i++) {
> >   struct ifqueue *ifq = ifp->if_ifqs[i];
> >
>

--
:wq Claudio

Reply | Threaded
Open this post in threaded view
|

Re: split ether_output up into resolution, encapsulation, and output

Claudio Jeker
In reply to this post by David Gwynne-5
On Fri, Nov 30, 2018 at 12:21:11PM +1000, David Gwynne wrote:

> i have a plan to allow virtual interfaces (eg, vlan, etherip, etc) to
> provide their own output functions so they can bypass the ifq machinery
> and push the packet onto the underlying layer directly.
>
> they'll still need to get an ethernet header though. vlan needs to get
> the ethernet header and put the vlan shim into it, therefore
> ether_resolve is exposed. etherip doesnt need a shim, it just wants
> ethernet encapsulating the payload before adding its own headers to the
> packet, therefore there is ether_encap.
>
> does this make sense?
>
> ok?

Two comments below. Apart from that OK claudio@

> Index: net/if_ethersubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.253
> diff -u -p -r1.253 if_ethersubr.c
> --- net/if_ethersubr.c 13 Mar 2018 01:31:48 -0000 1.253
> +++ net/if_ethersubr.c 30 Nov 2018 02:02:58 -0000
> @@ -178,24 +178,18 @@ ether_rtrequest(struct ifnet *ifp, int r
>   break;
>   }
>  }
> -/*
> - * Ethernet output routine.
> - * Encapsulate a packet of type family for the local net.
> - * Assumes that ifp is actually pointer to arpcom structure.
> - */
> +
>  int
> -ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> -    struct rtentry *rt)
> +ether_resolve(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +    struct rtentry *rt, struct ether_header *eh)
>  {
> - u_int16_t etype;
> - u_char edst[ETHER_ADDR_LEN];
> - u_char *esrc;
> - struct mbuf *mcopy = NULL;
> - struct ether_header *eh;
>   struct arpcom *ac = (struct arpcom *)ifp;
>   sa_family_t af = dst->sa_family;
>   int error = 0;
>  
> + if (!ISSET(ifp->if_flags, IFF_RUNNING))
> + senderr(ENETDOWN);
> +
>   KASSERT(rt != NULL || ISSET(m->m_flags, M_MCAST|M_BCAST) ||
>   af == AF_UNSPEC || af == pseudo_AF_HDRCMPLT);
>  
> @@ -207,28 +201,31 @@ ether_output(struct ifnet *ifp, struct m
>   }
>  #endif
>  
> - esrc = ac->ac_enaddr;
> -
> - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
> - senderr(ENETDOWN);
> -
>   switch (af) {
>   case AF_INET:
> - error = arpresolve(ifp, rt, m, dst, edst);
> + error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
>   if (error)
> - return (error == EAGAIN ? 0 : error);
> + return (error);
> + eh->ether_type = htons(ETHERTYPE_IP);
> +
>   /* If broadcasting on a simplex interface, loopback a copy */
> - if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
> -    !m->m_pkthdr.pf.routed)
> + if (ISSET(m->m_flags, M_BCAST) &&
> +    ISSET(ifp->if_flags, IFF_SIMPLEX) &&
> +    !m->m_pkthdr.pf.routed) {
> + struct mbuf *mcopy;
> +
> + /* XXX Should we input an unencrypted IPsec packet? */
>   mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> - etype = htons(ETHERTYPE_IP);
> + if (mcopy != NULL)
> + if_input_local(ifp, mcopy, af);
> + }
>   break;
>  #ifdef INET6
>   case AF_INET6:
> - error = nd6_resolve(ifp, rt, m, dst, edst);
> + error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
>   if (error)
> - return (error == EAGAIN ? 0 : error);
> - etype = htons(ETHERTYPE_IPV6);
> + return (error);
> + eh->ether_type = htons(ETHERTYPE_IPV6);
>   break;
>  #endif
>  #ifdef MPLS
> @@ -242,72 +239,102 @@ ether_output(struct ifnet *ifp, struct m
>   senderr(ENETUNREACH);
>  
>   switch (dst->sa_family) {
> - case AF_LINK:
> - if (satosdl(dst)->sdl_alen < sizeof(edst))
> - senderr(EHOSTUNREACH);
> - memcpy(edst, LLADDR(satosdl(dst)),
> -    sizeof(edst));
> - break;
> + case AF_LINK:
> + if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost))
> + senderr(EHOSTUNREACH);
> + memcpy(eh->ether_dhost, LLADDR(satosdl(dst)),
> +    sizeof(eh->ether_dhost));
> + break;
>  #ifdef INET6
> - case AF_INET6:
> - error = nd6_resolve(ifp, rt, m, dst, edst);
> - if (error)
> - return (error == EAGAIN ? 0 : error);
> - break;
> + case AF_INET6:
> + error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
> + if (error)
> + return (error == EAGAIN ? 0 : error);

In other places you removed the EAGAIN handling, why not here...

> + break;
>  #endif
> - case AF_INET:
> - case AF_MPLS:
> - error = arpresolve(ifp, rt, m, dst, edst);
> - if (error)
> - return (error == EAGAIN ? 0 : error);
> - break;
> - default:
> - senderr(EHOSTUNREACH);
> + case AF_INET:
> + case AF_MPLS:
> + error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
> + if (error)
> + return (error == EAGAIN ? 0 : error);

... and here.

> + break;
> + default:
> + senderr(EHOSTUNREACH);
>   }
>   /* XXX handling for simplex devices in case of M/BCAST ?? */
>   if (m->m_flags & (M_BCAST | M_MCAST))
> - etype = htons(ETHERTYPE_MPLS_MCAST);
> + eh->ether_type = htons(ETHERTYPE_MPLS_MCAST);
>   else
> - etype = htons(ETHERTYPE_MPLS);
> + eh->ether_type = htons(ETHERTYPE_MPLS);
>   break;
>  #endif /* MPLS */
>   case pseudo_AF_HDRCMPLT:
> - eh = (struct ether_header *)dst->sa_data;
> - esrc = eh->ether_shost;
> - /* FALLTHROUGH */
> + /* take the whole header from the sa */
> + memcpy(eh, dst->sa_data, sizeof(*eh));
> + return (0);
>  
>   case AF_UNSPEC:
> - eh = (struct ether_header *)dst->sa_data;
> - memcpy(edst, eh->ether_dhost, sizeof(edst));
> - /* AF_UNSPEC doesn't swap the byte order of the ether_type. */
> - etype = eh->ether_type;
> + /* take the dst and type from the sa, but get src below */
> + memcpy(eh, dst->sa_data, sizeof(*eh));
>   break;
>  
>   default:
> - printf("%s: can't handle af%d\n", ifp->if_xname,
> - dst->sa_family);
> + printf("%s: can't handle af%d\n", ifp->if_xname, af);
>   senderr(EAFNOSUPPORT);
>   }
>  
> - /* XXX Should we feed-back an unencrypted IPsec packet ? */
> - if (mcopy)
> - if_input_local(ifp, mcopy, dst->sa_family);
> + memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
>  
> - M_PREPEND(m, sizeof(struct ether_header) + ETHER_ALIGN, M_DONTWAIT);
> - if (m == NULL)
> - return (ENOBUFS);
> - m_adj(m, ETHER_ALIGN);
> - eh = mtod(m, struct ether_header *);
> - eh->ether_type = etype;
> - memcpy(eh->ether_dhost, edst, sizeof(eh->ether_dhost));
> - memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
> + return (0);
>  
> - return (if_enqueue(ifp, m));
>  bad:
>   m_freem(m);
>   return (error);
>  }
>  
> +struct mbuf*
> +ether_encap(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +    struct rtentry *rt, int *errorp)
> +{
> + struct ether_header eh;
> + int error;
> +
> + error = ether_resolve(ifp, m, dst, rt, &eh);
> + switch (error) {
> + case 0:
> + break;
> + case EAGAIN:
> + error = 0;
> + default:
> + *errorp = error;
> + return (NULL);
> + }
> +
> + m = m_prepend(m, ETHER_ALIGN + sizeof(eh), M_DONTWAIT);
> + if (m == NULL) {
> + *errorp = ENOBUFS;
> + return (NULL);
> + }
> +
> + m_adj(m, ETHER_ALIGN);
> + memcpy(mtod(m, struct ether_header *), &eh, sizeof(eh));
> +
> + return (m);
> +}
> +
> +int
> +ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
> +    struct rtentry *rt)
> +{
> + int error;
> +
> + m = ether_encap(ifp, m, dst, rt, &error);
> + if (m == NULL)
> + return (error);
> +
> + return (if_enqueue(ifp, m));
> +}
> +
>  /*
>   * Process a received Ethernet packet;
>   * the packet is in the mbuf chain m without
> Index: netinet/if_ether.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/if_ether.h,v
> retrieving revision 1.73
> diff -u -p -r1.73 if_ether.h
> --- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73
> +++ netinet/if_ether.h 30 Nov 2018 02:02:58 -0000
> @@ -240,8 +245,13 @@ void ether_ifattach(struct ifnet *);
>  void ether_ifdetach(struct ifnet *);
>  int ether_ioctl(struct ifnet *, struct arpcom *, u_long, caddr_t);
>  int ether_input(struct ifnet *, struct mbuf *, void *);
> -int ether_output(struct ifnet *,
> -    struct mbuf *, struct sockaddr *, struct rtentry *);
> +int ether_resolve(struct ifnet *, struct mbuf *, struct sockaddr *,
> +    struct rtentry *, struct ether_header *);
> +struct mbuf *
> + ether_encap(struct ifnet *, struct mbuf *, struct sockaddr *,
> +    struct rtentry *, int *);
> +int ether_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> +    struct rtentry *);
>  void ether_rtrequest(struct ifnet *, int, struct rtentry *);
>  char *ether_sprintf(u_char *);
>  
>

--
:wq Claudio