make prio fields in encap protocols configurable

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

make prio fields in encap protocols configurable

David Gwynne-5
a lot of encap protocols have fields to carry a priority or class of
service associated with the relevant packet. some concrete examples are
the dot1q priority field in vlan(4) headers, the now repurposed exp
field in mpls shims, and the tos/traffic class/dscp field in ip headers
around gre, gif, etherip, and so on.

currently the implementation of these protocols hardcodes a decision
about what these priority fields should be. this diff aims to make the
pri fields in all these protocols configurable, leaving the current
decisions as the default.

there are three options, one of which is not available depending on what
is being encapsulated. the options are:

1. hardcoding the priority field to a value between 0 and 7
2. using the packet (mbuf) priority field for the value
3. copying the equivalent field in the inner protocol payload

currently most of the ethernet over ip encapsulators use 1, and
pick 0 as that value. vlan and svlan use 2 unless the link0 button i
added is in effect, then they use the llprio instead. the ip in ip
things tend like gif and gre use option 3.

this diff makes it possible to configure interfaces to use different
options if so desired. for example, if you're terminating the nbn in .au
on an openbsd box, you will probably need to hardcode the vlan prio
field to 1 to be able to talk. right now there's the llprio and link0
buttons, but that causes arp to be prioritised in what could be
considered the wrong way. making the config separate and explicit makes
it more obvious what is going on.

so, this adds a txprio option to ifconfig. you can pass it a number or
"packet" or "payload" to configure which of the 3 options above you
want. ethernet itself doesnt have a priority field, so ethernet over ip
things don't allow "txprio payload" to be configured on themselves.
the state of txprio is added to the encap line. it is moot whether i
added it to the tunnel line instead, but the tunnel line is getting
full.

this updates most of the encap drivers to implement the txprio
behaviour.

the name should give a hint, but note that this diff doesnt not
implement any change in behaviour on the receive side, ie, nothing is
done with the prio field coming into the system, except on vlan and
svlan interfaces.

some examples:

vlan7: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
        lladdr 00:1b:21:bc:25:e7
        index 5 priority 0 llprio 3
        encap: vnetid 7 parent ix1 txprio packet
        groups: vlan
        media: Ethernet autoselect (10GSFP+Cu
full-duplex,rxpause,txpause)
        status: active
        inet 192.168.0.1 netmask 0xffffff00 broadcast 192.168.0.255
vxlan0: flags=8802<BROADCAST,SIMPLEX,MULTICAST> mtu 1500
        lladdr fe:e1:ba:d0:89:4f
        index 8 priority 0 llprio 3
        encap: vnetid none txprio 0
        groups: vxlan
        media: Ethernet autoselect
        status: active
        tunnel: (unset) nodf
gre0: flags=8010<POINTOPOINT,MULTICAST> mtu 1476
        index 9 priority 0 llprio 6
        encap: vnetid none txprio payload
        groups: gre
        tunnel: (unset) ttl 64 nodf

i have built ifconfig with SMALL and it still works.

ok?

Index: sbin/ifconfig/ifconfig.c
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.380
diff -u -p -r1.380 ifconfig.c
--- sbin/ifconfig/ifconfig.c 15 Oct 2018 11:25:55 -0000 1.380
+++ sbin/ifconfig/ifconfig.c 11 Nov 2018 12:54:58 -0000
@@ -136,6 +136,9 @@ struct ifencap {
 #define IFE_PARENT_NONE 0x100
 #define IFE_PARENT_SET 0x200
  char ife_parent[IFNAMSIZ];
+
+#define IFE_TXHPRIO_SET 0x1000
+ int ife_txhprio;
 };
 
 struct ifreq ifr, ridreq;
@@ -288,6 +291,8 @@ void pfsync_status(void);
 void setvnetflowid(const char *, int);
 void delvnetflowid(const char *, int);
 void getvnetflowid(struct ifencap *);
+void gettxprio(struct ifencap *);
+void settxprio(const char *, int);
 void settunneldf(const char *, int);
 void settunnelnodf(const char *, int);
 void setpppoe_dev(const char *,int);
@@ -486,6 +491,7 @@ const struct cmd {
  { "-tunneldf", 0, 0, settunnelnodf },
  { "vnetflowid", 0, 0, setvnetflowid },
  { "-vnetflowid", 0, 0, delvnetflowid },
+ { "txprio", NEXTARG, 0, settxprio },
  { "pppoedev", NEXTARG, 0, setpppoe_dev },
  { "pppoesvc", NEXTARG, 0, setpppoe_svc },
  { "-pppoesvc", 1, 0, setpppoe_svc },
@@ -3982,6 +3988,46 @@ getifparent(struct ifencap *ife)
  }
 }
 
+#ifndef SMALL
+void
+gettxprio(struct ifencap *ife)
+{
+ if (strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)) >=
+    sizeof(ifr.ifr_name))
+ errx(1, "hdr prio: name is too long");
+
+ if (ioctl(s, SIOCGTXHPRIO, (caddr_t)&ifr) == -1)
+ return;
+
+ ife->ife_flags |= IFE_TXHPRIO_SET;
+ ife->ife_txhprio = ifr.ifr_hdrprio;
+}
+
+void
+settxprio(const char *val, int d)
+{
+ const char *errmsg = NULL;
+
+ if (strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)) >=
+    sizeof(ifr.ifr_name))
+ errx(1, "tx prio: name is too long");
+
+ if (strcmp(val, "packet") == 0)
+ ifr.ifr_hdrprio = IF_HDRPRIO_PACKET;
+ else if (strcmp(val, "payload") == 0)
+ ifr.ifr_hdrprio = IF_HDRPRIO_PAYLOAD;
+ else {
+ ifr.ifr_hdrprio = strtonum(val,
+    IF_HDRPRIO_MIN, IF_HDRPRIO_MAX, &errmsg);
+ if (errmsg)
+ errx(1, "tx prio %s: %s", val, errmsg);
+ }
+
+ if (ioctl(s, SIOCSTXHPRIO, (caddr_t)&ifr) < 0)
+ warn("SIOCSTXHPRIO");
+}
+#endif
+
 void
 getencap(void)
 {
@@ -3990,6 +4036,9 @@ getencap(void)
  getvnetid(&ife);
  getvnetflowid(&ife);
  getifparent(&ife);
+#ifndef SMALL
+ gettxprio(&ife);
+#endif
 
  if (ife.ife_flags == 0)
  return;
@@ -4018,6 +4067,22 @@ getencap(void)
  printf(" parent %s", ife.ife_parent);
  break;
  }
+
+#ifndef SMALL
+ if (ife.ife_flags & IFE_TXHPRIO_SET) {
+ switch (ife.ife_txhprio) {
+ case IF_HDRPRIO_PACKET:
+ printf(" txprio packet");
+ break;
+ case IF_HDRPRIO_PAYLOAD:
+ printf(" txprio payload");
+ break;
+ default:
+ printf(" txprio %d", ife.ife_txhprio);
+ break;
+ }
+ }
+#endif
 
  printf("\n");
 }
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.566
diff -u -p -r1.566 if.c
--- sys/net/if.c 1 Oct 2018 12:38:32 -0000 1.566
+++ sys/net/if.c 11 Nov 2018 12:54:58 -0000
@@ -2136,6 +2136,7 @@ ifioctl(struct socket *so, u_long cmd, c
  case SIOCSIFMEDIA:
  case SIOCSVNETID:
  case SIOCSVNETFLOWID:
+ case SIOCSTXHPRIO:
  case SIOCSIFPAIR:
  case SIOCSIFPARENT:
  case SIOCDIFPARENT:
Index: sys/net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.196
diff -u -p -r1.196 if.h
--- sys/net/if.h 11 Nov 2018 05:55:10 -0000 1.196
+++ sys/net/if.h 11 Nov 2018 12:54:59 -0000
@@ -398,7 +398,13 @@ struct ifreq {
 #define ifr_data ifr_ifru.ifru_data /* for use by interface */
 #define ifr_index ifr_ifru.ifru_index /* interface index */
 #define ifr_llprio ifr_ifru.ifru_metric /* link layer priority */
+#define ifr_hdrprio ifr_ifru.ifru_metric /* header prio field config */
 };
+
+#define IF_HDRPRIO_MIN IFQ_MINPRIO
+#define IF_HDRPRIO_MAX IFQ_MAXPRIO
+#define IF_HDRPRIO_PACKET -1 /* use mbuf prio */
+#define IF_HDRPRIO_PAYLOAD -2 /* copy payload prio */
 
 struct ifaliasreq {
  char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
Index: sys/net/if_etherip.c
===================================================================
RCS file: /cvs/src/sys/net/if_etherip.c,v
retrieving revision 1.38
diff -u -p -r1.38 if_etherip.c
--- sys/net/if_etherip.c 29 Oct 2018 09:51:20 -0000 1.38
+++ sys/net/if_etherip.c 11 Nov 2018 12:54:59 -0000
@@ -85,6 +85,7 @@ struct etherip_softc {
  struct etherip_tunnel sc_tunnel; /* must be first */
  struct arpcom sc_ac;
  struct ifmedia sc_media;
+ int sc_txhprio;
  uint16_t sc_df;
  uint8_t sc_ttl;
 };
@@ -137,6 +138,7 @@ etherip_clone_create(struct if_clone *if
     ifc->ifc_name, unit);
 
  sc->sc_ttl = ip_defttl;
+ sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */
  sc->sc_df = htons(0);
 
  ifp->if_softc = sc;
@@ -282,6 +284,21 @@ etherip_ioctl(struct ifnet *ifp, u_long
  error = etherip_del_tunnel(sc);
  break;
 
+ case SIOCSTXHPRIO:
+ if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) /* use mbuf prio */
+ ;
+ else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN ||
+    ifr->ifr_hdrprio > IF_HDRPRIO_MAX) {
+ error = EINVAL;
+ break;
+ }
+
+ sc->sc_txhprio = ifr->ifr_hdrprio;
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_txhprio;
+                break;
+
  case SIOCSLIFPHYTTL:
  if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
  error = EINVAL;
@@ -486,7 +503,8 @@ ip_etherip_output(struct ifnet *ifp, str
 
  ip->ip_v = IPVERSION;
  ip->ip_hl = sizeof(*ip) >> 2;
- ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
+    m->m_pkthdr.pf.prio : sc->sc_txhprio);
  ip->ip_len = htons(m->m_pkthdr.len);
  ip->ip_id = htons(ip_randomid());
  ip->ip_off = sc->sc_df;
@@ -622,6 +640,7 @@ ip6_etherip_output(struct ifnet *ifp, st
  struct ip6_hdr *ip6;
  struct etherip_header *eip;
  uint16_t len;
+ uint32_t flow;
 
  if (IN6_IS_ADDR_UNSPECIFIED(&sc->sc_tunnel.t_dst6)) {
  m_freem(m);
@@ -636,10 +655,12 @@ ip6_etherip_output(struct ifnet *ifp, st
  return ENOBUFS;
  }
 
+ flow = IPV6_VERSION << 28;
+ flow |= IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
+     m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20;
+
  ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = 0;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
+ htobem32(&ip6->ip6_flow, flow);
  ip6->ip6_nxt  = IPPROTO_ETHERIP;
  ip6->ip6_hlim = sc->sc_ttl;
  ip6->ip6_plen = htons(len);
Index: sys/net/if_gif.c
===================================================================
RCS file: /cvs/src/sys/net/if_gif.c,v
retrieving revision 1.117
diff -u -p -r1.117 if_gif.c
--- sys/net/if_gif.c 11 Nov 2018 12:47:04 -0000 1.117
+++ sys/net/if_gif.c 11 Nov 2018 12:54:59 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_gif.c,v 1.117 2018/11/11 12:47:04 dlg Exp $ */
+/* $OpenBSD: if_gif.c,v 1.116 2018/04/18 13:24:07 bluhm Exp $ */
 /* $KAME: if_gif.c,v 1.43 2001/02/20 08:51:07 itojun Exp $ */
 
 /*
@@ -106,6 +106,7 @@ struct gif_softc {
  struct ifnet sc_if;
  uint16_t sc_df;
  int sc_ttl;
+ int sc_txhprio;
 };
 
 struct gif_list gif_list = TAILQ_HEAD_INITIALIZER(gif_list);
@@ -153,6 +154,7 @@ gif_clone_create(struct if_clone *ifc, i
 
  sc->sc_df = htons(0);
  sc->sc_ttl = ip_defttl;
+ sc->sc_txhprio = IF_HDRPRIO_PAYLOAD;
 
  snprintf(ifp->if_xname, sizeof(ifp->if_xname),
     "%s%d", ifc->ifc_name, unit);
@@ -266,7 +268,6 @@ gif_start(struct ifnet *ifp)
  tos = (shim >> MPLS_EXP_OFFSET) << 5;
 
  ttloff = 3;
-
  proto = IPPROTO_MPLS;
  break;
  }
@@ -282,6 +283,18 @@ gif_start(struct ifnet *ifp)
  } else
  ttl = tttl;
 
+ switch (sc->sc_txhprio) {
+ case IF_HDRPRIO_PAYLOAD:
+ /* tos is already set */
+ break;
+ case IF_HDRPRIO_PACKET:
+ tos = IFQ_PRIO2TOS(m->m_pkthdr.pf.prio);
+ break;
+ default:
+ tos = IFQ_PRIO2TOS(sc->sc_txhprio);
+ break;
+ }
+
  gif_send(sc, m, proto, ttl, tos);
  }
 }
@@ -527,6 +540,22 @@ gif_ioctl(struct ifnet *ifp, u_long cmd,
  break;
  case SIOCGLIFPHYDF:
  ifr->ifr_df = sc->sc_df ? 1 : 0;
+ break;
+
+ case SIOCSTXHPRIO:
+ if (ifr->ifr_hdrprio == IF_HDRPRIO_PAYLOAD ||
+    ifr->ifr_hdrprio == IF_HDRPRIO_PACKET)
+ ; /* ok, fall through */
+ else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN ||
+    ifr->ifr_hdrprio > IF_HDRPRIO_MAX) {
+ error = EINVAL;
+ break;
+ }
+
+ sc->sc_txhprio = ifr->ifr_hdrprio;
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_txhprio;
  break;
 
  default:
Index: sys/net/if_gre.c
===================================================================
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.134
diff -u -p -r1.134 if_gre.c
--- sys/net/if_gre.c 11 Nov 2018 06:35:41 -0000 1.134
+++ sys/net/if_gre.c 11 Nov 2018 12:54:59 -0000
@@ -191,6 +191,7 @@ struct gre_tunnel {
 #define t_dst4 t_dst.in4
 #define t_dst6 t_dst.in6
  int t_ttl;
+ int t_txhprio;
  uint16_t t_df;
  sa_family_t t_af;
 };
@@ -230,6 +231,13 @@ static int
 static int gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
     u_long, void *);
 
+static int gre_l2_txhprio(struct gre_tunnel *, int);
+static int gre_l3_txhprio(struct gre_tunnel *, int);
+
+static uint8_t gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
+static uint8_t gre_l3_tos(const struct gre_tunnel *,
+    const struct mbuf *, uint8_t);
+
 /*
  * layer 3 GRE tunnels
  */
@@ -576,6 +584,7 @@ gre_clone_create(struct if_clone *ifc, i
  ifp->if_rtrequest = p2p_rtrequest;
 
  sc->sc_tunnel.t_ttl = ip_defttl;
+ sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
  sc->sc_tunnel.t_df = htons(0);
 
  timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
@@ -641,6 +650,7 @@ mgre_clone_create(struct if_clone *ifc,
  ifp->if_ioctl = mgre_ioctl;
 
  sc->sc_tunnel.t_ttl = ip_defttl;
+ sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
  sc->sc_tunnel.t_df = htons(0);
 
  if_attach(ifp);
@@ -692,6 +702,7 @@ egre_clone_create(struct if_clone *ifc,
  ether_fakeaddr(ifp);
 
  sc->sc_tunnel.t_ttl = ip_defttl;
+ sc->sc_tunnel.t_txhprio = 0;
  sc->sc_tunnel.t_df = htons(0);
 
  ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
@@ -752,6 +763,7 @@ nvgre_clone_create(struct if_clone *ifc,
 
  tunnel = &sc->sc_tunnel;
  tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
+ tunnel->t_txhprio = 0;
  tunnel->t_df = htons(IP_DF);
  tunnel->t_key_mask = GRE_KEY_ENTROPY;
  tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
@@ -817,6 +829,7 @@ eoip_clone_create(struct if_clone *ifc,
  ether_fakeaddr(ifp);
 
  sc->sc_tunnel.t_ttl = ip_defttl;
+ sc->sc_tunnel.t_txhprio = 0;
  sc->sc_tunnel.t_df = htons(0);
 
  sc->sc_ka_timeo = 10;
@@ -1822,7 +1835,7 @@ mgre_start(struct ifnet *ifp)
  }
 #endif
 
- if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
+ if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
  ifp->if_oerrors++;
  continue;
  }
@@ -1863,7 +1876,7 @@ egre_start(struct ifnet *ifp)
  m->m_len = 0;
 
  m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
-    sc->sc_tunnel.t_ttl, 0);
+    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
  if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
  ifp->if_oerrors++;
  continue;
@@ -1942,6 +1955,8 @@ gre_l3_encap_dst(const struct gre_tunnel
  } else
  ttl = tttl;
 
+ tos = gre_l3_tos(tunnel, m, tos);
+
  return (gre_encap_dst(tunnel, dst, m, proto, ttl, tos));
 }
 
@@ -2143,6 +2158,77 @@ gre_tunnel_ioctl(struct ifnet *ifp, stru
 }
 
 static int
+gre_l2_txhprio(struct gre_tunnel *t, int hdrprio)
+{
+ switch (hdrprio) {
+ case IF_HDRPRIO_PACKET:
+ break;
+ default:
+ if (hdrprio < IF_HDRPRIO_MIN || hdrprio > IF_HDRPRIO_MAX)
+ return (EINVAL);
+ break;
+ }
+
+ t->t_txhprio = hdrprio;
+
+ return (0);
+}
+
+static int
+gre_l3_txhprio(struct gre_tunnel *t, int hdrprio)
+{
+ switch (hdrprio) {
+ case IF_HDRPRIO_PACKET:
+ case IF_HDRPRIO_PAYLOAD:
+ break;
+ default:
+ if (hdrprio < IF_HDRPRIO_MIN || hdrprio > IF_HDRPRIO_MAX)
+ return (EINVAL);
+ break;
+ }
+
+ t->t_txhprio = hdrprio;
+
+ return (0);
+}
+
+static uint8_t
+gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
+{
+ uint8_t prio;
+
+ switch (t->t_txhprio) {
+ case IF_HDRPRIO_PACKET:
+ prio = m->m_pkthdr.pf.prio;
+ break;
+ default:
+ prio = t->t_txhprio;
+ break;
+ }
+
+ return (IFQ_PRIO2TOS(prio));
+}
+
+static uint8_t
+gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t ttl)
+{
+ uint8_t prio;
+
+ switch (t->t_txhprio) {
+ case IF_HDRPRIO_PAYLOAD:
+ return (ttl);
+ case IF_HDRPRIO_PACKET:
+ prio = m->m_pkthdr.pf.prio;
+ break;
+ default:
+ prio = t->t_txhprio;
+ break;
+ }
+
+ return (IFQ_PRIO2TOS(prio));
+}
+
+static int
 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
  struct gre_softc *sc = ifp->if_softc;
@@ -2216,6 +2302,13 @@ gre_ioctl(struct ifnet *ifp, u_long cmd,
  ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
  break;
 
+ case SIOCSTXHPRIO:
+ error = gre_l3_txhprio(&sc->sc_tunnel, ifr->ifr_hdrprio);
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
+ break;
+
  default:
  error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
  break;
@@ -2272,6 +2365,13 @@ mgre_ioctl(struct ifnet *ifp, u_long cmd
  error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
  break;
 
+ case SIOCSTXHPRIO:
+ error = gre_l3_txhprio(&sc->sc_tunnel, ifr->ifr_hdrprio);
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
+ break;
+
  case SIOCSVNETID:
  case SIOCDVNETID:
  case SIOCDIFPHYADDR:
@@ -2424,6 +2524,13 @@ egre_ioctl(struct ifnet *ifp, u_long cmd
  ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
  break;
 
+ case SIOCSTXHPRIO:
+ error = gre_l2_txhprio(&sc->sc_tunnel, ifr->ifr_hdrprio);
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
+ break;
+
  case SIOCSVNETID:
  case SIOCDVNETID:
  case SIOCSVNETFLOWID:
@@ -2580,6 +2687,13 @@ nvgre_ioctl(struct ifnet *ifp, u_long cm
  ifr->ifr_ttl = tunnel->t_ttl;
  break;
 
+ case SIOCSTXHPRIO:
+ error = gre_l2_txhprio(&sc->sc_tunnel, ifr->ifr_hdrprio);
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
+ break;
+
  case SIOCBRDGSCACHE:
  if (bparam->ifbrp_csize < 1) {
  error = EINVAL;
@@ -2751,6 +2865,13 @@ eoip_ioctl(struct ifnet *ifp, u_long cmd
  ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
  break;
 
+ case SIOCSTXHPRIO:
+ error = gre_l2_txhprio(&sc->sc_tunnel, ifr->ifr_hdrprio);
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
+ break;
+
  default:
  error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
  break;
@@ -3556,7 +3677,8 @@ nvgre_start(struct ifnet *ifp)
  m->m_len = 0;
 
  m = gre_encap_dst(tunnel, &gateway, m,
-    htons(ETHERTYPE_TRANSETHER), tunnel->t_ttl, 0);
+    htons(ETHERTYPE_TRANSETHER),
+    tunnel->t_ttl, gre_l2_tos(tunnel, m));
  if (m == NULL)
  continue;
 
@@ -3729,7 +3851,7 @@ eoip_start(struct ifnet *ifp)
  MH_ALIGN(m, 0);
  m->m_len = 0;
 
- m = eoip_encap(sc, m, 0);
+ m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
  if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
  ifp->if_oerrors++;
  continue;
Index: sys/net/if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.178
diff -u -p -r1.178 if_vlan.c
--- sys/net/if_vlan.c 11 Jul 2018 14:20:18 -0000 1.178
+++ sys/net/if_vlan.c 11 Nov 2018 12:54:59 -0000
@@ -173,6 +173,7 @@ vlan_clone_create(struct if_clone *ifc,
  ifv->ifv_type = ETHERTYPE_VLAN;
 
  refcnt_init(&ifv->ifv_refcnt);
+ ifv->ifv_prio = IF_HDRPRIO_PACKET;
 
  ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
  ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE;
@@ -261,8 +262,8 @@ vlan_start(struct ifqueue *ifq)
  bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 #endif /* NBPFILTER > 0 */
 
- prio = ISSET(ifp->if_flags, IFF_LINK0) ?
-    ifp->if_llprio : m->m_pkthdr.pf.prio;
+ prio = ifv->ifv_prio == IF_HDRPRIO_PACKET ?
+    m->m_pkthdr.pf.prio : ifv->ifv_prio;
 
  /* IEEE 802.1p has prio 0 and 1 swapped */
  if (prio <= 1)
@@ -712,6 +713,21 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd
  break;
  case SIOCGETVLAN:
  error = vlan_get_compat(ifp, ifr);
+ break;
+
+ case SIOCSTXHPRIO:
+ if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET)
+ ;
+ else if (ifr->ifr_hdrprio > IF_HDRPRIO_MAX ||
+    ifr->ifr_hdrprio < IF_HDRPRIO_MIN) {
+ error = EINVAL;
+ break;
+ }
+
+ ifv->ifv_prio = ifr->ifr_hdrprio;
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = ifv->ifv_prio;
  break;
 
  default:
Index: sys/net/if_vlan_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_vlan_var.h,v
retrieving revision 1.37
diff -u -p -r1.37 if_vlan_var.h
--- sys/net/if_vlan_var.h 24 Jan 2017 10:08:30 -0000 1.37
+++ sys/net/if_vlan_var.h 11 Nov 2018 12:54:59 -0000
@@ -63,9 +63,9 @@ struct ifvlan {
  struct arpcom ifv_ac; /* make this an interface */
  unsigned int ifv_ifp0; /* parent interface of this vlan */
  struct ifv_linkmib {
+ int ifvm_prio; /* prio to apply on packet leaving if */
  u_int16_t ifvm_proto; /* encapsulation ethertype */
  u_int16_t ifvm_tag; /* tag to apply on packets leaving if */
- u_int16_t ifvm_prio; /* prio to apply on packet leaving if */
  u_int16_t ifvm_type; /* non-standard ethertype or 0x8100 */
  } ifv_mib;
  LIST_HEAD(__vlan_mchead, vlan_mc_entry) vlan_mc_listhead;
Index: sys/net/if_vxlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.68
diff -u -p -r1.68 if_vxlan.c
--- sys/net/if_vxlan.c 17 Aug 2018 01:53:31 -0000 1.68
+++ sys/net/if_vxlan.c 11 Nov 2018 12:54:59 -0000
@@ -73,6 +73,7 @@ struct vxlan_softc {
  int64_t sc_vnetid;
  uint16_t sc_df;
  u_int8_t sc_ttl;
+ int sc_txhprio;
 
  struct task sc_sendtask;
 
@@ -136,6 +137,7 @@ vxlan_clone_create(struct if_clone *ifc,
  sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
  sc->sc_dstport = htons(VXLAN_PORT);
  sc->sc_vnetid = VXLAN_VNI_UNSET;
+ sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */
  sc->sc_df = htons(0);
  task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc);
 
@@ -506,6 +508,21 @@ vxlanioctl(struct ifnet *ifp, u_long cmd
  ifr->ifr_df = sc->sc_df ? 1 : 0;
  break;
 
+ case SIOCSTXHPRIO:
+ if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET)
+ ; /* fall through */
+ else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN ||
+    ifr->ifr_hdrprio > IF_HDRPRIO_MAX) {
+ error = EINVAL;
+ break;
+ }
+
+ sc->sc_txhprio = ifr->ifr_hdrprio;
+ break;
+ case SIOCGTXHPRIO:
+ ifr->ifr_hdrprio = sc->sc_txhprio;
+ break;
+
  case SIOCSVNETID:
  if (sc->sc_vnetid == ifr->ifr_vnetid)
  break;
@@ -756,7 +773,8 @@ vxlan_encap4(struct ifnet *ifp, struct m
  ip->ip_id = htons(ip_randomid());
  ip->ip_off = sc->sc_df;
  ip->ip_p = IPPROTO_UDP;
- ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
+    m->m_pkthdr.pf.prio : sc->sc_txhprio);
  ip->ip_len = htons(m->m_pkthdr.len);
 
  ip->ip_src = satosin(src)->sin_addr;
@@ -778,6 +796,7 @@ vxlan_encap6(struct ifnet *ifp, struct m
  struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc;
  struct ip6_hdr *ip6;
  struct in6_addr *in6a;
+ uint32_t flow;
 
  /*
  * Remove multicast and broadcast flags or encapsulated packet
@@ -789,8 +808,11 @@ vxlan_encap6(struct ifnet *ifp, struct m
  if (m == NULL)
  return (NULL);
 
+ flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
+    m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20;
+
  ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = 0;
+ ip6->ip6_flow = htonl(flow);
  ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
  ip6->ip6_vfc |= IPV6_VERSION;
  ip6->ip6_nxt = IPPROTO_UDP;
Index: sys/sys/sockio.h
===================================================================
RCS file: /cvs/src/sys/sys/sockio.h,v
retrieving revision 1.76
diff -u -p -r1.76 sockio.h
--- sys/sys/sockio.h 30 May 2018 18:15:47 -0000 1.76
+++ sys/sys/sockio.h 11 Nov 2018 12:54:59 -0000
@@ -198,6 +198,9 @@
 #define SIOCSVNETFLOWID _IOW('i', 195, struct ifreq) /* set vnet flowid */
 #define SIOCGVNETFLOWID _IOWR('i', 196, struct ifreq) /* get vnet flowid */
 
+#define SIOCSTXHPRIO _IOW('i', 197, struct ifreq) /* set tx hdr prio */
+#define SIOCGTXHPRIO _IOWR('i', 198, struct ifreq) /* get tx hdr prio */
+
 #define SIOCSVH _IOWR('i', 245, struct ifreq) /* set carp param */
 #define SIOCGVH _IOWR('i', 246, struct ifreq) /* get carp param */