rfc: let etherip(4) depend on another interface for link state

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

rfc: let etherip(4) depend on another interface for link state

David Gwynne-5
this is not a request for oks, this is me backing up a hack i did to fix
a problem i needed a quick solution for.

we're in the process of moving a bunch of boxes to a new site, and are
lucky that we can take our address space with us. to let us gradually
migrate hosts i want to stretch the layer 2 network between sites, but
i only have ip connectivity between them. therefore i'm bridging vlans
over etherip.

i also care about having some redundancy during this process (which is
going to take weeks at a minimum), so i have a pair of boxes at each
site act as endpoints to the bridges. to keep it simple im using carp
on each end to elect the active bridge endpoint on each side. this means
only one of a pair rxes the etherip packets from the remote site
to be forwarded into the DC the pair sit in.

unfortunately i discovered that carp has no influence on sending etherip
to the remote DC. once i had both pairs of bridges set up i managed
to introduce a loop at layer 2 which DoSsed both DCs. in more detail,
say I have DCs A and B, and each site has 2 bridge boxes that are called
A0, A1, B0, and B1. A0 and B0 are the carp masters. say A0 rxes a
broadcast packet from a vlan on its physical interface. it will forward
that to B0, which transmits it to it's physical interface. B1 will rx
the broadcast packet and send it to DC A via A0. A0 pushes the broadcast
packet to it's physical interface, which is rxed by B1. B1 pushes it to
A0, and so on.

my quick and dirty hack is to make transmission over the etherip leg of
a bridge depend on the state of the carp interface. this is implemented
by hacking up etherip so it supports the configuration of a parent
interface. etherip then watches whether the parent is running and has
link.

so i have the following config in DC A:

xdlg@dca-bridge0 ~$ sudo cat /etc/hostname.carp423
carpdev vlan423
vhid 23 pass secret
inet 172.23.84.113 255.255.255.248 NONE
xdlg@dca-bridge0 ~$ sudo cat /etc/hostname.etherip0
tunnel 172.23.84.113 172.23.84.121
parent carp423
up

this sits underneath this:

xdlg@dca-bridge0 ~$ for i in vlan374 vlan10374 bridge374; do ifconfig $i; done
vlan374: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu 9000
        lladdr 00:50:56:a1:d7:f7
        description: labs-servers
        index 29 priority 0 llprio 3
        encap: vnetid 374 parent em0 txprio packet rxprio outer
        groups: vlan overlay
        media: Ethernet autoselect (1000baseT full-duplex,master)
        status: active
vlan10374: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu 9000
        lladdr fe:e1:ba:d0:3f:da
        description: labs-servers
        index 30 priority 0 llprio 3
        encap: vnetid 374 parent etherip0 txprio packet rxprio outer
        groups: vlan overlay
        media: Ethernet autoselect
        status: active
bridge374: flags=41<UP,RUNNING>
        description: labs-servers
        index 31 llprio 3
        groups: bridge
        priority 32768 hellotime 2 fwddelay 15 maxage 20 holdcnt 6 proto rstp
        designated: id 00:00:00:00:00:00 priority 0
        vlan10374 flags=2007<LEARNING,DISCOVER,BLOCKNONIP>
                port 30 ifpriority 0 ifcost 0
        vlan374 flags=2007<LEARNING,DISCOVER,BLOCKNONIP>
                port 29 ifpriority 0 ifcost 0
        Addresses (max cache: 100, timeout: 240):

systat if on dca-bridge0 has this:

IFACE     STATE DESC             IPKTS IBYTES IFAILS  OPKTS OBYTES OFAILS  COLLS
em0       up:U                    1505 276472      0      7   1066      0      0
enc0      dn:U                       0      0      0      0      0      0      0
lo0       up                         0      0      0      0      0      0      0
carp423   up:D                       4    252      0      0      0      0      0
etherip0  up:D                       0      0      0     10    914     10      0
vlan374   up:U  labs-servers         0      0      0      0      0      0      0
vlan10374 up:D  labs-servers         0      0      0      0      0      0      0
bridge374 up    labs-servers         0      0      0      0      0      0      0

and dca-bridge1:

IFACE     STATE DESC             IPKTS IBYTES IFAILS  OPKTS OBYTES OFAILS  COLLS
em0       up:U                     841 140375      0     45   6265      0      0
enc0      dn:U                       0      0      0      0      0      0      0
lo0       up                         0      0      0      0      0      0      0
carp423   up:U                       2    126      1      3    126      0      0
etherip0  up:U                       8   1242      0     27   2751      0      0
vlan374   up:U  labs-servers        20   4755      0      8   3861      0      0
vlan10374 up:U  labs-servers        10   3981      0     12   3499      0      0
bridge374 up    labs-servers        26   8496      0     20   7360      0      0

is there something obvious im missing here? how else do i make the
backup bridge not transmit to the other DC?

Index: if_etherip.c
===================================================================
RCS file: /cvs/src/sys/net/if_etherip.c,v
retrieving revision 1.45
diff -u -p -r1.45 if_etherip.c
--- if_etherip.c 23 Apr 2019 10:53:45 -0000 1.45
+++ if_etherip.c 16 Jul 2019 09:50:34 -0000
@@ -90,6 +90,10 @@ struct etherip_softc {
  int sc_rxhprio;
  uint16_t sc_df;
  uint8_t sc_ttl;
+
+ unsigned int sc_parent;
+ void *sc_lcookie;
+ void *sc_dcookie;
 };
 
 /*
@@ -115,6 +119,12 @@ int etherip_down(struct etherip_softc *)
 struct etherip_softc *etherip_find(const struct etherip_tunnel *);
 int etherip_input(struct etherip_tunnel *, struct mbuf *, uint8_t, int);
 
+static int etherip_get_parent(struct etherip_softc *, struct if_parent *);
+static int etherip_set_parent(struct etherip_softc *,
+    const struct if_parent *);
+static void etherip_del_parent(void *);
+static void etherip_parent_link(void *);
+
 struct if_clone etherip_cloner = IF_CLONE_INITIALIZER("etherip",
     etherip_clone_create, etherip_clone_destroy);
 
@@ -176,6 +186,8 @@ etherip_clone_destroy(struct ifnet *ifp)
  struct etherip_softc *sc = ifp->if_softc;
 
  NET_LOCK();
+ etherip_del_parent(sc);
+
  if (ISSET(ifp->if_flags, IFF_RUNNING))
  etherip_down(sc);
 
@@ -214,6 +226,11 @@ etherip_start(struct ifnet *ifp)
  caddr_t if_bpf;
 #endif
 
+ if (ifp->if_link_state == LINK_STATE_DOWN) {
+ ifq_purge(&ifp->if_snd);
+ return;
+ }
+
  while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
 #if NBPFILTER > 0
  if_bpf = ifp->if_bpf;
@@ -341,6 +358,19 @@ etherip_ioctl(struct ifnet *ifp, u_long
  case SIOCDELMULTI:
  break;
 
+ case SIOCGIFPARENT:
+ error = etherip_get_parent(sc, (struct if_parent *)data);
+ break;
+
+ case SIOCSIFPARENT:
+ error = etherip_set_parent(sc, (struct if_parent *)data);
+ break;
+
+ case SIOCDIFPARENT:
+ etherip_del_parent(sc);
+ error = 0;
+ break;
+
  default:
  error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
  break;
@@ -352,6 +382,98 @@ etherip_ioctl(struct ifnet *ifp, u_long
  }
 
  return (error);
+}
+
+static int
+etherip_get_parent(struct etherip_softc *sc, struct if_parent *parent)
+{
+ struct ifnet *ifp0;
+
+ ifp0 = if_get(sc->sc_parent);
+ if (ifp0 == NULL)
+ return (ENOTTY); /* invisible to ifconfig until set */
+
+ if (strlcpy(parent->ifp_parent, ifp0->if_xname,
+    sizeof(parent->ifp_parent)) > sizeof(parent->ifp_parent))
+ return (EINVAL); /* XXX */
+
+ if_put(ifp0);
+
+ return (0);
+}
+
+static void
+etherip_parent_link(void *arg)
+{
+ struct etherip_softc *sc = arg;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct ifnet *ifp0;
+ u_char link_state = LINK_STATE_UNKNOWN;
+
+ ifp0 = if_get(sc->sc_parent);
+ if (ifp0 != NULL) {
+ link_state = (ISSET(ifp0->if_flags, IFF_RUNNING) &&
+    LINK_STATE_IS_UP(ifp0->if_link_state)) ?
+    LINK_STATE_FULL_DUPLEX : LINK_STATE_DOWN;
+ }
+ if_put(ifp0);
+
+ if (ifp->if_link_state != link_state) {
+ ifp->if_link_state = link_state;
+ if_link_state_change(ifp);
+ }
+}
+
+static int
+etherip_set_parent(struct etherip_softc *sc, const struct if_parent *parent)
+{
+ struct ifnet *ifp0;
+
+ NET_ASSERT_LOCKED();
+
+ ifp0 = ifunit(parent->ifp_parent); /* no ref */
+ if (ifp0 == NULL)
+ return (EINVAL);
+
+ if (sc->sc_parent == ifp0->if_index)
+ return (0); /* nop */
+
+ sc->sc_parent = ifp0->if_index;
+
+ sc->sc_lcookie = hook_establish(ifp0->if_linkstatehooks, 1,
+    etherip_parent_link, sc);
+
+ sc->sc_dcookie = hook_establish(ifp0->if_detachhooks, 0,
+   etherip_del_parent, sc);
+
+ etherip_parent_link(sc);
+
+ return (0);
+}
+
+static void
+etherip_del_parent(void *arg)
+{
+ struct etherip_softc *sc = arg;
+ struct ifnet *ifp = &sc->sc_ac.ac_if;
+ struct ifnet *ifp0;
+ u_char link_state = LINK_STATE_UNKNOWN;
+
+ NET_ASSERT_LOCKED();
+
+ ifp0 = if_get(sc->sc_parent);
+ if (ifp0 != NULL) {
+                hook_disestablish(ifp0->if_detachhooks, sc->sc_dcookie);
+                hook_disestablish(ifp0->if_linkstatehooks, sc->sc_lcookie);
+ }
+ if_put(ifp0);
+
+ sc->sc_parent = 0;
+
+ if (ifp->if_link_state != link_state) {
+ ifp->if_link_state = link_state;
+ if_link_state_change(ifp);
+ }
 }
 
 int

Reply | Threaded
Open this post in threaded view
|

Re: rfc: let etherip(4) depend on another interface for link state

Stefan Sperling-5
On Tue, Jul 16, 2019 at 08:29:11PM +1000, David Gwynne wrote:
> is there something obvious im missing here? how else do i make the
> backup bridge not transmit to the other DC?

A workaround which does not involve kernel hacking could be to
monitor carp state with ifstated(8) and have it put etherip
interfaces up or down based on carp state.