bgpd, Adj-RIB-Out support

classic Classic list List threaded Threaded
11 messages Options
Reply | Threaded
Open this post in threaded view
|

bgpd, Adj-RIB-Out support

Claudio Jeker
This diff introduces a real Adj-RIB-Out. It is the minimal change to
introduce the new RIB. This removes the update_rib introduced before 6.4
lock because that is now replaced with a real RIB.
The code used by bgpctl show rib is now a fair amount easier since now one
RIB runner can be used for all cases.
path_update() is now returning if a prefix was not modified which is
needed in the update path to suppress unneeded updates.
The softreconfig out case becomes simpler because there is no more the
need to run with both filters and check results.
Last the shutdown code of the RDE had to be adjusted a bit to still work
with the Adj-RIB-Out.

This may cause memory consumption to go up but my testing has shown that
the result is actually not significant. Needs the commits from earlier
today to apply.
--
:wq Claudio

Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.444
diff -u -p -r1.444 rde.c
--- rde.c 31 Oct 2018 14:50:07 -0000 1.444
+++ rde.c 31 Oct 2018 15:09:39 -0000
@@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer,
 
  /* add original path to the Adj-RIB-In */
  if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen,
-    vstate))
+    vstate) == 1)
  peer->prefix_cnt++;
 
  /* max prefix checker */
@@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str
  * control specific functions
  */
 static void
-rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp,
-    struct nexthop *nexthop, pid_t pid, int flags)
+rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
 {
  struct ctl_show_rib rib;
  struct ibuf *wbuf;
  struct attr *a;
+ struct nexthop *nexthop;
  void *bp;
  time_t staletime;
  u_int8_t l;
 
+ nexthop = prefix_nexthop(p);
  bzero(&rib, sizeof(rib));
  rib.lastchange = p->lastchange;
  rib.local_pref = asp->lpref;
@@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct
 }
 
 static void
-rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
-    struct ctl_show_rib_request *req)
-{
- struct filterstate state;
- enum filter_actions a;
-
- if (up_test_update(peer, p) != 1)
- return;
-
- rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
-    prefix_nhflags(p));
- a = rde_filter(out_rules, peer, p, &state);
-
- if (a == ACTION_ALLOW)
- rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid,
-    req->flags);
-
- rde_filterstate_clean(&state);
-}
-
-static void
 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
 {
- struct rde_peer *peer;
  struct rde_aspath *asp;
 
- if (req->flags & F_CTL_ADJ_OUT) {
- if (p->re->active != p)
- /* only consider active prefix */
- return;
- if (req->peerid) {
- if ((peer = peer_get(req->peerid)) != NULL)
- rde_dump_filterout(peer, p, req);
- return;
- }
- } else {
- asp = prefix_aspath(p);
- if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
- return;
- if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
- return;
- if ((req->flags & F_CTL_INVALID) &&
-    (asp->flags & F_ATTR_PARSE_ERR) == 0)
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_AS &&
-    !aspath_match(asp->aspath->data, asp->aspath->len,
-    &req->as, 0))
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
-    !community_match(asp, req->community.as,
-    req->community.type))
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
-    !community_ext_match(asp, &req->extcommunity, 0))
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
-    !community_large_match(asp, req->large_community.as,
-    req->large_community.ld1, req->large_community.ld2))
- return;
- if (!ovs_match(p, req->flags))
- return;
- rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid,
-    req->flags);
- }
+ if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
+ return;
+
+ asp = prefix_aspath(p);
+ if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
+ return;
+ if ((req->flags & F_CTL_INVALID) &&
+    (asp->flags & F_ATTR_PARSE_ERR) == 0)
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_AS &&
+    !aspath_match(asp->aspath->data, asp->aspath->len,
+    &req->as, 0))
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
+    !community_match(asp, req->community.as,
+    req->community.type))
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
+    !community_ext_match(asp, &req->extcommunity, 0))
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
+    !community_large_match(asp, req->large_community.as,
+    req->large_community.ld1, req->large_community.ld2))
+ return;
+ if (!ovs_match(p, req->flags))
+ return;
+ rde_dump_rib_as(p, asp, req->pid, req->flags);
 }
 
 static void
@@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req
  }
  if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
  rid = RIB_ADJ_IN;
+ } else if (req->flags & F_CTL_ADJ_OUT) {
+ rid = RIB_ADJ_OUT;
  } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
  log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
  error = CTL_RES_NOSUCHPEER;
@@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re,
 }
 
 static void
+rde_up_flush_upcall(struct rib_entry *re, void *ptr)
+{
+ struct rde_peer *peer = ptr;
+ struct prefix *p, *np;
+
+ LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+ if (peer != prefix_peer(p))
+ continue;
+ up_generate_updates(out_rules, peer, NULL, p);
+ }
+}
+
+static void
 rde_up_dump_done(void *ptr, u_int8_t aid)
 {
  struct rde_peer *peer = ptr;
@@ -2805,6 +2788,8 @@ rde_reload_done(void)
  u_int16_t rid;
  int reload = 0;
 
+ softreconfig = 0;
+
  /* first merge the main config */
  if ((conf->flags & BGPD_FLAG_NO_EVALUATE) &&
     (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) {
@@ -2887,11 +2872,16 @@ rde_reload_done(void)
  char *p = log_fmt_peer(&peer->conf);
  log_debug("rib change: reloading peer %s", p);
  free(p);
- up_withdraw_all(peer);
  peer->loc_rib_id = rib_find(peer->conf.rib);
  if (peer->loc_rib_id == RIB_NOTFOUND)
  fatalx("King Bula's peer met an unknown RIB");
  peer->reconf_rib = 1;
+ softreconfig++;
+ if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
+    RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
+    rde_softreconfig_in_done, NULL) == -1)
+ fatal("%s: rib_dump_new", __func__);
+ log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
  continue;
  }
  if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
@@ -2941,14 +2931,13 @@ rde_reload_done(void)
  }
  log_info("RDE reconfigured");
 
- softreconfig = 0;
  if (reload > 0) {
- log_info("running softreconfig in");
  softreconfig++;
  if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
     RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in,
     rde_softreconfig_in_done, NULL) == -1)
  fatal("%s: rib_dump_new", __func__);
+ log_info("running softreconfig in");
  } else {
  rde_softreconfig_in_done(NULL, AID_UNSPEC);
  }
@@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re
 }
 
 static void
-rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer)
-{
- struct filterstate ostate, nstate;
- struct bgpd_addr addr;
- struct prefix *p = re->active;
- struct pt_entry *pt;
- enum filter_actions oa, na;
-
- pt = re->prefix;
- pt_getaddr(pt, &addr);
-
- if (up_test_update(peer, p) != 1)
- return;
-
- rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p),
-    prefix_nhflags(p));
- rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p),
-    prefix_nhflags(p));
- oa = rde_filter(out_rules_tmp, peer, p, &ostate);
- na = rde_filter(out_rules, peer, p, &nstate);
-
- /* go through all 4 possible combinations */
- /* if (oa == ACTION_DENY && na == ACTION_DENY) */
- /* nothing todo */
- if (oa == ACTION_DENY && na == ACTION_ALLOW) {
- /* send update */
- up_rib_add(peer, re);
- up_generate(peer, &nstate, &addr, pt->prefixlen);
- } else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
- /* send withdraw */
- up_rib_remove(peer, re);
- up_generate(peer, NULL, &addr, pt->prefixlen);
- } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
- /* send update if anything changed */
- if (nstate.nhflags != ostate.nhflags ||
-    nstate.nexthop != ostate.nexthop ||
-    path_compare(&nstate.aspath, &ostate.aspath) != 0)
- up_generate(peer, &nstate, &addr, pt->prefixlen);
- }
-
- rde_filterstate_clean(&ostate);
- rde_filterstate_clean(&nstate);
-}
-
-static void
 rde_softreconfig_out(struct rib_entry *re, void *bula)
 {
+ struct prefix *new = re->active;
  struct rde_peer *peer;
 
- if (re->active == NULL)
+ if (new == NULL)
  return;
 
  LIST_FOREACH(peer, &peerlist, peer_l) {
  if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
- rde_softreconfig_out_peer(re, peer);
+ /* Regenerate all updates. */
+ up_generate_updates(out_rules, peer, new, new);
  }
 }
 
@@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i
  vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix,
     nc->prefixlen, asp->source_as);
  if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix,
-    nc->prefixlen, vstate))
+    nc->prefixlen, vstate) == 1)
  peerself->prefix_cnt++;
  for (i = RIB_LOC_START; i < rib_size; i++) {
  if (!rib_valid(i))
@@ -3835,21 +3781,20 @@ rde_shutdown(void)
  * rde_shutdown depends on this.
  */
 
- /*
- * All peers go down
- */
+ /* Frist all peers go down */
  for (i = 0; i <= peertable.peer_hashmask; i++)
  while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
  peer_down(p->conf.id);
 
+ /* then since decision process is off, kill RIB_ADJ_OUT */
+ rib_free(rib_byid(RIB_ADJ_OUT));
+
  /* free filters */
  filterlist_free(out_rules);
- for (i = 0; i < rib_size; i++) {
- if (!rib_valid(i))
- continue;
- filterlist_free(ribs[i].in_rules);
- }
+ filterlist_free(out_rules_tmp);
 
+ /* now check everything */
+ rib_shutdown();
  nexthop_shutdown();
  path_shutdown();
  aspath_shutdown();
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.201
diff -u -p -r1.201 rde.h
--- rde.h 31 Oct 2018 14:50:07 -0000 1.201
+++ rde.h 31 Oct 2018 15:09:39 -0000
@@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr);
 LIST_HEAD(aspath_head, rde_aspath);
 RB_HEAD(uptree_prefix, update_prefix);
 RB_HEAD(uptree_attr, update_attr);
-RB_HEAD(uptree_rib, update_rib);
 
 TAILQ_HEAD(uplist_prefix, update_prefix);
 TAILQ_HEAD(uplist_attr, update_attr);
@@ -91,7 +90,6 @@ struct rde_peer {
  struct bgpd_addr remote_addr;
  struct bgpd_addr local_v4_addr;
  struct bgpd_addr local_v6_addr;
- struct uptree_rib up_rib;
  struct uptree_prefix up_prefix;
  struct uptree_attr up_attrs;
  struct uplist_attr updates[AID_MAX];
@@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t);
 u_int16_t rib_find(char *);
 struct rib_desc *rib_desc(struct rib *);
 void rib_free(struct rib *);
+void rib_shutdown(void);
 struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
 struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
 int rib_dump_pending(void);
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.184
diff -u -p -r1.184 rde_rib.c
--- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184
+++ rde_rib.c 31 Oct 2018 15:09:39 -0000
@@ -216,6 +216,21 @@ rib_free(struct rib *rib)
  bzero(rd, sizeof(struct rib_desc));
 }
 
+void
+rib_shutdown(void)
+{
+ u_int16_t id;
+
+ for (id = 0; id < rib_size; id++) {
+ if (!rib_valid(id))
+ continue;
+ if (!RB_EMPTY(rib_tree(&ribs[id].rib)))
+ log_warnx("%s: rib %s is not empty", __func__,
+    ribs[id].name);
+ rib_free(&ribs[id].rib);
+ }
+}
+
 struct rib_entry *
 rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
 {
@@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs
  }
 }
 
+/*
+ * Update a prefix belonging to a possible new aspath.
+ * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no
+ * change happened at all.
+ */
 int
 path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state,
     struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
@@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_
  /* no change, update last change */
  p->lastchange = time(NULL);
  p->validation_state = vstate;
- return (0);
+ return (2);
  }
  }
 
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.102
diff -u -p -r1.102 rde_update.c
--- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102
+++ rde_update.c 31 Oct 2018 15:09:39 -0000
@@ -53,15 +53,9 @@ struct update_attr {
  u_int16_t mpattr_len;
 };
 
-struct update_rib {
- RB_ENTRY(update_rib) entry;
- struct rib_entry *re;
-};
-
 void up_clear(struct uplist_attr *, struct uplist_prefix *);
 int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
 int up_attr_cmp(struct update_attr *, struct update_attr *);
-int up_rib_cmp(struct update_rib *, struct update_rib *);
 int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
 
 RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
@@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix
 RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
 RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
 
-RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp)
-RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp)
-
 SIPHASH_KEY uptree_key;
 
 void
@@ -86,7 +77,6 @@ up_init(struct rde_peer *peer)
  }
  RB_INIT(&peer->up_prefix);
  RB_INIT(&peer->up_attrs);
- RB_INIT(&peer->up_rib);
  peer->up_pcnt = 0;
  peer->up_acnt = 0;
  peer->up_nlricnt = 0;
@@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st
 void
 up_down(struct rde_peer *peer)
 {
- struct update_rib *ur, *nur;
  u_int8_t i;
 
  for (i = 0; i < AID_MAX; i++)
@@ -128,10 +117,6 @@ up_down(struct rde_peer *peer)
 
  RB_INIT(&peer->up_prefix);
  RB_INIT(&peer->up_attrs);
- RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
- RB_REMOVE(uptree_rib, &peer->up_rib, ur);
- free(ur);
- }
 
  peer->up_pcnt = 0;
  peer->up_acnt = 0;
@@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc
 }
 
 int
-up_rib_cmp(struct update_rib *a, struct update_rib *b)
-{
- if (a->re != b->re)
- return (a->re > b->re ? 1 : -1);
- return 0;
-}
-
-int
-up_rib_remove(struct rde_peer *peer, struct rib_entry *re)
-{
- struct update_rib *ur, u;
- u.re = re;
-
- ur = RB_FIND(uptree_rib, &peer->up_rib, &u);
- if (ur != NULL) {
- RB_REMOVE(uptree_rib, &peer->up_rib, ur);
- free(ur);
- return 1;
- } else
- return 0;
-}
-
-void
-up_rib_add(struct rde_peer *peer, struct rib_entry *re)
-{
- struct update_rib *ur;
-
- if ((ur = calloc(1, sizeof(*ur))) == NULL)
- fatal("%s", __func__);
- ur->re = re;
-
- if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL)
- free(ur);
-}
-
-void
-up_withdraw_all(struct rde_peer *peer)
-{
- struct bgpd_addr addr;
- struct update_rib *ur, *nur;
-
- RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
- RB_REMOVE(uptree_rib, &peer->up_rib, ur);
-
- /* withdraw prefix */
- pt_getaddr(ur->re->prefix, &addr);
- up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen);
- free(ur);
- }
-}
-
-int
 up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
 {
  struct update_attr *na = NULL;
@@ -473,14 +406,16 @@ up_generate_updates(struct filter_head *
 
  if (new == NULL) {
 withdraw:
- if (up_test_update(peer, old) != 1)
- return;
-
- if (!up_rib_remove(peer, old->re))
+ if (old == NULL)
  return;
 
  /* withdraw prefix */
  pt_getaddr(old->re->prefix, &addr);
+ if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
+    old->re->prefix->prefixlen) == 0) {
+ /* not in table, no need to send withdraw */
+ return;
+ }
  up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
  } else {
  switch (up_test_update(peer, new)) {
@@ -500,8 +435,12 @@ withdraw:
  }
 
  pt_getaddr(new->re->prefix, &addr);
- up_generate(peer, &state, &addr, new->re->prefix->prefixlen);
- up_rib_add(peer, new->re);
+ if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
+    new->re->prefix->prefixlen, prefix_vstate(new)) != 2) {
+ /* only send update if path changed */
+ up_generate(peer, &state, &addr,
+    new->re->prefix->prefixlen);
+ }
 
  rde_filterstate_clean(&state);
  }

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Denis Fondras
On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:

> This diff introduces a real Adj-RIB-Out. It is the minimal change to
> introduce the new RIB. This removes the update_rib introduced before 6.4
> lock because that is now replaced with a real RIB.
> The code used by bgpctl show rib is now a fair amount easier since now one
> RIB runner can be used for all cases.
> path_update() is now returning if a prefix was not modified which is
> needed in the update path to suppress unneeded updates.
> The softreconfig out case becomes simpler because there is no more the
> need to run with both filters and check results.
> Last the shutdown code of the RDE had to be adjusted a bit to still work
> with the Adj-RIB-Out.
>
> This may cause memory consumption to go up but my testing has shown that
> the result is actually not significant. Needs the commits from earlier
> today to apply.

On my Route Server, it is quite a big change (in percentage).
* Before :
RDE memory statistics
     11561 IPv4 unicast network entries using 452K of memory
       131 IPv6 unicast network entries using 7.2K of memory
     23370 rib entries using 1.4M of memory
     23517 prefix entries using 1.8M of memory
      4894 BGP path attribute entries using 344K of memory
           and holding 23517 references
      1720 BGP AS-PATH attribute entries using 76.4K of memory
           and holding 4894 references
      1061 BGP attributes entries using 41.4K of memory
           and holding 10565 references
      1060 BGP attributes using 25.6K of memory
    101809 as-set elements in 58041 tables using 2.1M of memory
    114429 prefix-set elments using 4.7M of memory
RIB using 4.1M of memory
Sets using 6.9M of memory

RDE hash statistics
        path hash: size 131072, 4894 entires
            min 0 max 3 avg/std-dev = 0.037/0.000
        aspath hash: size 131072, 1720 entires
            min 0 max 2 avg/std-dev = 0.013/0.000
        attr hash: size 16384, 1061 entires
            min 0 max 2 avg/std-dev = 0.065/0.000

* After:
RDE memory statistics
     11560 IPv4 unicast network entries using 452K of memory
       145 IPv6 unicast network entries using 7.9K of memory
     34991 rib entries using 2.1M of memory
     69844 prefix entries using 5.3M of memory
      4929 BGP path attribute entries using 347K of memory
           and holding 69844 references
      1727 BGP AS-PATH attribute entries using 76.6K of memory
           and holding 4929 references
      1066 BGP attributes entries using 41.6K of memory
           and holding 10643 references
      1065 BGP attributes using 25.6K of memory
    101809 as-set elements in 58041 tables using 2.1M of memory
    114429 prefix-set elments using 4.7M of memory
RIB using 8.4M of memory
Sets using 6.9M of memory

RDE hash statistics
        path hash: size 131072, 4929 entires
            min 0 max 3 avg/std-dev = 0.038/0.000
        aspath hash: size 131072, 1727 entires
            min 0 max 2 avg/std-dev = 0.013/0.000
        attr hash: size 16384, 1066 entires
            min 0 max 2 avg/std-dev = 0.065/0.000

I need to test it on a router with a fullview.

Comments below.

> --
> :wq Claudio
>
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.444
> diff -u -p -r1.444 rde.c
> --- rde.c 31 Oct 2018 14:50:07 -0000 1.444
> +++ rde.c 31 Oct 2018 15:09:39 -0000
> @@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer,
>  
>   /* add original path to the Adj-RIB-In */
>   if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen,
> -    vstate))
> +    vstate) == 1)
>   peer->prefix_cnt++;
>  
>   /* max prefix checker */
> @@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str
>   * control specific functions
>   */
>  static void
> -rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp,
> -    struct nexthop *nexthop, pid_t pid, int flags)
> +rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
>  {
>   struct ctl_show_rib rib;
>   struct ibuf *wbuf;
>   struct attr *a;
> + struct nexthop *nexthop;
>   void *bp;
>   time_t staletime;
>   u_int8_t l;
>  
> + nexthop = prefix_nexthop(p);
>   bzero(&rib, sizeof(rib));
>   rib.lastchange = p->lastchange;
>   rib.local_pref = asp->lpref;
> @@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct
>  }
>  
>  static void
> -rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
> -    struct ctl_show_rib_request *req)
> -{
> - struct filterstate state;
> - enum filter_actions a;
> -
> - if (up_test_update(peer, p) != 1)
> - return;
> -
> - rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
> -    prefix_nhflags(p));
> - a = rde_filter(out_rules, peer, p, &state);
> -
> - if (a == ACTION_ALLOW)
> - rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid,
> -    req->flags);
> -
> - rde_filterstate_clean(&state);
> -}
> -
> -static void
>  rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
>  {
> - struct rde_peer *peer;
>   struct rde_aspath *asp;
>  
> - if (req->flags & F_CTL_ADJ_OUT) {
> - if (p->re->active != p)
> - /* only consider active prefix */
> - return;
> - if (req->peerid) {
> - if ((peer = peer_get(req->peerid)) != NULL)
> - rde_dump_filterout(peer, p, req);
> - return;
> - }
> - } else {
> - asp = prefix_aspath(p);
> - if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> - return;
> - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> - return;
> - if ((req->flags & F_CTL_INVALID) &&
> -    (asp->flags & F_ATTR_PARSE_ERR) == 0)
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> -    !aspath_match(asp->aspath->data, asp->aspath->len,
> -    &req->as, 0))
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> -    !community_match(asp, req->community.as,
> -    req->community.type))
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> -    !community_ext_match(asp, &req->extcommunity, 0))
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> -    !community_large_match(asp, req->large_community.as,
> -    req->large_community.ld1, req->large_community.ld2))
> - return;
> - if (!ovs_match(p, req->flags))
> - return;
> - rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid,
> -    req->flags);
> - }
> + if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> + return;
> +
> + asp = prefix_aspath(p);
> + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> + return;

You can move it up one line. It only saves a call to prefix_aspath() in a few
case though.

> + if ((req->flags & F_CTL_INVALID) &&
> +    (asp->flags & F_ATTR_PARSE_ERR) == 0)
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> +    !aspath_match(asp->aspath->data, asp->aspath->len,
> +    &req->as, 0))
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> +    !community_match(asp, req->community.as,
> +    req->community.type))
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> +    !community_ext_match(asp, &req->extcommunity, 0))
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> +    !community_large_match(asp, req->large_community.as,
> +    req->large_community.ld1, req->large_community.ld2))
> + return;
> + if (!ovs_match(p, req->flags))
> + return;
> + rde_dump_rib_as(p, asp, req->pid, req->flags);
>  }
>  
>  static void
> @@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>   }
>   if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
>   rid = RIB_ADJ_IN;
> + } else if (req->flags & F_CTL_ADJ_OUT) {
> + rid = RIB_ADJ_OUT;
>   } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
>   log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
>   error = CTL_RES_NOSUCHPEER;
> @@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re,
>  }
>  
>  static void
> +rde_up_flush_upcall(struct rib_entry *re, void *ptr)
> +{
> + struct rde_peer *peer = ptr;
> + struct prefix *p, *np;
> +
> + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> + if (peer != prefix_peer(p))
> + continue;
> + up_generate_updates(out_rules, peer, NULL, p);
> + }
> +}
> +
> +static void
>  rde_up_dump_done(void *ptr, u_int8_t aid)
>  {
>   struct rde_peer *peer = ptr;
> @@ -2805,6 +2788,8 @@ rde_reload_done(void)
>   u_int16_t rid;
>   int reload = 0;
>  
> + softreconfig = 0;
> +
>   /* first merge the main config */
>   if ((conf->flags & BGPD_FLAG_NO_EVALUATE) &&
>      (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) {
> @@ -2887,11 +2872,16 @@ rde_reload_done(void)
>   char *p = log_fmt_peer(&peer->conf);
>   log_debug("rib change: reloading peer %s", p);
>   free(p);
> - up_withdraw_all(peer);
>   peer->loc_rib_id = rib_find(peer->conf.rib);
>   if (peer->loc_rib_id == RIB_NOTFOUND)
>   fatalx("King Bula's peer met an unknown RIB");
>   peer->reconf_rib = 1;
> + softreconfig++;
> + if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
> +    RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
> +    rde_softreconfig_in_done, NULL) == -1)
> + fatal("%s: rib_dump_new", __func__);
> + log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
>   continue;
>   }
>   if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
> @@ -2941,14 +2931,13 @@ rde_reload_done(void)
>   }
>   log_info("RDE reconfigured");
>  
> - softreconfig = 0;
>   if (reload > 0) {
> - log_info("running softreconfig in");
>   softreconfig++;
>   if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
>      RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in,
>      rde_softreconfig_in_done, NULL) == -1)
>   fatal("%s: rib_dump_new", __func__);
> + log_info("running softreconfig in");
>   } else {
>   rde_softreconfig_in_done(NULL, AID_UNSPEC);
>   }
> @@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re
>  }
>  
>  static void
> -rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer)
> -{
> - struct filterstate ostate, nstate;
> - struct bgpd_addr addr;
> - struct prefix *p = re->active;
> - struct pt_entry *pt;
> - enum filter_actions oa, na;
> -
> - pt = re->prefix;
> - pt_getaddr(pt, &addr);
> -
> - if (up_test_update(peer, p) != 1)
> - return;
> -
> - rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p),
> -    prefix_nhflags(p));
> - rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p),
> -    prefix_nhflags(p));
> - oa = rde_filter(out_rules_tmp, peer, p, &ostate);
> - na = rde_filter(out_rules, peer, p, &nstate);
> -
> - /* go through all 4 possible combinations */
> - /* if (oa == ACTION_DENY && na == ACTION_DENY) */
> - /* nothing todo */
> - if (oa == ACTION_DENY && na == ACTION_ALLOW) {
> - /* send update */
> - up_rib_add(peer, re);
> - up_generate(peer, &nstate, &addr, pt->prefixlen);
> - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
> - /* send withdraw */
> - up_rib_remove(peer, re);
> - up_generate(peer, NULL, &addr, pt->prefixlen);
> - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
> - /* send update if anything changed */
> - if (nstate.nhflags != ostate.nhflags ||
> -    nstate.nexthop != ostate.nexthop ||
> -    path_compare(&nstate.aspath, &ostate.aspath) != 0)
> - up_generate(peer, &nstate, &addr, pt->prefixlen);
> - }
> -
> - rde_filterstate_clean(&ostate);
> - rde_filterstate_clean(&nstate);
> -}
> -
> -static void
>  rde_softreconfig_out(struct rib_entry *re, void *bula)
>  {
> + struct prefix *new = re->active;
>   struct rde_peer *peer;
>  
> - if (re->active == NULL)
> + if (new == NULL)
>   return;
>  
>   LIST_FOREACH(peer, &peerlist, peer_l) {
>   if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
> - rde_softreconfig_out_peer(re, peer);
> + /* Regenerate all updates. */
> + up_generate_updates(out_rules, peer, new, new);
>   }
>  }
>  
> @@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i
>   vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix,
>      nc->prefixlen, asp->source_as);
>   if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix,
> -    nc->prefixlen, vstate))
> +    nc->prefixlen, vstate) == 1)

Isn't there too many tabs here ?

>   peerself->prefix_cnt++;
>   for (i = RIB_LOC_START; i < rib_size; i++) {
>   if (!rib_valid(i))
> @@ -3835,21 +3781,20 @@ rde_shutdown(void)
>   * rde_shutdown depends on this.
>   */
>  
> - /*
> - * All peers go down
> - */
> + /* Frist all peers go down */

Frist ?! :)

>   for (i = 0; i <= peertable.peer_hashmask; i++)
>   while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
>   peer_down(p->conf.id);
>  
> + /* then since decision process is off, kill RIB_ADJ_OUT */
> + rib_free(rib_byid(RIB_ADJ_OUT));
> +
>   /* free filters */
>   filterlist_free(out_rules);
> - for (i = 0; i < rib_size; i++) {
> - if (!rib_valid(i))
> - continue;
> - filterlist_free(ribs[i].in_rules);
> - }
> + filterlist_free(out_rules_tmp);
>  
> + /* now check everything */
> + rib_shutdown();
>   nexthop_shutdown();
>   path_shutdown();
>   aspath_shutdown();
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> retrieving revision 1.201
> diff -u -p -r1.201 rde.h
> --- rde.h 31 Oct 2018 14:50:07 -0000 1.201
> +++ rde.h 31 Oct 2018 15:09:39 -0000
> @@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr);
>  LIST_HEAD(aspath_head, rde_aspath);
>  RB_HEAD(uptree_prefix, update_prefix);
>  RB_HEAD(uptree_attr, update_attr);
> -RB_HEAD(uptree_rib, update_rib);
>  
>  TAILQ_HEAD(uplist_prefix, update_prefix);
>  TAILQ_HEAD(uplist_attr, update_attr);
> @@ -91,7 +90,6 @@ struct rde_peer {
>   struct bgpd_addr remote_addr;
>   struct bgpd_addr local_v4_addr;
>   struct bgpd_addr local_v6_addr;
> - struct uptree_rib up_rib;
>   struct uptree_prefix up_prefix;
>   struct uptree_attr up_attrs;
>   struct uplist_attr updates[AID_MAX];
> @@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t);
>  u_int16_t rib_find(char *);
>  struct rib_desc *rib_desc(struct rib *);
>  void rib_free(struct rib *);
> +void rib_shutdown(void);
>  struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
>  struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
>  int rib_dump_pending(void);
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 rde_rib.c
> --- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184
> +++ rde_rib.c 31 Oct 2018 15:09:39 -0000
> @@ -216,6 +216,21 @@ rib_free(struct rib *rib)
>   bzero(rd, sizeof(struct rib_desc));
>  }
>  
> +void
> +rib_shutdown(void)
> +{
> + u_int16_t id;
> +
> + for (id = 0; id < rib_size; id++) {
> + if (!rib_valid(id))
> + continue;
> + if (!RB_EMPTY(rib_tree(&ribs[id].rib)))
> + log_warnx("%s: rib %s is not empty", __func__,
> +    ribs[id].name);
> + rib_free(&ribs[id].rib);
> + }
> +}
> +
>  struct rib_entry *
>  rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
>  {
> @@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs
>   }
>  }
>  
> +/*
> + * Update a prefix belonging to a possible new aspath.
> + * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no
> + * change happened at all.
> + */
>  int
>  path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state,
>      struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
> @@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_
>   /* no change, update last change */
>   p->lastchange = time(NULL);
>   p->validation_state = vstate;
> - return (0);
> + return (2);
>   }
>   }
>  
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 rde_update.c
> --- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102
> +++ rde_update.c 31 Oct 2018 15:09:39 -0000
> @@ -53,15 +53,9 @@ struct update_attr {
>   u_int16_t mpattr_len;
>  };
>  
> -struct update_rib {
> - RB_ENTRY(update_rib) entry;
> - struct rib_entry *re;
> -};
> -
>  void up_clear(struct uplist_attr *, struct uplist_prefix *);
>  int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
>  int up_attr_cmp(struct update_attr *, struct update_attr *);
> -int up_rib_cmp(struct update_rib *, struct update_rib *);
>  int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
>  
>  RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
> @@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix
>  RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
>  RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
>  
> -RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp)
> -RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp)
> -
>  SIPHASH_KEY uptree_key;
>  
>  void
> @@ -86,7 +77,6 @@ up_init(struct rde_peer *peer)
>   }
>   RB_INIT(&peer->up_prefix);
>   RB_INIT(&peer->up_attrs);
> - RB_INIT(&peer->up_rib);
>   peer->up_pcnt = 0;
>   peer->up_acnt = 0;
>   peer->up_nlricnt = 0;
> @@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st
>  void
>  up_down(struct rde_peer *peer)
>  {
> - struct update_rib *ur, *nur;
>   u_int8_t i;
>  
>   for (i = 0; i < AID_MAX; i++)
> @@ -128,10 +117,6 @@ up_down(struct rde_peer *peer)
>  
>   RB_INIT(&peer->up_prefix);
>   RB_INIT(&peer->up_attrs);
> - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> - free(ur);
> - }
>  
>   peer->up_pcnt = 0;
>   peer->up_acnt = 0;
> @@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc
>  }
>  
>  int
> -up_rib_cmp(struct update_rib *a, struct update_rib *b)
> -{
> - if (a->re != b->re)
> - return (a->re > b->re ? 1 : -1);
> - return 0;
> -}
> -
> -int
> -up_rib_remove(struct rde_peer *peer, struct rib_entry *re)
> -{
> - struct update_rib *ur, u;
> - u.re = re;
> -
> - ur = RB_FIND(uptree_rib, &peer->up_rib, &u);
> - if (ur != NULL) {
> - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> - free(ur);
> - return 1;
> - } else
> - return 0;
> -}
> -
> -void
> -up_rib_add(struct rde_peer *peer, struct rib_entry *re)
> -{
> - struct update_rib *ur;
> -
> - if ((ur = calloc(1, sizeof(*ur))) == NULL)
> - fatal("%s", __func__);
> - ur->re = re;
> -
> - if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL)
> - free(ur);
> -}
> -
> -void
> -up_withdraw_all(struct rde_peer *peer)
> -{
> - struct bgpd_addr addr;
> - struct update_rib *ur, *nur;
> -
> - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> -
> - /* withdraw prefix */
> - pt_getaddr(ur->re->prefix, &addr);
> - up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen);
> - free(ur);
> - }
> -}
> -
> -int
>  up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
>  {
>   struct update_attr *na = NULL;
> @@ -473,14 +406,16 @@ up_generate_updates(struct filter_head *
>  
>   if (new == NULL) {
>  withdraw:
> - if (up_test_update(peer, old) != 1)
> - return;
> -
> - if (!up_rib_remove(peer, old->re))
> + if (old == NULL)
>   return;
>  
>   /* withdraw prefix */
>   pt_getaddr(old->re->prefix, &addr);
> + if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> +    old->re->prefix->prefixlen) == 0) {
> + /* not in table, no need to send withdraw */
> + return;
> + }
>   up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
>   } else {
>   switch (up_test_update(peer, new)) {
> @@ -500,8 +435,12 @@ withdraw:
>   }
>  
>   pt_getaddr(new->re->prefix, &addr);
> - up_generate(peer, &state, &addr, new->re->prefix->prefixlen);
> - up_rib_add(peer, new->re);
> + if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
> +    new->re->prefix->prefixlen, prefix_vstate(new)) != 2) {
> + /* only send update if path changed */
> + up_generate(peer, &state, &addr,
> +    new->re->prefix->prefixlen);
> + }
>  
>   rde_filterstate_clean(&state);
>   }
>

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Sebastian Benoit-3
Denis Fondras([hidden email]) on 2018.10.31 21:02:17 +0100:

> On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:
> > This diff introduces a real Adj-RIB-Out. It is the minimal change to
> > introduce the new RIB. This removes the update_rib introduced before 6.4
> > lock because that is now replaced with a real RIB.
> > The code used by bgpctl show rib is now a fair amount easier since now one
> > RIB runner can be used for all cases.
> > path_update() is now returning if a prefix was not modified which is
> > needed in the update path to suppress unneeded updates.
> > The softreconfig out case becomes simpler because there is no more the
> > need to run with both filters and check results.
> > Last the shutdown code of the RDE had to be adjusted a bit to still work
> > with the Adj-RIB-Out.
> >
> > This may cause memory consumption to go up but my testing has shown that
> > the result is actually not significant. Needs the commits from earlier
> > today to apply.
>
> On my Route Server, it is quite a big change (in percentage).
> * Before :
> RDE memory statistics
>      11561 IPv4 unicast network entries using 452K of memory
>        131 IPv6 unicast network entries using 7.2K of memory
>      23370 rib entries using 1.4M of memory
>      23517 prefix entries using 1.8M of memory
>       4894 BGP path attribute entries using 344K of memory
>            and holding 23517 references
>       1720 BGP AS-PATH attribute entries using 76.4K of memory
>            and holding 4894 references
>       1061 BGP attributes entries using 41.4K of memory
>            and holding 10565 references
>       1060 BGP attributes using 25.6K of memory
>     101809 as-set elements in 58041 tables using 2.1M of memory
>     114429 prefix-set elments using 4.7M of memory
> RIB using 4.1M of memory
> Sets using 6.9M of memory
>
> RDE hash statistics
>         path hash: size 131072, 4894 entires
>             min 0 max 3 avg/std-dev = 0.037/0.000
>         aspath hash: size 131072, 1720 entires
>             min 0 max 2 avg/std-dev = 0.013/0.000
>         attr hash: size 16384, 1061 entires
>             min 0 max 2 avg/std-dev = 0.065/0.000
>
> * After:
> RDE memory statistics
>      11560 IPv4 unicast network entries using 452K of memory
>        145 IPv6 unicast network entries using 7.9K of memory
>      34991 rib entries using 2.1M of memory
>      69844 prefix entries using 5.3M of memory
>       4929 BGP path attribute entries using 347K of memory
>            and holding 69844 references
>       1727 BGP AS-PATH attribute entries using 76.6K of memory
>            and holding 4929 references
>       1066 BGP attributes entries using 41.6K of memory
>            and holding 10643 references
>       1065 BGP attributes using 25.6K of memory
>     101809 as-set elements in 58041 tables using 2.1M of memory
>     114429 prefix-set elments using 4.7M of memory
> RIB using 8.4M of memory
> Sets using 6.9M of memory
>
> RDE hash statistics
>         path hash: size 131072, 4929 entires
>             min 0 max 3 avg/std-dev = 0.038/0.000
>         aspath hash: size 131072, 1727 entires
>             min 0 max 2 avg/std-dev = 0.013/0.000
>         attr hash: size 16384, 1066 entires
>             min 0 max 2 avg/std-dev = 0.065/0.000
>
> I need to test it on a router with a fullview.


two full views v4 + v6, the usual testbox:

before:

[benoit@fw-ipv6onlyorg:/usr/src/usr.sbin/bgpd]$ cat  /tmp/old  
RDE memory statistics
    714558 IPv4 unicast network entries using 27.3M of memory
     58836 IPv6 unicast network entries using 3.1M of memory
   1546598 rib entries using 94.4M of memory
   3093193 prefix entries using 236M of memory
    413229 BGP path attribute entries using 28.4M of memory
           and holding 3093193 references
    108607 BGP AS-PATH attribute entries using 4.3M of memory
           and holding 413229 references
     43526 BGP attributes entries using 1.7M of memory
           and holding 1563107 references
     43525 BGP attributes using 1.1M of memory
      1055 as-set elements in 1 tables using 32B of memory
        12 prefix-set elments using 504B of memory
RIB using 396M of memory
Sets using 536B of memory

RDE hash statistics
        path hash: size 131072, 413229 entires
            min 0 max 14 avg/std-dev = 3.153/1.749
        aspath hash: size 131072, 108607 entires
            min 0 max 7 avg/std-dev = 0.829/0.560
        attr hash: size 16384, 43526 entires
            min 0 max 11 avg/std-dev = 2.657/1.394


after:

[benoit@fw-ipv6onlyorg:/usr/src/usr.sbin/bgpd]$ cat  /tmp/new2
RDE memory statistics
    714521 IPv4 unicast network entries using 27.3M of memory
     58831 IPv6 unicast network entries using 3.1M of memory
   1546515 rib entries using 94.4M of memory
   3093028 prefix entries using 236M of memory
    391788 BGP path attribute entries using 26.9M of memory
           and holding 3093028 references
    111348 BGP AS-PATH attribute entries using 4.5M of memory
           and holding 391788 references
     43780 BGP attributes entries using 1.7M of memory
           and holding 1442079 references
     43779 BGP attributes using 1.2M of memory
      1055 as-set elements in 1 tables using 32B of memory
        12 prefix-set elments using 504B of memory
RIB using 395M of memory
Sets using 536B of memory

RDE hash statistics
        path hash: size 131072, 391788 entires
            min 0 max 14 avg/std-dev = 2.989/1.751
        aspath hash: size 131072, 111348 entires
            min 0 max 8 avg/std-dev = 0.850/0.528
        attr hash: size 16384, 43780 entires
            min 0 max 11 avg/std-dev = 2.672/1.364

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Sebastian Benoit-3
Sebastian Benoit([hidden email]) on 2018.10.31 22:26:51 +0100:
> two full views v4 + v6, the usual testbox:

forget this one, the machine is not really doing much with the Adj-RIB-Out.

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Sebastian Benoit-3
In reply to this post by Denis Fondras
Denis Fondras([hidden email]) on 2018.10.31 21:02:17 +0100:

> On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:
> > This diff introduces a real Adj-RIB-Out. It is the minimal change to
> > introduce the new RIB. This removes the update_rib introduced before 6.4
> > lock because that is now replaced with a real RIB.
> > The code used by bgpctl show rib is now a fair amount easier since now one
> > RIB runner can be used for all cases.
> > path_update() is now returning if a prefix was not modified which is
> > needed in the update path to suppress unneeded updates.
> > The softreconfig out case becomes simpler because there is no more the
> > need to run with both filters and check results.
> > Last the shutdown code of the RDE had to be adjusted a bit to still work
> > with the Adj-RIB-Out.
> >
> > This may cause memory consumption to go up but my testing has shown that
> > the result is actually not significant. Needs the commits from earlier
> > today to apply.
>
> On my Route Server, it is quite a big change (in percentage).
> * Before :
> RDE memory statistics
>      11561 IPv4 unicast network entries using 452K of memory
>        131 IPv6 unicast network entries using 7.2K of memory
>      23370 rib entries using 1.4M of memory
>      23517 prefix entries using 1.8M of memory
>       4894 BGP path attribute entries using 344K of memory
>            and holding 23517 references
>       1720 BGP AS-PATH attribute entries using 76.4K of memory
>            and holding 4894 references
>       1061 BGP attributes entries using 41.4K of memory
>            and holding 10565 references
>       1060 BGP attributes using 25.6K of memory
>     101809 as-set elements in 58041 tables using 2.1M of memory
>     114429 prefix-set elments using 4.7M of memory
> RIB using 4.1M of memory
> Sets using 6.9M of memory
>
> RDE hash statistics
>         path hash: size 131072, 4894 entires
>             min 0 max 3 avg/std-dev = 0.037/0.000
>         aspath hash: size 131072, 1720 entires
>             min 0 max 2 avg/std-dev = 0.013/0.000
>         attr hash: size 16384, 1061 entires
>             min 0 max 2 avg/std-dev = 0.065/0.000
>
> * After:
> RDE memory statistics
>      11560 IPv4 unicast network entries using 452K of memory
>        145 IPv6 unicast network entries using 7.9K of memory
>      34991 rib entries using 2.1M of memory
>      69844 prefix entries using 5.3M of memory
>       4929 BGP path attribute entries using 347K of memory
>            and holding 69844 references
>       1727 BGP AS-PATH attribute entries using 76.6K of memory
>            and holding 4929 references
>       1066 BGP attributes entries using 41.6K of memory
>            and holding 10643 references
>       1065 BGP attributes using 25.6K of memory
>     101809 as-set elements in 58041 tables using 2.1M of memory
>     114429 prefix-set elments using 4.7M of memory
> RIB using 8.4M of memory
> Sets using 6.9M of memory
>
> RDE hash statistics
>         path hash: size 131072, 4929 entires
>             min 0 max 3 avg/std-dev = 0.038/0.000
>         aspath hash: size 131072, 1727 entires
>             min 0 max 2 avg/std-dev = 0.013/0.000
>         attr hash: size 16384, 1066 entires
>             min 0 max 2 avg/std-dev = 0.065/0.000
>

ok this is a more interesting router:

[benoit@border2:~]$ cat before  
RDE memory statistics
    715727 IPv4 unicast network entries using 27.3M of memory
     58953 IPv6 unicast network entries using 3.1M of memory
   1549171 rib entries using 94.6M of memory
   2897130 prefix entries using 265M of memory
    562423 BGP path attribute entries using 60.1M of memory
    149579 BGP AS-PATH attribute entries using 6.1M of memory,
           and holding 562423 references
     37007 BGP attributes entries using 1.4M of memory
           and holding 880668 references
     37006 BGP attributes using 912K of memory
     61964 as-set elements in 58064 tables using 950K of memory
    103150 prefix-set elments using 4.3M of memory
RIB using 459M of memory
Sets using 5.2M of memory

RDE hash statistics
        path hash: size 131072, 562423 entires
            min 0 max 20 avg/std-dev = 4.291/2.364
        aspath hash: size 131072, 149579 entires
            min 0 max 8 avg/std-dev = 1.141/0.835
        attr hash: size 16384, 37007 entires
            min 0 max 10 avg/std-dev = 2.259/1.378

[benoit@border2:~]$ cat now                                                            
RDE memory statistics
   2323535 unspec network entries using 0B of memory
    716246 IPv6 unicast network entries using 38.3M of memory
     58933 IPv4 vpn network entries using 4.0M of memory
   4806161 rib entries using 293M of memory
   4806161 prefix entries using 440M of memory
    529410 BGP path attribute entries using 56.5M of memory
      1945 BGP AS-PATH attribute entries using 134K of memory,
           and holding 5807738 references
    529410 BGP attributes entries using 20.2M of memory
           and holding 32706 references
    725558 BGP attributes using 870K of memory
   1426888 as-set elements in 32705 tables using 56.7K of memory
     61964 prefix-set elments using 101K of memory
RIB using 853M of memory
Sets using 157K of memory

RDE hash statistics
        path hash: size 131072, 529410 entires
            min 0 max 17 avg/std-dev = 4.039/1.920
        aspath hash: size 131072, 136842 entires
            min 0 max 8 avg/std-dev = 1.044/0.954
        attr hash: size 16384, 32706 entires
            min 0 max 10 avg/std-dev = 1.996/1.420

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Sebastian Benoit-3
In reply to this post by Claudio Jeker
Claudio Jeker([hidden email]) on 2018.10.31 16:24:49 +0100:

> This diff introduces a real Adj-RIB-Out. It is the minimal change to
> introduce the new RIB. This removes the update_rib introduced before 6.4
> lock because that is now replaced with a real RIB.
> The code used by bgpctl show rib is now a fair amount easier since now one
> RIB runner can be used for all cases.
> path_update() is now returning if a prefix was not modified which is
> needed in the update path to suppress unneeded updates.
> The softreconfig out case becomes simpler because there is no more the
> need to run with both filters and check results.
> Last the shutdown code of the RDE had to be adjusted a bit to still work
> with the Adj-RIB-Out.
>
> This may cause memory consumption to go up but my testing has shown that
> the result is actually not significant. Needs the commits from earlier
> today to apply.
> --
> :wq Claudio

i dont see a problem with the diff other than the memory usage (and denis
remarks).

so if we decide that the memory is worth it, ok benno@

not sure if double the usage is acceptable though.

 

> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.444
> diff -u -p -r1.444 rde.c
> --- rde.c 31 Oct 2018 14:50:07 -0000 1.444
> +++ rde.c 31 Oct 2018 15:09:39 -0000
> @@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer,
>  
>   /* add original path to the Adj-RIB-In */
>   if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen,
> -    vstate))
> +    vstate) == 1)
>   peer->prefix_cnt++;
>  
>   /* max prefix checker */
> @@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str
>   * control specific functions
>   */
>  static void
> -rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp,
> -    struct nexthop *nexthop, pid_t pid, int flags)
> +rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
>  {
>   struct ctl_show_rib rib;
>   struct ibuf *wbuf;
>   struct attr *a;
> + struct nexthop *nexthop;
>   void *bp;
>   time_t staletime;
>   u_int8_t l;
>  
> + nexthop = prefix_nexthop(p);
>   bzero(&rib, sizeof(rib));
>   rib.lastchange = p->lastchange;
>   rib.local_pref = asp->lpref;
> @@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct
>  }
>  
>  static void
> -rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
> -    struct ctl_show_rib_request *req)
> -{
> - struct filterstate state;
> - enum filter_actions a;
> -
> - if (up_test_update(peer, p) != 1)
> - return;
> -
> - rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
> -    prefix_nhflags(p));
> - a = rde_filter(out_rules, peer, p, &state);
> -
> - if (a == ACTION_ALLOW)
> - rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid,
> -    req->flags);
> -
> - rde_filterstate_clean(&state);
> -}
> -
> -static void
>  rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
>  {
> - struct rde_peer *peer;
>   struct rde_aspath *asp;
>  
> - if (req->flags & F_CTL_ADJ_OUT) {
> - if (p->re->active != p)
> - /* only consider active prefix */
> - return;
> - if (req->peerid) {
> - if ((peer = peer_get(req->peerid)) != NULL)
> - rde_dump_filterout(peer, p, req);
> - return;
> - }
> - } else {
> - asp = prefix_aspath(p);
> - if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> - return;
> - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> - return;
> - if ((req->flags & F_CTL_INVALID) &&
> -    (asp->flags & F_ATTR_PARSE_ERR) == 0)
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> -    !aspath_match(asp->aspath->data, asp->aspath->len,
> -    &req->as, 0))
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> -    !community_match(asp, req->community.as,
> -    req->community.type))
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> -    !community_ext_match(asp, &req->extcommunity, 0))
> - return;
> - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> -    !community_large_match(asp, req->large_community.as,
> -    req->large_community.ld1, req->large_community.ld2))
> - return;
> - if (!ovs_match(p, req->flags))
> - return;
> - rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid,
> -    req->flags);
> - }
> + if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> + return;
> +
> + asp = prefix_aspath(p);
> + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> + return;
> + if ((req->flags & F_CTL_INVALID) &&
> +    (asp->flags & F_ATTR_PARSE_ERR) == 0)
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> +    !aspath_match(asp->aspath->data, asp->aspath->len,
> +    &req->as, 0))
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> +    !community_match(asp, req->community.as,
> +    req->community.type))
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> +    !community_ext_match(asp, &req->extcommunity, 0))
> + return;
> + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> +    !community_large_match(asp, req->large_community.as,
> +    req->large_community.ld1, req->large_community.ld2))
> + return;
> + if (!ovs_match(p, req->flags))
> + return;
> + rde_dump_rib_as(p, asp, req->pid, req->flags);
>  }
>  
>  static void
> @@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>   }
>   if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
>   rid = RIB_ADJ_IN;
> + } else if (req->flags & F_CTL_ADJ_OUT) {
> + rid = RIB_ADJ_OUT;
>   } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
>   log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
>   error = CTL_RES_NOSUCHPEER;
> @@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re,
>  }
>  
>  static void
> +rde_up_flush_upcall(struct rib_entry *re, void *ptr)
> +{
> + struct rde_peer *peer = ptr;
> + struct prefix *p, *np;
> +
> + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> + if (peer != prefix_peer(p))
> + continue;
> + up_generate_updates(out_rules, peer, NULL, p);
> + }
> +}
> +
> +static void
>  rde_up_dump_done(void *ptr, u_int8_t aid)
>  {
>   struct rde_peer *peer = ptr;
> @@ -2805,6 +2788,8 @@ rde_reload_done(void)
>   u_int16_t rid;
>   int reload = 0;
>  
> + softreconfig = 0;
> +
>   /* first merge the main config */
>   if ((conf->flags & BGPD_FLAG_NO_EVALUATE) &&
>      (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) {
> @@ -2887,11 +2872,16 @@ rde_reload_done(void)
>   char *p = log_fmt_peer(&peer->conf);
>   log_debug("rib change: reloading peer %s", p);
>   free(p);
> - up_withdraw_all(peer);
>   peer->loc_rib_id = rib_find(peer->conf.rib);
>   if (peer->loc_rib_id == RIB_NOTFOUND)
>   fatalx("King Bula's peer met an unknown RIB");
>   peer->reconf_rib = 1;
> + softreconfig++;
> + if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
> +    RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
> +    rde_softreconfig_in_done, NULL) == -1)
> + fatal("%s: rib_dump_new", __func__);
> + log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
>   continue;
>   }
>   if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
> @@ -2941,14 +2931,13 @@ rde_reload_done(void)
>   }
>   log_info("RDE reconfigured");
>  
> - softreconfig = 0;
>   if (reload > 0) {
> - log_info("running softreconfig in");
>   softreconfig++;
>   if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
>      RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in,
>      rde_softreconfig_in_done, NULL) == -1)
>   fatal("%s: rib_dump_new", __func__);
> + log_info("running softreconfig in");
>   } else {
>   rde_softreconfig_in_done(NULL, AID_UNSPEC);
>   }
> @@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re
>  }
>  
>  static void
> -rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer)
> -{
> - struct filterstate ostate, nstate;
> - struct bgpd_addr addr;
> - struct prefix *p = re->active;
> - struct pt_entry *pt;
> - enum filter_actions oa, na;
> -
> - pt = re->prefix;
> - pt_getaddr(pt, &addr);
> -
> - if (up_test_update(peer, p) != 1)
> - return;
> -
> - rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p),
> -    prefix_nhflags(p));
> - rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p),
> -    prefix_nhflags(p));
> - oa = rde_filter(out_rules_tmp, peer, p, &ostate);
> - na = rde_filter(out_rules, peer, p, &nstate);
> -
> - /* go through all 4 possible combinations */
> - /* if (oa == ACTION_DENY && na == ACTION_DENY) */
> - /* nothing todo */
> - if (oa == ACTION_DENY && na == ACTION_ALLOW) {
> - /* send update */
> - up_rib_add(peer, re);
> - up_generate(peer, &nstate, &addr, pt->prefixlen);
> - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
> - /* send withdraw */
> - up_rib_remove(peer, re);
> - up_generate(peer, NULL, &addr, pt->prefixlen);
> - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
> - /* send update if anything changed */
> - if (nstate.nhflags != ostate.nhflags ||
> -    nstate.nexthop != ostate.nexthop ||
> -    path_compare(&nstate.aspath, &ostate.aspath) != 0)
> - up_generate(peer, &nstate, &addr, pt->prefixlen);
> - }
> -
> - rde_filterstate_clean(&ostate);
> - rde_filterstate_clean(&nstate);
> -}
> -
> -static void
>  rde_softreconfig_out(struct rib_entry *re, void *bula)
>  {
> + struct prefix *new = re->active;
>   struct rde_peer *peer;
>  
> - if (re->active == NULL)
> + if (new == NULL)
>   return;
>  
>   LIST_FOREACH(peer, &peerlist, peer_l) {
>   if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
> - rde_softreconfig_out_peer(re, peer);
> + /* Regenerate all updates. */
> + up_generate_updates(out_rules, peer, new, new);
>   }
>  }
>  
> @@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i
>   vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix,
>      nc->prefixlen, asp->source_as);
>   if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix,
> -    nc->prefixlen, vstate))
> +    nc->prefixlen, vstate) == 1)
>   peerself->prefix_cnt++;
>   for (i = RIB_LOC_START; i < rib_size; i++) {
>   if (!rib_valid(i))
> @@ -3835,21 +3781,20 @@ rde_shutdown(void)
>   * rde_shutdown depends on this.
>   */
>  
> - /*
> - * All peers go down
> - */
> + /* Frist all peers go down */
>   for (i = 0; i <= peertable.peer_hashmask; i++)
>   while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
>   peer_down(p->conf.id);
>  
> + /* then since decision process is off, kill RIB_ADJ_OUT */
> + rib_free(rib_byid(RIB_ADJ_OUT));
> +
>   /* free filters */
>   filterlist_free(out_rules);
> - for (i = 0; i < rib_size; i++) {
> - if (!rib_valid(i))
> - continue;
> - filterlist_free(ribs[i].in_rules);
> - }
> + filterlist_free(out_rules_tmp);
>  
> + /* now check everything */
> + rib_shutdown();
>   nexthop_shutdown();
>   path_shutdown();
>   aspath_shutdown();
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> retrieving revision 1.201
> diff -u -p -r1.201 rde.h
> --- rde.h 31 Oct 2018 14:50:07 -0000 1.201
> +++ rde.h 31 Oct 2018 15:09:39 -0000
> @@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr);
>  LIST_HEAD(aspath_head, rde_aspath);
>  RB_HEAD(uptree_prefix, update_prefix);
>  RB_HEAD(uptree_attr, update_attr);
> -RB_HEAD(uptree_rib, update_rib);
>  
>  TAILQ_HEAD(uplist_prefix, update_prefix);
>  TAILQ_HEAD(uplist_attr, update_attr);
> @@ -91,7 +90,6 @@ struct rde_peer {
>   struct bgpd_addr remote_addr;
>   struct bgpd_addr local_v4_addr;
>   struct bgpd_addr local_v6_addr;
> - struct uptree_rib up_rib;
>   struct uptree_prefix up_prefix;
>   struct uptree_attr up_attrs;
>   struct uplist_attr updates[AID_MAX];
> @@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t);
>  u_int16_t rib_find(char *);
>  struct rib_desc *rib_desc(struct rib *);
>  void rib_free(struct rib *);
> +void rib_shutdown(void);
>  struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
>  struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
>  int rib_dump_pending(void);
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 rde_rib.c
> --- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184
> +++ rde_rib.c 31 Oct 2018 15:09:39 -0000
> @@ -216,6 +216,21 @@ rib_free(struct rib *rib)
>   bzero(rd, sizeof(struct rib_desc));
>  }
>  
> +void
> +rib_shutdown(void)
> +{
> + u_int16_t id;
> +
> + for (id = 0; id < rib_size; id++) {
> + if (!rib_valid(id))
> + continue;
> + if (!RB_EMPTY(rib_tree(&ribs[id].rib)))
> + log_warnx("%s: rib %s is not empty", __func__,
> +    ribs[id].name);
> + rib_free(&ribs[id].rib);
> + }
> +}
> +
>  struct rib_entry *
>  rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
>  {
> @@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs
>   }
>  }
>  
> +/*
> + * Update a prefix belonging to a possible new aspath.
> + * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no
> + * change happened at all.
> + */
>  int
>  path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state,
>      struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
> @@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_
>   /* no change, update last change */
>   p->lastchange = time(NULL);
>   p->validation_state = vstate;
> - return (0);
> + return (2);
>   }
>   }
>  
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 rde_update.c
> --- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102
> +++ rde_update.c 31 Oct 2018 15:09:39 -0000
> @@ -53,15 +53,9 @@ struct update_attr {
>   u_int16_t mpattr_len;
>  };
>  
> -struct update_rib {
> - RB_ENTRY(update_rib) entry;
> - struct rib_entry *re;
> -};
> -
>  void up_clear(struct uplist_attr *, struct uplist_prefix *);
>  int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
>  int up_attr_cmp(struct update_attr *, struct update_attr *);
> -int up_rib_cmp(struct update_rib *, struct update_rib *);
>  int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
>  
>  RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
> @@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix
>  RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
>  RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
>  
> -RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp)
> -RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp)
> -
>  SIPHASH_KEY uptree_key;
>  
>  void
> @@ -86,7 +77,6 @@ up_init(struct rde_peer *peer)
>   }
>   RB_INIT(&peer->up_prefix);
>   RB_INIT(&peer->up_attrs);
> - RB_INIT(&peer->up_rib);
>   peer->up_pcnt = 0;
>   peer->up_acnt = 0;
>   peer->up_nlricnt = 0;
> @@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st
>  void
>  up_down(struct rde_peer *peer)
>  {
> - struct update_rib *ur, *nur;
>   u_int8_t i;
>  
>   for (i = 0; i < AID_MAX; i++)
> @@ -128,10 +117,6 @@ up_down(struct rde_peer *peer)
>  
>   RB_INIT(&peer->up_prefix);
>   RB_INIT(&peer->up_attrs);
> - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> - free(ur);
> - }
>  
>   peer->up_pcnt = 0;
>   peer->up_acnt = 0;
> @@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc
>  }
>  
>  int
> -up_rib_cmp(struct update_rib *a, struct update_rib *b)
> -{
> - if (a->re != b->re)
> - return (a->re > b->re ? 1 : -1);
> - return 0;
> -}
> -
> -int
> -up_rib_remove(struct rde_peer *peer, struct rib_entry *re)
> -{
> - struct update_rib *ur, u;
> - u.re = re;
> -
> - ur = RB_FIND(uptree_rib, &peer->up_rib, &u);
> - if (ur != NULL) {
> - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> - free(ur);
> - return 1;
> - } else
> - return 0;
> -}
> -
> -void
> -up_rib_add(struct rde_peer *peer, struct rib_entry *re)
> -{
> - struct update_rib *ur;
> -
> - if ((ur = calloc(1, sizeof(*ur))) == NULL)
> - fatal("%s", __func__);
> - ur->re = re;
> -
> - if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL)
> - free(ur);
> -}
> -
> -void
> -up_withdraw_all(struct rde_peer *peer)
> -{
> - struct bgpd_addr addr;
> - struct update_rib *ur, *nur;
> -
> - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> -
> - /* withdraw prefix */
> - pt_getaddr(ur->re->prefix, &addr);
> - up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen);
> - free(ur);
> - }
> -}
> -
> -int
>  up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
>  {
>   struct update_attr *na = NULL;
> @@ -473,14 +406,16 @@ up_generate_updates(struct filter_head *
>  
>   if (new == NULL) {
>  withdraw:
> - if (up_test_update(peer, old) != 1)
> - return;
> -
> - if (!up_rib_remove(peer, old->re))
> + if (old == NULL)
>   return;
>  
>   /* withdraw prefix */
>   pt_getaddr(old->re->prefix, &addr);
> + if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> +    old->re->prefix->prefixlen) == 0) {
> + /* not in table, no need to send withdraw */
> + return;
> + }
>   up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
>   } else {
>   switch (up_test_update(peer, new)) {
> @@ -500,8 +435,12 @@ withdraw:
>   }
>  
>   pt_getaddr(new->re->prefix, &addr);
> - up_generate(peer, &state, &addr, new->re->prefix->prefixlen);
> - up_rib_add(peer, new->re);
> + if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
> +    new->re->prefix->prefixlen, prefix_vstate(new)) != 2) {
> + /* only send update if path changed */
> + up_generate(peer, &state, &addr,
> +    new->re->prefix->prefixlen);
> + }
>  
>   rde_filterstate_clean(&state);
>   }
>

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

David Higgs
In reply to this post by Sebastian Benoit-3
On Wed, Oct 31, 2018 at 6:58 PM Sebastian Benoit <[hidden email]> wrote:

<snip>

On a phone, saw some typos and such, sorry no diff.

[benoit@border2:~]$ cat before

> RDE memory statistics
>     715727 IPv4 unicast network entries using 27.3M of memory
>      58953 IPv6 unicast network entries using 3.1M of memory
>    1549171 rib entries using 94.6M of memory
>    2897130 prefix entries using 265M of memory
>     562423 BGP path attribute entries using 60.1M of memory
>     149579 BGP AS-PATH attribute entries using 6.1M of memory,
>            and holding 562423 references
>      37007 BGP attributes entries using 1.4M of memory
>            and holding 880668 references


Inconsistent comma usage above


>      37006 BGP attributes using 912K of memory
>      61964 as-set elements in 58064 tables using 950K of memory
>     103150 prefix-set elments using 4.3M of memory


elments => elements

RIB using 459M of memory

> Sets using 5.2M of memory
>
> RDE hash statistics
>         path hash: size 131072, 562423 entires
>             min 0 max 20 avg/std-dev = 4.291/2.364
>         aspath hash: size 131072, 149579 entires
>             min 0 max 8 avg/std-dev = 1.141/0.835
>         attr hash: size 16384, 37007 entires
>             min 0 max 10 avg/std-dev = 2.259/1.378
>

entires => entries

—david
Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Denis Fondras
On Wed, Oct 31, 2018 at 10:02:05PM -0400, David Higgs wrote:

> On Wed, Oct 31, 2018 at 6:58 PM Sebastian Benoit <[hidden email]> wrote:
>
> <snip>
>
> On a phone, saw some typos and such, sorry no diff.
>
> [benoit@border2:~]$ cat before
> > RDE memory statistics
> >     715727 IPv4 unicast network entries using 27.3M of memory
> >      58953 IPv6 unicast network entries using 3.1M of memory
> >    1549171 rib entries using 94.6M of memory
> >    2897130 prefix entries using 265M of memory
> >     562423 BGP path attribute entries using 60.1M of memory
> >     149579 BGP AS-PATH attribute entries using 6.1M of memory,
> >            and holding 562423 references
> >      37007 BGP attributes entries using 1.4M of memory
> >            and holding 880668 references
>
>
> Inconsistent comma usage above
>

This one was fixed in a recent commit. It seems benno@ has not updated bgpctl :)


>
> >      37006 BGP attributes using 912K of memory
> >      61964 as-set elements in 58064 tables using 950K of memory
> >     103150 prefix-set elments using 4.3M of memory
>
>
> elments => elements
>
> RIB using 459M of memory
> > Sets using 5.2M of memory
> >
> > RDE hash statistics
> >         path hash: size 131072, 562423 entires
> >             min 0 max 20 avg/std-dev = 4.291/2.364
> >         aspath hash: size 131072, 149579 entires
> >             min 0 max 8 avg/std-dev = 1.141/0.835
> >         attr hash: size 16384, 37007 entires
> >             min 0 max 10 avg/std-dev = 2.259/1.378
> >
>
> entires => entries
>

Thanks for spotting.

> —david

Index: bgpctl.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.222
diff -u -p -r1.222 bgpctl.c
--- bgpctl.c 31 Oct 2018 14:58:59 -0000 1.222
+++ bgpctl.c 1 Nov 2018 08:17:21 -0000
@@ -1880,7 +1880,7 @@ show_rib_memory_msg(struct imsg *imsg)
  printf("%10lld as-set elements in %lld tables using "
     "%s of memory\n", stats.aset_nmemb, stats.aset_cnt,
     fmt_mem(stats.aset_size));
- printf("%10lld prefix-set elments using %s of memory\n",
+ printf("%10lld prefix-set elements using %s of memory\n",
     stats.pset_cnt, fmt_mem(stats.pset_size));
  printf("RIB using %s of memory\n", fmt_mem(pts +
     stats.prefix_cnt * sizeof(struct prefix) +
@@ -1894,7 +1894,7 @@ show_rib_memory_msg(struct imsg *imsg)
  break;
  case IMSG_CTL_SHOW_RIB_HASH:
  memcpy(&hash, imsg->data, sizeof(hash));
- printf("\t%s: size %lld, %lld entires\n", hash.name, hash.num,
+ printf("\t%s: size %lld, %lld entries\n", hash.name, hash.num,
     hash.sum);
  avg = (double)hash.sum / (double)hash.num;
  dev = sqrt(fmax(0, hash.sumq / hash.num - avg * avg));

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Claudio Jeker
In reply to this post by Sebastian Benoit-3
On Wed, Oct 31, 2018 at 11:55:51PM +0100, Sebastian Benoit wrote:

> Denis Fondras([hidden email]) on 2018.10.31 21:02:17 +0100:
> > On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:
> > > This diff introduces a real Adj-RIB-Out. It is the minimal change to
> > > introduce the new RIB. This removes the update_rib introduced before 6.4
> > > lock because that is now replaced with a real RIB.
> > > The code used by bgpctl show rib is now a fair amount easier since now one
> > > RIB runner can be used for all cases.
> > > path_update() is now returning if a prefix was not modified which is
> > > needed in the update path to suppress unneeded updates.
> > > The softreconfig out case becomes simpler because there is no more the
> > > need to run with both filters and check results.
> > > Last the shutdown code of the RDE had to be adjusted a bit to still work
> > > with the Adj-RIB-Out.
> > >
> > > This may cause memory consumption to go up but my testing has shown that
> > > the result is actually not significant. Needs the commits from earlier
> > > today to apply.
> >
> > On my Route Server, it is quite a big change (in percentage).
> > * Before :
> > RDE memory statistics
> >      11561 IPv4 unicast network entries using 452K of memory
> >        131 IPv6 unicast network entries using 7.2K of memory
> >      23370 rib entries using 1.4M of memory
> >      23517 prefix entries using 1.8M of memory
> >       4894 BGP path attribute entries using 344K of memory
> >            and holding 23517 references
> >       1720 BGP AS-PATH attribute entries using 76.4K of memory
> >            and holding 4894 references
> >       1061 BGP attributes entries using 41.4K of memory
> >            and holding 10565 references
> >       1060 BGP attributes using 25.6K of memory
> >     101809 as-set elements in 58041 tables using 2.1M of memory
> >     114429 prefix-set elments using 4.7M of memory
> > RIB using 4.1M of memory
> > Sets using 6.9M of memory
> >
> > RDE hash statistics
> >         path hash: size 131072, 4894 entires
> >             min 0 max 3 avg/std-dev = 0.037/0.000
> >         aspath hash: size 131072, 1720 entires
> >             min 0 max 2 avg/std-dev = 0.013/0.000
> >         attr hash: size 16384, 1061 entires
> >             min 0 max 2 avg/std-dev = 0.065/0.000
> >
> > * After:
> > RDE memory statistics
> >      11560 IPv4 unicast network entries using 452K of memory
> >        145 IPv6 unicast network entries using 7.9K of memory
> >      34991 rib entries using 2.1M of memory
> >      69844 prefix entries using 5.3M of memory
> >       4929 BGP path attribute entries using 347K of memory
> >            and holding 69844 references
> >       1727 BGP AS-PATH attribute entries using 76.6K of memory
> >            and holding 4929 references
> >       1066 BGP attributes entries using 41.6K of memory
> >            and holding 10643 references
> >       1065 BGP attributes using 25.6K of memory
> >     101809 as-set elements in 58041 tables using 2.1M of memory
> >     114429 prefix-set elments using 4.7M of memory
> > RIB using 8.4M of memory
> > Sets using 6.9M of memory
> >
> > RDE hash statistics
> >         path hash: size 131072, 4929 entires
> >             min 0 max 3 avg/std-dev = 0.038/0.000
> >         aspath hash: size 131072, 1727 entires
> >             min 0 max 2 avg/std-dev = 0.013/0.000
> >         attr hash: size 16384, 1066 entires
> >             min 0 max 2 avg/std-dev = 0.065/0.000
> >
>
> ok this is a more interesting router:
>
> [benoit@border2:~]$ cat before  
> RDE memory statistics
>     715727 IPv4 unicast network entries using 27.3M of memory
>      58953 IPv6 unicast network entries using 3.1M of memory
>    1549171 rib entries using 94.6M of memory
>    2897130 prefix entries using 265M of memory
>     562423 BGP path attribute entries using 60.1M of memory
>     149579 BGP AS-PATH attribute entries using 6.1M of memory,
>            and holding 562423 references
>      37007 BGP attributes entries using 1.4M of memory
>            and holding 880668 references
>      37006 BGP attributes using 912K of memory
>      61964 as-set elements in 58064 tables using 950K of memory
>     103150 prefix-set elments using 4.3M of memory
> RIB using 459M of memory
> Sets using 5.2M of memory
>
> RDE hash statistics
>         path hash: size 131072, 562423 entires
>             min 0 max 20 avg/std-dev = 4.291/2.364
>         aspath hash: size 131072, 149579 entires
>             min 0 max 8 avg/std-dev = 1.141/0.835
>         attr hash: size 16384, 37007 entires
>             min 0 max 10 avg/std-dev = 2.259/1.378
>
> [benoit@border2:~]$ cat now                                                            
> RDE memory statistics
>    2323535 unspec network entries using 0B of memory
>     716246 IPv6 unicast network entries using 38.3M of memory

This bgpctl is out of sync. So I don't trust the numbers too much.

Also check the ps alx or top output. Since malloc has a fair amount of
fragmentation.

>      58933 IPv4 vpn network entries using 4.0M of memory
>    4806161 rib entries using 293M of memory
>    4806161 prefix entries using 440M of memory
>     529410 BGP path attribute entries using 56.5M of memory
>       1945 BGP AS-PATH attribute entries using 134K of memory,
>            and holding 5807738 references
>     529410 BGP attributes entries using 20.2M of memory
>            and holding 32706 references
>     725558 BGP attributes using 870K of memory
>    1426888 as-set elements in 32705 tables using 56.7K of memory
>      61964 prefix-set elments using 101K of memory
> RIB using 853M of memory
> Sets using 157K of memory
>
> RDE hash statistics
>         path hash: size 131072, 529410 entires
>             min 0 max 17 avg/std-dev = 4.039/1.920
>         aspath hash: size 131072, 136842 entires
>             min 0 max 8 avg/std-dev = 1.044/0.954
>         attr hash: size 16384, 32706 entires
>             min 0 max 10 avg/std-dev = 1.996/1.420
>

--
:wq Claudio

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Sebastian Benoit-3
Claudio Jeker([hidden email]) on 2018.11.01 10:38:41 +0100:

> On Wed, Oct 31, 2018 at 11:55:51PM +0100, Sebastian Benoit wrote:
> > Denis Fondras([hidden email]) on 2018.10.31 21:02:17 +0100:
> > > On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:
> > > > This diff introduces a real Adj-RIB-Out. It is the minimal change to
> > > > introduce the new RIB. This removes the update_rib introduced before 6.4
> > > > lock because that is now replaced with a real RIB.
> > > > The code used by bgpctl show rib is now a fair amount easier since now one
> > > > RIB runner can be used for all cases.
> > > > path_update() is now returning if a prefix was not modified which is
> > > > needed in the update path to suppress unneeded updates.
> > > > The softreconfig out case becomes simpler because there is no more the
> > > > need to run with both filters and check results.
> > > > Last the shutdown code of the RDE had to be adjusted a bit to still work
> > > > with the Adj-RIB-Out.
> > > >
> > > > This may cause memory consumption to go up but my testing has shown that
> > > > the result is actually not significant. Needs the commits from earlier
> > > > today to apply.
> > >
> > > On my Route Server, it is quite a big change (in percentage).
> > > * Before :
> > > RDE memory statistics
> > >      11561 IPv4 unicast network entries using 452K of memory
> > >        131 IPv6 unicast network entries using 7.2K of memory
> > >      23370 rib entries using 1.4M of memory
> > >      23517 prefix entries using 1.8M of memory
> > >       4894 BGP path attribute entries using 344K of memory
> > >            and holding 23517 references
> > >       1720 BGP AS-PATH attribute entries using 76.4K of memory
> > >            and holding 4894 references
> > >       1061 BGP attributes entries using 41.4K of memory
> > >            and holding 10565 references
> > >       1060 BGP attributes using 25.6K of memory
> > >     101809 as-set elements in 58041 tables using 2.1M of memory
> > >     114429 prefix-set elments using 4.7M of memory
> > > RIB using 4.1M of memory
> > > Sets using 6.9M of memory
> > >
> > > RDE hash statistics
> > >         path hash: size 131072, 4894 entires
> > >             min 0 max 3 avg/std-dev = 0.037/0.000
> > >         aspath hash: size 131072, 1720 entires
> > >             min 0 max 2 avg/std-dev = 0.013/0.000
> > >         attr hash: size 16384, 1061 entires
> > >             min 0 max 2 avg/std-dev = 0.065/0.000
> > >
> > > * After:
> > > RDE memory statistics
> > >      11560 IPv4 unicast network entries using 452K of memory
> > >        145 IPv6 unicast network entries using 7.9K of memory
> > >      34991 rib entries using 2.1M of memory
> > >      69844 prefix entries using 5.3M of memory
> > >       4929 BGP path attribute entries using 347K of memory
> > >            and holding 69844 references
> > >       1727 BGP AS-PATH attribute entries using 76.6K of memory
> > >            and holding 4929 references
> > >       1066 BGP attributes entries using 41.6K of memory
> > >            and holding 10643 references
> > >       1065 BGP attributes using 25.6K of memory
> > >     101809 as-set elements in 58041 tables using 2.1M of memory
> > >     114429 prefix-set elments using 4.7M of memory
> > > RIB using 8.4M of memory
> > > Sets using 6.9M of memory
> > >
> > > RDE hash statistics
> > >         path hash: size 131072, 4929 entires
> > >             min 0 max 3 avg/std-dev = 0.038/0.000
> > >         aspath hash: size 131072, 1727 entires
> > >             min 0 max 2 avg/std-dev = 0.013/0.000
> > >         attr hash: size 16384, 1066 entires
> > >             min 0 max 2 avg/std-dev = 0.065/0.000
> > >
> >
> > ok this is a more interesting router:
> >
> > [benoit@border2:~]$ cat before  
> > RDE memory statistics
> >     715727 IPv4 unicast network entries using 27.3M of memory
> >      58953 IPv6 unicast network entries using 3.1M of memory
> >    1549171 rib entries using 94.6M of memory
> >    2897130 prefix entries using 265M of memory
> >     562423 BGP path attribute entries using 60.1M of memory
> >     149579 BGP AS-PATH attribute entries using 6.1M of memory,
> >            and holding 562423 references
> >      37007 BGP attributes entries using 1.4M of memory
> >            and holding 880668 references
> >      37006 BGP attributes using 912K of memory
> >      61964 as-set elements in 58064 tables using 950K of memory
> >     103150 prefix-set elments using 4.3M of memory
> > RIB using 459M of memory
> > Sets using 5.2M of memory
> >
> > RDE hash statistics
> >         path hash: size 131072, 562423 entires
> >             min 0 max 20 avg/std-dev = 4.291/2.364
> >         aspath hash: size 131072, 149579 entires
> >             min 0 max 8 avg/std-dev = 1.141/0.835
> >         attr hash: size 16384, 37007 entires
> >             min 0 max 10 avg/std-dev = 2.259/1.378
> >
> > [benoit@border2:~]$ cat now                                                            
> > RDE memory statistics
> >    2323535 unspec network entries using 0B of memory
> >     716246 IPv6 unicast network entries using 38.3M of memory
>
> This bgpctl is out of sync. So I don't trust the numbers too much.
>
> Also check the ps alx or top output. Since malloc has a fair amount of
> fragmentation.
>
> >      58933 IPv4 vpn network entries using 4.0M of memory
> >    4806161 rib entries using 293M of memory
> >    4806161 prefix entries using 440M of memory
> >     529410 BGP path attribute entries using 56.5M of memory
> >       1945 BGP AS-PATH attribute entries using 134K of memory,
> >            and holding 5807738 references
> >     529410 BGP attributes entries using 20.2M of memory
> >            and holding 32706 references
> >     725558 BGP attributes using 870K of memory
> >    1426888 as-set elements in 32705 tables using 56.7K of memory
> >      61964 prefix-set elments using 101K of memory
> > RIB using 853M of memory
> > Sets using 157K of memory
> >
> > RDE hash statistics
> >         path hash: size 131072, 529410 entires
> >             min 0 max 17 avg/std-dev = 4.039/1.920
> >         aspath hash: size 131072, 136842 entires
> >             min 0 max 8 avg/std-dev = 1.044/0.954
> >         attr hash: size 16384, 32706 entires
> >             min 0 max 10 avg/std-dev = 1.996/1.420

sorry about that, i missed those the commits since the 26th

indeed its much less with an updated bgpctl

RDE memory statistics
    716318 IPv4 unicast network entries using 27.3M of memory
     59024 IPv6 unicast network entries using 3.2M of memory
   2323991 rib entries using 142M of memory
   4814319 prefix entries using 367M of memory
    530183 BGP path attribute entries using 36.4M of memory
           and holding 4814319 references
    137056 BGP AS-PATH attribute entries using 5.5M of memory
           and holding 530183 references
     32708 BGP attributes entries using 1.2M of memory
           and holding 891572 references
     32707 BGP attributes using 708K of memory
     61964 as-set elements in 58064 tables using 1.4M of memory
    103150 prefix-set elements using 4.3M of memory
RIB using 584M of memory
Sets using 5.6M of memory

RDE hash statistics
        path hash: size 131072, 530183 entries
            min 0 max 17 avg/std-dev = 4.045/1.907
        aspath hash: size 131072, 137056 entries
            min 0 max 8 avg/std-dev = 1.046/0.952
        attr hash: size 16384, 32708 entries
            min 0 max 10 avg/std-dev = 1.996/1.419

Reply | Threaded
Open this post in threaded view
|

Re: bgpd, Adj-RIB-Out support

Denis Fondras-3
In reply to this post by Denis Fondras
On Wed, Oct 31, 2018 at 09:02:16PM +0100, Denis Fondras wrote:

> On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:
> > This diff introduces a real Adj-RIB-Out. It is the minimal change to
> > introduce the new RIB. This removes the update_rib introduced before 6.4
> > lock because that is now replaced with a real RIB.
> > The code used by bgpctl show rib is now a fair amount easier since now one
> > RIB runner can be used for all cases.
> > path_update() is now returning if a prefix was not modified which is
> > needed in the update path to suppress unneeded updates.
> > The softreconfig out case becomes simpler because there is no more the
> > need to run with both filters and check results.
> > Last the shutdown code of the RDE had to be adjusted a bit to still work
> > with the Adj-RIB-Out.
> >
> > This may cause memory consumption to go up but my testing has shown that
> > the result is actually not significant. Needs the commits from earlier
> > today to apply.
>

[zip]

> I need to test it on a router with a fullview.
>

OK denis@

> Comments below.
>
> > --
> > :wq Claudio
> >
> > Index: rde.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > retrieving revision 1.444
> > diff -u -p -r1.444 rde.c
> > --- rde.c 31 Oct 2018 14:50:07 -0000 1.444
> > +++ rde.c 31 Oct 2018 15:09:39 -0000
> > @@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer,
> >  
> >   /* add original path to the Adj-RIB-In */
> >   if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen,
> > -    vstate))
> > +    vstate) == 1)
> >   peer->prefix_cnt++;
> >  
> >   /* max prefix checker */
> > @@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str
> >   * control specific functions
> >   */
> >  static void
> > -rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp,
> > -    struct nexthop *nexthop, pid_t pid, int flags)
> > +rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
> >  {
> >   struct ctl_show_rib rib;
> >   struct ibuf *wbuf;
> >   struct attr *a;
> > + struct nexthop *nexthop;
> >   void *bp;
> >   time_t staletime;
> >   u_int8_t l;
> >  
> > + nexthop = prefix_nexthop(p);
> >   bzero(&rib, sizeof(rib));
> >   rib.lastchange = p->lastchange;
> >   rib.local_pref = asp->lpref;
> > @@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct
> >  }
> >  
> >  static void
> > -rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
> > -    struct ctl_show_rib_request *req)
> > -{
> > - struct filterstate state;
> > - enum filter_actions a;
> > -
> > - if (up_test_update(peer, p) != 1)
> > - return;
> > -
> > - rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
> > -    prefix_nhflags(p));
> > - a = rde_filter(out_rules, peer, p, &state);
> > -
> > - if (a == ACTION_ALLOW)
> > - rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid,
> > -    req->flags);
> > -
> > - rde_filterstate_clean(&state);
> > -}
> > -
> > -static void
> >  rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
> >  {
> > - struct rde_peer *peer;
> >   struct rde_aspath *asp;
> >  
> > - if (req->flags & F_CTL_ADJ_OUT) {
> > - if (p->re->active != p)
> > - /* only consider active prefix */
> > - return;
> > - if (req->peerid) {
> > - if ((peer = peer_get(req->peerid)) != NULL)
> > - rde_dump_filterout(peer, p, req);
> > - return;
> > - }
> > - } else {
> > - asp = prefix_aspath(p);
> > - if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> > - return;
> > - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> > - return;
> > - if ((req->flags & F_CTL_INVALID) &&
> > -    (asp->flags & F_ATTR_PARSE_ERR) == 0)
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> > -    !aspath_match(asp->aspath->data, asp->aspath->len,
> > -    &req->as, 0))
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> > -    !community_match(asp, req->community.as,
> > -    req->community.type))
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> > -    !community_ext_match(asp, &req->extcommunity, 0))
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> > -    !community_large_match(asp, req->large_community.as,
> > -    req->large_community.ld1, req->large_community.ld2))
> > - return;
> > - if (!ovs_match(p, req->flags))
> > - return;
> > - rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid,
> > -    req->flags);
> > - }
> > + if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> > + return;
> > +
> > + asp = prefix_aspath(p);
> > + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> > + return;
>
> You can move it up one line. It only saves a call to prefix_aspath() in a few
> case though.
>
> > + if ((req->flags & F_CTL_INVALID) &&
> > +    (asp->flags & F_ATTR_PARSE_ERR) == 0)
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> > +    !aspath_match(asp->aspath->data, asp->aspath->len,
> > +    &req->as, 0))
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> > +    !community_match(asp, req->community.as,
> > +    req->community.type))
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> > +    !community_ext_match(asp, &req->extcommunity, 0))
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> > +    !community_large_match(asp, req->large_community.as,
> > +    req->large_community.ld1, req->large_community.ld2))
> > + return;
> > + if (!ovs_match(p, req->flags))
> > + return;
> > + rde_dump_rib_as(p, asp, req->pid, req->flags);
> >  }
> >  
> >  static void
> > @@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req
> >   }
> >   if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
> >   rid = RIB_ADJ_IN;
> > + } else if (req->flags & F_CTL_ADJ_OUT) {
> > + rid = RIB_ADJ_OUT;
> >   } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
> >   log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
> >   error = CTL_RES_NOSUCHPEER;
> > @@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re,
> >  }
> >  
> >  static void
> > +rde_up_flush_upcall(struct rib_entry *re, void *ptr)
> > +{
> > + struct rde_peer *peer = ptr;
> > + struct prefix *p, *np;
> > +
> > + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> > + if (peer != prefix_peer(p))
> > + continue;
> > + up_generate_updates(out_rules, peer, NULL, p);
> > + }
> > +}
> > +
> > +static void
> >  rde_up_dump_done(void *ptr, u_int8_t aid)
> >  {
> >   struct rde_peer *peer = ptr;
> > @@ -2805,6 +2788,8 @@ rde_reload_done(void)
> >   u_int16_t rid;
> >   int reload = 0;
> >  
> > + softreconfig = 0;
> > +
> >   /* first merge the main config */
> >   if ((conf->flags & BGPD_FLAG_NO_EVALUATE) &&
> >      (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) {
> > @@ -2887,11 +2872,16 @@ rde_reload_done(void)
> >   char *p = log_fmt_peer(&peer->conf);
> >   log_debug("rib change: reloading peer %s", p);
> >   free(p);
> > - up_withdraw_all(peer);
> >   peer->loc_rib_id = rib_find(peer->conf.rib);
> >   if (peer->loc_rib_id == RIB_NOTFOUND)
> >   fatalx("King Bula's peer met an unknown RIB");
> >   peer->reconf_rib = 1;
> > + softreconfig++;
> > + if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
> > +    RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
> > +    rde_softreconfig_in_done, NULL) == -1)
> > + fatal("%s: rib_dump_new", __func__);
> > + log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
> >   continue;
> >   }
> >   if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
> > @@ -2941,14 +2931,13 @@ rde_reload_done(void)
> >   }
> >   log_info("RDE reconfigured");
> >  
> > - softreconfig = 0;
> >   if (reload > 0) {
> > - log_info("running softreconfig in");
> >   softreconfig++;
> >   if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
> >      RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in,
> >      rde_softreconfig_in_done, NULL) == -1)
> >   fatal("%s: rib_dump_new", __func__);
> > + log_info("running softreconfig in");
> >   } else {
> >   rde_softreconfig_in_done(NULL, AID_UNSPEC);
> >   }
> > @@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re
> >  }
> >  
> >  static void
> > -rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer)
> > -{
> > - struct filterstate ostate, nstate;
> > - struct bgpd_addr addr;
> > - struct prefix *p = re->active;
> > - struct pt_entry *pt;
> > - enum filter_actions oa, na;
> > -
> > - pt = re->prefix;
> > - pt_getaddr(pt, &addr);
> > -
> > - if (up_test_update(peer, p) != 1)
> > - return;
> > -
> > - rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p),
> > -    prefix_nhflags(p));
> > - rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p),
> > -    prefix_nhflags(p));
> > - oa = rde_filter(out_rules_tmp, peer, p, &ostate);
> > - na = rde_filter(out_rules, peer, p, &nstate);
> > -
> > - /* go through all 4 possible combinations */
> > - /* if (oa == ACTION_DENY && na == ACTION_DENY) */
> > - /* nothing todo */
> > - if (oa == ACTION_DENY && na == ACTION_ALLOW) {
> > - /* send update */
> > - up_rib_add(peer, re);
> > - up_generate(peer, &nstate, &addr, pt->prefixlen);
> > - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
> > - /* send withdraw */
> > - up_rib_remove(peer, re);
> > - up_generate(peer, NULL, &addr, pt->prefixlen);
> > - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
> > - /* send update if anything changed */
> > - if (nstate.nhflags != ostate.nhflags ||
> > -    nstate.nexthop != ostate.nexthop ||
> > -    path_compare(&nstate.aspath, &ostate.aspath) != 0)
> > - up_generate(peer, &nstate, &addr, pt->prefixlen);
> > - }
> > -
> > - rde_filterstate_clean(&ostate);
> > - rde_filterstate_clean(&nstate);
> > -}
> > -
> > -static void
> >  rde_softreconfig_out(struct rib_entry *re, void *bula)
> >  {
> > + struct prefix *new = re->active;
> >   struct rde_peer *peer;
> >  
> > - if (re->active == NULL)
> > + if (new == NULL)
> >   return;
> >  
> >   LIST_FOREACH(peer, &peerlist, peer_l) {
> >   if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
> > - rde_softreconfig_out_peer(re, peer);
> > + /* Regenerate all updates. */
> > + up_generate_updates(out_rules, peer, new, new);
> >   }
> >  }
> >  
> > @@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i
> >   vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix,
> >      nc->prefixlen, asp->source_as);
> >   if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix,
> > -    nc->prefixlen, vstate))
> > +    nc->prefixlen, vstate) == 1)
>
> Isn't there too many tabs here ?
>
> >   peerself->prefix_cnt++;
> >   for (i = RIB_LOC_START; i < rib_size; i++) {
> >   if (!rib_valid(i))
> > @@ -3835,21 +3781,20 @@ rde_shutdown(void)
> >   * rde_shutdown depends on this.
> >   */
> >  
> > - /*
> > - * All peers go down
> > - */
> > + /* Frist all peers go down */
>
> Frist ?! :)
>
> >   for (i = 0; i <= peertable.peer_hashmask; i++)
> >   while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
> >   peer_down(p->conf.id);
> >  
> > + /* then since decision process is off, kill RIB_ADJ_OUT */
> > + rib_free(rib_byid(RIB_ADJ_OUT));
> > +
> >   /* free filters */
> >   filterlist_free(out_rules);
> > - for (i = 0; i < rib_size; i++) {
> > - if (!rib_valid(i))
> > - continue;
> > - filterlist_free(ribs[i].in_rules);
> > - }
> > + filterlist_free(out_rules_tmp);
> >  
> > + /* now check everything */
> > + rib_shutdown();
> >   nexthop_shutdown();
> >   path_shutdown();
> >   aspath_shutdown();
> > Index: rde.h
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> > retrieving revision 1.201
> > diff -u -p -r1.201 rde.h
> > --- rde.h 31 Oct 2018 14:50:07 -0000 1.201
> > +++ rde.h 31 Oct 2018 15:09:39 -0000
> > @@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr);
> >  LIST_HEAD(aspath_head, rde_aspath);
> >  RB_HEAD(uptree_prefix, update_prefix);
> >  RB_HEAD(uptree_attr, update_attr);
> > -RB_HEAD(uptree_rib, update_rib);
> >  
> >  TAILQ_HEAD(uplist_prefix, update_prefix);
> >  TAILQ_HEAD(uplist_attr, update_attr);
> > @@ -91,7 +90,6 @@ struct rde_peer {
> >   struct bgpd_addr remote_addr;
> >   struct bgpd_addr local_v4_addr;
> >   struct bgpd_addr local_v6_addr;
> > - struct uptree_rib up_rib;
> >   struct uptree_prefix up_prefix;
> >   struct uptree_attr up_attrs;
> >   struct uplist_attr updates[AID_MAX];
> > @@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t);
> >  u_int16_t rib_find(char *);
> >  struct rib_desc *rib_desc(struct rib *);
> >  void rib_free(struct rib *);
> > +void rib_shutdown(void);
> >  struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
> >  struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
> >  int rib_dump_pending(void);
> > Index: rde_rib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> > retrieving revision 1.184
> > diff -u -p -r1.184 rde_rib.c
> > --- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184
> > +++ rde_rib.c 31 Oct 2018 15:09:39 -0000
> > @@ -216,6 +216,21 @@ rib_free(struct rib *rib)
> >   bzero(rd, sizeof(struct rib_desc));
> >  }
> >  
> > +void
> > +rib_shutdown(void)
> > +{
> > + u_int16_t id;
> > +
> > + for (id = 0; id < rib_size; id++) {
> > + if (!rib_valid(id))
> > + continue;
> > + if (!RB_EMPTY(rib_tree(&ribs[id].rib)))
> > + log_warnx("%s: rib %s is not empty", __func__,
> > +    ribs[id].name);
> > + rib_free(&ribs[id].rib);
> > + }
> > +}
> > +
> >  struct rib_entry *
> >  rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
> >  {
> > @@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs
> >   }
> >  }
> >  
> > +/*
> > + * Update a prefix belonging to a possible new aspath.
> > + * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no
> > + * change happened at all.
> > + */
> >  int
> >  path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state,
> >      struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
> > @@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_
> >   /* no change, update last change */
> >   p->lastchange = time(NULL);
> >   p->validation_state = vstate;
> > - return (0);
> > + return (2);
> >   }
> >   }
> >  
> > Index: rde_update.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> > retrieving revision 1.102
> > diff -u -p -r1.102 rde_update.c
> > --- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102
> > +++ rde_update.c 31 Oct 2018 15:09:39 -0000
> > @@ -53,15 +53,9 @@ struct update_attr {
> >   u_int16_t mpattr_len;
> >  };
> >  
> > -struct update_rib {
> > - RB_ENTRY(update_rib) entry;
> > - struct rib_entry *re;
> > -};
> > -
> >  void up_clear(struct uplist_attr *, struct uplist_prefix *);
> >  int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
> >  int up_attr_cmp(struct update_attr *, struct update_attr *);
> > -int up_rib_cmp(struct update_rib *, struct update_rib *);
> >  int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
> >  
> >  RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
> > @@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix
> >  RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
> >  RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
> >  
> > -RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp)
> > -RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp)
> > -
> >  SIPHASH_KEY uptree_key;
> >  
> >  void
> > @@ -86,7 +77,6 @@ up_init(struct rde_peer *peer)
> >   }
> >   RB_INIT(&peer->up_prefix);
> >   RB_INIT(&peer->up_attrs);
> > - RB_INIT(&peer->up_rib);
> >   peer->up_pcnt = 0;
> >   peer->up_acnt = 0;
> >   peer->up_nlricnt = 0;
> > @@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st
> >  void
> >  up_down(struct rde_peer *peer)
> >  {
> > - struct update_rib *ur, *nur;
> >   u_int8_t i;
> >  
> >   for (i = 0; i < AID_MAX; i++)
> > @@ -128,10 +117,6 @@ up_down(struct rde_peer *peer)
> >  
> >   RB_INIT(&peer->up_prefix);
> >   RB_INIT(&peer->up_attrs);
> > - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> > - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> > - free(ur);
> > - }
> >  
> >   peer->up_pcnt = 0;
> >   peer->up_acnt = 0;
> > @@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc
> >  }
> >  
> >  int
> > -up_rib_cmp(struct update_rib *a, struct update_rib *b)
> > -{
> > - if (a->re != b->re)
> > - return (a->re > b->re ? 1 : -1);
> > - return 0;
> > -}
> > -
> > -int
> > -up_rib_remove(struct rde_peer *peer, struct rib_entry *re)
> > -{
> > - struct update_rib *ur, u;
> > - u.re = re;
> > -
> > - ur = RB_FIND(uptree_rib, &peer->up_rib, &u);
> > - if (ur != NULL) {
> > - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> > - free(ur);
> > - return 1;
> > - } else
> > - return 0;
> > -}
> > -
> > -void
> > -up_rib_add(struct rde_peer *peer, struct rib_entry *re)
> > -{
> > - struct update_rib *ur;
> > -
> > - if ((ur = calloc(1, sizeof(*ur))) == NULL)
> > - fatal("%s", __func__);
> > - ur->re = re;
> > -
> > - if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL)
> > - free(ur);
> > -}
> > -
> > -void
> > -up_withdraw_all(struct rde_peer *peer)
> > -{
> > - struct bgpd_addr addr;
> > - struct update_rib *ur, *nur;
> > -
> > - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> > - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> > -
> > - /* withdraw prefix */
> > - pt_getaddr(ur->re->prefix, &addr);
> > - up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen);
> > - free(ur);
> > - }
> > -}
> > -
> > -int
> >  up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
> >  {
> >   struct update_attr *na = NULL;
> > @@ -473,14 +406,16 @@ up_generate_updates(struct filter_head *
> >  
> >   if (new == NULL) {
> >  withdraw:
> > - if (up_test_update(peer, old) != 1)
> > - return;
> > -
> > - if (!up_rib_remove(peer, old->re))
> > + if (old == NULL)
> >   return;
> >  
> >   /* withdraw prefix */
> >   pt_getaddr(old->re->prefix, &addr);
> > + if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> > +    old->re->prefix->prefixlen) == 0) {
> > + /* not in table, no need to send withdraw */
> > + return;
> > + }
> >   up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
> >   } else {
> >   switch (up_test_update(peer, new)) {
> > @@ -500,8 +435,12 @@ withdraw:
> >   }
> >  
> >   pt_getaddr(new->re->prefix, &addr);
> > - up_generate(peer, &state, &addr, new->re->prefix->prefixlen);
> > - up_rib_add(peer, new->re);
> > + if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
> > +    new->re->prefix->prefixlen, prefix_vstate(new)) != 2) {
> > + /* only send update if path changed */
> > + up_generate(peer, &state, &addr,
> > +    new->re->prefix->prefixlen);
> > + }
> >  
> >   rde_filterstate_clean(&state);
> >   }
> >