On Wed, Oct 31, 2018 at 09:02:16PM +0100, Denis Fondras wrote:
> On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote:
> > This diff introduces a real Adj-RIB-Out. It is the minimal change to
> > introduce the new RIB. This removes the update_rib introduced before 6.4
> > lock because that is now replaced with a real RIB.
> > The code used by bgpctl show rib is now a fair amount easier since now one
> > RIB runner can be used for all cases.
> > path_update() is now returning if a prefix was not modified which is
> > needed in the update path to suppress unneeded updates.
> > The softreconfig out case becomes simpler because there is no more the
> > need to run with both filters and check results.
> > Last the shutdown code of the RDE had to be adjusted a bit to still work
> > with the Adj-RIB-Out.
> >
> > This may cause memory consumption to go up but my testing has shown that
> > the result is actually not significant. Needs the commits from earlier
> > today to apply.
>
[zip]
> I need to test it on a router with a fullview.
>
OK denis@
> Comments below.
>
> > --
> > :wq Claudio
> >
> > Index: rde.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > retrieving revision 1.444
> > diff -u -p -r1.444 rde.c
> > --- rde.c 31 Oct 2018 14:50:07 -0000 1.444
> > +++ rde.c 31 Oct 2018 15:09:39 -0000
> > @@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer,
> >
> > /* add original path to the Adj-RIB-In */
> > if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen,
> > - vstate))
> > + vstate) == 1)
> > peer->prefix_cnt++;
> >
> > /* max prefix checker */
> > @@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str
> > * control specific functions
> > */
> > static void
> > -rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp,
> > - struct nexthop *nexthop, pid_t pid, int flags)
> > +rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int
> > flags)
> > {
> > struct ctl_show_rib rib;
> > struct ibuf *wbuf;
> > struct attr *a;
> > + struct nexthop *nexthop;
> > void *bp;
> > time_t staletime;
> > u_int8_t l;
> >
> > + nexthop = prefix_nexthop(p);
> > bzero(&rib, sizeof(rib));
> > rib.lastchange = p->lastchange;
> > rib.local_pref = asp->lpref;
> > @@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct
> > }
> >
> > static void
> > -rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
> > - struct ctl_show_rib_request *req)
> > -{
> > - struct filterstate state;
> > - enum filter_actions a;
> > -
> > - if (up_test_update(peer, p) != 1)
> > - return;
> > -
> > - rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
> > - prefix_nhflags(p));
> > - a = rde_filter(out_rules, peer, p, &state);
> > -
> > - if (a == ACTION_ALLOW)
> > - rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid,
> > - req->flags);
> > -
> > - rde_filterstate_clean(&state);
> > -}
> > -
> > -static void
> > rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
> > {
> > - struct rde_peer *peer;
> > struct rde_aspath *asp;
> >
> > - if (req->flags & F_CTL_ADJ_OUT) {
> > - if (p->re->active != p)
> > - /* only consider active prefix */
> > - return;
> > - if (req->peerid) {
> > - if ((peer = peer_get(req->peerid)) != NULL)
> > - rde_dump_filterout(peer, p, req);
> > - return;
> > - }
> > - } else {
> > - asp = prefix_aspath(p);
> > - if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> > - return;
> > - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> > - return;
> > - if ((req->flags & F_CTL_INVALID) &&
> > - (asp->flags & F_ATTR_PARSE_ERR) == 0)
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> > - !aspath_match(asp->aspath->data, asp->aspath->len,
> > - &req->as, 0))
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> > - !community_match(asp, req->community.as,
> > - req->community.type))
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> > - !community_ext_match(asp, &req->extcommunity, 0))
> > - return;
> > - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> > - !community_large_match(asp, req->large_community.as,
> > - req->large_community.ld1, req->large_community.ld2))
> > - return;
> > - if (!ovs_match(p, req->flags))
> > - return;
> > - rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid,
> > - req->flags);
> > - }
> > + if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
> > + return;
> > +
> > + asp = prefix_aspath(p);
> > + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
> > + return;
>
> You can move it up one line. It only saves a call to prefix_aspath() in a few
> case though.
>
> > + if ((req->flags & F_CTL_INVALID) &&
> > + (asp->flags & F_ATTR_PARSE_ERR) == 0)
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_AS &&
> > + !aspath_match(asp->aspath->data, asp->aspath->len,
> > + &req->as, 0))
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
> > + !community_match(asp, req->community.as,
> > + req->community.type))
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
> > + !community_ext_match(asp, &req->extcommunity, 0))
> > + return;
> > + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
> > + !community_large_match(asp, req->large_community.as,
> > + req->large_community.ld1, req->large_community.ld2))
> > + return;
> > + if (!ovs_match(p, req->flags))
> > + return;
> > + rde_dump_rib_as(p, asp, req->pid, req->flags);
> > }
> >
> > static void
> > @@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req
> > }
> > if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
> > rid = RIB_ADJ_IN;
> > + } else if (req->flags & F_CTL_ADJ_OUT) {
> > + rid = RIB_ADJ_OUT;
> > } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
> > log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
> > error = CTL_RES_NOSUCHPEER;
> > @@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re,
> > }
> >
> > static void
> > +rde_up_flush_upcall(struct rib_entry *re, void *ptr)
> > +{
> > + struct rde_peer *peer = ptr;
> > + struct prefix *p, *np;
> > +
> > + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> > + if (peer != prefix_peer(p))
> > + continue;
> > + up_generate_updates(out_rules, peer, NULL, p);
> > + }
> > +}
> > +
> > +static void
> > rde_up_dump_done(void *ptr, u_int8_t aid)
> > {
> > struct rde_peer *peer = ptr;
> > @@ -2805,6 +2788,8 @@ rde_reload_done(void)
> > u_int16_t rid;
> > int reload = 0;
> >
> > + softreconfig = 0;
> > +
> > /* first merge the main config */
> > if ((conf->flags & BGPD_FLAG_NO_EVALUATE) &&
> > (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) {
> > @@ -2887,11 +2872,16 @@ rde_reload_done(void)
> > char *p = log_fmt_peer(&peer->conf);
> > log_debug("rib change: reloading peer %s", p);
> > free(p);
> > - up_withdraw_all(peer);
> > peer->loc_rib_id = rib_find(peer->conf.rib);
> > if (peer->loc_rib_id == RIB_NOTFOUND)
> > fatalx("King Bula's peer met an unknown RIB");
> > peer->reconf_rib = 1;
> > + softreconfig++;
> > + if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
> > + RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
> > + rde_softreconfig_in_done, NULL) == -1)
> > + fatal("%s: rib_dump_new", __func__);
> > + log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
> > continue;
> > }
> > if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
> > @@ -2941,14 +2931,13 @@ rde_reload_done(void)
> > }
> > log_info("RDE reconfigured");
> >
> > - softreconfig = 0;
> > if (reload > 0) {
> > - log_info("running softreconfig in");
> > softreconfig++;
> > if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
> > RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in,
> > rde_softreconfig_in_done, NULL) == -1)
> > fatal("%s: rib_dump_new", __func__);
> > + log_info("running softreconfig in");
> > } else {
> > rde_softreconfig_in_done(NULL, AID_UNSPEC);
> > }
> > @@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re
> > }
> >
> > static void
> > -rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer)
> > -{
> > - struct filterstate ostate, nstate;
> > - struct bgpd_addr addr;
> > - struct prefix *p = re->active;
> > - struct pt_entry *pt;
> > - enum filter_actions oa, na;
> > -
> > - pt = re->prefix;
> > - pt_getaddr(pt, &addr);
> > -
> > - if (up_test_update(peer, p) != 1)
> > - return;
> > -
> > - rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p),
> > - prefix_nhflags(p));
> > - rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p),
> > - prefix_nhflags(p));
> > - oa = rde_filter(out_rules_tmp, peer, p, &ostate);
> > - na = rde_filter(out_rules, peer, p, &nstate);
> > -
> > - /* go through all 4 possible combinations */
> > - /* if (oa == ACTION_DENY && na == ACTION_DENY) */
> > - /* nothing todo */
> > - if (oa == ACTION_DENY && na == ACTION_ALLOW) {
> > - /* send update */
> > - up_rib_add(peer, re);
> > - up_generate(peer, &nstate, &addr, pt->prefixlen);
> > - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
> > - /* send withdraw */
> > - up_rib_remove(peer, re);
> > - up_generate(peer, NULL, &addr, pt->prefixlen);
> > - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
> > - /* send update if anything changed */
> > - if (nstate.nhflags != ostate.nhflags ||
> > - nstate.nexthop != ostate.nexthop ||
> > - path_compare(&nstate.aspath, &ostate.aspath) != 0)
> > - up_generate(peer, &nstate, &addr, pt->prefixlen);
> > - }
> > -
> > - rde_filterstate_clean(&ostate);
> > - rde_filterstate_clean(&nstate);
> > -}
> > -
> > -static void
> > rde_softreconfig_out(struct rib_entry *re, void *bula)
> > {
> > + struct prefix *new = re->active;
> > struct rde_peer *peer;
> >
> > - if (re->active == NULL)
> > + if (new == NULL)
> > return;
> >
> > LIST_FOREACH(peer, &peerlist, peer_l) {
> > if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
> > - rde_softreconfig_out_peer(re, peer);
> > + /* Regenerate all updates. */
> > + up_generate_updates(out_rules, peer, new, new);
> > }
> > }
> >
> > @@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i
> > vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix,
> > nc->prefixlen, asp->source_as);
> > if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix,
> > - nc->prefixlen, vstate))
> > + nc->prefixlen, vstate) == 1)
>
> Isn't there too many tabs here ?
>
> > peerself->prefix_cnt++;
> > for (i = RIB_LOC_START; i < rib_size; i++) {
> > if (!rib_valid(i))
> > @@ -3835,21 +3781,20 @@ rde_shutdown(void)
> > * rde_shutdown depends on this.
> > */
> >
> > - /*
> > - * All peers go down
> > - */
> > + /* Frist all peers go down */
>
> Frist ?! :)
>
> > for (i = 0; i <= peertable.peer_hashmask; i++)
> > while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
> > peer_down(p->conf.id);
> >
> > + /* then since decision process is off, kill RIB_ADJ_OUT */
> > + rib_free(rib_byid(RIB_ADJ_OUT));
> > +
> > /* free filters */
> > filterlist_free(out_rules);
> > - for (i = 0; i < rib_size; i++) {
> > - if (!rib_valid(i))
> > - continue;
> > - filterlist_free(ribs[i].in_rules);
> > - }
> > + filterlist_free(out_rules_tmp);
> >
> > + /* now check everything */
> > + rib_shutdown();
> > nexthop_shutdown();
> > path_shutdown();
> > aspath_shutdown();
> > Index: rde.h
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> > retrieving revision 1.201
> > diff -u -p -r1.201 rde.h
> > --- rde.h 31 Oct 2018 14:50:07 -0000 1.201
> > +++ rde.h 31 Oct 2018 15:09:39 -0000
> > @@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr);
> > LIST_HEAD(aspath_head, rde_aspath);
> > RB_HEAD(uptree_prefix, update_prefix);
> > RB_HEAD(uptree_attr, update_attr);
> > -RB_HEAD(uptree_rib, update_rib);
> >
> > TAILQ_HEAD(uplist_prefix, update_prefix);
> > TAILQ_HEAD(uplist_attr, update_attr);
> > @@ -91,7 +90,6 @@ struct rde_peer {
> > struct bgpd_addr remote_addr;
> > struct bgpd_addr local_v4_addr;
> > struct bgpd_addr local_v6_addr;
> > - struct uptree_rib up_rib;
> > struct uptree_prefix up_prefix;
> > struct uptree_attr up_attrs;
> > struct uplist_attr updates[AID_MAX];
> > @@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t);
> > u_int16_t rib_find(char *);
> > struct rib_desc *rib_desc(struct rib *);
> > void rib_free(struct rib *);
> > +void rib_shutdown(void);
> > struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
> > struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
> > int rib_dump_pending(void);
> > Index: rde_rib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> > retrieving revision 1.184
> > diff -u -p -r1.184 rde_rib.c
> > --- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184
> > +++ rde_rib.c 31 Oct 2018 15:09:39 -0000
> > @@ -216,6 +216,21 @@ rib_free(struct rib *rib)
> > bzero(rd, sizeof(struct rib_desc));
> > }
> >
> > +void
> > +rib_shutdown(void)
> > +{
> > + u_int16_t id;
> > +
> > + for (id = 0; id < rib_size; id++) {
> > + if (!rib_valid(id))
> > + continue;
> > + if (!RB_EMPTY(rib_tree(&ribs[id].rib)))
> > + log_warnx("%s: rib %s is not empty", __func__,
> > + ribs[id].name);
> > + rib_free(&ribs[id].rib);
> > + }
> > +}
> > +
> > struct rib_entry *
> > rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
> > {
> > @@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs
> > }
> > }
> >
> > +/*
> > + * Update a prefix belonging to a possible new aspath.
> > + * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no
> > + * change happened at all.
> > + */
> > int
> > path_update(struct rib *rib, struct rde_peer *peer, struct filterstate
> > *state,
> > struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
> > @@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_
> > /* no change, update last change */
> > p->lastchange = time(NULL);
> > p->validation_state = vstate;
> > - return (0);
> > + return (2);
> > }
> > }
> >
> > Index: rde_update.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> > retrieving revision 1.102
> > diff -u -p -r1.102 rde_update.c
> > --- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102
> > +++ rde_update.c 31 Oct 2018 15:09:39 -0000
> > @@ -53,15 +53,9 @@ struct update_attr {
> > u_int16_t mpattr_len;
> > };
> >
> > -struct update_rib {
> > - RB_ENTRY(update_rib) entry;
> > - struct rib_entry *re;
> > -};
> > -
> > void up_clear(struct uplist_attr *, struct uplist_prefix *);
> > int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
> > int up_attr_cmp(struct update_attr *, struct update_attr *);
> > -int up_rib_cmp(struct update_rib *, struct update_rib *);
> > int up_add(struct rde_peer *, struct update_prefix *, struct
> > update_attr *);
> >
> > RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
> > @@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix
> > RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
> > RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
> >
> > -RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp)
> > -RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp)
> > -
> > SIPHASH_KEY uptree_key;
> >
> > void
> > @@ -86,7 +77,6 @@ up_init(struct rde_peer *peer)
> > }
> > RB_INIT(&peer->up_prefix);
> > RB_INIT(&peer->up_attrs);
> > - RB_INIT(&peer->up_rib);
> > peer->up_pcnt = 0;
> > peer->up_acnt = 0;
> > peer->up_nlricnt = 0;
> > @@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st
> > void
> > up_down(struct rde_peer *peer)
> > {
> > - struct update_rib *ur, *nur;
> > u_int8_t i;
> >
> > for (i = 0; i < AID_MAX; i++)
> > @@ -128,10 +117,6 @@ up_down(struct rde_peer *peer)
> >
> > RB_INIT(&peer->up_prefix);
> > RB_INIT(&peer->up_attrs);
> > - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> > - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> > - free(ur);
> > - }
> >
> > peer->up_pcnt = 0;
> > peer->up_acnt = 0;
> > @@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc
> > }
> >
> > int
> > -up_rib_cmp(struct update_rib *a, struct update_rib *b)
> > -{
> > - if (a->re != b->re)
> > - return (a->re > b->re ? 1 : -1);
> > - return 0;
> > -}
> > -
> > -int
> > -up_rib_remove(struct rde_peer *peer, struct rib_entry *re)
> > -{
> > - struct update_rib *ur, u;
> > - u.re = re;
> > -
> > - ur = RB_FIND(uptree_rib, &peer->up_rib, &u);
> > - if (ur != NULL) {
> > - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> > - free(ur);
> > - return 1;
> > - } else
> > - return 0;
> > -}
> > -
> > -void
> > -up_rib_add(struct rde_peer *peer, struct rib_entry *re)
> > -{
> > - struct update_rib *ur;
> > -
> > - if ((ur = calloc(1, sizeof(*ur))) == NULL)
> > - fatal("%s", __func__);
> > - ur->re = re;
> > -
> > - if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL)
> > - free(ur);
> > -}
> > -
> > -void
> > -up_withdraw_all(struct rde_peer *peer)
> > -{
> > - struct bgpd_addr addr;
> > - struct update_rib *ur, *nur;
> > -
> > - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) {
> > - RB_REMOVE(uptree_rib, &peer->up_rib, ur);
> > -
> > - /* withdraw prefix */
> > - pt_getaddr(ur->re->prefix, &addr);
> > - up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen);
> > - free(ur);
> > - }
> > -}
> > -
> > -int
> > up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr
> > *a)
> > {
> > struct update_attr *na = NULL;
> > @@ -473,14 +406,16 @@ up_generate_updates(struct filter_head *
> >
> > if (new == NULL) {
> > withdraw:
> > - if (up_test_update(peer, old) != 1)
> > - return;
> > -
> > - if (!up_rib_remove(peer, old->re))
> > + if (old == NULL)
> > return;
> >
> > /* withdraw prefix */
> > pt_getaddr(old->re->prefix, &addr);
> > + if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> > + old->re->prefix->prefixlen) == 0) {
> > + /* not in table, no need to send withdraw */
> > + return;
> > + }
> > up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
> > } else {
> > switch (up_test_update(peer, new)) {
> > @@ -500,8 +435,12 @@ withdraw:
> > }
> >
> > pt_getaddr(new->re->prefix, &addr);
> > - up_generate(peer, &state, &addr, new->re->prefix->prefixlen);
> > - up_rib_add(peer, new->re);
> > + if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
> > + new->re->prefix->prefixlen, prefix_vstate(new)) != 2) {
> > + /* only send update if path changed */
> > + up_generate(peer, &state, &addr,
> > + new->re->prefix->prefixlen);
> > + }
> >
> > rde_filterstate_clean(&state);
> > }
> >