This diff introduces a real Adj-RIB-Out. It is the minimal change to introduce the new RIB. This removes the update_rib introduced before 6.4 lock because that is now replaced with a real RIB. The code used by bgpctl show rib is now a fair amount easier since now one RIB runner can be used for all cases. path_update() is now returning if a prefix was not modified which is needed in the update path to suppress unneeded updates. The softreconfig out case becomes simpler because there is no more the need to run with both filters and check results. Last the shutdown code of the RDE had to be adjusted a bit to still work with the Adj-RIB-Out.
This may cause memory consumption to go up but my testing has shown that the result is actually not significant. Needs the commits from earlier today to apply. -- :wq Claudio Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.444 diff -u -p -r1.444 rde.c --- rde.c 31 Oct 2018 14:50:07 -0000 1.444 +++ rde.c 31 Oct 2018 15:09:39 -0000 @@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer, /* add original path to the Adj-RIB-In */ if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen, - vstate)) + vstate) == 1) peer->prefix_cnt++; /* max prefix checker */ @@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str * control specific functions */ static void -rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, - struct nexthop *nexthop, pid_t pid, int flags) +rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) { struct ctl_show_rib rib; struct ibuf *wbuf; struct attr *a; + struct nexthop *nexthop; void *bp; time_t staletime; u_int8_t l; + nexthop = prefix_nexthop(p); bzero(&rib, sizeof(rib)); rib.lastchange = p->lastchange; rib.local_pref = asp->lpref; @@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct } static void -rde_dump_filterout(struct rde_peer *peer, struct prefix *p, - struct ctl_show_rib_request *req) -{ - struct filterstate state; - enum filter_actions a; - - if (up_test_update(peer, p) != 1) - return; - - rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p), - prefix_nhflags(p)); - a = rde_filter(out_rules, peer, p, &state); - - if (a == ACTION_ALLOW) - rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid, - req->flags); - - rde_filterstate_clean(&state); -} - -static void rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) { - struct rde_peer *peer; struct rde_aspath *asp; - if (req->flags & F_CTL_ADJ_OUT) { - if (p->re->active != p) - /* only consider active prefix */ - return; - if (req->peerid) { - if ((peer = peer_get(req->peerid)) != NULL) - rde_dump_filterout(peer, p, req); - return; - } - } else { - asp = prefix_aspath(p); - if (req->peerid && req->peerid != prefix_peer(p)->conf.id) - return; - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) - return; - if ((req->flags & F_CTL_INVALID) && - (asp->flags & F_ATTR_PARSE_ERR) == 0) - return; - if (req->type == IMSG_CTL_SHOW_RIB_AS && - !aspath_match(asp->aspath->data, asp->aspath->len, - &req->as, 0)) - return; - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && - !community_match(asp, req->community.as, - req->community.type)) - return; - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY && - !community_ext_match(asp, &req->extcommunity, 0)) - return; - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && - !community_large_match(asp, req->large_community.as, - req->large_community.ld1, req->large_community.ld2)) - return; - if (!ovs_match(p, req->flags)) - return; - rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid, - req->flags); - } + if (req->peerid && req->peerid != prefix_peer(p)->conf.id) + return; + + asp = prefix_aspath(p); + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) + return; + if ((req->flags & F_CTL_INVALID) && + (asp->flags & F_ATTR_PARSE_ERR) == 0) + return; + if (req->type == IMSG_CTL_SHOW_RIB_AS && + !aspath_match(asp->aspath->data, asp->aspath->len, + &req->as, 0)) + return; + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && + !community_match(asp, req->community.as, + req->community.type)) + return; + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY && + !community_ext_match(asp, &req->extcommunity, 0)) + return; + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && + !community_large_match(asp, req->large_community.as, + req->large_community.ld1, req->large_community.ld2)) + return; + if (!ovs_match(p, req->flags)) + return; + rde_dump_rib_as(p, asp, req->pid, req->flags); } static void @@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req } if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) { rid = RIB_ADJ_IN; + } else if (req->flags & F_CTL_ADJ_OUT) { + rid = RIB_ADJ_OUT; } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) { log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); error = CTL_RES_NOSUCHPEER; @@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re, } static void +rde_up_flush_upcall(struct rib_entry *re, void *ptr) +{ + struct rde_peer *peer = ptr; + struct prefix *p, *np; + + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + if (peer != prefix_peer(p)) + continue; + up_generate_updates(out_rules, peer, NULL, p); + } +} + +static void rde_up_dump_done(void *ptr, u_int8_t aid) { struct rde_peer *peer = ptr; @@ -2805,6 +2788,8 @@ rde_reload_done(void) u_int16_t rid; int reload = 0; + softreconfig = 0; + /* first merge the main config */ if ((conf->flags & BGPD_FLAG_NO_EVALUATE) && (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) { @@ -2887,11 +2872,16 @@ rde_reload_done(void) char *p = log_fmt_peer(&peer->conf); log_debug("rib change: reloading peer %s", p); free(p); - up_withdraw_all(peer); peer->loc_rib_id = rib_find(peer->conf.rib); if (peer->loc_rib_id == RIB_NOTFOUND) fatalx("King Bula's peer met an unknown RIB"); peer->reconf_rib = 1; + softreconfig++; + if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, + RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall, + rde_softreconfig_in_done, NULL) == -1) + fatal("%s: rib_dump_new", __func__); + log_peer_info(&peer->conf, "flushing Adj-RIB-Out"); continue; } if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) { @@ -2941,14 +2931,13 @@ rde_reload_done(void) } log_info("RDE reconfigured"); - softreconfig = 0; if (reload > 0) { - log_info("running softreconfig in"); softreconfig++; if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in, rde_softreconfig_in_done, NULL) == -1) fatal("%s: rib_dump_new", __func__); + log_info("running softreconfig in"); } else { rde_softreconfig_in_done(NULL, AID_UNSPEC); } @@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re } static void -rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer) -{ - struct filterstate ostate, nstate; - struct bgpd_addr addr; - struct prefix *p = re->active; - struct pt_entry *pt; - enum filter_actions oa, na; - - pt = re->prefix; - pt_getaddr(pt, &addr); - - if (up_test_update(peer, p) != 1) - return; - - rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p), - prefix_nhflags(p)); - rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p), - prefix_nhflags(p)); - oa = rde_filter(out_rules_tmp, peer, p, &ostate); - na = rde_filter(out_rules, peer, p, &nstate); - - /* go through all 4 possible combinations */ - /* if (oa == ACTION_DENY && na == ACTION_DENY) */ - /* nothing todo */ - if (oa == ACTION_DENY && na == ACTION_ALLOW) { - /* send update */ - up_rib_add(peer, re); - up_generate(peer, &nstate, &addr, pt->prefixlen); - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { - /* send withdraw */ - up_rib_remove(peer, re); - up_generate(peer, NULL, &addr, pt->prefixlen); - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { - /* send update if anything changed */ - if (nstate.nhflags != ostate.nhflags || - nstate.nexthop != ostate.nexthop || - path_compare(&nstate.aspath, &ostate.aspath) != 0) - up_generate(peer, &nstate, &addr, pt->prefixlen); - } - - rde_filterstate_clean(&ostate); - rde_filterstate_clean(&nstate); -} - -static void rde_softreconfig_out(struct rib_entry *re, void *bula) { + struct prefix *new = re->active; struct rde_peer *peer; - if (re->active == NULL) + if (new == NULL) return; LIST_FOREACH(peer, &peerlist, peer_l) { if (peer->loc_rib_id == re->rib_id && peer->reconf_out) - rde_softreconfig_out_peer(re, peer); + /* Regenerate all updates. */ + up_generate_updates(out_rules, peer, new, new); } } @@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix, nc->prefixlen, asp->source_as); if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix, - nc->prefixlen, vstate)) + nc->prefixlen, vstate) == 1) peerself->prefix_cnt++; for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) @@ -3835,21 +3781,20 @@ rde_shutdown(void) * rde_shutdown depends on this. */ - /* - * All peers go down - */ + /* Frist all peers go down */ for (i = 0; i <= peertable.peer_hashmask; i++) while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL) peer_down(p->conf.id); + /* then since decision process is off, kill RIB_ADJ_OUT */ + rib_free(rib_byid(RIB_ADJ_OUT)); + /* free filters */ filterlist_free(out_rules); - for (i = 0; i < rib_size; i++) { - if (!rib_valid(i)) - continue; - filterlist_free(ribs[i].in_rules); - } + filterlist_free(out_rules_tmp); + /* now check everything */ + rib_shutdown(); nexthop_shutdown(); path_shutdown(); aspath_shutdown(); Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.201 diff -u -p -r1.201 rde.h --- rde.h 31 Oct 2018 14:50:07 -0000 1.201 +++ rde.h 31 Oct 2018 15:09:39 -0000 @@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr); LIST_HEAD(aspath_head, rde_aspath); RB_HEAD(uptree_prefix, update_prefix); RB_HEAD(uptree_attr, update_attr); -RB_HEAD(uptree_rib, update_rib); TAILQ_HEAD(uplist_prefix, update_prefix); TAILQ_HEAD(uplist_attr, update_attr); @@ -91,7 +90,6 @@ struct rde_peer { struct bgpd_addr remote_addr; struct bgpd_addr local_v4_addr; struct bgpd_addr local_v6_addr; - struct uptree_rib up_rib; struct uptree_prefix up_prefix; struct uptree_attr up_attrs; struct uplist_attr updates[AID_MAX]; @@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t); u_int16_t rib_find(char *); struct rib_desc *rib_desc(struct rib *); void rib_free(struct rib *); +void rib_shutdown(void); struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int); struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *); int rib_dump_pending(void); Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.184 diff -u -p -r1.184 rde_rib.c --- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184 +++ rde_rib.c 31 Oct 2018 15:09:39 -0000 @@ -216,6 +216,21 @@ rib_free(struct rib *rib) bzero(rd, sizeof(struct rib_desc)); } +void +rib_shutdown(void) +{ + u_int16_t id; + + for (id = 0; id < rib_size; id++) { + if (!rib_valid(id)) + continue; + if (!RB_EMPTY(rib_tree(&ribs[id].rib))) + log_warnx("%s: rib %s is not empty", __func__, + ribs[id].name); + rib_free(&ribs[id].rib); + } +} + struct rib_entry * rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) { @@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs } } +/* + * Update a prefix belonging to a possible new aspath. + * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no + * change happened at all. + */ int path_update(struct rib *rib, struct rde_peer *peer, struct filterstate *state, struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate) @@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_ /* no change, update last change */ p->lastchange = time(NULL); p->validation_state = vstate; - return (0); + return (2); } } Index: rde_update.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v retrieving revision 1.102 diff -u -p -r1.102 rde_update.c --- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102 +++ rde_update.c 31 Oct 2018 15:09:39 -0000 @@ -53,15 +53,9 @@ struct update_attr { u_int16_t mpattr_len; }; -struct update_rib { - RB_ENTRY(update_rib) entry; - struct rib_entry *re; -}; - void up_clear(struct uplist_attr *, struct uplist_prefix *); int up_prefix_cmp(struct update_prefix *, struct update_prefix *); int up_attr_cmp(struct update_attr *, struct update_attr *); -int up_rib_cmp(struct update_rib *, struct update_rib *); int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *); RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp) @@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp) RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp) -RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp) -RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp) - SIPHASH_KEY uptree_key; void @@ -86,7 +77,6 @@ up_init(struct rde_peer *peer) } RB_INIT(&peer->up_prefix); RB_INIT(&peer->up_attrs); - RB_INIT(&peer->up_rib); peer->up_pcnt = 0; peer->up_acnt = 0; peer->up_nlricnt = 0; @@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st void up_down(struct rde_peer *peer) { - struct update_rib *ur, *nur; u_int8_t i; for (i = 0; i < AID_MAX; i++) @@ -128,10 +117,6 @@ up_down(struct rde_peer *peer) RB_INIT(&peer->up_prefix); RB_INIT(&peer->up_attrs); - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) { - RB_REMOVE(uptree_rib, &peer->up_rib, ur); - free(ur); - } peer->up_pcnt = 0; peer->up_acnt = 0; @@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc } int -up_rib_cmp(struct update_rib *a, struct update_rib *b) -{ - if (a->re != b->re) - return (a->re > b->re ? 1 : -1); - return 0; -} - -int -up_rib_remove(struct rde_peer *peer, struct rib_entry *re) -{ - struct update_rib *ur, u; - u.re = re; - - ur = RB_FIND(uptree_rib, &peer->up_rib, &u); - if (ur != NULL) { - RB_REMOVE(uptree_rib, &peer->up_rib, ur); - free(ur); - return 1; - } else - return 0; -} - -void -up_rib_add(struct rde_peer *peer, struct rib_entry *re) -{ - struct update_rib *ur; - - if ((ur = calloc(1, sizeof(*ur))) == NULL) - fatal("%s", __func__); - ur->re = re; - - if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL) - free(ur); -} - -void -up_withdraw_all(struct rde_peer *peer) -{ - struct bgpd_addr addr; - struct update_rib *ur, *nur; - - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) { - RB_REMOVE(uptree_rib, &peer->up_rib, ur); - - /* withdraw prefix */ - pt_getaddr(ur->re->prefix, &addr); - up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen); - free(ur); - } -} - -int up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a) { struct update_attr *na = NULL; @@ -473,14 +406,16 @@ up_generate_updates(struct filter_head * if (new == NULL) { withdraw: - if (up_test_update(peer, old) != 1) - return; - - if (!up_rib_remove(peer, old->re)) + if (old == NULL) return; /* withdraw prefix */ pt_getaddr(old->re->prefix, &addr); + if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr, + old->re->prefix->prefixlen) == 0) { + /* not in table, no need to send withdraw */ + return; + } up_generate(peer, NULL, &addr, old->re->prefix->prefixlen); } else { switch (up_test_update(peer, new)) { @@ -500,8 +435,12 @@ withdraw: } pt_getaddr(new->re->prefix, &addr); - up_generate(peer, &state, &addr, new->re->prefix->prefixlen); - up_rib_add(peer, new->re); + if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, + new->re->prefix->prefixlen, prefix_vstate(new)) != 2) { + /* only send update if path changed */ + up_generate(peer, &state, &addr, + new->re->prefix->prefixlen); + } rde_filterstate_clean(&state); }