Claudio Jeker([email protected]) on 2019.07.10 22:08:38 +0200:
> This diff is a bit of a monster. It changes the Adj-RIB-Out to be a peer
> specific set of RB trees instead of using a rib in the original sense.
> The reason for this is that the more peers a system has the more elements
> end up being linked into the adj-rib-out and many operations do linear
> searches which does not scale.
>
> I did some testing with 4000 peers sending 1 prefix each which then are
> sent back to all peers (resulting in 16Mio updates being put in Adj-RIB-Out).
> Without this diff the system takes about 1h to bring up all sessions. With
> the diff the system finishes in around 5min.
>
> To not increase the memory footprint struct prefix is now using a union
> for the lists or RB trees. Additionally the rib dump runner was adjusted
> so that it also works with the Adj-RIB-Out. bgpctl show rib out changed
> a bit since it will dump now one peer after the other apart from that
> behaviour should be the same.
ok benno@
you have to fix up the regress tests too.
>
> Please test
> --
> :wq Claudio
>
>
> Index: mrt.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
> retrieving revision 1.97
> diff -u -p -r1.97 mrt.c
> --- mrt.c 25 Jun 2019 21:33:55 -0000 1.97
> +++ mrt.c 26 Jun 2019 09:44:12 -0000
> @@ -512,15 +512,11 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
> goto fail;
> }
> nump = 0;
> - LIST_FOREACH(p, &re->prefix_h, rib_l) {
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
> struct nexthop *nexthop;
> struct bgpd_addr *nh;
> struct ibuf *tbuf;
>
> - /* skip pending withdraw in Adj-RIB-Out */
> - if (prefix_aspath(p) == NULL)
> - continue;
> -
> nexthop = prefix_nexthop(p);
> if (nexthop == NULL) {
> bzero(&addr, sizeof(struct bgpd_addr));
> @@ -683,10 +679,7 @@ mrt_dump_upcall(struct rib_entry *re, vo
> * dumps the table so we do the same. If only the active route should
> * be dumped p should be set to p = pt->active.
> */
> - LIST_FOREACH(p, &re->prefix_h, rib_l) {
> - /* skip pending withdraw in Adj-RIB-Out */
> - if (prefix_aspath(p) == NULL)
> - continue;
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
> if (mrtbuf->type == MRT_TABLE_DUMP)
> mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
> prefix_peer(p));
> Index: parse.y
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
> retrieving revision 1.392
> diff -u -p -r1.392 parse.y
> --- parse.y 22 Jun 2019 05:36:40 -0000 1.392
> +++ parse.y 22 Jun 2019 05:44:57 -0000
> @@ -3282,8 +3282,6 @@ parse_config(char *filename, struct peer
>
> add_rib("Adj-RIB-In", conf->default_tableid,
> F_RIB_NOFIB | F_RIB_NOEVALUATE);
> - add_rib("Adj-RIB-Out", conf->default_tableid,
> - F_RIB_NOFIB | F_RIB_NOEVALUATE);
> add_rib("Loc-RIB", conf->default_tableid, F_RIB_LOCAL);
>
> if ((file = pushfile(filename, 1)) == NULL)
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.475
> diff -u -p -r1.475 rde.c
> --- rde.c 1 Jul 2019 07:07:08 -0000 1.475
> +++ rde.c 1 Jul 2019 07:37:09 -0000
> @@ -94,8 +94,8 @@ u_int8_t rde_roa_validity(struct rde_pr
> void peer_init(u_int32_t);
> void peer_shutdown(void);
> int peer_localaddrs(struct rde_peer *, struct bgpd_addr *);
> +struct rde_peer *peer_match(struct ctl_neighbor *, u_int32_t);
> struct rde_peer *peer_add(u_int32_t, struct peer_config *);
> -struct rde_peer *peer_get(u_int32_t);
> void peer_up(u_int32_t, struct session_up *);
> void peer_down(u_int32_t);
> void peer_flush(struct rde_peer *, u_int8_t, time_t);
> @@ -133,7 +133,7 @@ int softreconfig;
> struct rde_dump_ctx {
> LIST_ENTRY(rde_dump_ctx) entry;
> struct ctl_show_rib_request req;
> - u_int16_t rid;
> + u_int32_t peerid;
> u_int8_t throttled;
> };
>
> @@ -220,7 +220,6 @@ rde_main(int debug, int verbose)
>
> /* make sure the default RIBs are setup */
> rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
> - rib_new("Adj-RIB-Out", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
>
> out_rules = calloc(1, sizeof(struct filter_head));
> if (out_rules == NULL)
> @@ -2242,7 +2241,7 @@ rde_dump_rib_as(struct prefix *p, struct
> rib.origin = asp->origin;
> rib.validation_state = p->validation_state;
> rib.flags = 0;
> - if (p->re->active == p)
> + if (p->re != NULL && p->re->active == p)
> rib.flags |= F_PREF_ACTIVE;
> if (!prefix_peer(p)->conf.ebgp)
> rib.flags |= F_PREF_INTERNAL;
> @@ -2305,7 +2304,7 @@ rde_dump_rib_as(struct prefix *p, struct
> }
>
> static int
> -rde_dump_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
> +rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
> {
> char *s;
>
> @@ -2326,7 +2325,7 @@ rde_dump_filter(struct prefix *p, struct
> {
> struct rde_aspath *asp;
>
> - if (!rde_dump_match_peer(prefix_peer(p), &req->neighbor))
> + if (!rde_match_peer(prefix_peer(p), &req->neighbor))
> return;
>
> asp = prefix_aspath(p);
> @@ -2353,10 +2352,10 @@ rde_dump_filter(struct prefix *p, struct
> static void
> rde_dump_upcall(struct rib_entry *re, void *ptr)
> {
> - struct prefix *p;
> struct rde_dump_ctx *ctx = ptr;
> + struct prefix *p;
>
> - LIST_FOREACH(p, &re->prefix_h, rib_l)
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
> rde_dump_filter(p, &ctx->req);
> }
>
> @@ -2375,10 +2374,38 @@ rde_dump_prefix_upcall(struct rib_entry
> if (ctx->req.prefixlen > pt->prefixlen)
> return;
> if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
> - LIST_FOREACH(p, &re->prefix_h, rib_l)
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
> rde_dump_filter(p, &ctx->req);
> }
>
> +static void
> +rde_dump_adjout_upcall(struct prefix *p, void *ptr)
> +{
> + struct rde_dump_ctx *ctx = ptr;
> +
> + if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
> + return;
> + rde_dump_filter(p, &ctx->req);
> +}
> +
> +static void
> +rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
> +{
> + struct rde_dump_ctx *ctx = ptr;
> + struct bgpd_addr addr;
> +
> + if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
> + return;
> +
> + pt_getaddr(p->pt, &addr);
> + if (addr.aid != ctx->req.prefix.aid)
> + return;
> + if (ctx->req.prefixlen > p->pt->prefixlen)
> + return;
> + if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
> + rde_dump_filter(p, &ctx->req);
> +}
> +
> static int
> rde_dump_throttled(void *arg)
> {
> @@ -2391,11 +2418,45 @@ static void
> rde_dump_done(void *arg, u_int8_t aid)
> {
> struct rde_dump_ctx *ctx = arg;
> + struct rde_peer *peer;
> + u_int error;
>
> - imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
> - -1, NULL, 0);
> + if (ctx->req.flags & F_CTL_ADJ_OUT) {
> + peer = peer_match(&ctx->req.neighbor, ctx->peerid);
> + if (peer == NULL)
> + goto done;
> + ctx->peerid = peer->conf.id;
> + switch (ctx->req.type) {
> + case IMSG_CTL_SHOW_RIB:
> + if (prefix_dump_new(peer, ctx->req.aid,
> + CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
> + rde_dump_done, rde_dump_throttled) == -1)
> + goto nomem;
> + break;
> + case IMSG_CTL_SHOW_RIB_PREFIX:
> + if (prefix_dump_new(peer, ctx->req.aid,
> + CTL_MSG_HIGH_MARK, ctx,
> + rde_dump_adjout_prefix_upcall,
> + rde_dump_done, rde_dump_throttled) == -1)
> + goto nomem;
> + break;
> + default:
> + fatalx("%s: unsupported imsg type", __func__);
> + }
> + return;
> + }
> +done:
> + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0);
> LIST_REMOVE(ctx, entry);
> free(ctx);
> + return;
> +
> +nomem:
> + log_warn(__func__);
> + error = CTL_RES_NOMEM;
> + imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error,
> + sizeof(error));
> + return;
> }
>
> void
> @@ -2404,24 +2465,92 @@ rde_dump_ctx_new(struct ctl_show_rib_req
> {
> struct rde_dump_ctx *ctx;
> struct rib_entry *re;
> + struct prefix *p;
> u_int error;
> u_int8_t hostplen;
> u_int16_t rid;
>
> if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
> nomem:
> - log_warn("rde_dump_ctx_new");
> + log_warn(__func__);
> error = CTL_RES_NOMEM;
> imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
> sizeof(error));
> return;
> }
> +
> + memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
> + ctx->req.pid = pid;
> + ctx->req.type = type;
> +
> if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
> rid = RIB_ADJ_IN;
> } else if (req->flags & F_CTL_ADJ_OUT) {
> - rid = RIB_ADJ_OUT;
> + struct rde_peer *peer;
> +
> + peer = peer_match(&req->neighbor, 0);
> + if (peer == NULL) {
> + log_warnx("%s: no peer found for adj-rib-out",
> + __func__);
> + error = CTL_RES_NOSUCHPEER;
> + imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1,
> + &error, sizeof(error));
> + free(ctx);
> + return;
> + }
> + ctx->peerid = peer->conf.id;
> + switch (ctx->req.type) {
> + case IMSG_CTL_SHOW_RIB:
> + if (prefix_dump_new(peer, ctx->req.aid,
> + CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
> + rde_dump_done, rde_dump_throttled) == -1)
> + goto nomem;
> + break;
> + case IMSG_CTL_SHOW_RIB_PREFIX:
> + if (req->flags & F_LONGER) {
> + if (prefix_dump_new(peer, ctx->req.aid,
> + CTL_MSG_HIGH_MARK, ctx,
> + rde_dump_adjout_prefix_upcall,
> + rde_dump_done, rde_dump_throttled) == -1)
> + goto nomem;
> + break;
> + }
> + switch (req->prefix.aid) {
> + case AID_INET:
> + case AID_VPN_IPv4:
> + hostplen = 32;
> + break;
> + case AID_INET6:
> + case AID_VPN_IPv6:
> + hostplen = 128;
> + break;
> + default:
> + fatalx("%s: unknown af", __func__);
> + }
> +
> + do {
> + if (req->prefixlen == hostplen)
> + p = prefix_match(peer, &req->prefix);
> + else
> + p = prefix_lookup(peer, &req->prefix,
> + req->prefixlen);
> + if (p)
> + rde_dump_adjout_upcall(p, ctx);
> + } while ((peer = peer_match(&req->neighbor,
> + peer->conf.id)));
> +
> + imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
> + -1, NULL, 0);
> + free(ctx);
> + return;
> + default:
> + fatalx("%s: unsupported imsg type", __func__);
> + }
> +
> + LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
> + return;
> } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
> - log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
> + log_warnx("%s: no such rib %s", __func__, req->rib);
> error = CTL_RES_NOSUCHRIB;
> imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
> sizeof(error));
> @@ -2429,10 +2558,6 @@ rde_dump_ctx_new(struct ctl_show_rib_req
> return;
> }
>
> - memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
> - ctx->req.pid = pid;
> - ctx->req.type = type;
> - ctx->rid = rid;
> switch (ctx->req.type) {
> case IMSG_CTL_SHOW_NETWORK:
> if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
> @@ -2463,10 +2588,10 @@ rde_dump_ctx_new(struct ctl_show_rib_req
> hostplen = 128;
> break;
> default:
> - fatalx("rde_dump_ctx_new: unknown af");
> + fatalx("%s: unknown af", __func__);
> }
> if (req->prefixlen == hostplen)
> - re = rib_lookup(rib_byid(rid), &req->prefix);
> + re = rib_match(rib_byid(rid), &req->prefix);
> else
> re = rib_get(rib_byid(rid), &req->prefix,
> req->prefixlen);
> @@ -2502,21 +2627,7 @@ rde_dump_ctx_terminate(pid_t pid)
>
> LIST_FOREACH(ctx, &rde_dump_h, entry) {
> if (ctx->req.pid == pid) {
> - void (*upcall)(struct rib_entry *, void *);
> - switch (ctx->req.type) {
> - case IMSG_CTL_SHOW_NETWORK:
> - upcall = network_dump_upcall;
> - break;
> - case IMSG_CTL_SHOW_RIB:
> - upcall = rde_dump_upcall;
> - break;
> - case IMSG_CTL_SHOW_RIB_PREFIX:
> - upcall = rde_dump_prefix_upcall;
> - break;
> - default:
> - fatalx("%s: unsupported imsg type", __func__);
> - }
> - rib_dump_terminate(ctx->rid, ctx, upcall);
> + rib_dump_terminate(ctx);
> return;
> }
> }
> @@ -2697,16 +2808,9 @@ rde_up_dump_upcall(struct rib_entry *re,
> }
>
> static void
> -rde_up_flush_upcall(struct rib_entry *re, void *ptr)
> +rde_up_flush_upcall(struct prefix *p, void *ptr)
> {
> - struct rde_peer *peer = ptr;
> - struct prefix *p, *np;
> -
> - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> - if (peer != prefix_peer(p))
> - continue;
> - up_generate_updates(out_rules, peer, NULL, p);
> - }
> + up_generate_updates(out_rules, prefix_peer(p), NULL, p);
> }
>
> static void
> @@ -3011,18 +3115,16 @@ rde_reload_done(void)
> peer->reconf_out = 0;
> peer->reconf_rib = 0;
> if (peer->loc_rib_id != rib_find(peer->conf.rib)) {
> - char *p = log_fmt_peer(&peer->conf);
> - log_debug("rib change: reloading peer %s", p);
> - free(p);
> + log_peer_info(&peer->conf, "rib change, reloading");
> peer->loc_rib_id = rib_find(peer->conf.rib);
> if (peer->loc_rib_id == RIB_NOTFOUND)
> fatalx("King Bula's peer met an unknown RIB");
> peer->reconf_rib = 1;
> softreconfig++;
> - if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
> - RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
> + if (prefix_dump_new(peer, AID_UNSPEC,
> + RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
> rde_softreconfig_in_done, NULL) == -1)
> - fatal("%s: rib_dump_new", __func__);
> + fatal("%s: prefix_dump_new", __func__);
> log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
> continue;
> }
> @@ -3197,7 +3299,7 @@ rde_softreconfig_in(struct rib_entry *re
>
> pt = re->prefix;
> pt_getaddr(pt, &prefix);
> - LIST_FOREACH(p, &re->prefix_h, rib_l) {
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
> asp = prefix_aspath(p);
> peer = prefix_peer(p);
> force_eval = 0;
> @@ -3358,6 +3460,35 @@ peer_get(u_int32_t id)
> }
>
> struct rde_peer *
> +peer_match(struct ctl_neighbor *n, u_int32_t peerid)
> +{
> + struct rde_peer_head *head;
> + struct rde_peer *peer;
> + u_int32_t i = 0;
> +
> + if (peerid != 0)
> + i = peerid & peertable.peer_hashmask;
> +
> + while (i <= peertable.peer_hashmask) {
> + head = &peertable.peer_hashtbl[i];
> + LIST_FOREACH(peer, head, hash_l) {
> + /* skip peers until peerid is found */
> + if (peerid == peer->conf.id) {
> + peerid = 0;
> + continue;
> + }
> + if (peerid != 0)
> + continue;
> +
> + if (rde_match_peer(peer, n))
> + return (peer);
> + }
> + i++;
> + }
> + return (NULL);
> +}
> +
> +struct rde_peer *
> peer_add(u_int32_t id, struct peer_config *p_conf)
> {
> struct rde_peer_head *head;
> @@ -3441,17 +3572,9 @@ peer_localaddrs(struct rde_peer *peer, s
> }
>
> static void
> -peer_adjout_flush_upcall(struct rib_entry *re, void *arg)
> +peer_adjout_clear_upcall(struct prefix *p, void *arg)
> {
> - struct rde_peer *peer = arg;
> - struct prefix *p, *np;
> -
> - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> - if (peer != prefix_peer(p))
> - continue;
> - prefix_destroy(p);
> - break; /* optimization, only one match per peer possible */
> - }
> + prefix_adjout_destroy(p);
> }
>
> void
> @@ -3472,9 +3595,9 @@ peer_up(u_int32_t id, struct session_up
> * There is a race condition when doing PEER_ERR -> PEER_DOWN.
> * So just do a full reset of the peer here.
> */
> - if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
> - peer_adjout_flush_upcall, NULL, NULL) == -1)
> - fatal("%s: rib_dump_new", __func__);
> + if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
> + peer_adjout_clear_upcall, NULL, NULL) == -1)
> + fatal("%s: prefix_dump_new", __func__);
> peer_flush(peer, AID_UNSPEC, 0);
> peer->prefix_cnt = 0;
> peer->state = PEER_DOWN;
> @@ -3519,12 +3642,12 @@ peer_down(u_int32_t id)
> peer->remote_bgpid = 0;
> peer->state = PEER_DOWN;
> /* stop all pending dumps which may depend on this peer */
> - rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall);
> + rib_dump_terminate(peer);
>
> /* flush Adj-RIB-Out for this peer */
> - if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
> - peer_adjout_flush_upcall, NULL, NULL) == -1)
> - fatal("%s: rib_dump_new", __func__);
> + if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
> + peer_adjout_clear_upcall, NULL, NULL) == -1)
> + fatal("%s: prefix_dump_new", __func__);
>
> peer_flush(peer, AID_UNSPEC, 0);
>
> @@ -3553,7 +3676,7 @@ peer_flush_upcall(struct rib_entry *re,
>
> pt_getaddr(re->prefix, &addr);
> prefixlen = re->prefix->prefixlen;
> - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> + LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
> if (peer != prefix_peer(p))
> continue;
> if (staletime && p->lastchange > staletime)
> @@ -3888,7 +4011,7 @@ network_dump_upcall(struct rib_entry *re
> struct bgpd_addr addr;
> struct rde_dump_ctx *ctx = ptr;
>
> - LIST_FOREACH(p, &re->prefix_h, rib_l) {
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
> asp = prefix_aspath(p);
> if (!(asp->flags & F_PREFIX_ANNOUNCED))
> continue;
> @@ -3925,7 +4048,7 @@ network_flush_upcall(struct rib_entry *r
>
> pt_getaddr(re->prefix, &addr);
> prefixlen = re->prefix->prefixlen;
> - LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> + LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
> if (prefix_peer(p) != peer)
> continue;
> asp = prefix_aspath(p);
> @@ -3965,9 +4088,6 @@ rde_shutdown(void)
> for (i = 0; i <= peertable.peer_hashmask; i++)
> while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
> peer_down(p->conf.id);
> -
> - /* then since decision process is off, kill RIB_ADJ_OUT */
> - rib_free(rib_byid(RIB_ADJ_OUT));
>
> /* free filters */
> filterlist_free(out_rules);
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> retrieving revision 1.219
> diff -u -p -r1.219 rde.h
> --- rde.h 1 Jul 2019 07:07:08 -0000 1.219
> +++ rde.h 10 Jul 2019 14:42:42 -0000
> @@ -57,8 +57,7 @@ struct rib {
> };
>
> #define RIB_ADJ_IN 0
> -#define RIB_ADJ_OUT 1
> -#define RIB_LOC_START 2
> +#define RIB_LOC_START 1
> #define RIB_NOTFOUND 0xffff
>
> struct rib_desc {
> @@ -78,6 +77,7 @@ LIST_HEAD(aspath_list, aspath);
> LIST_HEAD(attr_list, attr);
> LIST_HEAD(aspath_head, rde_aspath);
> RB_HEAD(prefix_tree, prefix);
> +RB_HEAD(prefix_index, prefix);
>
> struct rde_peer {
> LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */
> @@ -87,6 +87,7 @@ struct rde_peer {
> struct bgpd_addr local_v4_addr;
> struct bgpd_addr local_v6_addr;
> struct capabilities capa;
> + struct prefix_index adj_rib_out;
> struct prefix_tree updates[AID_MAX];
> struct prefix_tree withdraws[AID_MAX];
> time_t staletime[AID_MAX];
> @@ -306,8 +307,14 @@ struct pt_entry_vpn6 {
> };
>
> struct prefix {
> - LIST_ENTRY(prefix) rib_l, nexthop_l;
> - RB_ENTRY(prefix) entry;
> + union {
> + struct {
> + LIST_ENTRY(prefix) rib, nexthop;
> + } list;
> + struct {
> + RB_ENTRY(prefix) index, update;
> + } tree;
> + } entry;
> struct pt_entry *pt;
> struct rib_entry *re;
> struct rde_aspath *aspath;
> @@ -317,12 +324,17 @@ struct prefix {
> time_t lastchange;
> u_int8_t validation_state;
> u_int8_t nhflags;
> - u_int8_t flags;
> u_int8_t eor;
> -#define PREFIX_FLAG_WITHDRAW 0x01
> -#define PREFIX_FLAG_UPDATE 0x02
> + u_int8_t flags;
> +#define PREFIX_FLAG_WITHDRAW 0x01 /* queued for withdraw */
> +#define PREFIX_FLAG_UPDATE 0x02 /* queued for update */
> +#define PREFIX_FLAG_DEAD 0x04 /* locked but removed */
> +#define PREFIX_FLAG_MASK 0x07 /* mask for the three prefix
> types */
> +#define PREFIX_NEXTHOP_LINKED 0x40 /* prefix is linked onto
> nexthop list */
> +#define PREFIX_FLAG_LOCKED 0x80 /* locked by rib walker */
> };
>
> +/* possible states for nhflags */
> #define NEXTHOP_SELF 0x01
> #define NEXTHOP_REJECT 0x02
> #define NEXTHOP_BLACKHOLE 0x04
> @@ -356,6 +368,7 @@ u_int32_t rde_local_as(void);
> int rde_noevaluate(void);
> int rde_decisionflags(void);
> int rde_as4byte(struct rde_peer *);
> +struct rde_peer *peer_get(u_int32_t);
>
> /* rde_attr.c */
> int attr_write(void *, u_int16_t, u_int8_t, u_int8_t, void *,
> @@ -395,6 +408,7 @@ u_char *aspath_override(struct aspath *
> u_int16_t *);
> int aspath_lenmatch(struct aspath *, enum aslen_spec, u_int);
>
> +/* rde_community.c */
> int community_match(struct rde_community *, struct community *,
> struct rde_peer *);
> int community_set(struct rde_community *, struct community *,
> @@ -499,15 +513,14 @@ struct rib_desc *rib_desc(struct rib *);
> void rib_free(struct rib *);
> void rib_shutdown(void);
> struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
> -struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
> +struct rib_entry *rib_match(struct rib *, struct bgpd_addr *);
> int rib_dump_pending(void);
> void rib_dump_runner(void);
> int rib_dump_new(u_int16_t, u_int8_t, unsigned int, void *,
> void (*)(struct rib_entry *, void *),
> void (*)(void *, u_int8_t),
> int (*)(void *));
> -void rib_dump_terminate(u_int16_t, void *,
> - void (*)(struct rib_entry *, void *));
> +void rib_dump_terminate(void *);
>
> static inline struct rib *
> re_rib(struct rib_entry *re)
> @@ -540,13 +553,20 @@ void path_put(struct rde_aspath *);
> #define PREFIX_SIZE(x) (((x) + 7) / 8 + 1)
> struct prefix *prefix_get(struct rib *, struct rde_peer *,
> struct bgpd_addr *, int);
> +struct prefix *prefix_lookup(struct rde_peer *, struct bgpd_addr *,
> int);
> +struct prefix *prefix_match(struct rde_peer *, struct bgpd_addr *);
> int prefix_remove(struct rib *, struct rde_peer *,
> struct bgpd_addr *, int);
> void prefix_add_eor(struct rde_peer *, u_int8_t);
> -void prefix_update(struct rib *, struct rde_peer *,
> - struct bgpd_addr *, int);
> -int prefix_withdraw(struct rib *, struct rde_peer *,
> - struct bgpd_addr *, int);
> +int prefix_update(struct rde_peer *, struct filterstate *,
> + struct bgpd_addr *, int, u_int8_t);
> +int prefix_withdraw(struct rde_peer *, struct bgpd_addr *, int);
> +void prefix_adjout_destroy(struct prefix *p);
> +void prefix_adjout_dump(struct rde_peer *, void *,
> + void (*)(struct prefix *, void *));
> +int prefix_dump_new(struct rde_peer *, u_int8_t, unsigned int,
> + void *, void (*)(struct prefix *, void *),
> + void (*)(void *, u_int8_t), int (*)(void *));
> int prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t, int);
> int prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t);
> struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *);
> Index: rde_decide.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 rde_decide.c
> --- rde_decide.c 21 Jan 2019 02:07:56 -0000 1.74
> +++ rde_decide.c 20 Jun 2019 19:50:25 -0000
> @@ -245,7 +245,7 @@ prefix_evaluate(struct prefix *p, struct
> if (re_rib(re)->flags & F_RIB_NOEVALUATE || rde_noevaluate()) {
> /* decision process is turned off */
> if (p != NULL)
> - LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
> + LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
> if (re->active != NULL)
> re->active = NULL;
> return;
> @@ -253,15 +253,18 @@ prefix_evaluate(struct prefix *p, struct
>
> if (p != NULL) {
> if (LIST_EMPTY(&re->prefix_h))
> - LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
> + LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
> else {
> - LIST_FOREACH(xp, &re->prefix_h, rib_l) {
> + LIST_FOREACH(xp, &re->prefix_h, entry.list.rib) {
> if (prefix_cmp(p, xp) > 0) {
> - LIST_INSERT_BEFORE(xp, p, rib_l);
> + LIST_INSERT_BEFORE(xp, p,
> + entry.list.rib);
> break;
> - } else if (LIST_NEXT(xp, rib_l) == NULL) {
> + } else if (LIST_NEXT(xp, entry.list.rib) ==
> + NULL) {
> /* if xp last element ... */
> - LIST_INSERT_AFTER(xp, p, rib_l);
> + LIST_INSERT_AFTER(xp, p,
> + entry.list.rib);
> break;
> }
> }
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> retrieving revision 1.198
> diff -u -p -r1.198 rde_rib.c
> --- rde_rib.c 1 Jul 2019 14:47:56 -0000 1.198
> +++ rde_rib.c 10 Jul 2019 14:27:45 -0000
> @@ -52,8 +52,10 @@ RB_GENERATE(rib_tree, rib_entry, rib_e,
> struct rib_context {
> LIST_ENTRY(rib_context) entry;
> struct rib_entry *ctx_re;
> - u_int16_t ctx_rib_id;
> - void (*ctx_upcall)(struct rib_entry *, void *);
> + struct prefix *ctx_p;
> + u_int32_t ctx_id;
> + void (*ctx_rib_call)(struct rib_entry *, void *);
> + void (*ctx_prefix_call)(struct prefix *, void *);
> void (*ctx_done)(void *, u_int8_t);
> int (*ctx_throttle)(void *);
> void *ctx_arg;
> @@ -69,6 +71,7 @@ static int prefix_add(struct bgpd_addr *
> static int prefix_move(struct prefix *, struct rde_peer *,
> struct rde_aspath *, struct rde_community *,
> struct nexthop *, u_int8_t, u_int8_t);
> +static void prefix_dump_r(struct rib_context *);
>
> static inline struct rib_entry *
> re_lock(struct rib_entry *re)
> @@ -128,7 +131,7 @@ rib_new(char *name, u_int rtableid, u_in
> rib_size = id + 1;
> }
>
> - bzero(&ribs[id], sizeof(struct rib_desc));
> + memset(&ribs[id], 0, sizeof(struct rib_desc));
> strlcpy(ribs[id].name, name, sizeof(ribs[id].name));
> RB_INIT(rib_tree(&ribs[id].rib));
> ribs[id].state = RECONF_REINIT;
> @@ -196,7 +199,7 @@ rib_free(struct rib *rib)
> */
> while ((p = LIST_FIRST(&re->prefix_h))) {
> struct rde_aspath *asp = prefix_aspath(p);
> - np = LIST_NEXT(p, rib_l);
> + np = LIST_NEXT(p, entry.list.rib);
> if (asp && asp->pftableid) {
> struct bgpd_addr addr;
>
> @@ -215,7 +218,7 @@ rib_free(struct rib *rib)
> rd = &ribs[rib->id];
> filterlist_free(rd->in_rules_tmp);
> filterlist_free(rd->in_rules);
> - bzero(rd, sizeof(struct rib_desc));
> + memset(rd, 0, sizeof(struct rib_desc));
> }
>
> void
> @@ -235,7 +238,7 @@ rib_shutdown(void)
> struct rib_desc *rd = &ribs[id];
> filterlist_free(rd->in_rules_tmp);
> filterlist_free(rd->in_rules);
> - bzero(rd, sizeof(struct rib_desc));
> + memset(rd, 0, sizeof(struct rib_desc));
> }
> free(ribs);
> }
> @@ -247,7 +250,7 @@ rib_get(struct rib *rib, struct bgpd_add
> struct pt_entry *pte;
>
> pte = pt_fill(prefix, prefixlen);
> - bzero(&xre, sizeof(xre));
> + memset(&xre, 0, sizeof(xre));
> xre.prefix = pte;
>
> re = RB_FIND(rib_tree, rib_tree(rib), &xre);
> @@ -258,7 +261,7 @@ rib_get(struct rib *rib, struct bgpd_add
> }
>
> struct rib_entry *
> -rib_lookup(struct rib *rib, struct bgpd_addr *addr)
> +rib_match(struct rib *rib, struct bgpd_addr *addr)
> {
> struct rib_entry *re;
> int i;
> @@ -281,7 +284,7 @@ rib_lookup(struct rib *rib, struct bgpd_
> }
> break;
> default:
> - fatalx("rib_lookup: unknown af");
> + fatalx("%s: unknown af", __func__);
> }
> return (NULL);
> }
> @@ -367,9 +370,9 @@ rib_dump_r(struct rib_context *ctx)
> struct rib *rib;
> unsigned int i;
>
> - rib = rib_byid(ctx->ctx_rib_id);
> + rib = rib_byid(ctx->ctx_id);
> if (rib == NULL)
> - fatalx("%s: rib id %u gone", __func__, ctx->ctx_rib_id);
> + fatalx("%s: rib id %u gone", __func__, ctx->ctx_id);
>
> if (ctx->ctx_re == NULL)
> re = RB_MIN(rib_tree, rib_tree(rib));
> @@ -378,9 +381,9 @@ rib_dump_r(struct rib_context *ctx)
>
> for (i = 0; re != NULL; re = next) {
> next = RB_NEXT(rib_tree, unused, re);
> - if (re->rib_id != ctx->ctx_rib_id)
> + if (re->rib_id != ctx->ctx_id)
> fatalx("%s: Unexpected RIB %u != %u.", __func__,
> - re->rib_id, ctx->ctx_rib_id);
> + re->rib_id, ctx->ctx_id);
> if (ctx->ctx_aid != AID_UNSPEC &&
> ctx->ctx_aid != re->prefix->aid)
> continue;
> @@ -391,7 +394,7 @@ rib_dump_r(struct rib_context *ctx)
> re_lock(re);
> return;
> }
> - ctx->ctx_upcall(re, ctx->ctx_arg);
> + ctx->ctx_rib_call(re, ctx->ctx_arg);
> }
>
> if (ctx->ctx_done)
> @@ -422,7 +425,10 @@ rib_dump_runner(void)
> LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
> if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg))
> continue;
> - rib_dump_r(ctx);
> + if (ctx->ctx_rib_call != NULL)
> + rib_dump_r(ctx);
> + else
> + prefix_dump_r(ctx);
> }
> }
>
> @@ -432,7 +438,7 @@ rib_dump_abort(u_int16_t id)
> struct rib_context *ctx, *next;
>
> LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
> - if (id != ctx->ctx_rib_id)
> + if (id != ctx->ctx_id)
> continue;
> if (ctx->ctx_done)
> ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
> @@ -450,11 +456,11 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
>
> if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
> return -1;
> - ctx->ctx_rib_id = id;
> + ctx->ctx_id = id;
> ctx->ctx_aid = aid;
> ctx->ctx_count = count;
> ctx->ctx_arg = arg;
> - ctx->ctx_upcall = upcall;
> + ctx->ctx_rib_call = upcall;
> ctx->ctx_done = done;
> ctx->ctx_throttle = throttle;
>
> @@ -468,14 +474,12 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
> }
>
> void
> -rib_dump_terminate(u_int16_t id, void *arg,
> - void (*upcall)(struct rib_entry *, void *))
> +rib_dump_terminate(void *arg)
> {
> struct rib_context *ctx, *next;
>
> LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
> - if (id != ctx->ctx_rib_id || ctx->ctx_arg != arg ||
> - ctx->ctx_upcall != upcall)
> + if (ctx->ctx_arg != arg)
> continue;
> if (ctx->ctx_done)
> ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
> @@ -610,15 +614,6 @@ path_update(struct rib *rib, struct rde_
> p->validation_state = vstate;
> return (2);
> }
> - if (p->flags) {
> - struct prefix_tree *prefix_head;
> - /* prefix is a pending update */
> - prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
> - &peer->updates[prefix->aid] :
> - &peer->withdraws[prefix->aid];
> - RB_REMOVE(prefix_tree, prefix_head, p);
> - p->flags = 0;
> - }
> }
>
> /*
> @@ -881,7 +876,14 @@ prefix_cmp(struct prefix *a, struct pref
> return pt_prefix_cmp(a->pt, b->pt);
> }
>
> -RB_GENERATE(prefix_tree, prefix, entry, prefix_cmp)
> +static inline int
> +prefix_index_cmp(struct prefix *a, struct prefix *b)
> +{
> + return pt_prefix_cmp(a->pt, b->pt);
> +}
> +
> +RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp)
> +RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp)
>
> /*
> * search for specified prefix of a peer. Returns NULL if not found.
> @@ -899,6 +901,52 @@ prefix_get(struct rib *rib, struct rde_p
> }
>
> /*
> + * lookup prefix in the peer prefix_index. Returns NULL if not found.
> + */
> +struct prefix *
> +prefix_lookup(struct rde_peer *peer, struct bgpd_addr *prefix,
> + int prefixlen)
> +{
> + struct prefix xp;
> + struct pt_entry *pte;
> +
> + memset(&xp, 0, sizeof(xp));
> + pte = pt_fill(prefix, prefixlen);
> + xp.pt = pte;
> +
> + return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
> +}
> +
> +struct prefix *
> +prefix_match(struct rde_peer *peer, struct bgpd_addr *addr)
> +{
> + struct prefix *p;
> + int i;
> +
> + switch (addr->aid) {
> + case AID_INET:
> + case AID_VPN_IPv4:
> + for (i = 32; i >= 0; i--) {
> + p = prefix_lookup(peer, addr, i);
> + if (p != NULL)
> + return p;
> + }
> + break;
> + case AID_INET6:
> + case AID_VPN_IPv6:
> + for (i = 128; i >= 0; i--) {
> + p = prefix_lookup(peer, addr, i);
> + if (p != NULL)
> + return p;
> + }
> + break;
> + default:
> + fatalx("%s: unknown af", __func__);
> + }
> + return NULL;
> +}
> +
> +/*
> * Adds or updates a prefix.
> */
> static int
> @@ -936,8 +984,8 @@ prefix_move(struct prefix *p, struct rde
> np->aspath = path_ref(asp);
> np->communities = communities_ref(comm);
> np->peer = peer;
> - np->pt = p->pt; /* skip refcnt update since ref is moved */
> np->re = p->re;
> + np->pt = p->pt; /* skip refcnt update since ref is moved */
> np->validation_state = vstate;
> np->nhflags = nhflags;
> np->nexthop = nexthop_ref(nexthop);
> @@ -957,7 +1005,7 @@ prefix_move(struct prefix *p, struct rde
> * This is safe because we create a new prefix and so the change
> * is noticed by prefix_evaluate().
> */
> - LIST_REMOVE(p, rib_l);
> + LIST_REMOVE(p, entry.list.rib);
> prefix_evaluate(np, np->re);
>
> /* remove old prefix node */
> @@ -1020,26 +1068,97 @@ prefix_add_eor(struct rde_peer *peer, u_
> if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
> /* no need to add if EoR marker already present */
> prefix_free(p);
> + /* EOR marker is not inserted into the adj_rib_out index */
> }
>
> /*
> * Put a prefix from the Adj-RIB-Out onto the update queue.
> */
> -void
> -prefix_update(struct rib *rib, struct rde_peer *peer,
> - struct bgpd_addr *prefix, int prefixlen)
> +int
> +prefix_update(struct rde_peer *peer, struct filterstate *state,
> + struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
> {
> + struct prefix_tree *prefix_head = NULL;
> + struct rde_aspath *asp;
> + struct rde_community *comm;
> struct prefix *p;
> + int created = 0;
>
> - p = prefix_get(rib, peer, prefix, prefixlen);
> - if (p == NULL) /* Got a dummy withdrawn request. */
> - return;
> + if ((p = prefix_lookup(peer, prefix, prefixlen)) != NULL) {
> + /* prefix is already in the Adj-RIB-Out */
> + if (p->flags & PREFIX_FLAG_WITHDRAW) {
> + created = 1; /* consider this a new entry */
> + peer->up_wcnt--;
> + prefix_head = &peer->withdraws[prefix->aid];
> + RB_REMOVE(prefix_tree, prefix_head, p);
> + } else if (p->flags & PREFIX_FLAG_DEAD) {
> + created = 1; /* consider this a new entry */
> + } else {
> + if (prefix_nhflags(p) == state->nhflags &&
> + prefix_nexthop(p) == state->nexthop &&
> + communities_equal(&state->communities,
> + prefix_communities(p)) &&
> + path_compare(&state->aspath, prefix_aspath(p)) ==
> + 0) {
> + /* nothing changed */
> + p->validation_state = vstate;
> + p->lastchange = time(NULL);
> + return 0;
> + }
> +
> + if (p->flags & PREFIX_FLAG_UPDATE) {
> + /* created = 0 so up_nlricnt is not increased */
> + prefix_head = &peer->updates[prefix->aid];
> + RB_REMOVE(prefix_tree, prefix_head, p);
> + }
> + }
> + /* unlink from aspath and remove nexthop ref */
> + nexthop_unref(p->nexthop);
> + communities_unref(p->communities);
> + path_unref(p->aspath);
> + p->flags &= ~PREFIX_FLAG_MASK;
> +
> + /* peer and pt remain */
> + } else {
> + p = prefix_alloc();
> + created = 1;
> +
> + p->pt = pt_get(prefix, prefixlen);
> + if (p->pt == NULL)
> + fatalx("%s: update for non existing prefix", __func__);
> + pt_ref(p->pt);
> + p->peer = peer;
> +
> + if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
> + fatalx("%s: RB index invariant violated", __func__);
> + }
> +
> + if ((asp = path_lookup(&state->aspath)) == NULL) {
> + /* Path not available, create and link a new one. */
> + asp = path_copy(path_get(), &state->aspath);
> + path_link(asp);
> + }
> +
> + if ((comm = communities_lookup(&state->communities)) == NULL) {
> + /* Communities not available, create and link a new one. */
> + comm = communities_link(&state->communities);
> + }
> +
> + p->aspath = path_ref(asp);
> + p->communities = communities_ref(comm);
> + p->nexthop = nexthop_ref(state->nexthop);
> + p->nhflags = state->nhflags;
>
> - if (p->flags != 0)
> + p->validation_state = vstate;
> + p->lastchange = time(NULL);
> +
> + if (p->flags & PREFIX_FLAG_MASK)
> fatalx("%s: bad flags %x", __func__, p->flags);
> - p->flags = PREFIX_FLAG_UPDATE;
> + p->flags |= PREFIX_FLAG_UPDATE;
> if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL)
> fatalx("%s: RB tree invariant violated", __func__);
> +
> + return created;
> }
>
> /*
> @@ -1047,15 +1166,19 @@ prefix_update(struct rib *rib, struct rd
> * the prefix in the RIB linked to the peer withdraw list.
> */
> int
> -prefix_withdraw(struct rib *rib, struct rde_peer *peer,
> - struct bgpd_addr *prefix, int prefixlen)
> +prefix_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, int
> prefixlen)
> {
> struct prefix *p;
>
> - p = prefix_get(rib, peer, prefix, prefixlen);
> + p = prefix_lookup(peer, prefix, prefixlen);
> if (p == NULL) /* Got a dummy withdrawn request. */
> return (0);
>
> + /* remove nexthop ref ... */
> + nexthop_unref(p->nexthop);
> + p->nexthop = NULL;
> + p->nhflags = 0;
> +
> /* unlink from aspath ...*/
> path_unref(p->aspath);
> p->aspath = NULL;
> @@ -1063,29 +1186,181 @@ prefix_withdraw(struct rib *rib, struct
> /* ... communities ... */
> communities_unref(p->communities);
> p->communities = NULL;
> + /* and unlink from aspath */
> + path_unref(p->aspath);
> + p->aspath = NULL;
> + /* re already NULL */
>
> - /* ... and nexthop but keep the re link */
> - nexthop_unlink(p);
> - nexthop_unref(p->nexthop);
> - p->nexthop = NULL;
> - p->nhflags = 0;
> - /* re link still exists */
> + p->lastchange = time(NULL);
>
> - if (p->flags) {
> + if (p->flags & PREFIX_FLAG_MASK) {
> struct prefix_tree *prefix_head;
> /* p is a pending update or withdraw, remove first */
> prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
> &peer->updates[prefix->aid] :
> &peer->withdraws[prefix->aid];
> RB_REMOVE(prefix_tree, prefix_head, p);
> - p->flags = 0;
> + p->flags &= ~PREFIX_FLAG_MASK;
> }
> - p->flags = PREFIX_FLAG_WITHDRAW;
> + p->flags |= PREFIX_FLAG_WITHDRAW;
> if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
> fatalx("%s: RB tree invariant violated", __func__);
> return (1);
> }
>
> +static inline void
> +prefix_lock(struct prefix *p)
> +{
> + if (p->flags & PREFIX_FLAG_LOCKED)
> + fatalx("%s: locking locked prefix", __func__);
> + p->flags |= PREFIX_FLAG_LOCKED;
> +}
> +
> +static inline void
> +prefix_unlock(struct prefix *p)
> +{
> + if ((p->flags & PREFIX_FLAG_LOCKED) == 0)
> + fatalx("%s: unlocking unlocked prefix", __func__);
> + p->flags &= ~PREFIX_FLAG_LOCKED;
> +}
> +
> +static inline int
> +prefix_is_locked(struct prefix *p)
> +{
> + return (p->flags & PREFIX_FLAG_LOCKED) != 0;
> +}
> +
> +static inline int
> +prefix_is_dead(struct prefix *p)
> +{
> + return (p->flags & PREFIX_FLAG_DEAD) != 0;
> +}
> +
> +static struct prefix *
> +prefix_restart(struct rib_context *ctx)
> +{
> + struct prefix *p;
> +
> + p = ctx->ctx_p;
> + prefix_unlock(p);
> +
> + if (prefix_is_dead(p)) {
> + struct prefix *next;
> +
> + next = RB_NEXT(prefix_index, unused, p);
> + prefix_adjout_destroy(p);
> + p = next;
> + }
> + return p;
> +}
> +
> +void
> +prefix_adjout_destroy(struct prefix *p)
> +{
> + struct rde_peer *peer = prefix_peer(p);
> +
> + if (p->eor) {
> + /* EOR marker is not linked in the index */
> + prefix_free(p);
> + return;
> + }
> +
> + if (p->flags & PREFIX_FLAG_WITHDRAW)
> + RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
> + else if (p->flags & PREFIX_FLAG_UPDATE)
> + RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
> + /* nothing needs to be done for PREFIX_FLAG_DEAD */
> + p->flags &= ~PREFIX_FLAG_MASK;
> +
> +
> + if (prefix_is_locked(p)) {
> + /* remove nexthop ref ... */
> + nexthop_unref(p->nexthop);
> + p->nexthop = NULL;
> + /* ... communities ... */
> + communities_unref(p->communities);
> + p->communities = NULL;
> + /* and unlink from aspath */
> + path_unref(p->aspath);
> + p->aspath = NULL;
> + p->nhflags = 0;
> + /* re already NULL */
> +
> + /* finally mark prefix dead */
> + p->flags |= PREFIX_FLAG_DEAD;
> + return;
> + }
> +
> + RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
> +
> + prefix_unlink(p);
> + prefix_free(p);
> +}
> +
> +static void
> +prefix_dump_r(struct rib_context *ctx)
> +{
> + struct prefix *p, *next;
> + struct rde_peer *peer;
> + unsigned int i;
> +
> + if ((peer = peer_get(ctx->ctx_id)) == NULL)
> + goto done;
> +
> + if (ctx->ctx_p == NULL)
> + p = RB_MIN(prefix_index, &peer->adj_rib_out);
> + else
> + p = prefix_restart(ctx);
> +
> + for (i = 0; p != NULL; p = next) {
> + next = RB_NEXT(prefix_index, unused, p);
> + if (prefix_is_dead(p))
> + continue;
> + if (ctx->ctx_aid != AID_UNSPEC &&
> + ctx->ctx_aid != p->pt->aid)
> + continue;
> + if (ctx->ctx_count && i++ >= ctx->ctx_count &&
> + !prefix_is_locked(p)) {
> + /* store and lock last element */
> + ctx->ctx_p = p;
> + prefix_lock(p);
> + return;
> + }
> + ctx->ctx_prefix_call(p, ctx->ctx_arg);
> + }
> +
> +done:
> + if (ctx->ctx_done)
> + ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
> + LIST_REMOVE(ctx, entry);
> + free(ctx);
> +}
> +
> +int
> +prefix_dump_new(struct rde_peer *peer, u_int8_t aid, unsigned int count,
> + void *arg, void (*upcall)(struct prefix *, void *),
> + void (*done)(void *, u_int8_t), int (*throttle)(void *))
> +{
> + struct rib_context *ctx;
> +
> + if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
> + return -1;
> + ctx->ctx_id = peer->conf.id;
> + ctx->ctx_aid = aid;
> + ctx->ctx_count = count;
> + ctx->ctx_arg = arg;
> + ctx->ctx_prefix_call = upcall;
> + ctx->ctx_done = done;
> + ctx->ctx_throttle = throttle;
> +
> + LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
> +
> + /* requested a sync traversal */
> + if (count == 0)
> + prefix_dump_r(ctx);
> +
> + return 0;
> +}
>
> /* dump a prefix into specified buffer */
> int
> @@ -1205,7 +1480,7 @@ prefix_bypeer(struct rib_entry *re, stru
> {
> struct prefix *p;
>
> - LIST_FOREACH(p, &re->prefix_h, rib_l)
> + LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
> if (prefix_peer(p) == peer)
> return (p);
> return (NULL);
> @@ -1237,7 +1512,7 @@ prefix_updateall(struct prefix *p, enum
> }
>
> /* redo the route decision */
> - LIST_REMOVE(p, rib_l);
> + LIST_REMOVE(p, entry.list.rib);
> /*
> * If the prefix is the active one remove it first,
> * this has to be done because we can not detect when
> @@ -1255,6 +1530,10 @@ prefix_updateall(struct prefix *p, enum
> void
> prefix_destroy(struct prefix *p)
> {
> + /* make route decision */
> + LIST_REMOVE(p, entry.list.rib);
> + prefix_evaluate(NULL, p->re);
> +
> prefix_unlink(p);
> prefix_free(p);
> }
> @@ -1290,13 +1569,6 @@ prefix_unlink(struct prefix *p)
> {
> struct rib_entry *re = p->re;
>
> - if (p->eor) /* nothing to unlink for EoR markers */
> - return;
> -
> - /* make route decision */
> - LIST_REMOVE(p, rib_l);
> - prefix_evaluate(NULL, re);
> -
> /* destroy all references to other objects */
> nexthop_unlink(p);
> nexthop_unref(p->nexthop);
> @@ -1310,7 +1582,7 @@ prefix_unlink(struct prefix *p)
> p->re = NULL;
> p->pt = NULL;
>
> - if (rib_empty(re))
> + if (re && rib_empty(re))
> rib_remove(re);
>
> /*
> @@ -1319,7 +1591,7 @@ prefix_unlink(struct prefix *p)
> */
> }
>
> -/* alloc and bzero new entry. May not fail. */
> +/* alloc and zero new entry. May not fail. */
> static struct prefix *
> prefix_alloc(void)
> {
> @@ -1430,7 +1702,7 @@ nexthop_runner(void)
> p = nh->next_prefix;
> for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) {
> prefix_updateall(p, nh->state, nh->oldstate);
> - p = LIST_NEXT(p, nexthop_l);
> + p = LIST_NEXT(p, entry.list.nexthop);
> }
>
> /* prep for next run, if not finished readd to tail of queue */
> @@ -1540,22 +1812,21 @@ nexthop_link(struct prefix *p)
> if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
> return;
>
> - LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l);
> + p->flags |= PREFIX_NEXTHOP_LINKED;
> + LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop);
> }
>
> void
> nexthop_unlink(struct prefix *p)
> {
> - if (p->nexthop == NULL)
> - return;
> -
> - if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
> + if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0)
> return;
>
> if (p == p->nexthop->next_prefix)
> - p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l);
> + p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop);
>
> - LIST_REMOVE(p, nexthop_l);
> + p->flags &= ~PREFIX_NEXTHOP_LINKED;
> + LIST_REMOVE(p, entry.list.nexthop);
> }
>
> struct nexthop *
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> retrieving revision 1.119
> diff -u -p -r1.119 rde_update.c
> --- rde_update.c 2 Jul 2019 12:07:00 -0000 1.119
> +++ rde_update.c 4 Jul 2019 08:59:13 -0000
> @@ -143,8 +143,7 @@ withdraw:
>
> /* withdraw prefix */
> pt_getaddr(old->pt, &addr);
> - if (prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> - old->pt->prefixlen) == 1)
> + if (prefix_withdraw(peer, &addr, old->pt->prefixlen) == 1)
> peer->up_wcnt++;
> } else {
> switch (up_test_update(peer, new)) {
> @@ -165,13 +164,11 @@ withdraw:
> }
>
> pt_getaddr(new->pt, &addr);
> - if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
> - new->pt->prefixlen, prefix_vstate(new)) != 2) {
> - /* only send update if path changed */
> - prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> - new->pt->prefixlen);
> +
> + /* only send update if path changed */
> + if (prefix_update(peer, &state, &addr, new->pt->prefixlen,
> + prefix_vstate(new)) == 1)
> peer->up_nlricnt++;
> - }
>
> rde_filterstate_clean(&state);
> }
> @@ -229,11 +226,8 @@ up_generate_default(struct filter_head *
> return;
> }
>
> - if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, 0,
> - ROA_NOTFOUND) != 2) {
> - prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr, 0);
> + if (prefix_update(peer, &state, &addr, 0, ROA_NOTFOUND) == 1)
> peer->up_nlricnt++;
> - }
>
> /* no longer needed */
> rde_filterstate_clean(&state);
> @@ -576,8 +570,13 @@ up_is_eor(struct rde_peer *peer, u_int8_
>
> p = RB_MIN(prefix_tree, &peer->updates[aid]);
> if (p != NULL && p->eor) {
> + /*
> + * Need to remove eor from update tree because
> + * prefix_adjout_destroy() can't handle that.
> + */
> RB_REMOVE(prefix_tree, &peer->updates[aid], p);
> - prefix_destroy(p);
> + p->flags &= ~PREFIX_FLAG_MASK;
> + prefix_adjout_destroy(p);
> return 1;
> }
> return 0;
> @@ -616,11 +615,11 @@ up_dump_prefix(u_char *buf, int len, str
>
> /* prefix sent, remove from list and clear flag */
> RB_REMOVE(prefix_tree, prefix_head, p);
> - p->flags = 0;
> + p->flags &= ~PREFIX_FLAG_MASK;
>
> if (withdraw) {
> /* prefix no longer needed, remove it */
> - prefix_destroy(p);
> + prefix_adjout_destroy(p);
> peer->up_wcnt--;
> peer->prefix_sent_withdraw++;
> } else {
>