On Fri, Sep 09, 2022 at 05:50:17PM +0200, Claudio Jeker wrote: > This diff optimized subtree walks. In other words it specifies a subtree > (as a prefix/prefixlen combo) and only walks the entries that are under > this covering route. > > Instead of doing a full table walk this will only walk part of the tree > and is therefor much faster if the subtree is small.
The diff looks good. The two new dump_subtree() functions are currently only called with a count of CTL_MSG_HIGH_MARK, so the two if (count == 0) prefix_dump_r(ctx) are currently dead code. I assume you anticipate that this might change. > -- > :wq Claudio > > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > retrieving revision 1.575 > diff -u -p -r1.575 rde.c > --- rde.c 9 Sep 2022 13:33:24 -0000 1.575 > +++ rde.c 9 Sep 2022 14:46:03 -0000 > @@ -2648,35 +2648,6 @@ rde_dump_upcall(struct rib_entry *re, vo > } > > static void > -rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) > -{ > - struct rde_dump_ctx *ctx = ptr; > - struct prefix *p; > - struct pt_entry *pt; > - struct bgpd_addr addr; > - > - pt = re->prefix; > - pt_getaddr(pt, &addr); > - if (addr.aid != ctx->req.prefix.aid) > - return; > - if (ctx->req.flags & F_LONGER) { > - if (ctx->req.prefixlen > pt->prefixlen) > - return; > - if (!prefix_compare(&ctx->req.prefix, &addr, > - ctx->req.prefixlen)) > - TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) > - rde_dump_filter(p, &ctx->req, 0); > - } else { > - if (ctx->req.prefixlen < pt->prefixlen) > - return; > - if (!prefix_compare(&addr, &ctx->req.prefix, > - pt->prefixlen)) > - TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) > - rde_dump_filter(p, &ctx->req, 0); > - } > -} > - > -static void > rde_dump_adjout_upcall(struct prefix *p, void *ptr) > { > struct rde_dump_ctx *ctx = ptr; > @@ -2688,35 +2659,6 @@ rde_dump_adjout_upcall(struct prefix *p, > rde_dump_filter(p, &ctx->req, 1); > } > > -static void > -rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr) > -{ > - struct rde_dump_ctx *ctx = ptr; > - struct bgpd_addr addr; > - > - if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) > - fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); > - if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) > - return; > - > - pt_getaddr(p->pt, &addr); > - if (addr.aid != ctx->req.prefix.aid) > - return; > - if (ctx->req.flags & F_LONGER) { > - if (ctx->req.prefixlen > p->pt->prefixlen) > - return; > - if (!prefix_compare(&ctx->req.prefix, &addr, > - ctx->req.prefixlen)) > - rde_dump_filter(p, &ctx->req, 1); > - } else { > - if (ctx->req.prefixlen < p->pt->prefixlen) > - return; > - if (!prefix_compare(&addr, &ctx->req.prefix, > - p->pt->prefixlen)) > - rde_dump_filter(p, &ctx->req, 1); > - } > -} > - > static int > rde_dump_throttled(void *arg) > { > @@ -2745,10 +2687,10 @@ rde_dump_done(void *arg, uint8_t aid) > goto nomem; > break; > case IMSG_CTL_SHOW_RIB_PREFIX: > - if (prefix_dump_new(peer, ctx->req.aid, > - CTL_MSG_HIGH_MARK, ctx, > - rde_dump_adjout_prefix_upcall, > - rde_dump_done, rde_dump_throttled) == -1) > + if (prefix_dump_subtree(peer, &ctx->req.prefix, > + ctx->req.prefixlen, CTL_MSG_HIGH_MARK, ctx, > + rde_dump_adjout_upcall, rde_dump_done, > + rde_dump_throttled) == -1) > goto nomem; > break; > default: > @@ -2817,9 +2759,9 @@ rde_dump_ctx_new(struct ctl_show_rib_req > break; > case IMSG_CTL_SHOW_RIB_PREFIX: > if (req->flags & F_LONGER) { > - if (prefix_dump_new(peer, ctx->req.aid, > - CTL_MSG_HIGH_MARK, ctx, > - rde_dump_adjout_prefix_upcall, > + if (prefix_dump_subtree(peer, &req->prefix, > + req->prefixlen, CTL_MSG_HIGH_MARK, ctx, > + rde_dump_adjout_upcall, > rde_dump_done, rde_dump_throttled) == -1) > goto nomem; > break; > @@ -2900,8 +2842,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req > break; > case IMSG_CTL_SHOW_RIB_PREFIX: > if (req->flags & F_LONGER) { > - if (rib_dump_new(rid, ctx->req.aid, > - CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall, > + if (rib_dump_subtree(rid, &req->prefix, req->prefixlen, > + CTL_MSG_HIGH_MARK, ctx, rde_dump_upcall, > rde_dump_done, rde_dump_throttled) == -1) > goto nomem; > break; > Index: rde.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > retrieving revision 1.270 > diff -u -p -r1.270 rde.h > --- rde.h 1 Sep 2022 13:23:24 -0000 1.270 > +++ rde.h 9 Sep 2022 14:44:31 -0000 > @@ -570,6 +570,11 @@ int rib_dump_new(uint16_t, uint8_t, un > void (*)(struct rib_entry *, void *), > void (*)(void *, uint8_t), > int (*)(void *)); > +int rib_dump_subtree(uint16_t, struct bgpd_addr *, uint8_t, > + unsigned int count, void *arg, > + void (*)(struct rib_entry *, void *), > + void (*)(void *, uint8_t), > + int (*)(void *)); > void rib_dump_terminate(void *); > > static inline struct rib * > @@ -611,6 +616,10 @@ void prefix_adjout_dump(struct rde_pee > void (*)(struct prefix *, void *)); > int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int, > void *, void (*)(struct prefix *, void *), > + void (*)(void *, uint8_t), int (*)(void *)); > +int prefix_dump_subtree(struct rde_peer *, struct bgpd_addr *, > + uint8_t, unsigned int, void *, > + void (*)(struct prefix *, void *), > void (*)(void *, uint8_t), int (*)(void *)); > int prefix_write(u_char *, int, struct bgpd_addr *, uint8_t, int); > int prefix_writebuf(struct ibuf *, struct bgpd_addr *, uint8_t); > Index: rde_rib.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v > retrieving revision 1.248 > diff -u -p -r1.248 rde_rib.c > --- rde_rib.c 1 Sep 2022 13:19:11 -0000 1.248 > +++ rde_rib.c 9 Sep 2022 14:54:26 -0000 > @@ -58,8 +58,10 @@ struct rib_context { > void (*ctx_done)(void *, uint8_t); > int (*ctx_throttle)(void *); > void *ctx_arg; > + struct bgpd_addr ctx_subtree; > unsigned int ctx_count; > uint8_t ctx_aid; > + uint8_t ctx_subtreelen; > }; > LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps); > > @@ -396,16 +398,17 @@ rib_empty(struct rib_entry *re) > static struct rib_entry * > rib_restart(struct rib_context *ctx) > { > - struct rib_entry *re; > + struct rib_entry *re = NULL; > > - re = re_unlock(ctx->ctx_re); > + if (ctx->ctx_re) > + re = re_unlock(ctx->ctx_re); > > /* find first non empty element */ > while (re && rib_empty(re)) > re = RB_NEXT(rib_tree, unused, re); > > /* free the previously locked rib element if empty */ > - if (rib_empty(ctx->ctx_re)) > + if (ctx->ctx_re && rib_empty(ctx->ctx_re)) > rib_remove(ctx->ctx_re); > ctx->ctx_re = NULL; > return (re); > @@ -422,7 +425,7 @@ rib_dump_r(struct rib_context *ctx) > if (rib == NULL) > fatalx("%s: rib id %u gone", __func__, ctx->ctx_id); > > - if (ctx->ctx_re == NULL) > + if (ctx->ctx_re == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) > re = RB_MIN(rib_tree, rib_tree(rib)); > else > re = rib_restart(ctx); > @@ -435,6 +438,14 @@ rib_dump_r(struct rib_context *ctx) > if (ctx->ctx_aid != AID_UNSPEC && > ctx->ctx_aid != re->prefix->aid) > continue; > + if (ctx->ctx_subtree.aid != AID_UNSPEC) { > + struct bgpd_addr addr; > + pt_getaddr(re->prefix, &addr); > + if (prefix_compare(&ctx->ctx_subtree, &addr, > + ctx->ctx_subtreelen) != 0) > + /* left subtree, walk is done */ > + break; > + } > if (ctx->ctx_count && i++ >= ctx->ctx_count && > !re_is_locked(re)) { > /* store and lock last element */ > @@ -543,6 +554,42 @@ rib_dump_new(uint16_t id, uint8_t aid, u > return 0; > } > > +int > +rib_dump_subtree(uint16_t id, struct bgpd_addr *subtree, uint8_t subtreelen, > + unsigned int count, void *arg, void (*upcall)(struct rib_entry *, void > *), > + void (*done)(void *, uint8_t), int (*throttle)(void *)) > +{ > + struct rib_context *ctx; > + struct rib_entry xre; > + > + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) > + return -1; > + ctx->ctx_id = id; > + ctx->ctx_aid = subtree->aid; > + ctx->ctx_count = count; > + ctx->ctx_arg = arg; > + ctx->ctx_rib_call = upcall; > + ctx->ctx_done = done; > + ctx->ctx_throttle = throttle; > + ctx->ctx_subtree = *subtree; > + ctx->ctx_subtreelen = subtreelen; > + > + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); > + > + /* lookup start of subtree */ > + memset(&xre, 0, sizeof(xre)); > + xre.prefix = pt_fill(subtree, subtreelen); > + ctx->ctx_re = RB_NFIND(rib_tree, rib_tree(rib_byid(id)), &xre); > + if (ctx->ctx_re) > + re_lock(ctx->ctx_re); > + > + /* requested a sync traversal */ > + if (count == 0) > + rib_dump_r(ctx); This is currently never called with a count of 0, so rib_dump_r() is currently not reachable. > + > + return 0; > +} > + > /* path specific functions */ > > static struct rde_aspath *path_lookup(struct rde_aspath *); > @@ -1253,11 +1300,12 @@ prefix_adjout_destroy(struct prefix *p) > static struct prefix * > prefix_restart(struct rib_context *ctx) > { > - struct prefix *p; > + struct prefix *p = NULL; > > - p = prefix_unlock(ctx->ctx_p); > + if (ctx->ctx_p) > + p = prefix_unlock(ctx->ctx_p); > > - if (prefix_is_dead(p)) { > + if (p && prefix_is_dead(p)) { > struct prefix *next; > > next = RB_NEXT(prefix_index, unused, p); > @@ -1278,7 +1326,7 @@ prefix_dump_r(struct rib_context *ctx) > if ((peer = peer_get(ctx->ctx_id)) == NULL) > goto done; > > - if (ctx->ctx_p == NULL) > + if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) > p = RB_MIN(prefix_index, &peer->adj_rib_out); > else > p = prefix_restart(ctx); > @@ -1290,6 +1338,14 @@ prefix_dump_r(struct rib_context *ctx) > if (ctx->ctx_aid != AID_UNSPEC && > ctx->ctx_aid != p->pt->aid) > continue; > + if (ctx->ctx_subtree.aid != AID_UNSPEC) { > + struct bgpd_addr addr; > + pt_getaddr(p->pt, &addr); > + if (prefix_compare(&ctx->ctx_subtree, &addr, > + ctx->ctx_subtreelen) != 0) > + /* left subtree, walk is done */ > + break; > + } > if (ctx->ctx_count && i++ >= ctx->ctx_count && > !prefix_is_locked(p)) { > /* store and lock last element */ > @@ -1324,6 +1380,43 @@ prefix_dump_new(struct rde_peer *peer, u > ctx->ctx_throttle = throttle; > > LIST_INSERT_HEAD(&rib_dumps, ctx, entry); > + > + /* requested a sync traversal */ > + if (count == 0) > + prefix_dump_r(ctx); > + > + return 0; > +} > + > +int > +prefix_dump_subtree(struct rde_peer *peer, struct bgpd_addr *subtree, > + uint8_t subtreelen, unsigned int count, void *arg, > + void (*upcall)(struct prefix *, void *), void (*done)(void *, uint8_t), > + int (*throttle)(void *)) > +{ > + struct rib_context *ctx; > + struct prefix xp; > + > + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) > + return -1; > + ctx->ctx_id = peer->conf.id; > + ctx->ctx_aid = subtree->aid; > + ctx->ctx_count = count; > + ctx->ctx_arg = arg; > + ctx->ctx_prefix_call = upcall; > + ctx->ctx_done = done; > + ctx->ctx_throttle = throttle; > + ctx->ctx_subtree = *subtree; > + ctx->ctx_subtreelen = subtreelen; > + > + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); > + > + /* lookup start of subtree */ > + memset(&xp, 0, sizeof(xp)); > + xp.pt = pt_fill(subtree, subtreelen); > + ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); > + if (ctx->ctx_p) > + prefix_lock(ctx->ctx_p); > > /* requested a sync traversal */ > if (count == 0) same here. >