On Fri, Sep 09, 2022 at 05:50:17PM +0200, Claudio Jeker wrote:
> This diff optimized subtree walks. In other words it specifies a subtree
> (as a prefix/prefixlen combo) and only walks the entries that are under
> this covering route.
> 
> Instead of doing a full table walk this will only walk part of the tree
> and is therefor much faster if the subtree is small.

The diff looks good. The two new dump_subtree() functions are currently
only called with a count of CTL_MSG_HIGH_MARK, so the two

        if (count == 0)
                prefix_dump_r(ctx)

are currently dead code. I assume you anticipate that this might change.

> -- 
> :wq Claudio
> 
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.575
> diff -u -p -r1.575 rde.c
> --- rde.c     9 Sep 2022 13:33:24 -0000       1.575
> +++ rde.c     9 Sep 2022 14:46:03 -0000
> @@ -2648,35 +2648,6 @@ rde_dump_upcall(struct rib_entry *re, vo
>  }
>  
>  static void
> -rde_dump_prefix_upcall(struct rib_entry *re, void *ptr)
> -{
> -     struct rde_dump_ctx     *ctx = ptr;
> -     struct prefix           *p;
> -     struct pt_entry         *pt;
> -     struct bgpd_addr         addr;
> -
> -     pt = re->prefix;
> -     pt_getaddr(pt, &addr);
> -     if (addr.aid != ctx->req.prefix.aid)
> -             return;
> -     if (ctx->req.flags & F_LONGER) {
> -             if (ctx->req.prefixlen > pt->prefixlen)
> -                     return;
> -             if (!prefix_compare(&ctx->req.prefix, &addr,
> -                 ctx->req.prefixlen))
> -                     TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib)
> -                             rde_dump_filter(p, &ctx->req, 0);
> -     } else {
> -             if (ctx->req.prefixlen < pt->prefixlen)
> -                     return;
> -             if (!prefix_compare(&addr, &ctx->req.prefix,
> -                 pt->prefixlen))
> -                     TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib)
> -                             rde_dump_filter(p, &ctx->req, 0);
> -     }
> -}
> -
> -static void
>  rde_dump_adjout_upcall(struct prefix *p, void *ptr)
>  {
>       struct rde_dump_ctx     *ctx = ptr;
> @@ -2688,35 +2659,6 @@ rde_dump_adjout_upcall(struct prefix *p,
>       rde_dump_filter(p, &ctx->req, 1);
>  }
>  
> -static void
> -rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
> -{
> -     struct rde_dump_ctx     *ctx = ptr;
> -     struct bgpd_addr         addr;
> -
> -     if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
> -             fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
> -     if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
> -             return;
> -
> -     pt_getaddr(p->pt, &addr);
> -     if (addr.aid != ctx->req.prefix.aid)
> -             return;
> -     if (ctx->req.flags & F_LONGER) {
> -             if (ctx->req.prefixlen > p->pt->prefixlen)
> -                     return;
> -             if (!prefix_compare(&ctx->req.prefix, &addr,
> -                 ctx->req.prefixlen))
> -                     rde_dump_filter(p, &ctx->req, 1);
> -     } else {
> -             if (ctx->req.prefixlen < p->pt->prefixlen)
> -                     return;
> -             if (!prefix_compare(&addr, &ctx->req.prefix,
> -                 p->pt->prefixlen))
> -                     rde_dump_filter(p, &ctx->req, 1);
> -     }
> -}
> -
>  static int
>  rde_dump_throttled(void *arg)
>  {
> @@ -2745,10 +2687,10 @@ rde_dump_done(void *arg, uint8_t aid)
>                               goto nomem;
>                       break;
>               case IMSG_CTL_SHOW_RIB_PREFIX:
> -                     if (prefix_dump_new(peer, ctx->req.aid,
> -                         CTL_MSG_HIGH_MARK, ctx,
> -                         rde_dump_adjout_prefix_upcall,
> -                         rde_dump_done, rde_dump_throttled) == -1)
> +                     if (prefix_dump_subtree(peer, &ctx->req.prefix,
> +                         ctx->req.prefixlen, CTL_MSG_HIGH_MARK, ctx,
> +                         rde_dump_adjout_upcall, rde_dump_done,
> +                         rde_dump_throttled) == -1)
>                               goto nomem;
>                       break;
>               default:
> @@ -2817,9 +2759,9 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>                       break;
>               case IMSG_CTL_SHOW_RIB_PREFIX:
>                       if (req->flags & F_LONGER) {
> -                             if (prefix_dump_new(peer, ctx->req.aid,
> -                                 CTL_MSG_HIGH_MARK, ctx,
> -                                 rde_dump_adjout_prefix_upcall,
> +                             if (prefix_dump_subtree(peer, &req->prefix,
> +                                 req->prefixlen, CTL_MSG_HIGH_MARK, ctx,
> +                                 rde_dump_adjout_upcall,
>                                   rde_dump_done, rde_dump_throttled) == -1)
>                                       goto nomem;
>                               break;
> @@ -2900,8 +2842,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>               break;
>       case IMSG_CTL_SHOW_RIB_PREFIX:
>               if (req->flags & F_LONGER) {
> -                     if (rib_dump_new(rid, ctx->req.aid,
> -                         CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall,
> +                     if (rib_dump_subtree(rid, &req->prefix, req->prefixlen,
> +                         CTL_MSG_HIGH_MARK, ctx, rde_dump_upcall,
>                           rde_dump_done, rde_dump_throttled) == -1)
>                               goto nomem;
>                       break;
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> retrieving revision 1.270
> diff -u -p -r1.270 rde.h
> --- rde.h     1 Sep 2022 13:23:24 -0000       1.270
> +++ rde.h     9 Sep 2022 14:44:31 -0000
> @@ -570,6 +570,11 @@ int               rib_dump_new(uint16_t, uint8_t, un
>                   void (*)(struct rib_entry *, void *),
>                   void (*)(void *, uint8_t),
>                   int (*)(void *));
> +int           rib_dump_subtree(uint16_t, struct bgpd_addr *, uint8_t,
> +                 unsigned int count, void *arg,
> +                 void (*)(struct rib_entry *, void *),
> +                 void (*)(void *, uint8_t),
> +                 int (*)(void *));
>  void          rib_dump_terminate(void *);
>  
>  static inline struct rib *
> @@ -611,6 +616,10 @@ void              prefix_adjout_dump(struct rde_pee
>                   void (*)(struct prefix *, void *));
>  int           prefix_dump_new(struct rde_peer *, uint8_t, unsigned int,
>                   void *, void (*)(struct prefix *, void *),
> +                 void (*)(void *, uint8_t), int (*)(void *));
> +int           prefix_dump_subtree(struct rde_peer *, struct bgpd_addr *,
> +                 uint8_t, unsigned int, void *,
> +                 void (*)(struct prefix *, void *),
>                   void (*)(void *, uint8_t), int (*)(void *));
>  int           prefix_write(u_char *, int, struct bgpd_addr *, uint8_t, int);
>  int           prefix_writebuf(struct ibuf *, struct bgpd_addr *, uint8_t);
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> retrieving revision 1.248
> diff -u -p -r1.248 rde_rib.c
> --- rde_rib.c 1 Sep 2022 13:19:11 -0000       1.248
> +++ rde_rib.c 9 Sep 2022 14:54:26 -0000
> @@ -58,8 +58,10 @@ struct rib_context {
>       void            (*ctx_done)(void *, uint8_t);
>       int             (*ctx_throttle)(void *);
>       void                            *ctx_arg;
> +     struct bgpd_addr                 ctx_subtree;
>       unsigned int                     ctx_count;
>       uint8_t                          ctx_aid;
> +     uint8_t                          ctx_subtreelen;
>  };
>  LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps);
>  
> @@ -396,16 +398,17 @@ rib_empty(struct rib_entry *re)
>  static struct rib_entry *
>  rib_restart(struct rib_context *ctx)
>  {
> -     struct rib_entry *re;
> +     struct rib_entry *re = NULL;
>  
> -     re = re_unlock(ctx->ctx_re);
> +     if (ctx->ctx_re)
> +             re = re_unlock(ctx->ctx_re);
>  
>       /* find first non empty element */
>       while (re && rib_empty(re))
>               re = RB_NEXT(rib_tree, unused, re);
>  
>       /* free the previously locked rib element if empty */
> -     if (rib_empty(ctx->ctx_re))
> +     if (ctx->ctx_re && rib_empty(ctx->ctx_re))
>               rib_remove(ctx->ctx_re);
>       ctx->ctx_re = NULL;
>       return (re);
> @@ -422,7 +425,7 @@ rib_dump_r(struct rib_context *ctx)
>       if (rib == NULL)
>               fatalx("%s: rib id %u gone", __func__, ctx->ctx_id);
>  
> -     if (ctx->ctx_re == NULL)
> +     if (ctx->ctx_re == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
>               re = RB_MIN(rib_tree, rib_tree(rib));
>       else
>               re = rib_restart(ctx);
> @@ -435,6 +438,14 @@ rib_dump_r(struct rib_context *ctx)
>               if (ctx->ctx_aid != AID_UNSPEC &&
>                   ctx->ctx_aid != re->prefix->aid)
>                       continue;
> +             if (ctx->ctx_subtree.aid != AID_UNSPEC) {
> +                     struct bgpd_addr addr;
> +                     pt_getaddr(re->prefix, &addr);
> +                     if (prefix_compare(&ctx->ctx_subtree, &addr,
> +                        ctx->ctx_subtreelen) != 0)
> +                             /* left subtree, walk is done */
> +                             break;
> +             }
>               if (ctx->ctx_count && i++ >= ctx->ctx_count &&
>                   !re_is_locked(re)) {
>                       /* store and lock last element */
> @@ -543,6 +554,42 @@ rib_dump_new(uint16_t id, uint8_t aid, u
>       return 0;
>  }
>  
> +int
> +rib_dump_subtree(uint16_t id, struct bgpd_addr *subtree, uint8_t subtreelen,
> +    unsigned int count, void *arg, void (*upcall)(struct rib_entry *, void 
> *),
> +    void (*done)(void *, uint8_t), int (*throttle)(void *))
> +{
> +     struct rib_context *ctx;
> +     struct rib_entry xre;
> +
> +     if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
> +             return -1;
> +     ctx->ctx_id = id;
> +     ctx->ctx_aid = subtree->aid;
> +     ctx->ctx_count = count;
> +     ctx->ctx_arg = arg;
> +     ctx->ctx_rib_call = upcall;
> +     ctx->ctx_done = done;
> +     ctx->ctx_throttle = throttle;
> +     ctx->ctx_subtree = *subtree;
> +     ctx->ctx_subtreelen = subtreelen;
> +
> +     LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
> +
> +     /* lookup start of subtree */
> +     memset(&xre, 0, sizeof(xre));
> +     xre.prefix = pt_fill(subtree, subtreelen);
> +     ctx->ctx_re = RB_NFIND(rib_tree, rib_tree(rib_byid(id)), &xre);
> +     if (ctx->ctx_re)
> +             re_lock(ctx->ctx_re);
> +
> +     /* requested a sync traversal */
> +     if (count == 0)
> +             rib_dump_r(ctx);

This is currently never called with a count of 0, so rib_dump_r() is
currently not reachable.

> +
> +     return 0;
> +}
> +
>  /* path specific functions */
>  
>  static struct rde_aspath *path_lookup(struct rde_aspath *);
> @@ -1253,11 +1300,12 @@ prefix_adjout_destroy(struct prefix *p)
>  static struct prefix *
>  prefix_restart(struct rib_context *ctx)
>  {
> -     struct prefix *p;
> +     struct prefix *p = NULL;
>  
> -     p = prefix_unlock(ctx->ctx_p);
> +     if (ctx->ctx_p)
> +             p = prefix_unlock(ctx->ctx_p);
>  
> -     if (prefix_is_dead(p)) {
> +     if (p && prefix_is_dead(p)) {
>               struct prefix *next;
>  
>               next = RB_NEXT(prefix_index, unused, p);
> @@ -1278,7 +1326,7 @@ prefix_dump_r(struct rib_context *ctx)
>       if ((peer = peer_get(ctx->ctx_id)) == NULL)
>               goto done;
>  
> -     if (ctx->ctx_p == NULL)
> +     if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
>               p = RB_MIN(prefix_index, &peer->adj_rib_out);
>       else
>               p = prefix_restart(ctx);
> @@ -1290,6 +1338,14 @@ prefix_dump_r(struct rib_context *ctx)
>               if (ctx->ctx_aid != AID_UNSPEC &&
>                   ctx->ctx_aid != p->pt->aid)
>                       continue;
> +             if (ctx->ctx_subtree.aid != AID_UNSPEC) {
> +                     struct bgpd_addr addr;
> +                     pt_getaddr(p->pt, &addr);
> +                     if (prefix_compare(&ctx->ctx_subtree, &addr,
> +                        ctx->ctx_subtreelen) != 0)
> +                             /* left subtree, walk is done */
> +                             break;
> +             }
>               if (ctx->ctx_count && i++ >= ctx->ctx_count &&
>                   !prefix_is_locked(p)) {
>                       /* store and lock last element */
> @@ -1324,6 +1380,43 @@ prefix_dump_new(struct rde_peer *peer, u
>       ctx->ctx_throttle = throttle;
>  
>       LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
> +
> +     /* requested a sync traversal */
> +     if (count == 0)
> +             prefix_dump_r(ctx);
> +
> +     return 0;
> +}
> +
> +int
> +prefix_dump_subtree(struct rde_peer *peer, struct bgpd_addr *subtree,
> +    uint8_t subtreelen, unsigned int count, void *arg,
> +    void (*upcall)(struct prefix *, void *), void (*done)(void *, uint8_t),
> +    int (*throttle)(void *))
> +{
> +     struct rib_context *ctx;
> +     struct prefix xp;
> +
> +     if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
> +             return -1;
> +     ctx->ctx_id = peer->conf.id;
> +     ctx->ctx_aid = subtree->aid;
> +     ctx->ctx_count = count;
> +     ctx->ctx_arg = arg;
> +     ctx->ctx_prefix_call = upcall;
> +     ctx->ctx_done = done;
> +     ctx->ctx_throttle = throttle;
> +     ctx->ctx_subtree = *subtree;
> +     ctx->ctx_subtreelen = subtreelen;
> +
> +     LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
> +
> +     /* lookup start of subtree */
> +     memset(&xp, 0, sizeof(xp));
> +     xp.pt = pt_fill(subtree, subtreelen);
> +     ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
> +     if (ctx->ctx_p)
> +             prefix_lock(ctx->ctx_p);
>  
>       /* requested a sync traversal */
>       if (count == 0)

same here.

> 

Reply via email to