Claudio Jeker([email protected]) on 2019.07.10 22:08:38 +0200:
> This diff is a bit of a monster. It changes the Adj-RIB-Out to be a peer
> specific set of RB trees instead of using a rib in the original sense.
> The reason for this is that the more peers a system has the more elements
> end up being linked into the adj-rib-out and many operations do linear
> searches which does not scale.
> 
> I did some testing with 4000 peers sending 1 prefix each which then are
> sent back to all peers (resulting in 16Mio updates being put in Adj-RIB-Out).
> Without this diff the system takes about 1h to bring up all sessions. With
> the diff the system finishes in around 5min.
> 
> To not increase the memory footprint struct prefix is now using a union
> for the lists or RB trees. Additionally the rib dump runner was adjusted
> so that it also works with the Adj-RIB-Out. bgpctl show rib out changed
> a bit since it will dump now one peer after the other apart from that
> behaviour should be the same.

ok benno@

you have to fix up the regress tests too.

> 
> Please test
> -- 
> :wq Claudio
> 
> 
> Index: mrt.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
> retrieving revision 1.97
> diff -u -p -r1.97 mrt.c
> --- mrt.c     25 Jun 2019 21:33:55 -0000      1.97
> +++ mrt.c     26 Jun 2019 09:44:12 -0000
> @@ -512,15 +512,11 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
>               goto fail;
>       }
>       nump = 0;
> -     LIST_FOREACH(p, &re->prefix_h, rib_l) {
> +     LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
>               struct nexthop          *nexthop;
>               struct bgpd_addr        *nh;
>               struct ibuf             *tbuf;
>  
> -             /* skip pending withdraw in Adj-RIB-Out */
> -             if (prefix_aspath(p) == NULL)
> -                     continue;
> -
>               nexthop = prefix_nexthop(p);
>               if (nexthop == NULL) {
>                       bzero(&addr, sizeof(struct bgpd_addr));
> @@ -683,10 +679,7 @@ mrt_dump_upcall(struct rib_entry *re, vo
>        * dumps the table so we do the same. If only the active route should
>        * be dumped p should be set to p = pt->active.
>        */
> -     LIST_FOREACH(p, &re->prefix_h, rib_l) {
> -             /* skip pending withdraw in Adj-RIB-Out */
> -             if (prefix_aspath(p) == NULL)
> -                     continue;
> +     LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
>               if (mrtbuf->type == MRT_TABLE_DUMP)
>                       mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
>                           prefix_peer(p));
> Index: parse.y
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
> retrieving revision 1.392
> diff -u -p -r1.392 parse.y
> --- parse.y   22 Jun 2019 05:36:40 -0000      1.392
> +++ parse.y   22 Jun 2019 05:44:57 -0000
> @@ -3282,8 +3282,6 @@ parse_config(char *filename, struct peer
>  
>       add_rib("Adj-RIB-In", conf->default_tableid,
>           F_RIB_NOFIB | F_RIB_NOEVALUATE);
> -     add_rib("Adj-RIB-Out", conf->default_tableid,
> -         F_RIB_NOFIB | F_RIB_NOEVALUATE);
>       add_rib("Loc-RIB", conf->default_tableid, F_RIB_LOCAL);
>  
>       if ((file = pushfile(filename, 1)) == NULL)
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.475
> diff -u -p -r1.475 rde.c
> --- rde.c     1 Jul 2019 07:07:08 -0000       1.475
> +++ rde.c     1 Jul 2019 07:37:09 -0000
> @@ -94,8 +94,8 @@ u_int8_t     rde_roa_validity(struct rde_pr
>  void          peer_init(u_int32_t);
>  void          peer_shutdown(void);
>  int           peer_localaddrs(struct rde_peer *, struct bgpd_addr *);
> +struct rde_peer *peer_match(struct ctl_neighbor *, u_int32_t);
>  struct rde_peer      *peer_add(u_int32_t, struct peer_config *);
> -struct rde_peer      *peer_get(u_int32_t);
>  void          peer_up(u_int32_t, struct session_up *);
>  void          peer_down(u_int32_t);
>  void          peer_flush(struct rde_peer *, u_int8_t, time_t);
> @@ -133,7 +133,7 @@ int                        softreconfig;
>  struct rde_dump_ctx {
>       LIST_ENTRY(rde_dump_ctx)        entry;
>       struct ctl_show_rib_request     req;
> -     u_int16_t                       rid;
> +     u_int32_t                       peerid;
>       u_int8_t                        throttled;
>  };
>  
> @@ -220,7 +220,6 @@ rde_main(int debug, int verbose)
>  
>       /* make sure the default RIBs are setup */
>       rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
> -     rib_new("Adj-RIB-Out", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
>  
>       out_rules = calloc(1, sizeof(struct filter_head));
>       if (out_rules == NULL)
> @@ -2242,7 +2241,7 @@ rde_dump_rib_as(struct prefix *p, struct
>       rib.origin = asp->origin;
>       rib.validation_state = p->validation_state;
>       rib.flags = 0;
> -     if (p->re->active == p)
> +     if (p->re != NULL && p->re->active == p)
>               rib.flags |= F_PREF_ACTIVE;
>       if (!prefix_peer(p)->conf.ebgp)
>               rib.flags |= F_PREF_INTERNAL;
> @@ -2305,7 +2304,7 @@ rde_dump_rib_as(struct prefix *p, struct
>  }
>  
>  static int
> -rde_dump_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
> +rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
>  {
>       char *s;
>  
> @@ -2326,7 +2325,7 @@ rde_dump_filter(struct prefix *p, struct
>  {
>       struct rde_aspath       *asp;
>  
> -     if (!rde_dump_match_peer(prefix_peer(p), &req->neighbor))
> +     if (!rde_match_peer(prefix_peer(p), &req->neighbor))
>               return;
>  
>       asp = prefix_aspath(p);
> @@ -2353,10 +2352,10 @@ rde_dump_filter(struct prefix *p, struct
>  static void
>  rde_dump_upcall(struct rib_entry *re, void *ptr)
>  {
> -     struct prefix           *p;
>       struct rde_dump_ctx     *ctx = ptr;
> +     struct prefix           *p;
>  
> -     LIST_FOREACH(p, &re->prefix_h, rib_l)
> +     LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
>               rde_dump_filter(p, &ctx->req);
>  }
>  
> @@ -2375,10 +2374,38 @@ rde_dump_prefix_upcall(struct rib_entry 
>       if (ctx->req.prefixlen > pt->prefixlen)
>               return;
>       if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
> -             LIST_FOREACH(p, &re->prefix_h, rib_l)
> +             LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
>                       rde_dump_filter(p, &ctx->req);
>  }
>  
> +static void
> +rde_dump_adjout_upcall(struct prefix *p, void *ptr)
> +{
> +     struct rde_dump_ctx     *ctx = ptr;
> +
> +     if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
> +             return;
> +     rde_dump_filter(p, &ctx->req);
> +}
> +
> +static void
> +rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
> +{
> +     struct rde_dump_ctx     *ctx = ptr;
> +     struct bgpd_addr         addr;
> +
> +     if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
> +             return;
> +
> +     pt_getaddr(p->pt, &addr);
> +     if (addr.aid != ctx->req.prefix.aid)
> +             return;
> +     if (ctx->req.prefixlen > p->pt->prefixlen)
> +             return;
> +     if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
> +             rde_dump_filter(p, &ctx->req);
> +}
> +
>  static int
>  rde_dump_throttled(void *arg)
>  {
> @@ -2391,11 +2418,45 @@ static void
>  rde_dump_done(void *arg, u_int8_t aid)
>  {
>       struct rde_dump_ctx     *ctx = arg;
> +     struct rde_peer         *peer;
> +     u_int                    error;
>  
> -     imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
> -         -1, NULL, 0);
> +     if (ctx->req.flags & F_CTL_ADJ_OUT) {
> +             peer = peer_match(&ctx->req.neighbor, ctx->peerid);
> +             if (peer == NULL)
> +                     goto done;
> +             ctx->peerid = peer->conf.id;
> +             switch (ctx->req.type) {
> +             case IMSG_CTL_SHOW_RIB:
> +                     if (prefix_dump_new(peer, ctx->req.aid,
> +                         CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
> +                         rde_dump_done, rde_dump_throttled) == -1)
> +                             goto nomem;
> +                     break;
> +             case IMSG_CTL_SHOW_RIB_PREFIX:
> +                     if (prefix_dump_new(peer, ctx->req.aid,
> +                         CTL_MSG_HIGH_MARK, ctx,
> +                         rde_dump_adjout_prefix_upcall,
> +                         rde_dump_done, rde_dump_throttled) == -1)
> +                             goto nomem;
> +                     break;
> +             default:
> +                     fatalx("%s: unsupported imsg type", __func__);
> +             }
> +             return;
> +     }
> +done:
> +     imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0);
>       LIST_REMOVE(ctx, entry);
>       free(ctx);
> +     return;
> +
> +nomem:
> +     log_warn(__func__);
> +     error = CTL_RES_NOMEM;
> +     imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error,
> +         sizeof(error));
> +     return;
>  }
>  
>  void
> @@ -2404,24 +2465,92 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>  {
>       struct rde_dump_ctx     *ctx;
>       struct rib_entry        *re;
> +     struct prefix           *p;
>       u_int                    error;
>       u_int8_t                 hostplen;
>       u_int16_t                rid;
>  
>       if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
>   nomem:
> -             log_warn("rde_dump_ctx_new");
> +             log_warn(__func__);
>               error = CTL_RES_NOMEM;
>               imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
>                   sizeof(error));
>               return;
>       }
> +
> +     memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
> +     ctx->req.pid = pid;
> +     ctx->req.type = type;
> +
>       if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
>               rid = RIB_ADJ_IN;
>       } else if (req->flags & F_CTL_ADJ_OUT) {
> -             rid = RIB_ADJ_OUT;
> +             struct rde_peer *peer;
> +
> +             peer = peer_match(&req->neighbor, 0);
> +             if (peer == NULL) {
> +                     log_warnx("%s: no peer found for adj-rib-out",
> +                         __func__);
> +                     error = CTL_RES_NOSUCHPEER;
> +                     imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1,
> +                         &error, sizeof(error));
> +                     free(ctx);
> +                     return;
> +             }
> +             ctx->peerid = peer->conf.id;
> +             switch (ctx->req.type) {
> +             case IMSG_CTL_SHOW_RIB:
> +                     if (prefix_dump_new(peer, ctx->req.aid,
> +                         CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
> +                         rde_dump_done, rde_dump_throttled) == -1)
> +                             goto nomem;
> +                     break;
> +             case IMSG_CTL_SHOW_RIB_PREFIX:
> +                     if (req->flags & F_LONGER) {
> +                             if (prefix_dump_new(peer, ctx->req.aid,
> +                                 CTL_MSG_HIGH_MARK, ctx,
> +                                 rde_dump_adjout_prefix_upcall,
> +                                 rde_dump_done, rde_dump_throttled) == -1)
> +                                     goto nomem;
> +                             break;
> +                     }
> +                     switch (req->prefix.aid) {
> +                     case AID_INET:
> +                     case AID_VPN_IPv4:
> +                             hostplen = 32;
> +                             break;
> +                     case AID_INET6:
> +                     case AID_VPN_IPv6:
> +                             hostplen = 128;
> +                             break;
> +                     default:
> +                             fatalx("%s: unknown af", __func__);
> +                     }
> +
> +                     do {
> +                             if (req->prefixlen == hostplen)
> +                                     p = prefix_match(peer, &req->prefix);
> +                             else
> +                                     p = prefix_lookup(peer, &req->prefix,
> +                                         req->prefixlen);
> +                             if (p)
> +                                     rde_dump_adjout_upcall(p, ctx);
> +                     } while ((peer = peer_match(&req->neighbor,
> +                         peer->conf.id)));
> +
> +                     imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
> +                         -1, NULL, 0);
> +                     free(ctx);
> +                     return;
> +             default:
> +                     fatalx("%s: unsupported imsg type", __func__);
> +             }
> +     
> +             LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
> +             return;
>       } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
> -             log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
> +             log_warnx("%s: no such rib %s", __func__, req->rib);
>               error = CTL_RES_NOSUCHRIB;
>               imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
>                   sizeof(error));
> @@ -2429,10 +2558,6 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>               return;
>       }
>  
> -     memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
> -     ctx->req.pid = pid;
> -     ctx->req.type = type;
> -     ctx->rid = rid;
>       switch (ctx->req.type) {
>       case IMSG_CTL_SHOW_NETWORK:
>               if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
> @@ -2463,10 +2588,10 @@ rde_dump_ctx_new(struct ctl_show_rib_req
>                       hostplen = 128;
>                       break;
>               default:
> -                     fatalx("rde_dump_ctx_new: unknown af");
> +                     fatalx("%s: unknown af", __func__);
>               }
>               if (req->prefixlen == hostplen)
> -                     re = rib_lookup(rib_byid(rid), &req->prefix);
> +                     re = rib_match(rib_byid(rid), &req->prefix);
>               else
>                       re = rib_get(rib_byid(rid), &req->prefix,
>                           req->prefixlen);
> @@ -2502,21 +2627,7 @@ rde_dump_ctx_terminate(pid_t pid)
>  
>       LIST_FOREACH(ctx, &rde_dump_h, entry) {
>               if (ctx->req.pid == pid) {
> -                     void (*upcall)(struct rib_entry *, void *);
> -                     switch (ctx->req.type) {
> -                     case IMSG_CTL_SHOW_NETWORK:
> -                             upcall = network_dump_upcall;
> -                             break;
> -                     case IMSG_CTL_SHOW_RIB:
> -                             upcall = rde_dump_upcall;
> -                             break;
> -                     case IMSG_CTL_SHOW_RIB_PREFIX:
> -                             upcall = rde_dump_prefix_upcall;
> -                             break;
> -                     default:
> -                             fatalx("%s: unsupported imsg type", __func__);
> -                     }
> -                     rib_dump_terminate(ctx->rid, ctx, upcall);
> +                     rib_dump_terminate(ctx);
>                       return;
>               }
>       }
> @@ -2697,16 +2808,9 @@ rde_up_dump_upcall(struct rib_entry *re,
>  }
>  
>  static void
> -rde_up_flush_upcall(struct rib_entry *re, void *ptr)
> +rde_up_flush_upcall(struct prefix *p, void *ptr)
>  {
> -     struct rde_peer *peer = ptr;
> -     struct prefix *p, *np;
> -
> -     LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> -             if (peer != prefix_peer(p))
> -                     continue;
> -             up_generate_updates(out_rules, peer, NULL, p);
> -     }
> +     up_generate_updates(out_rules, prefix_peer(p), NULL, p);
>  }
>  
>  static void
> @@ -3011,18 +3115,16 @@ rde_reload_done(void)
>               peer->reconf_out = 0;
>               peer->reconf_rib = 0;
>               if (peer->loc_rib_id != rib_find(peer->conf.rib)) {
> -                     char *p = log_fmt_peer(&peer->conf);
> -                     log_debug("rib change: reloading peer %s", p);
> -                     free(p);
> +                     log_peer_info(&peer->conf, "rib change, reloading");
>                       peer->loc_rib_id = rib_find(peer->conf.rib);
>                       if (peer->loc_rib_id == RIB_NOTFOUND)
>                               fatalx("King Bula's peer met an unknown RIB");
>                       peer->reconf_rib = 1;
>                       softreconfig++;
> -                     if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
> -                         RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
> +                     if (prefix_dump_new(peer, AID_UNSPEC,
> +                         RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
>                           rde_softreconfig_in_done, NULL) == -1)
> -                             fatal("%s: rib_dump_new", __func__);
> +                             fatal("%s: prefix_dump_new", __func__);
>                       log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
>                       continue;
>               }
> @@ -3197,7 +3299,7 @@ rde_softreconfig_in(struct rib_entry *re
>  
>       pt = re->prefix;
>       pt_getaddr(pt, &prefix);
> -     LIST_FOREACH(p, &re->prefix_h, rib_l) {
> +     LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
>               asp = prefix_aspath(p);
>               peer = prefix_peer(p);
>               force_eval = 0;
> @@ -3358,6 +3460,35 @@ peer_get(u_int32_t id)
>  }
>  
>  struct rde_peer *
> +peer_match(struct ctl_neighbor *n, u_int32_t peerid)
> +{
> +     struct rde_peer_head    *head;
> +     struct rde_peer         *peer;
> +     u_int32_t               i = 0;
> +
> +     if (peerid != 0)
> +             i = peerid & peertable.peer_hashmask;
> +
> +     while (i <= peertable.peer_hashmask) {
> +             head = &peertable.peer_hashtbl[i];
> +             LIST_FOREACH(peer, head, hash_l) {
> +                     /* skip peers until peerid is found */
> +                     if (peerid == peer->conf.id) {
> +                             peerid = 0;
> +                             continue;
> +                     }
> +                     if (peerid != 0)
> +                             continue;
> +
> +                     if (rde_match_peer(peer, n))
> +                             return (peer);
> +             }
> +             i++;
> +     }
> +     return (NULL);
> +}
> +
> +struct rde_peer *
>  peer_add(u_int32_t id, struct peer_config *p_conf)
>  {
>       struct rde_peer_head    *head;
> @@ -3441,17 +3572,9 @@ peer_localaddrs(struct rde_peer *peer, s
>  }
>  
>  static void
> -peer_adjout_flush_upcall(struct rib_entry *re, void *arg)
> +peer_adjout_clear_upcall(struct prefix *p, void *arg)
>  {
> -     struct rde_peer *peer = arg;
> -     struct prefix *p, *np;
> -
> -     LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> -             if (peer != prefix_peer(p))
> -                     continue;
> -             prefix_destroy(p);
> -             break;  /* optimization, only one match per peer possible */
> -     }
> +     prefix_adjout_destroy(p);
>  }
>  
>  void
> @@ -3472,9 +3595,9 @@ peer_up(u_int32_t id, struct session_up 
>                * There is a race condition when doing PEER_ERR -> PEER_DOWN.
>                * So just do a full reset of the peer here.
>                */
> -             if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
> -                 peer_adjout_flush_upcall, NULL, NULL) == -1)
> -                     fatal("%s: rib_dump_new", __func__);
> +             if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
> +                 peer_adjout_clear_upcall, NULL, NULL) == -1)
> +                     fatal("%s: prefix_dump_new", __func__);
>               peer_flush(peer, AID_UNSPEC, 0);
>               peer->prefix_cnt = 0;
>               peer->state = PEER_DOWN;
> @@ -3519,12 +3642,12 @@ peer_down(u_int32_t id)
>       peer->remote_bgpid = 0;
>       peer->state = PEER_DOWN;
>       /* stop all pending dumps which may depend on this peer */
> -     rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall);
> +     rib_dump_terminate(peer);
>  
>       /* flush Adj-RIB-Out for this peer */
> -     if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
> -         peer_adjout_flush_upcall, NULL, NULL) == -1)
> -             fatal("%s: rib_dump_new", __func__);
> +     if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
> +         peer_adjout_clear_upcall, NULL, NULL) == -1)
> +             fatal("%s: prefix_dump_new", __func__);
>  
>       peer_flush(peer, AID_UNSPEC, 0);
>  
> @@ -3553,7 +3676,7 @@ peer_flush_upcall(struct rib_entry *re, 
>  
>       pt_getaddr(re->prefix, &addr);
>       prefixlen = re->prefix->prefixlen;
> -     LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> +     LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
>               if (peer != prefix_peer(p))
>                       continue;
>               if (staletime && p->lastchange > staletime)
> @@ -3888,7 +4011,7 @@ network_dump_upcall(struct rib_entry *re
>       struct bgpd_addr         addr;
>       struct rde_dump_ctx     *ctx = ptr;
>  
> -     LIST_FOREACH(p, &re->prefix_h, rib_l) {
> +     LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
>               asp = prefix_aspath(p);
>               if (!(asp->flags & F_PREFIX_ANNOUNCED))
>                       continue;
> @@ -3925,7 +4048,7 @@ network_flush_upcall(struct rib_entry *r
>  
>       pt_getaddr(re->prefix, &addr);
>       prefixlen = re->prefix->prefixlen;
> -     LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
> +     LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
>               if (prefix_peer(p) != peer)
>                       continue;
>               asp = prefix_aspath(p);
> @@ -3965,9 +4088,6 @@ rde_shutdown(void)
>       for (i = 0; i <= peertable.peer_hashmask; i++)
>               while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
>                       peer_down(p->conf.id);
> -
> -     /* then since decision process is off, kill RIB_ADJ_OUT */
> -     rib_free(rib_byid(RIB_ADJ_OUT));
>  
>       /* free filters */
>       filterlist_free(out_rules);
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> retrieving revision 1.219
> diff -u -p -r1.219 rde.h
> --- rde.h     1 Jul 2019 07:07:08 -0000       1.219
> +++ rde.h     10 Jul 2019 14:42:42 -0000
> @@ -57,8 +57,7 @@ struct rib {
>  };
>  
>  #define RIB_ADJ_IN   0
> -#define RIB_ADJ_OUT  1
> -#define RIB_LOC_START        2
> +#define RIB_LOC_START        1
>  #define RIB_NOTFOUND 0xffff
>  
>  struct rib_desc {
> @@ -78,6 +77,7 @@ LIST_HEAD(aspath_list, aspath);
>  LIST_HEAD(attr_list, attr);
>  LIST_HEAD(aspath_head, rde_aspath);
>  RB_HEAD(prefix_tree, prefix);
> +RB_HEAD(prefix_index, prefix);
>  
>  struct rde_peer {
>       LIST_ENTRY(rde_peer)             hash_l; /* hash list over all peers */
> @@ -87,6 +87,7 @@ struct rde_peer {
>       struct bgpd_addr                 local_v4_addr;
>       struct bgpd_addr                 local_v6_addr;
>       struct capabilities              capa;
> +     struct prefix_index              adj_rib_out;
>       struct prefix_tree               updates[AID_MAX];
>       struct prefix_tree               withdraws[AID_MAX];
>       time_t                           staletime[AID_MAX];
> @@ -306,8 +307,14 @@ struct pt_entry_vpn6 {
>  };
>  
>  struct prefix {
> -     LIST_ENTRY(prefix)               rib_l, nexthop_l;
> -     RB_ENTRY(prefix)                 entry;
> +     union {
> +             struct {
> +                     LIST_ENTRY(prefix)       rib, nexthop;
> +             } list;
> +             struct {
> +                     RB_ENTRY(prefix)         index, update;
> +             } tree;
> +     }                                entry;
>       struct pt_entry                 *pt;
>       struct rib_entry                *re;
>       struct rde_aspath               *aspath;
> @@ -317,12 +324,17 @@ struct prefix {
>       time_t                           lastchange;
>       u_int8_t                         validation_state;
>       u_int8_t                         nhflags;
> -     u_int8_t                         flags;
>       u_int8_t                         eor;
> -#define      PREFIX_FLAG_WITHDRAW    0x01
> -#define      PREFIX_FLAG_UPDATE      0x02
> +     u_int8_t                         flags;
> +#define      PREFIX_FLAG_WITHDRAW    0x01    /* queued for withdraw */
> +#define      PREFIX_FLAG_UPDATE      0x02    /* queued for update */
> +#define      PREFIX_FLAG_DEAD        0x04    /* locked but removed */
> +#define      PREFIX_FLAG_MASK        0x07    /* mask for the three prefix 
> types */
> +#define      PREFIX_NEXTHOP_LINKED   0x40    /* prefix is linked onto 
> nexthop list */
> +#define      PREFIX_FLAG_LOCKED      0x80    /* locked by rib walker */
>  };
>  
> +/* possible states for nhflags */
>  #define      NEXTHOP_SELF            0x01
>  #define      NEXTHOP_REJECT          0x02
>  #define      NEXTHOP_BLACKHOLE       0x04
> @@ -356,6 +368,7 @@ u_int32_t rde_local_as(void);
>  int          rde_noevaluate(void);
>  int          rde_decisionflags(void);
>  int          rde_as4byte(struct rde_peer *);
> +struct rde_peer      *peer_get(u_int32_t);
>  
>  /* rde_attr.c */
>  int           attr_write(void *, u_int16_t, u_int8_t, u_int8_t, void *,
> @@ -395,6 +408,7 @@ u_char            *aspath_override(struct aspath *
>                   u_int16_t *);
>  int           aspath_lenmatch(struct aspath *, enum aslen_spec, u_int);
>  
> +/* rde_community.c */
>  int  community_match(struct rde_community *, struct community *,
>           struct rde_peer *);
>  int  community_set(struct rde_community *, struct community *,
> @@ -499,15 +513,14 @@ struct rib_desc *rib_desc(struct rib *);
>  void          rib_free(struct rib *);
>  void          rib_shutdown(void);
>  struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
> -struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
> +struct rib_entry *rib_match(struct rib *, struct bgpd_addr *);
>  int           rib_dump_pending(void);
>  void          rib_dump_runner(void);
>  int           rib_dump_new(u_int16_t, u_int8_t, unsigned int, void *,
>                   void (*)(struct rib_entry *, void *),
>                   void (*)(void *, u_int8_t),
>                   int (*)(void *));
> -void          rib_dump_terminate(u_int16_t, void *,
> -                 void (*)(struct rib_entry *, void *));
> +void          rib_dump_terminate(void *);
>  
>  static inline struct rib *
>  re_rib(struct rib_entry *re)
> @@ -540,13 +553,20 @@ void             path_put(struct rde_aspath *);
>  #define      PREFIX_SIZE(x)  (((x) + 7) / 8 + 1)
>  struct prefix        *prefix_get(struct rib *, struct rde_peer *,
>                   struct bgpd_addr *, int);
> +struct prefix        *prefix_lookup(struct rde_peer *, struct bgpd_addr *, 
> int);
> +struct prefix        *prefix_match(struct rde_peer *, struct bgpd_addr *);
>  int           prefix_remove(struct rib *, struct rde_peer *,
>                   struct bgpd_addr *, int);
>  void          prefix_add_eor(struct rde_peer *, u_int8_t);
> -void          prefix_update(struct rib *, struct rde_peer *,
> -                 struct bgpd_addr *, int);
> -int           prefix_withdraw(struct rib *, struct rde_peer *,
> -                 struct bgpd_addr *, int);
> +int           prefix_update(struct rde_peer *, struct filterstate *,
> +                 struct bgpd_addr *, int, u_int8_t);
> +int           prefix_withdraw(struct rde_peer *, struct bgpd_addr *, int);
> +void          prefix_adjout_destroy(struct prefix *p);
> +void          prefix_adjout_dump(struct rde_peer *, void *,
> +                 void (*)(struct prefix *, void *));
> +int           prefix_dump_new(struct rde_peer *, u_int8_t, unsigned int,
> +                 void *, void (*)(struct prefix *, void *),
> +                 void (*)(void *, u_int8_t), int (*)(void *));
>  int           prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t, int);
>  int           prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t);
>  struct prefix        *prefix_bypeer(struct rib_entry *, struct rde_peer *);
> Index: rde_decide.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 rde_decide.c
> --- rde_decide.c      21 Jan 2019 02:07:56 -0000      1.74
> +++ rde_decide.c      20 Jun 2019 19:50:25 -0000
> @@ -245,7 +245,7 @@ prefix_evaluate(struct prefix *p, struct
>       if (re_rib(re)->flags & F_RIB_NOEVALUATE || rde_noevaluate()) {
>               /* decision process is turned off */
>               if (p != NULL)
> -                     LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
> +                     LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
>               if (re->active != NULL)
>                       re->active = NULL;
>               return;
> @@ -253,15 +253,18 @@ prefix_evaluate(struct prefix *p, struct
>  
>       if (p != NULL) {
>               if (LIST_EMPTY(&re->prefix_h))
> -                     LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
> +                     LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
>               else {
> -                     LIST_FOREACH(xp, &re->prefix_h, rib_l) {
> +                     LIST_FOREACH(xp, &re->prefix_h, entry.list.rib) {
>                               if (prefix_cmp(p, xp) > 0) {
> -                                     LIST_INSERT_BEFORE(xp, p, rib_l);
> +                                     LIST_INSERT_BEFORE(xp, p,
> +                                         entry.list.rib);
>                                       break;
> -                             } else if (LIST_NEXT(xp, rib_l) == NULL) {
> +                             } else if (LIST_NEXT(xp, entry.list.rib) ==
> +                                 NULL) {
>                                       /* if xp last element ... */
> -                                     LIST_INSERT_AFTER(xp, p, rib_l);
> +                                     LIST_INSERT_AFTER(xp, p,
> +                                         entry.list.rib);
>                                       break;
>                               }
>                       }
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> retrieving revision 1.198
> diff -u -p -r1.198 rde_rib.c
> --- rde_rib.c 1 Jul 2019 14:47:56 -0000       1.198
> +++ rde_rib.c 10 Jul 2019 14:27:45 -0000
> @@ -52,8 +52,10 @@ RB_GENERATE(rib_tree, rib_entry, rib_e, 
>  struct rib_context {
>       LIST_ENTRY(rib_context)          entry;
>       struct rib_entry                *ctx_re;
> -     u_int16_t                        ctx_rib_id;
> -     void            (*ctx_upcall)(struct rib_entry *, void *);
> +     struct prefix                   *ctx_p;
> +     u_int32_t                        ctx_id;
> +     void            (*ctx_rib_call)(struct rib_entry *, void *);
> +     void            (*ctx_prefix_call)(struct prefix *, void *);
>       void            (*ctx_done)(void *, u_int8_t);
>       int             (*ctx_throttle)(void *);
>       void                            *ctx_arg;
> @@ -69,6 +71,7 @@ static int  prefix_add(struct bgpd_addr *
>  static int   prefix_move(struct prefix *, struct rde_peer *,
>                   struct rde_aspath *, struct rde_community *,
>                   struct nexthop *, u_int8_t, u_int8_t);
> +static void  prefix_dump_r(struct rib_context *);
>  
>  static inline struct rib_entry *
>  re_lock(struct rib_entry *re)
> @@ -128,7 +131,7 @@ rib_new(char *name, u_int rtableid, u_in
>               rib_size = id + 1;
>       }
>  
> -     bzero(&ribs[id], sizeof(struct rib_desc));
> +     memset(&ribs[id], 0, sizeof(struct rib_desc));
>       strlcpy(ribs[id].name, name, sizeof(ribs[id].name));
>       RB_INIT(rib_tree(&ribs[id].rib));
>       ribs[id].state = RECONF_REINIT;
> @@ -196,7 +199,7 @@ rib_free(struct rib *rib)
>                */
>               while ((p = LIST_FIRST(&re->prefix_h))) {
>                       struct rde_aspath *asp = prefix_aspath(p);
> -                     np = LIST_NEXT(p, rib_l);
> +                     np = LIST_NEXT(p, entry.list.rib);
>                       if (asp && asp->pftableid) {
>                               struct bgpd_addr addr;
>  
> @@ -215,7 +218,7 @@ rib_free(struct rib *rib)
>       rd = &ribs[rib->id];
>       filterlist_free(rd->in_rules_tmp);
>       filterlist_free(rd->in_rules);
> -     bzero(rd, sizeof(struct rib_desc));
> +     memset(rd, 0, sizeof(struct rib_desc));
>  }
>  
>  void
> @@ -235,7 +238,7 @@ rib_shutdown(void)
>               struct rib_desc *rd = &ribs[id];
>               filterlist_free(rd->in_rules_tmp);
>               filterlist_free(rd->in_rules);
> -             bzero(rd, sizeof(struct rib_desc));
> +             memset(rd, 0, sizeof(struct rib_desc));
>       }
>       free(ribs);
>  }
> @@ -247,7 +250,7 @@ rib_get(struct rib *rib, struct bgpd_add
>       struct pt_entry *pte;
>  
>       pte = pt_fill(prefix, prefixlen);
> -     bzero(&xre, sizeof(xre));
> +     memset(&xre, 0, sizeof(xre));
>       xre.prefix = pte;
>  
>       re = RB_FIND(rib_tree, rib_tree(rib), &xre);
> @@ -258,7 +261,7 @@ rib_get(struct rib *rib, struct bgpd_add
>  }
>  
>  struct rib_entry *
> -rib_lookup(struct rib *rib, struct bgpd_addr *addr)
> +rib_match(struct rib *rib, struct bgpd_addr *addr)
>  {
>       struct rib_entry *re;
>       int              i;
> @@ -281,7 +284,7 @@ rib_lookup(struct rib *rib, struct bgpd_
>               }
>               break;
>       default:
> -             fatalx("rib_lookup: unknown af");
> +             fatalx("%s: unknown af", __func__);
>       }
>       return (NULL);
>  }
> @@ -367,9 +370,9 @@ rib_dump_r(struct rib_context *ctx)
>       struct rib              *rib;
>       unsigned int             i;
>  
> -     rib = rib_byid(ctx->ctx_rib_id);
> +     rib = rib_byid(ctx->ctx_id);
>       if (rib == NULL)
> -             fatalx("%s: rib id %u gone", __func__, ctx->ctx_rib_id);
> +             fatalx("%s: rib id %u gone", __func__, ctx->ctx_id);
>  
>       if (ctx->ctx_re == NULL)
>               re = RB_MIN(rib_tree, rib_tree(rib));
> @@ -378,9 +381,9 @@ rib_dump_r(struct rib_context *ctx)
>  
>       for (i = 0; re != NULL; re = next) {
>               next = RB_NEXT(rib_tree, unused, re);
> -             if (re->rib_id != ctx->ctx_rib_id)
> +             if (re->rib_id != ctx->ctx_id)
>                       fatalx("%s: Unexpected RIB %u != %u.", __func__,
> -                         re->rib_id, ctx->ctx_rib_id);
> +                         re->rib_id, ctx->ctx_id);
>               if (ctx->ctx_aid != AID_UNSPEC &&
>                   ctx->ctx_aid != re->prefix->aid)
>                       continue;
> @@ -391,7 +394,7 @@ rib_dump_r(struct rib_context *ctx)
>                       re_lock(re);
>                       return;
>               }
> -             ctx->ctx_upcall(re, ctx->ctx_arg);
> +             ctx->ctx_rib_call(re, ctx->ctx_arg);
>       }
>  
>       if (ctx->ctx_done)
> @@ -422,7 +425,10 @@ rib_dump_runner(void)
>       LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
>               if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg))
>                       continue;
> -             rib_dump_r(ctx);
> +             if (ctx->ctx_rib_call != NULL)
> +                     rib_dump_r(ctx);
> +             else
> +                     prefix_dump_r(ctx);
>       }
>  }
>  
> @@ -432,7 +438,7 @@ rib_dump_abort(u_int16_t id)
>       struct rib_context *ctx, *next;
>  
>       LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
> -             if (id != ctx->ctx_rib_id)
> +             if (id != ctx->ctx_id)
>                       continue;
>               if (ctx->ctx_done)
>                       ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
> @@ -450,11 +456,11 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
>  
>       if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
>               return -1;
> -     ctx->ctx_rib_id = id;
> +     ctx->ctx_id = id;
>       ctx->ctx_aid = aid;
>       ctx->ctx_count = count;
>       ctx->ctx_arg = arg;
> -     ctx->ctx_upcall = upcall;
> +     ctx->ctx_rib_call = upcall;
>       ctx->ctx_done = done;
>       ctx->ctx_throttle = throttle;
>  
> @@ -468,14 +474,12 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
>  }
>  
>  void
> -rib_dump_terminate(u_int16_t id, void *arg,
> -    void (*upcall)(struct rib_entry *, void *))
> +rib_dump_terminate(void *arg)
>  {
>       struct rib_context *ctx, *next;
>  
>       LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
> -             if (id != ctx->ctx_rib_id || ctx->ctx_arg != arg ||
> -                 ctx->ctx_upcall != upcall)
> +             if (ctx->ctx_arg != arg)
>                       continue;
>               if (ctx->ctx_done)
>                       ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
> @@ -610,15 +614,6 @@ path_update(struct rib *rib, struct rde_
>                       p->validation_state = vstate;
>                       return (2);
>               }
> -             if (p->flags) {
> -                     struct prefix_tree *prefix_head;
> -                     /* prefix is a pending update */
> -                     prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
> -                         &peer->updates[prefix->aid] :
> -                         &peer->withdraws[prefix->aid];
> -                     RB_REMOVE(prefix_tree, prefix_head, p);
> -                     p->flags = 0;
> -             }
>       }
>  
>       /*
> @@ -881,7 +876,14 @@ prefix_cmp(struct prefix *a, struct pref
>       return pt_prefix_cmp(a->pt, b->pt);
>  }
>  
> -RB_GENERATE(prefix_tree, prefix, entry, prefix_cmp)
> +static inline int
> +prefix_index_cmp(struct prefix *a, struct prefix *b)
> +{
> +     return pt_prefix_cmp(a->pt, b->pt);
> +}
> +
> +RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp)
> +RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp)
>  
>  /*
>   * search for specified prefix of a peer. Returns NULL if not found.
> @@ -899,6 +901,52 @@ prefix_get(struct rib *rib, struct rde_p
>  }
>  
>  /*
> + * lookup prefix in the peer prefix_index. Returns NULL if not found.
> + */
> +struct prefix *
> +prefix_lookup(struct rde_peer *peer, struct bgpd_addr *prefix,
> +    int prefixlen)
> +{
> +     struct prefix xp;
> +     struct pt_entry *pte;
> +
> +     memset(&xp, 0, sizeof(xp));
> +     pte = pt_fill(prefix, prefixlen);
> +     xp.pt = pte;
> +
> +     return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
> +}
> +
> +struct prefix *
> +prefix_match(struct rde_peer *peer, struct bgpd_addr *addr)
> +{
> +     struct prefix *p;
> +     int i;
> +
> +     switch (addr->aid) {
> +     case AID_INET:
> +     case AID_VPN_IPv4:
> +             for (i = 32; i >= 0; i--) {
> +                     p = prefix_lookup(peer, addr, i);
> +                     if (p != NULL)
> +                             return p;
> +             }
> +             break;
> +     case AID_INET6:
> +     case AID_VPN_IPv6:
> +             for (i = 128; i >= 0; i--) {
> +                     p = prefix_lookup(peer, addr, i);
> +                     if (p != NULL)
> +                             return p;
> +             }
> +             break;
> +     default:
> +             fatalx("%s: unknown af", __func__);
> +     }
> +     return NULL;
> +}
> +
> +/*
>   * Adds or updates a prefix.
>   */
>  static int
> @@ -936,8 +984,8 @@ prefix_move(struct prefix *p, struct rde
>       np->aspath = path_ref(asp);
>       np->communities = communities_ref(comm);
>       np->peer = peer;
> -     np->pt = p->pt; /* skip refcnt update since ref is moved */
>       np->re = p->re;
> +     np->pt = p->pt; /* skip refcnt update since ref is moved */
>       np->validation_state = vstate;
>       np->nhflags = nhflags;
>       np->nexthop = nexthop_ref(nexthop);
> @@ -957,7 +1005,7 @@ prefix_move(struct prefix *p, struct rde
>        * This is safe because we create a new prefix and so the change
>        * is noticed by prefix_evaluate().
>        */
> -     LIST_REMOVE(p, rib_l);
> +     LIST_REMOVE(p, entry.list.rib);
>       prefix_evaluate(np, np->re);
>  
>       /* remove old prefix node */
> @@ -1020,26 +1068,97 @@ prefix_add_eor(struct rde_peer *peer, u_
>       if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
>               /* no need to add if EoR marker already present */
>               prefix_free(p);
> +     /* EOR marker is not inserted into the adj_rib_out index */
>  }
>  
>  /*
>   * Put a prefix from the Adj-RIB-Out onto the update queue.
>   */
> -void
> -prefix_update(struct rib *rib, struct rde_peer *peer,
> -    struct bgpd_addr *prefix, int prefixlen)
> +int
> +prefix_update(struct rde_peer *peer, struct filterstate *state,
> +    struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
>  {
> +     struct prefix_tree *prefix_head = NULL;
> +     struct rde_aspath *asp;
> +     struct rde_community *comm;
>       struct prefix *p;
> +     int created = 0;
>  
> -     p = prefix_get(rib, peer, prefix, prefixlen);
> -     if (p == NULL)          /* Got a dummy withdrawn request. */
> -             return;
> +     if ((p = prefix_lookup(peer, prefix, prefixlen)) != NULL) {
> +             /* prefix is already in the Adj-RIB-Out */
> +             if (p->flags & PREFIX_FLAG_WITHDRAW) {
> +                     created = 1;    /* consider this a new entry */
> +                     peer->up_wcnt--;
> +                     prefix_head = &peer->withdraws[prefix->aid];
> +                     RB_REMOVE(prefix_tree, prefix_head, p);
> +             } else if (p->flags & PREFIX_FLAG_DEAD) {
> +                     created = 1;    /* consider this a new entry */
> +             } else {
> +                     if (prefix_nhflags(p) == state->nhflags &&
> +                         prefix_nexthop(p) == state->nexthop &&
> +                         communities_equal(&state->communities,
> +                         prefix_communities(p)) &&
> +                         path_compare(&state->aspath, prefix_aspath(p)) ==
> +                         0) {
> +                             /* nothing changed */
> +                             p->validation_state = vstate;
> +                             p->lastchange = time(NULL);
> +                             return 0;
> +                     }
> +
> +                     if (p->flags & PREFIX_FLAG_UPDATE) {
> +                             /* created = 0 so up_nlricnt is not increased */
> +                             prefix_head = &peer->updates[prefix->aid];
> +                             RB_REMOVE(prefix_tree, prefix_head, p);
> +                     }
> +             }
> +             /* unlink from aspath and remove nexthop ref */
> +             nexthop_unref(p->nexthop);
> +             communities_unref(p->communities);
> +             path_unref(p->aspath);
> +             p->flags &= ~PREFIX_FLAG_MASK;
> +
> +             /* peer and pt remain */
> +     } else {
> +             p = prefix_alloc();
> +             created = 1;
> +
> +             p->pt = pt_get(prefix, prefixlen);
> +             if (p->pt == NULL)
> +                     fatalx("%s: update for non existing prefix", __func__);
> +             pt_ref(p->pt);
> +             p->peer = peer;
> +
> +             if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
> +                     fatalx("%s: RB index invariant violated", __func__);
> +     }
> +
> +     if ((asp = path_lookup(&state->aspath)) == NULL) {
> +             /* Path not available, create and link a new one. */
> +             asp = path_copy(path_get(), &state->aspath);
> +             path_link(asp);
> +     }
> +
> +     if ((comm = communities_lookup(&state->communities)) == NULL) {
> +             /* Communities not available, create and link a new one. */
> +             comm = communities_link(&state->communities);
> +     }
> +
> +     p->aspath = path_ref(asp);
> +     p->communities = communities_ref(comm);
> +     p->nexthop = nexthop_ref(state->nexthop);
> +     p->nhflags = state->nhflags;
>  
> -     if (p->flags != 0)
> +     p->validation_state = vstate;
> +     p->lastchange = time(NULL);
> +
> +     if (p->flags & PREFIX_FLAG_MASK)
>               fatalx("%s: bad flags %x", __func__, p->flags);
> -     p->flags = PREFIX_FLAG_UPDATE;
> +     p->flags |= PREFIX_FLAG_UPDATE;
>       if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL)
>               fatalx("%s: RB tree invariant violated", __func__);
> +
> +     return created;
>  }
>  
>  /*
> @@ -1047,15 +1166,19 @@ prefix_update(struct rib *rib, struct rd
>   * the prefix in the RIB linked to the peer withdraw list.
>   */
>  int
> -prefix_withdraw(struct rib *rib, struct rde_peer *peer,
> -    struct bgpd_addr *prefix, int prefixlen)
> +prefix_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, int 
> prefixlen)
>  {
>       struct prefix           *p;
>  
> -     p = prefix_get(rib, peer, prefix, prefixlen);
> +     p = prefix_lookup(peer, prefix, prefixlen);
>       if (p == NULL)          /* Got a dummy withdrawn request. */
>               return (0);
>  
> +     /* remove nexthop ref ... */
> +     nexthop_unref(p->nexthop);
> +     p->nexthop = NULL;
> +     p->nhflags = 0;
> +
>       /* unlink from aspath ...*/
>       path_unref(p->aspath);
>       p->aspath = NULL;
> @@ -1063,29 +1186,181 @@ prefix_withdraw(struct rib *rib, struct 
>       /* ... communities ... */
>       communities_unref(p->communities);
>       p->communities = NULL;
> +     /* and unlink from aspath */
> +     path_unref(p->aspath);
> +     p->aspath = NULL;
> +     /* re already NULL */
>  
> -     /* ... and nexthop but keep the re link */
> -     nexthop_unlink(p);
> -     nexthop_unref(p->nexthop);
> -     p->nexthop = NULL;
> -     p->nhflags = 0;
> -     /* re link still exists */
> +     p->lastchange = time(NULL);
>  
> -     if (p->flags) {
> +     if (p->flags & PREFIX_FLAG_MASK) {
>               struct prefix_tree *prefix_head;
>               /* p is a pending update or withdraw, remove first */
>               prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
>                   &peer->updates[prefix->aid] :
>                   &peer->withdraws[prefix->aid];
>               RB_REMOVE(prefix_tree, prefix_head, p);
> -             p->flags = 0;
> +             p->flags &= ~PREFIX_FLAG_MASK;
>       }
> -     p->flags = PREFIX_FLAG_WITHDRAW;
> +     p->flags |= PREFIX_FLAG_WITHDRAW;
>       if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
>               fatalx("%s: RB tree invariant violated", __func__);
>       return (1);
>  }
>  
> +static inline void
> +prefix_lock(struct prefix *p)
> +{
> +     if (p->flags & PREFIX_FLAG_LOCKED)
> +             fatalx("%s: locking locked prefix", __func__);
> +     p->flags |= PREFIX_FLAG_LOCKED;
> +}
> +
> +static inline void
> +prefix_unlock(struct prefix *p)
> +{
> +     if ((p->flags & PREFIX_FLAG_LOCKED) == 0)
> +             fatalx("%s: unlocking unlocked prefix", __func__);
> +     p->flags &= ~PREFIX_FLAG_LOCKED;
> +}
> +
> +static inline int
> +prefix_is_locked(struct prefix *p)
> +{
> +     return (p->flags & PREFIX_FLAG_LOCKED) != 0;
> +}
> +
> +static inline int
> +prefix_is_dead(struct prefix *p)
> +{
> +     return (p->flags & PREFIX_FLAG_DEAD) != 0;
> +}
> +
> +static struct prefix *
> +prefix_restart(struct rib_context *ctx)
> +{
> +     struct prefix *p;
> +
> +     p = ctx->ctx_p;
> +     prefix_unlock(p);
> +
> +     if (prefix_is_dead(p)) {
> +             struct prefix *next;
> +
> +             next = RB_NEXT(prefix_index, unused, p);
> +             prefix_adjout_destroy(p);
> +             p = next;
> +     }
> +     return p;
> +}
> +
> +void
> +prefix_adjout_destroy(struct prefix *p)
> +{
> +     struct rde_peer *peer = prefix_peer(p);
> +
> +     if (p->eor) {
> +             /* EOR marker is not linked in the index */
> +             prefix_free(p);
> +             return;
> +     }
> +
> +     if (p->flags & PREFIX_FLAG_WITHDRAW)
> +             RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
> +     else if (p->flags & PREFIX_FLAG_UPDATE)
> +             RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
> +     /* nothing needs to be done for PREFIX_FLAG_DEAD */
> +     p->flags &= ~PREFIX_FLAG_MASK;
> +
> +
> +     if (prefix_is_locked(p)) {
> +             /* remove nexthop ref ... */
> +             nexthop_unref(p->nexthop);
> +             p->nexthop = NULL;
> +             /* ... communities ... */
> +             communities_unref(p->communities);
> +             p->communities = NULL;
> +             /* and unlink from aspath */
> +             path_unref(p->aspath);
> +             p->aspath = NULL;
> +             p->nhflags = 0;
> +             /* re already NULL */
> +
> +             /* finally mark prefix dead */
> +             p->flags |= PREFIX_FLAG_DEAD;
> +             return;
> +     }
> +
> +     RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
> +     
> +     prefix_unlink(p);
> +     prefix_free(p);
> +}
> +
> +static void
> +prefix_dump_r(struct rib_context *ctx)
> +{
> +     struct prefix *p, *next;
> +     struct rde_peer *peer;
> +     unsigned int i;
> +
> +     if ((peer = peer_get(ctx->ctx_id)) == NULL)
> +             goto done;
> +
> +     if (ctx->ctx_p == NULL)
> +             p = RB_MIN(prefix_index, &peer->adj_rib_out);
> +     else
> +             p = prefix_restart(ctx);
> +
> +     for (i = 0; p != NULL; p = next) {
> +             next = RB_NEXT(prefix_index, unused, p);
> +             if (prefix_is_dead(p))
> +                     continue;
> +             if (ctx->ctx_aid != AID_UNSPEC &&
> +                 ctx->ctx_aid != p->pt->aid)
> +                     continue;
> +             if (ctx->ctx_count && i++ >= ctx->ctx_count &&
> +                 !prefix_is_locked(p)) {
> +                     /* store and lock last element */
> +                     ctx->ctx_p = p;
> +                     prefix_lock(p);
> +                     return;
> +             }
> +             ctx->ctx_prefix_call(p, ctx->ctx_arg);
> +     }
> +
> +done:
> +     if (ctx->ctx_done)
> +             ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
> +     LIST_REMOVE(ctx, entry);
> +     free(ctx);
> +}
> +
> +int
> +prefix_dump_new(struct rde_peer *peer, u_int8_t aid, unsigned int count,
> +    void *arg, void (*upcall)(struct prefix *, void *),
> +    void (*done)(void *, u_int8_t), int (*throttle)(void *))
> +{
> +     struct rib_context *ctx;
> +
> +     if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
> +             return -1;
> +     ctx->ctx_id = peer->conf.id;
> +     ctx->ctx_aid = aid;
> +     ctx->ctx_count = count;
> +     ctx->ctx_arg = arg;
> +     ctx->ctx_prefix_call = upcall;
> +     ctx->ctx_done = done;
> +     ctx->ctx_throttle = throttle;
> +
> +     LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
> +
> +     /* requested a sync traversal */
> +     if (count == 0)
> +             prefix_dump_r(ctx);
> +
> +     return 0;
> +}
>  
>  /* dump a prefix into specified buffer */
>  int
> @@ -1205,7 +1480,7 @@ prefix_bypeer(struct rib_entry *re, stru
>  {
>       struct prefix   *p;
>  
> -     LIST_FOREACH(p, &re->prefix_h, rib_l)
> +     LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
>               if (prefix_peer(p) == peer)
>                       return (p);
>       return (NULL);
> @@ -1237,7 +1512,7 @@ prefix_updateall(struct prefix *p, enum 
>       }
>  
>       /* redo the route decision */
> -     LIST_REMOVE(p, rib_l);
> +     LIST_REMOVE(p, entry.list.rib);
>       /*
>        * If the prefix is the active one remove it first,
>        * this has to be done because we can not detect when
> @@ -1255,6 +1530,10 @@ prefix_updateall(struct prefix *p, enum 
>  void
>  prefix_destroy(struct prefix *p)
>  {
> +     /* make route decision */
> +     LIST_REMOVE(p, entry.list.rib);
> +     prefix_evaluate(NULL, p->re);
> +
>       prefix_unlink(p);
>       prefix_free(p);
>  }
> @@ -1290,13 +1569,6 @@ prefix_unlink(struct prefix *p)
>  {
>       struct rib_entry        *re = p->re;
>  
> -     if (p->eor)     /* nothing to unlink for EoR markers */
> -             return;
> -
> -     /* make route decision */
> -     LIST_REMOVE(p, rib_l);
> -     prefix_evaluate(NULL, re);
> -
>       /* destroy all references to other objects */
>       nexthop_unlink(p);
>       nexthop_unref(p->nexthop);
> @@ -1310,7 +1582,7 @@ prefix_unlink(struct prefix *p)
>       p->re = NULL;
>       p->pt = NULL;
>  
> -     if (rib_empty(re))
> +     if (re && rib_empty(re))
>               rib_remove(re);
>  
>       /*
> @@ -1319,7 +1591,7 @@ prefix_unlink(struct prefix *p)
>        */
>  }
>  
> -/* alloc and bzero new entry. May not fail. */
> +/* alloc and zero new entry. May not fail. */
>  static struct prefix *
>  prefix_alloc(void)
>  {
> @@ -1430,7 +1702,7 @@ nexthop_runner(void)
>       p = nh->next_prefix;
>       for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) {
>               prefix_updateall(p, nh->state, nh->oldstate);
> -             p = LIST_NEXT(p, nexthop_l);
> +             p = LIST_NEXT(p, entry.list.nexthop);
>       }
>  
>       /* prep for next run, if not finished readd to tail of queue */
> @@ -1540,22 +1812,21 @@ nexthop_link(struct prefix *p)
>       if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
>               return;
>  
> -     LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l);
> +     p->flags |= PREFIX_NEXTHOP_LINKED;
> +     LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop);
>  }
>  
>  void
>  nexthop_unlink(struct prefix *p)
>  {
> -     if (p->nexthop == NULL)
> -             return;
> -
> -     if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
> +     if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0)
>               return;
>  
>       if (p == p->nexthop->next_prefix)
> -             p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l);
> +             p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop);
>  
> -     LIST_REMOVE(p, nexthop_l);
> +     p->flags &= ~PREFIX_NEXTHOP_LINKED;
> +     LIST_REMOVE(p, entry.list.nexthop);
>  }
>  
>  struct nexthop *
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> retrieving revision 1.119
> diff -u -p -r1.119 rde_update.c
> --- rde_update.c      2 Jul 2019 12:07:00 -0000       1.119
> +++ rde_update.c      4 Jul 2019 08:59:13 -0000
> @@ -143,8 +143,7 @@ withdraw:
>  
>               /* withdraw prefix */
>               pt_getaddr(old->pt, &addr);
> -             if (prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> -                 old->pt->prefixlen) == 1)
> +             if (prefix_withdraw(peer, &addr, old->pt->prefixlen) == 1)
>                       peer->up_wcnt++;
>       } else {
>               switch (up_test_update(peer, new)) {
> @@ -165,13 +164,11 @@ withdraw:
>               }
>  
>               pt_getaddr(new->pt, &addr);
> -             if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
> -                 new->pt->prefixlen, prefix_vstate(new)) != 2) {
> -                     /* only send update if path changed */
> -                     prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
> -                         new->pt->prefixlen);
> +
> +             /* only send update if path changed */
> +             if (prefix_update(peer, &state, &addr, new->pt->prefixlen,
> +                 prefix_vstate(new)) == 1)
>                       peer->up_nlricnt++;
> -             }
>  
>               rde_filterstate_clean(&state);
>       }
> @@ -229,11 +226,8 @@ up_generate_default(struct filter_head *
>               return;
>       }
>  
> -     if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, 0,
> -         ROA_NOTFOUND) != 2) {
> -             prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr, 0);
> +     if (prefix_update(peer, &state, &addr, 0, ROA_NOTFOUND) == 1)
>               peer->up_nlricnt++;
> -     }
>  
>       /* no longer needed */
>       rde_filterstate_clean(&state);
> @@ -576,8 +570,13 @@ up_is_eor(struct rde_peer *peer, u_int8_
>  
>       p = RB_MIN(prefix_tree, &peer->updates[aid]);
>       if (p != NULL && p->eor) {
> +             /*
> +              * Need to remove eor from update tree because
> +              * prefix_adjout_destroy() can't handle that.
> +              */
>               RB_REMOVE(prefix_tree, &peer->updates[aid], p);
> -             prefix_destroy(p);
> +             p->flags &= ~PREFIX_FLAG_MASK;
> +             prefix_adjout_destroy(p);
>               return 1;
>       }
>       return 0;
> @@ -616,11 +615,11 @@ up_dump_prefix(u_char *buf, int len, str
>  
>               /* prefix sent, remove from list and clear flag */
>               RB_REMOVE(prefix_tree, prefix_head, p);
> -             p->flags = 0;
> +             p->flags &= ~PREFIX_FLAG_MASK;
>  
>               if (withdraw) {
>                       /* prefix no longer needed, remove it */
> -                     prefix_destroy(p);
> +                     prefix_adjout_destroy(p);
>                       peer->up_wcnt--;
>                       peer->prefix_sent_withdraw++;
>               } else {
> 

Reply via email to