Re: bgpd: replace some more walkers with rib_dump

2018-10-28 Thread Claudio Jeker
On Thu, Oct 25, 2018 at 08:51:30AM +0200, Claudio Jeker wrote:
> Next step on my quest to make the RIB code better.
> This changes the following things:
> - network_flush is now using rib_dump_new to walk the Adj-RIB-In and
>   remove all dynamically added announcements
> - peer_flush got generalized and is now used also in peer_down.
>   It also uses a rib_dump_new call to walk the Adj-RIB-In and remove
>   all prefixes from a peer but this is done synchronous for now.
> - peer_flush is now working correctly for AID_UNSPEC.
> - Change the rib_valid check to continue instead of break out of the loop.
>   The rib table can have holes so the loop needs to continue.
> 
> This removes the last three places that use the peer -> path -> prefix
> tailqs so the next step is to remove these and make rde_aspath just an
> object that is referenced by prefixes. After that adding a proper
> Adj-RIB-Out should be possible.
> 
> Running with this on production :)

I will commit this on Monday unless someone objects or I get some OKs :)

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.440
diff -u -p -r1.440 rde.c
--- rde.c   24 Oct 2018 08:26:37 -  1.440
+++ rde.c   25 Oct 2018 06:34:32 -
@@ -97,7 +97,7 @@ struct rde_peer   *peer_add(u_int32_t, str
 struct rde_peer*peer_get(u_int32_t);
 voidpeer_up(u_int32_t, struct session_up *);
 voidpeer_down(u_int32_t);
-voidpeer_flush(struct rde_peer *, u_int8_t);
+voidpeer_flush(struct rde_peer *, u_int8_t, time_t);
 voidpeer_stale(u_int32_t, u_int8_t);
 voidpeer_dump(u_int32_t, u_int8_t);
 static void peer_recv_eor(struct rde_peer *, u_int8_t);
@@ -105,7 +105,8 @@ static void  peer_send_eor(struct rde_pe
 
 voidnetwork_add(struct network_config *, int);
 voidnetwork_delete(struct network_config *, int);
-voidnetwork_dump_upcall(struct rib_entry *, void *);
+static void network_dump_upcall(struct rib_entry *, void *);
+static void network_flush_upcall(struct rib_entry *, void *);
 
 voidrde_shutdown(void);
 int sa_cmp(struct bgpd_addr *, struct sockaddr *);
@@ -418,7 +419,7 @@ rde_dispatch_imsg_session(struct imsgbuf
imsg.hdr.peerid);
break;
}
-   peer_flush(peer, aid);
+   peer_flush(peer, aid, peer->staletime[aid]);
break;
case IMSG_SESSION_RESTARTED:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -434,7 +435,7 @@ rde_dispatch_imsg_session(struct imsgbuf
break;
}
if (peer->staletime[aid])
-   peer_flush(peer, aid);
+   peer_flush(peer, aid, peer->staletime[aid]);
break;
case IMSG_REFRESH:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -556,7 +557,10 @@ badnetdel:
log_warnx("rde_dispatch: wrong imsg len");
break;
}
-   prefix_network_clean(peerself);
+   if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
+   RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
+   NULL, NULL) == -1)
+   log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
break;
case IMSG_FILTER_SET:
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -770,7 +774,7 @@ rde_dispatch_imsg_parent(struct imsgbuf 
memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
for (rid = 0; rid < rib_size; rid++) {
if (!rib_valid(rid))
-   break;
+   continue;
ribs[rid].state = RECONF_DELETE;
}
SIMPLEQ_INIT(>rde_prefixsets);
@@ -1411,7 +1415,7 @@ rde_update_update(struct rde_peer *peer,
 
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
-   break;
+   continue;
rde_filterstate_prep(, >aspath, in->nexthop,
in->nhflags);
/* input filter */
@@ -1443,7 +1447,7 @@ rde_update_withdraw(struct rde_peer *pee
 
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
-   break;
+   continue;
if (prefix_remove([i].rib, peer, prefix, prefixlen))

Re: bgpd: replace some more walkers with rib_dump

2018-10-25 Thread Claudio Jeker
On Thu, Oct 25, 2018 at 09:04:18PM +0200, Denis Fondras wrote:
> On Thu, Oct 25, 2018 at 08:51:30AM +0200, Claudio Jeker wrote:
> > Next step on my quest to make the RIB code better.
> > This changes the following things:
> > - network_flush is now using rib_dump_new to walk the Adj-RIB-In and
> >   remove all dynamically added announcements
> > - peer_flush got generalized and is now used also in peer_down.
> >   It also uses a rib_dump_new call to walk the Adj-RIB-In and remove
> >   all prefixes from a peer but this is done synchronous for now.
> > - peer_flush is now working correctly for AID_UNSPEC.
> > - Change the rib_valid check to continue instead of break out of the loop.
> >   The rib table can have holes so the loop needs to continue.
> > 
> > This removes the last three places that use the peer -> path -> prefix
> > tailqs so the next step is to remove these and make rde_aspath just an
> > object that is referenced by prefixes. After that adding a proper
> > Adj-RIB-Out should be possible.
> > 
> 
> Question below.
> Running on prod too :)
> 
> > Index: rde_rib.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> > retrieving revision 1.180
> > diff -u -p -r1.180 rde_rib.c
> > --- rde_rib.c   24 Oct 2018 08:26:37 -  1.180
> > +++ rde_rib.c   24 Oct 2018 08:44:54 -
> > @@ -339,8 +339,8 @@ rib_restart(struct rib_context *ctx)
> >  static void
> >  rib_dump_r(struct rib_context *ctx)
> >  {
> > +   struct rib_entry*re, *next;
> > struct rib  *rib;
> > -   struct rib_entry*re;
> > unsigned int i;
> >  
> > rib = rib_byid(ctx->ctx_rib_id);
> > @@ -352,7 +352,8 @@ rib_dump_r(struct rib_context *ctx)
> > else
> > re = rib_restart(ctx);
> >  
> > -   for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) {
> > +   for (i = 0; re != NULL; re = next) {
> > +   next = RB_NEXT(rib_tree, unused, re);
> 
> Why not RB_FOREACH_SAFE ?

Because we do not start with RB_MIN. The inital element is set above and
so we can't use a regular RB_FOREACH loop either.
 
> > if (re->rib_id != ctx->ctx_rib_id)
> > fatalx("%s: Unexpected RIB %u != %u.", __func__,
> > re->rib_id, ctx->ctx_rib_id);
> > @@ -435,6 +436,10 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
> >  
> > LIST_INSERT_HEAD(_dumps, ctx, entry);
> >  
> > +   /* requested a sync traversal */
> > +   if (count == 0)
> > +   rib_dump_r(ctx);
> > +
> > return 0;
> >  }
> >  
> > @@ -664,65 +669,6 @@ path_lookup(struct rde_aspath *aspath, s
> > return (NULL);
> >  }
> >  
> > -void
> > -path_remove(struct rde_aspath *asp)
> > -{
> > -   struct prefix   *p, *np;
> > -
> > -   for (p = TAILQ_FIRST(>prefixes); p != NULL; p = np) {
> > -   np = TAILQ_NEXT(p, path_l);
> > -   if (asp->pftableid) {
> > -   struct bgpd_addr addr;
> > -
> > -   pt_getaddr(p->re->prefix, );
> > -   /* Commit is done in peer_down() */
> > -   rde_send_pftable(prefix_aspath(p)->pftableid, ,
> > -   p->re->prefix->prefixlen, 1);
> > -   }
> > -   prefix_destroy(p);
> > -   }
> > -}
> > -
> > -/* remove all stale routes or if staletime is 0 remove all routes for
> > -   a specified AID. */
> > -u_int32_t
> > -path_remove_stale(struct rde_aspath *asp, u_int8_t aid, time_t staletime)
> > -{
> > -   struct prefix   *p, *np;
> > -   u_int32_trprefixes;
> > -
> > -   rprefixes=0;
> > -   /*
> > -* This is called when a session flapped and during that time
> > -* the pending updates for that peer are getting reset.
> > -*/
> > -   for (p = TAILQ_FIRST(>prefixes); p != NULL; p = np) {
> > -   np = TAILQ_NEXT(p, path_l);
> > -   if (p->re->prefix->aid != aid)
> > -   continue;
> > -
> > -   if (staletime && p->lastchange > staletime)
> > -   continue;
> > -
> > -   if (asp->pftableid) {
> > -   struct bgpd_addr addr;
> > -
> > -   pt_getaddr(p->re->prefix, );
> > -   /* Commit is done in peer_flush() */
> > -   rde_send_pftable(prefix_aspath(p)->pftableid, ,
> > -   p->re->prefix->prefixlen, 1);
> > -   }
> > -
> > -   /* only count Adj-RIB-In */
> > -   if (re_rib(p->re) == [RIB_ADJ_IN].rib)
> > -   rprefixes++;
> > -
> > -   prefix_destroy(p);
> > -   }
> > -   return (rprefixes);
> > -}
> > -
> > -
> >  /*
> >   * This function can only called when all prefix have been removed first.
> >   * Normally this happens directly out of the prefix removal functions.
> > @@ -1144,29 +1090,6 @@ prefix_destroy(struct prefix *p)
> >  
> > if (path_empty(asp))
> > path_destroy(asp);
> > -}
> > -
> > -/*
> > - * helper function 

Re: bgpd: replace some more walkers with rib_dump

2018-10-25 Thread Denis Fondras
On Thu, Oct 25, 2018 at 08:51:30AM +0200, Claudio Jeker wrote:
> Next step on my quest to make the RIB code better.
> This changes the following things:
> - network_flush is now using rib_dump_new to walk the Adj-RIB-In and
>   remove all dynamically added announcements
> - peer_flush got generalized and is now used also in peer_down.
>   It also uses a rib_dump_new call to walk the Adj-RIB-In and remove
>   all prefixes from a peer but this is done synchronous for now.
> - peer_flush is now working correctly for AID_UNSPEC.
> - Change the rib_valid check to continue instead of break out of the loop.
>   The rib table can have holes so the loop needs to continue.
> 
> This removes the last three places that use the peer -> path -> prefix
> tailqs so the next step is to remove these and make rde_aspath just an
> object that is referenced by prefixes. After that adding a proper
> Adj-RIB-Out should be possible.
> 

Question below.
Running on prod too :)

> Index: rde_rib.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> retrieving revision 1.180
> diff -u -p -r1.180 rde_rib.c
> --- rde_rib.c 24 Oct 2018 08:26:37 -  1.180
> +++ rde_rib.c 24 Oct 2018 08:44:54 -
> @@ -339,8 +339,8 @@ rib_restart(struct rib_context *ctx)
>  static void
>  rib_dump_r(struct rib_context *ctx)
>  {
> + struct rib_entry*re, *next;
>   struct rib  *rib;
> - struct rib_entry*re;
>   unsigned int i;
>  
>   rib = rib_byid(ctx->ctx_rib_id);
> @@ -352,7 +352,8 @@ rib_dump_r(struct rib_context *ctx)
>   else
>   re = rib_restart(ctx);
>  
> - for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) {
> + for (i = 0; re != NULL; re = next) {
> + next = RB_NEXT(rib_tree, unused, re);

Why not RB_FOREACH_SAFE ?

>   if (re->rib_id != ctx->ctx_rib_id)
>   fatalx("%s: Unexpected RIB %u != %u.", __func__,
>   re->rib_id, ctx->ctx_rib_id);
> @@ -435,6 +436,10 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
>  
>   LIST_INSERT_HEAD(_dumps, ctx, entry);
>  
> + /* requested a sync traversal */
> + if (count == 0)
> + rib_dump_r(ctx);
> +
>   return 0;
>  }
>  
> @@ -664,65 +669,6 @@ path_lookup(struct rde_aspath *aspath, s
>   return (NULL);
>  }
>  
> -void
> -path_remove(struct rde_aspath *asp)
> -{
> - struct prefix   *p, *np;
> -
> - for (p = TAILQ_FIRST(>prefixes); p != NULL; p = np) {
> - np = TAILQ_NEXT(p, path_l);
> - if (asp->pftableid) {
> - struct bgpd_addr addr;
> -
> - pt_getaddr(p->re->prefix, );
> - /* Commit is done in peer_down() */
> - rde_send_pftable(prefix_aspath(p)->pftableid, ,
> - p->re->prefix->prefixlen, 1);
> - }
> - prefix_destroy(p);
> - }
> -}
> -
> -/* remove all stale routes or if staletime is 0 remove all routes for
> -   a specified AID. */
> -u_int32_t
> -path_remove_stale(struct rde_aspath *asp, u_int8_t aid, time_t staletime)
> -{
> - struct prefix   *p, *np;
> - u_int32_trprefixes;
> -
> - rprefixes=0;
> - /*
> -  * This is called when a session flapped and during that time
> -  * the pending updates for that peer are getting reset.
> -  */
> - for (p = TAILQ_FIRST(>prefixes); p != NULL; p = np) {
> - np = TAILQ_NEXT(p, path_l);
> - if (p->re->prefix->aid != aid)
> - continue;
> -
> - if (staletime && p->lastchange > staletime)
> - continue;
> -
> - if (asp->pftableid) {
> - struct bgpd_addr addr;
> -
> - pt_getaddr(p->re->prefix, );
> - /* Commit is done in peer_flush() */
> - rde_send_pftable(prefix_aspath(p)->pftableid, ,
> - p->re->prefix->prefixlen, 1);
> - }
> -
> - /* only count Adj-RIB-In */
> - if (re_rib(p->re) == [RIB_ADJ_IN].rib)
> - rprefixes++;
> -
> - prefix_destroy(p);
> - }
> - return (rprefixes);
> -}
> -
> -
>  /*
>   * This function can only called when all prefix have been removed first.
>   * Normally this happens directly out of the prefix removal functions.
> @@ -1144,29 +1090,6 @@ prefix_destroy(struct prefix *p)
>  
>   if (path_empty(asp))
>   path_destroy(asp);
> -}
> -
> -/*
> - * helper function to clean up the dynamically added networks
> - */
> -void
> -prefix_network_clean(struct rde_peer *peer)
> -{
> - struct rde_aspath   *asp, *xasp;
> - struct prefix   *p, *xp;
> -
> - for (asp = TAILQ_FIRST(>path_h); asp != NULL; asp = xasp) {
> - xasp = TAILQ_NEXT(asp, peer_l);
> - if ((asp->flags & 

bgpd: replace some more walkers with rib_dump

2018-10-25 Thread Claudio Jeker
Next step on my quest to make the RIB code better.
This changes the following things:
- network_flush is now using rib_dump_new to walk the Adj-RIB-In and
  remove all dynamically added announcements
- peer_flush got generalized and is now used also in peer_down.
  It also uses a rib_dump_new call to walk the Adj-RIB-In and remove
  all prefixes from a peer but this is done synchronous for now.
- peer_flush is now working correctly for AID_UNSPEC.
- Change the rib_valid check to continue instead of break out of the loop.
  The rib table can have holes so the loop needs to continue.

This removes the last three places that use the peer -> path -> prefix
tailqs so the next step is to remove these and make rde_aspath just an
object that is referenced by prefixes. After that adding a proper
Adj-RIB-Out should be possible.

Running with this on production :)
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.440
diff -u -p -r1.440 rde.c
--- rde.c   24 Oct 2018 08:26:37 -  1.440
+++ rde.c   25 Oct 2018 06:34:32 -
@@ -97,7 +97,7 @@ struct rde_peer   *peer_add(u_int32_t, str
 struct rde_peer*peer_get(u_int32_t);
 voidpeer_up(u_int32_t, struct session_up *);
 voidpeer_down(u_int32_t);
-voidpeer_flush(struct rde_peer *, u_int8_t);
+voidpeer_flush(struct rde_peer *, u_int8_t, time_t);
 voidpeer_stale(u_int32_t, u_int8_t);
 voidpeer_dump(u_int32_t, u_int8_t);
 static void peer_recv_eor(struct rde_peer *, u_int8_t);
@@ -105,7 +105,8 @@ static void  peer_send_eor(struct rde_pe
 
 voidnetwork_add(struct network_config *, int);
 voidnetwork_delete(struct network_config *, int);
-voidnetwork_dump_upcall(struct rib_entry *, void *);
+static void network_dump_upcall(struct rib_entry *, void *);
+static void network_flush_upcall(struct rib_entry *, void *);
 
 voidrde_shutdown(void);
 int sa_cmp(struct bgpd_addr *, struct sockaddr *);
@@ -418,7 +419,7 @@ rde_dispatch_imsg_session(struct imsgbuf
imsg.hdr.peerid);
break;
}
-   peer_flush(peer, aid);
+   peer_flush(peer, aid, peer->staletime[aid]);
break;
case IMSG_SESSION_RESTARTED:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -434,7 +435,7 @@ rde_dispatch_imsg_session(struct imsgbuf
break;
}
if (peer->staletime[aid])
-   peer_flush(peer, aid);
+   peer_flush(peer, aid, peer->staletime[aid]);
break;
case IMSG_REFRESH:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -556,7 +557,10 @@ badnetdel:
log_warnx("rde_dispatch: wrong imsg len");
break;
}
-   prefix_network_clean(peerself);
+   if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
+   RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
+   NULL, NULL) == -1)
+   log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
break;
case IMSG_FILTER_SET:
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -770,7 +774,7 @@ rde_dispatch_imsg_parent(struct imsgbuf 
memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
for (rid = 0; rid < rib_size; rid++) {
if (!rib_valid(rid))
-   break;
+   continue;
ribs[rid].state = RECONF_DELETE;
}
SIMPLEQ_INIT(>rde_prefixsets);
@@ -1411,7 +1415,7 @@ rde_update_update(struct rde_peer *peer,
 
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
-   break;
+   continue;
rde_filterstate_prep(, >aspath, in->nexthop,
in->nhflags);
/* input filter */
@@ -1443,7 +1447,7 @@ rde_update_withdraw(struct rde_peer *pee
 
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
-   break;
+   continue;
if (prefix_remove([i].rib, peer, prefix, prefixlen))
rde_update_log("withdraw", i, peer, NULL, prefix,
prefixlen);
@@ -2957,11 +2961,10 @@ rde_reload_done(void)
 static void