On Thu, Oct 25, 2018 at 08:51:30AM +0200, Claudio Jeker wrote:
> Next step on my quest to make the RIB code better.
> This changes the following things:
> - network_flush is now using rib_dump_new to walk the Adj-RIB-In and
> remove all dynamically added announcements
> - peer_flush got generalized and is now used also in peer_down.
> It also uses a rib_dump_new call to walk the Adj-RIB-In and remove
> all prefixes from a peer but this is done synchronous for now.
> - peer_flush is now working correctly for AID_UNSPEC.
> - Change the rib_valid check to continue instead of break out of the loop.
> The rib table can have holes so the loop needs to continue.
>
> This removes the last three places that use the peer -> path -> prefix
> tailqs so the next step is to remove these and make rde_aspath just an
> object that is referenced by prefixes. After that adding a proper
> Adj-RIB-Out should be possible.
>
> Running with this on production :)
I will commit this on Monday unless someone objects or I get some OKs :)
--
:wq Claudio
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.440
diff -u -p -r1.440 rde.c
--- rde.c 24 Oct 2018 08:26:37 -0000 1.440
+++ rde.c 25 Oct 2018 06:34:32 -0000
@@ -97,7 +97,7 @@ struct rde_peer *peer_add(u_int32_t, str
struct rde_peer *peer_get(u_int32_t);
void peer_up(u_int32_t, struct session_up *);
void peer_down(u_int32_t);
-void peer_flush(struct rde_peer *, u_int8_t);
+void peer_flush(struct rde_peer *, u_int8_t, time_t);
void peer_stale(u_int32_t, u_int8_t);
void peer_dump(u_int32_t, u_int8_t);
static void peer_recv_eor(struct rde_peer *, u_int8_t);
@@ -105,7 +105,8 @@ static void peer_send_eor(struct rde_pe
void network_add(struct network_config *, int);
void network_delete(struct network_config *, int);
-void network_dump_upcall(struct rib_entry *, void *);
+static void network_dump_upcall(struct rib_entry *, void *);
+static void network_flush_upcall(struct rib_entry *, void *);
void rde_shutdown(void);
int sa_cmp(struct bgpd_addr *, struct sockaddr *);
@@ -418,7 +419,7 @@ rde_dispatch_imsg_session(struct imsgbuf
imsg.hdr.peerid);
break;
}
- peer_flush(peer, aid);
+ peer_flush(peer, aid, peer->staletime[aid]);
break;
case IMSG_SESSION_RESTARTED:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -434,7 +435,7 @@ rde_dispatch_imsg_session(struct imsgbuf
break;
}
if (peer->staletime[aid])
- peer_flush(peer, aid);
+ peer_flush(peer, aid, peer->staletime[aid]);
break;
case IMSG_REFRESH:
if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -556,7 +557,10 @@ badnetdel:
log_warnx("rde_dispatch: wrong imsg len");
break;
}
- prefix_network_clean(peerself);
+ if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
+ RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
+ NULL, NULL) == -1)
+ log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
break;
case IMSG_FILTER_SET:
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -770,7 +774,7 @@ rde_dispatch_imsg_parent(struct imsgbuf
memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
for (rid = 0; rid < rib_size; rid++) {
if (!rib_valid(rid))
- break;
+ continue;
ribs[rid].state = RECONF_DELETE;
}
SIMPLEQ_INIT(&nconf->rde_prefixsets);
@@ -1411,7 +1415,7 @@ rde_update_update(struct rde_peer *peer,
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
- break;
+ continue;
rde_filterstate_prep(&state, &in->aspath, in->nexthop,
in->nhflags);
/* input filter */
@@ -1443,7 +1447,7 @@ rde_update_withdraw(struct rde_peer *pee
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
- break;
+ continue;
if (prefix_remove(&ribs[i].rib, peer, prefix, prefixlen))
rde_update_log("withdraw", i, peer, NULL, prefix,
prefixlen);
@@ -2957,11 +2961,10 @@ rde_reload_done(void)
static void
rde_softreconfig_in_done(void *arg, u_int8_t aid)
{
- struct rib_desc *rd = arg;
- struct rde_peer *peer;
- u_int16_t rid;
+ struct rde_peer *peer;
+ u_int16_t rid;
- if (rd != NULL) {
+ if (arg != NULL) {
softreconfig--;
/* one guy done but other dumps are still running */
if (softreconfig > 0)
@@ -3372,10 +3375,7 @@ peer_up(u_int32_t id, struct session_up
* There is a race condition when doing PEER_ERR -> PEER_DOWN.
* So just do a full reset of the peer here.
*/
- for (i = 0; i < AID_MAX; i++) {
- peer->staletime[i] = 0;
- peer_flush(peer, i);
- }
+ peer_flush(peer, AID_UNSPEC, 0);
up_down(peer);
peer->prefix_cnt = 0;
peer->state = PEER_DOWN;
@@ -3412,7 +3412,6 @@ void
peer_down(u_int32_t id)
{
struct rde_peer *peer;
- struct rde_aspath *asp, *nasp;
peer = peer_get(id);
if (peer == NULL) {
@@ -3425,49 +3424,85 @@ peer_down(u_int32_t id)
/* stop all pending dumps which may depend on this peer */
rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall);
- /* walk through per peer RIB list and remove all prefixes. */
- for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
- nasp = TAILQ_NEXT(asp, peer_l);
- path_remove(asp);
- }
- TAILQ_INIT(&peer->path_h);
- peer->prefix_cnt = 0;
+ peer_flush(peer, AID_UNSPEC, 0);
- /* Deletions are performed in path_remove() */
- rde_send_pftable_commit();
+ peer->prefix_cnt = 0;
LIST_REMOVE(peer, hash_l);
LIST_REMOVE(peer, peer_l);
free(peer);
}
+struct peer_flush {
+ struct rde_peer *peer;
+ time_t staletime;
+};
+
+static void
+peer_flush_upcall(struct rib_entry *re, void *arg)
+{
+ struct rde_peer *peer = ((struct peer_flush *)arg)->peer;
+ struct rde_aspath *asp;
+ struct bgpd_addr addr;
+ struct prefix *p, *np, *rp;
+ time_t staletime = ((struct peer_flush *)arg)->staletime;
+ u_int32_t i;
+ u_int8_t prefixlen;
+
+ pt_getaddr(re->prefix, &addr);
+ prefixlen = re->prefix->prefixlen;
+ LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+ if (peer != prefix_peer(p))
+ continue;
+ if (staletime && p->lastchange > staletime)
+ continue;
+
+ for (i = RIB_LOC_START; i < rib_size; i++) {
+ if (!rib_valid(i))
+ continue;
+ rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen);
+ if (rp) {
+ asp = prefix_aspath(rp);
+ if (asp->pftableid)
+ rde_send_pftable(asp->pftableid, &addr,
+ prefixlen, 1);
+
+ prefix_destroy(rp);
+ rde_update_log("flush", i, peer, NULL,
+ &addr, prefixlen);
+ }
+ }
+
+ prefix_destroy(p);
+ peer->prefix_cnt--;
+ }
+}
+
/*
* Flush all routes older then staletime. If staletime is 0 all routes will
* be flushed.
*/
void
-peer_flush(struct rde_peer *peer, u_int8_t aid)
+peer_flush(struct rde_peer *peer, u_int8_t aid, time_t staletime)
{
- struct rde_aspath *asp, *nasp;
- u_int32_t rprefixes;
+ struct peer_flush pf = { peer, staletime };
- rprefixes = 0;
- /* walk through per peer RIB list and remove all stale prefixes. */
- for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
- nasp = TAILQ_NEXT(asp, peer_l);
- rprefixes += path_remove_stale(asp, aid, peer->staletime[aid]);
- }
+ /* this dump must run synchronous, too much depends on that right now */
+ if (rib_dump_new(RIB_ADJ_IN, aid, 0, &pf, peer_flush_upcall,
+ NULL, NULL) == -1)
+ fatal("%s: rib_dump_new", __func__);
/* Deletions are performed in path_remove() */
rde_send_pftable_commit();
/* flushed no need to keep staletime */
- peer->staletime[aid] = 0;
-
- if (peer->prefix_cnt > rprefixes)
- peer->prefix_cnt -= rprefixes;
- else
- peer->prefix_cnt = 0;
+ if (aid == AID_UNSPEC) {
+ u_int8_t i;
+ for (i = 0; i < AID_MAX; i++)
+ peer->staletime[i] = 0;
+ } else {
+ peer->staletime[aid] = 0;
+ }
}
void
@@ -3484,7 +3519,7 @@ peer_stale(u_int32_t id, u_int8_t aid)
/* flush the now even staler routes out */
if (peer->staletime[aid])
- peer_flush(peer, aid);
+ peer_flush(peer, aid, peer->staletime[aid]);
peer->staletime[aid] = now = time(NULL);
/* make sure new prefixes start on a higher timestamp */
@@ -3661,7 +3696,7 @@ network_add(struct network_config *nc, i
peerself->prefix_cnt++;
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
- break;
+ continue;
rde_update_log("announce", i, peerself,
state.nexthop ? &state.nexthop->exit_nexthop : NULL,
&nc->prefix, nc->prefixlen);
@@ -3713,19 +3748,18 @@ network_delete(struct network_config *nc
for (i = RIB_LOC_START; i < rib_size; i++) {
if (!rib_valid(i))
- break;
+ continue;
if (prefix_remove(&ribs[i].rib, peerself, &nc->prefix,
nc->prefixlen))
rde_update_log("withdraw announce", i, peerself,
NULL, &nc->prefix, nc->prefixlen);
-
}
if (prefix_remove(&ribs[RIB_ADJ_IN].rib, peerself, &nc->prefix,
nc->prefixlen))
peerself->prefix_cnt--;
}
-void
+static void
network_dump_upcall(struct rib_entry *re, void *ptr)
{
struct prefix *p;
@@ -3759,6 +3793,41 @@ network_dump_upcall(struct rib_entry *re
}
}
+static void
+network_flush_upcall(struct rib_entry *re, void *ptr)
+{
+ struct rde_peer *peer = ptr;
+ struct rde_aspath *asp;
+ struct bgpd_addr addr;
+ struct prefix *p, *np, *rp;
+ u_int32_t i;
+ u_int8_t prefixlen;
+
+ pt_getaddr(re->prefix, &addr);
+ prefixlen = re->prefix->prefixlen;
+ LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+ if (prefix_peer(p) != peer)
+ continue;
+ asp = prefix_aspath(p);
+ if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC)
+ continue;
+
+ for (i = RIB_LOC_START; i < rib_size; i++) {
+ if (!rib_valid(i))
+ continue;
+ rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen);
+ if (rp) {
+ prefix_destroy(rp);
+ rde_update_log("flush announce", i, peer,
+ NULL, &addr, prefixlen);
+ }
+ }
+
+ prefix_destroy(p);
+ peer->prefix_cnt--;
+ }
+}
+
/* clean up */
void
rde_shutdown(void)
@@ -3782,7 +3851,7 @@ rde_shutdown(void)
filterlist_free(out_rules);
for (i = 0; i < rib_size; i++) {
if (!rib_valid(i))
- break;
+ continue;
filterlist_free(ribs[i].in_rules);
}
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.198
diff -u -p -r1.198 rde.h
--- rde.h 24 Oct 2018 08:26:37 -0000 1.198
+++ rde.h 24 Oct 2018 21:27:25 -0000
@@ -477,7 +477,6 @@ void path_hash_stats(struct rde_hashst
int path_update(struct rib *, struct rde_peer *,
struct filterstate *, struct bgpd_addr *, int, u_int8_t);
int path_compare(struct rde_aspath *, struct rde_aspath *);
-void path_remove(struct rde_aspath *);
u_int32_t path_remove_stale(struct rde_aspath *, u_int8_t, time_t);
void path_destroy(struct rde_aspath *);
int path_empty(struct rde_aspath *);
@@ -498,7 +497,6 @@ struct prefix *prefix_bypeer(struct rib_
void prefix_updateall(struct prefix *, enum nexthop_state,
enum nexthop_state);
void prefix_destroy(struct prefix *);
-void prefix_network_clean(struct rde_peer *);
void prefix_relink(struct prefix *, struct rde_aspath *, int);
static inline struct rde_peer *
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.180
diff -u -p -r1.180 rde_rib.c
--- rde_rib.c 24 Oct 2018 08:26:37 -0000 1.180
+++ rde_rib.c 24 Oct 2018 08:44:54 -0000
@@ -339,8 +339,8 @@ rib_restart(struct rib_context *ctx)
static void
rib_dump_r(struct rib_context *ctx)
{
+ struct rib_entry *re, *next;
struct rib *rib;
- struct rib_entry *re;
unsigned int i;
rib = rib_byid(ctx->ctx_rib_id);
@@ -352,7 +352,8 @@ rib_dump_r(struct rib_context *ctx)
else
re = rib_restart(ctx);
- for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) {
+ for (i = 0; re != NULL; re = next) {
+ next = RB_NEXT(rib_tree, unused, re);
if (re->rib_id != ctx->ctx_rib_id)
fatalx("%s: Unexpected RIB %u != %u.", __func__,
re->rib_id, ctx->ctx_rib_id);
@@ -435,6 +436,10 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
+ /* requested a sync traversal */
+ if (count == 0)
+ rib_dump_r(ctx);
+
return 0;
}
@@ -664,65 +669,6 @@ path_lookup(struct rde_aspath *aspath, s
return (NULL);
}
-void
-path_remove(struct rde_aspath *asp)
-{
- struct prefix *p, *np;
-
- for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) {
- np = TAILQ_NEXT(p, path_l);
- if (asp->pftableid) {
- struct bgpd_addr addr;
-
- pt_getaddr(p->re->prefix, &addr);
- /* Commit is done in peer_down() */
- rde_send_pftable(prefix_aspath(p)->pftableid, &addr,
- p->re->prefix->prefixlen, 1);
- }
- prefix_destroy(p);
- }
-}
-
-/* remove all stale routes or if staletime is 0 remove all routes for
- a specified AID. */
-u_int32_t
-path_remove_stale(struct rde_aspath *asp, u_int8_t aid, time_t staletime)
-{
- struct prefix *p, *np;
- u_int32_t rprefixes;
-
- rprefixes=0;
- /*
- * This is called when a session flapped and during that time
- * the pending updates for that peer are getting reset.
- */
- for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) {
- np = TAILQ_NEXT(p, path_l);
- if (p->re->prefix->aid != aid)
- continue;
-
- if (staletime && p->lastchange > staletime)
- continue;
-
- if (asp->pftableid) {
- struct bgpd_addr addr;
-
- pt_getaddr(p->re->prefix, &addr);
- /* Commit is done in peer_flush() */
- rde_send_pftable(prefix_aspath(p)->pftableid, &addr,
- p->re->prefix->prefixlen, 1);
- }
-
- /* only count Adj-RIB-In */
- if (re_rib(p->re) == &ribs[RIB_ADJ_IN].rib)
- rprefixes++;
-
- prefix_destroy(p);
- }
- return (rprefixes);
-}
-
-
/*
* This function can only called when all prefix have been removed first.
* Normally this happens directly out of the prefix removal functions.
@@ -1144,29 +1090,6 @@ prefix_destroy(struct prefix *p)
if (path_empty(asp))
path_destroy(asp);
-}
-
-/*
- * helper function to clean up the dynamically added networks
- */
-void
-prefix_network_clean(struct rde_peer *peer)
-{
- struct rde_aspath *asp, *xasp;
- struct prefix *p, *xp;
-
- for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = xasp) {
- xasp = TAILQ_NEXT(asp, peer_l);
- if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC)
- continue;
- for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = xp) {
- xp = TAILQ_NEXT(p, path_l);
- prefix_unlink(p);
- prefix_free(p);
- }
- if (path_empty(asp))
- path_destroy(asp);
- }
}
/*