While investigating the cause of bgpd crashing for some people I noticed that both graceful restart and route refresh were broken. This diff fixes this by a) marking the adj-rib-out as stale when a peer goes stale during graceful restart and b) changing peer_dump to force all entries in the Adj-RIB-Out into pending UPDATES. Stale entries will be removed in the latter case.
Additionally in prefix_adjout_withdraw() the check if an element is on the update or withdraw tree was not correct which could lead to a call to RB_REMOVE for an element that is not part of the RB tree. Please test and review -- :wq Claudio Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.489 diff -u -p -r1.489 rde.c --- rde.c 27 Sep 2019 14:50:39 -0000 1.489 +++ rde.c 29 Oct 2019 06:52:10 -0000 @@ -2828,15 +2828,44 @@ rde_up_flush_upcall(struct prefix *p, vo } static void -rde_up_dump_done(void *ptr, u_int8_t aid) +rde_up_adjout_force_upcall(struct prefix *p, void *ptr) +{ + if (p->flags & PREFIX_FLAG_STALE) { + /* remove stale entries */ + prefix_adjout_destroy(p); + } else if (p->flags & PREFIX_FLAG_DEAD) { + /* ignore dead prefixes, they will go away soon */ + } else if ((p->flags & PREFIX_FLAG_MASK) == 0) { + /* put entries on the update queue if not allready on a queue */ + p->flags |= PREFIX_FLAG_UPDATE; + if (RB_INSERT(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], + p) != NULL) + fatalx("%s: RB tree invariant violated", __func__); + } +} + +static void +rde_up_adjout_force_done(void *ptr, u_int8_t aid) { struct rde_peer *peer = ptr; + /* Adj-RIB-Out ready, unthrottle peer and inject EOR */ peer->throttled = 0; if (peer->capa.grestart.restart) prefix_add_eor(peer, aid); } +static void +rde_up_dump_done(void *ptr, u_int8_t aid) +{ + struct rde_peer *peer = ptr; + + /* force out all updates of Adj-RIB-Out for this peer */ + if (prefix_dump_new(peer, aid, 0, peer, rde_up_adjout_force_upcall, + rde_up_adjout_force_done, NULL) == -1) + fatal("%s: prefix_dump_new", __func__); +} + u_char queue_buf[4096]; int @@ -3387,16 +3416,17 @@ rde_softreconfig_in(struct rib_entry *re static void rde_softreconfig_out(struct rib_entry *re, void *bula) { - struct prefix *new = re->active; + struct prefix *p = re->active; struct rde_peer *peer; - if (new == NULL) + if (p == NULL) + /* no valid path for prefix */ return; LIST_FOREACH(peer, &peerlist, peer_l) { if (peer->loc_rib_id == re->rib_id && peer->reconf_out) /* Regenerate all updates. */ - up_generate_updates(out_rules, peer, new, new); + up_generate_updates(out_rules, peer, p, p); } } @@ -3668,6 +3698,22 @@ peer_adjout_clear_upcall(struct prefix * prefix_adjout_destroy(p); } +static void +peer_adjout_stale_upcall(struct prefix *p, void *arg) +{ + if (p->flags & PREFIX_FLAG_DEAD) { + return; + } else if (p->flags & PREFIX_FLAG_WITHDRAW) { + /* no need to keep stale withdraws, they miss all attributes */ + prefix_adjout_destroy(p); + return; + } else if (p->flags & PREFIX_FLAG_UPDATE) { + RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p); + p->flags &= ~PREFIX_FLAG_UPDATE; + } + p->flags |= PREFIX_FLAG_STALE; +} + void peer_up(u_int32_t id, struct session_up *sup) { @@ -3680,8 +3726,7 @@ peer_up(u_int32_t id, struct session_up return; } - if (peer->state != PEER_DOWN && peer->state != PEER_NONE && - peer->state != PEER_UP) { + if (peer->state == PEER_ERR) { /* * There is a race condition when doing PEER_ERR -> PEER_DOWN. * So just do a full reset of the peer here. @@ -3831,12 +3876,18 @@ peer_stale(u_int32_t id, u_int8_t aid) /* flush the now even staler routes out */ if (peer->staletime[aid]) peer_flush(peer, aid, peer->staletime[aid]); + peer->staletime[aid] = now = time(NULL); + peer->state = PEER_DOWN; + + /* mark Adj-RIB-Out stale for this peer */ + if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, + peer_adjout_stale_upcall, NULL, NULL) == -1) + fatal("%s: prefix_dump_new", __func__); /* make sure new prefixes start on a higher timestamp */ - do { + while (now >= time(NULL)) sleep(1); - } while (now >= time(NULL)); } void @@ -3856,13 +3907,13 @@ peer_dump(u_int32_t id, u_int8_t aid) prefix_add_eor(peer, aid); } else if (peer->conf.export_type == EXPORT_DEFAULT_ROUTE) { up_generate_default(out_rules, peer, aid); - if (peer->capa.grestart.restart) - prefix_add_eor(peer, aid); + rde_up_dump_done(peer, aid); } else { if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer, rde_up_dump_upcall, rde_up_dump_done, NULL) == -1) fatal("%s: rib_dump_new", __func__); - peer->throttled = 1; /* XXX throttle peer until dump is done */ + /* throttle peer until dump is done */ + peer->throttled = 1; } } Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.226 diff -u -p -r1.226 rde.h --- rde.h 14 Aug 2019 11:57:21 -0000 1.226 +++ rde.h 24 Oct 2019 08:43:50 -0000 @@ -324,10 +324,11 @@ struct prefix { u_int8_t nhflags; u_int8_t eor; u_int8_t flags; -#define PREFIX_FLAG_WITHDRAW 0x01 /* queued for withdraw */ -#define PREFIX_FLAG_UPDATE 0x02 /* queued for update */ +#define PREFIX_FLAG_WITHDRAW 0x01 /* enqueued on withdraw queue */ +#define PREFIX_FLAG_UPDATE 0x02 /* enqueued on update queue */ #define PREFIX_FLAG_DEAD 0x04 /* locked but removed */ -#define PREFIX_FLAG_MASK 0x07 /* mask for the three prefix types */ +#define PREFIX_FLAG_STALE 0x08 /* stale entry (graceful reload) */ +#define PREFIX_FLAG_MASK 0x0f /* mask for the prefix types */ #define PREFIX_NEXTHOP_LINKED 0x40 /* prefix is linked onto nexthop list */ #define PREFIX_FLAG_LOCKED 0x80 /* locked by rib walker */ }; Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.207 diff -u -p -r1.207 rde_rib.c --- rde_rib.c 27 Sep 2019 14:50:39 -0000 1.207 +++ rde_rib.c 24 Oct 2019 10:58:11 -0000 @@ -1169,6 +1169,7 @@ prefix_adjout_update(struct rde_peer *pe /* nothing changed */ p->validation_state = vstate; p->lastchange = time(NULL); + p->flags &= ~PREFIX_FLAG_STALE; return 0; } @@ -1235,12 +1236,24 @@ int prefix_adjout_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen) { - struct prefix *p; + struct prefix *p; p = prefix_lookup(peer, prefix, prefixlen); if (p == NULL) /* Got a dummy withdrawn request. */ return (0); + /* already a withdraw, shortcut */ + if (p->flags & PREFIX_FLAG_WITHDRAW) { + p->lastchange = time(NULL); + p->flags &= ~PREFIX_FLAG_STALE; + return (0); + } + /* pending update just got withdrawn */ + if (p->flags & PREFIX_FLAG_UPDATE) + RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p); + /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */ + p->flags &= ~PREFIX_FLAG_MASK; + /* remove nexthop ref ... */ nexthop_unref(p->nexthop); p->nexthop = NULL; @@ -1260,15 +1273,6 @@ prefix_adjout_withdraw(struct rde_peer * p->lastchange = time(NULL); - if (p->flags & PREFIX_FLAG_MASK) { - struct prefix_tree *prefix_head; - /* p is a pending update or withdraw, remove first */ - prefix_head = p->flags & PREFIX_FLAG_UPDATE ? - &peer->updates[prefix->aid] : - &peer->withdraws[prefix->aid]; - RB_REMOVE(prefix_tree, prefix_head, p); - p->flags &= ~PREFIX_FLAG_MASK; - } p->flags |= PREFIX_FLAG_WITHDRAW; if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL) fatalx("%s: RB tree invariant violated", __func__); @@ -1308,7 +1312,7 @@ prefix_adjout_destroy(struct prefix *p) RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p); else if (p->flags & PREFIX_FLAG_UPDATE) RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p); - /* nothing needs to be done for PREFIX_FLAG_DEAD */ + /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */ p->flags &= ~PREFIX_FLAG_MASK;