While investigating the cause of bgpd crashing for some people I noticed
that both graceful restart and route refresh were broken.
This diff fixes this by a) marking the adj-rib-out as stale when a peer
goes stale during graceful restart and b) changing peer_dump to force all
entries in the Adj-RIB-Out into pending UPDATES. Stale entries will be
removed in the latter case.

Additionally in prefix_adjout_withdraw() the check if an element is on the
update or withdraw tree was not correct which could lead to a call to
RB_REMOVE for an element that is not part of the RB tree.

Please test and review
-- 
:wq Claudio

Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.489
diff -u -p -r1.489 rde.c
--- rde.c       27 Sep 2019 14:50:39 -0000      1.489
+++ rde.c       29 Oct 2019 06:52:10 -0000
@@ -2828,15 +2828,44 @@ rde_up_flush_upcall(struct prefix *p, vo
 }
 
 static void
-rde_up_dump_done(void *ptr, u_int8_t aid)
+rde_up_adjout_force_upcall(struct prefix *p, void *ptr)
+{
+       if (p->flags & PREFIX_FLAG_STALE) {
+               /* remove stale entries */
+               prefix_adjout_destroy(p);
+       } else if (p->flags & PREFIX_FLAG_DEAD) {
+               /* ignore dead prefixes, they will go away soon */
+       } else if ((p->flags & PREFIX_FLAG_MASK) == 0) {
+               /* put entries on the update queue if not allready on a queue */
+               p->flags |= PREFIX_FLAG_UPDATE;
+               if (RB_INSERT(prefix_tree, &prefix_peer(p)->updates[p->pt->aid],
+                   p) != NULL)
+                       fatalx("%s: RB tree invariant violated", __func__);
+       }
+}
+
+static void
+rde_up_adjout_force_done(void *ptr, u_int8_t aid)
 {
        struct rde_peer         *peer = ptr;
 
+       /* Adj-RIB-Out ready, unthrottle peer and inject EOR */
        peer->throttled = 0;
        if (peer->capa.grestart.restart)
                prefix_add_eor(peer, aid);
 }
 
+static void
+rde_up_dump_done(void *ptr, u_int8_t aid)
+{
+       struct rde_peer         *peer = ptr;
+
+       /* force out all updates of Adj-RIB-Out for this peer */
+       if (prefix_dump_new(peer, aid, 0, peer, rde_up_adjout_force_upcall,
+           rde_up_adjout_force_done, NULL) == -1)
+               fatal("%s: prefix_dump_new", __func__);
+}
+
 u_char queue_buf[4096];
 
 int
@@ -3387,16 +3416,17 @@ rde_softreconfig_in(struct rib_entry *re
 static void
 rde_softreconfig_out(struct rib_entry *re, void *bula)
 {
-       struct prefix           *new = re->active;
+       struct prefix           *p = re->active;
        struct rde_peer         *peer;
 
-       if (new == NULL)
+       if (p == NULL)
+               /* no valid path for prefix */
                return;
 
        LIST_FOREACH(peer, &peerlist, peer_l) {
                if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
                        /* Regenerate all updates. */
-                       up_generate_updates(out_rules, peer, new, new);
+                       up_generate_updates(out_rules, peer, p, p);
        }
 }
 
@@ -3668,6 +3698,22 @@ peer_adjout_clear_upcall(struct prefix *
        prefix_adjout_destroy(p);
 }
 
+static void
+peer_adjout_stale_upcall(struct prefix *p, void *arg)
+{
+       if (p->flags & PREFIX_FLAG_DEAD) {
+               return;
+       } else if (p->flags & PREFIX_FLAG_WITHDRAW) {
+               /* no need to keep stale withdraws, they miss all attributes */
+               prefix_adjout_destroy(p);
+               return;
+       } else if (p->flags & PREFIX_FLAG_UPDATE) {
+               RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p);
+               p->flags &= ~PREFIX_FLAG_UPDATE;
+       }
+       p->flags |= PREFIX_FLAG_STALE;
+}
+
 void
 peer_up(u_int32_t id, struct session_up *sup)
 {
@@ -3680,8 +3726,7 @@ peer_up(u_int32_t id, struct session_up 
                return;
        }
 
-       if (peer->state != PEER_DOWN && peer->state != PEER_NONE &&
-           peer->state != PEER_UP) {
+       if (peer->state == PEER_ERR) {
                /*
                 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
                 * So just do a full reset of the peer here.
@@ -3831,12 +3876,18 @@ peer_stale(u_int32_t id, u_int8_t aid)
        /* flush the now even staler routes out */
        if (peer->staletime[aid])
                peer_flush(peer, aid, peer->staletime[aid]);
+
        peer->staletime[aid] = now = time(NULL);
+       peer->state = PEER_DOWN;
+
+       /* mark Adj-RIB-Out stale for this peer */
+       if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+           peer_adjout_stale_upcall, NULL, NULL) == -1)
+               fatal("%s: prefix_dump_new", __func__);
 
        /* make sure new prefixes start on a higher timestamp */
-       do {
+       while (now >= time(NULL))
                sleep(1);
-       } while (now >= time(NULL));
 }
 
 void
@@ -3856,13 +3907,13 @@ peer_dump(u_int32_t id, u_int8_t aid)
                        prefix_add_eor(peer, aid);
        } else if (peer->conf.export_type == EXPORT_DEFAULT_ROUTE) {
                up_generate_default(out_rules, peer, aid);
-               if (peer->capa.grestart.restart)
-                       prefix_add_eor(peer, aid);
+               rde_up_dump_done(peer, aid);
        } else {
                if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer,
                    rde_up_dump_upcall, rde_up_dump_done, NULL) == -1)
                        fatal("%s: rib_dump_new", __func__);
-               peer->throttled = 1; /* XXX throttle peer until dump is done */
+               /* throttle peer until dump is done */
+               peer->throttled = 1;
        }
 }
 
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.226
diff -u -p -r1.226 rde.h
--- rde.h       14 Aug 2019 11:57:21 -0000      1.226
+++ rde.h       24 Oct 2019 08:43:50 -0000
@@ -324,10 +324,11 @@ struct prefix {
        u_int8_t                         nhflags;
        u_int8_t                         eor;
        u_int8_t                         flags;
-#define        PREFIX_FLAG_WITHDRAW    0x01    /* queued for withdraw */
-#define        PREFIX_FLAG_UPDATE      0x02    /* queued for update */
+#define        PREFIX_FLAG_WITHDRAW    0x01    /* enqueued on withdraw queue */
+#define        PREFIX_FLAG_UPDATE      0x02    /* enqueued on update queue */
 #define        PREFIX_FLAG_DEAD        0x04    /* locked but removed */
-#define        PREFIX_FLAG_MASK        0x07    /* mask for the three prefix 
types */
+#define        PREFIX_FLAG_STALE       0x08    /* stale entry (graceful 
reload) */
+#define        PREFIX_FLAG_MASK        0x0f    /* mask for the prefix types */
 #define        PREFIX_NEXTHOP_LINKED   0x40    /* prefix is linked onto 
nexthop list */
 #define        PREFIX_FLAG_LOCKED      0x80    /* locked by rib walker */
 };
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.207
diff -u -p -r1.207 rde_rib.c
--- rde_rib.c   27 Sep 2019 14:50:39 -0000      1.207
+++ rde_rib.c   24 Oct 2019 10:58:11 -0000
@@ -1169,6 +1169,7 @@ prefix_adjout_update(struct rde_peer *pe
                                /* nothing changed */
                                p->validation_state = vstate;
                                p->lastchange = time(NULL);
+                               p->flags &= ~PREFIX_FLAG_STALE;
                                return 0;
                        }
 
@@ -1235,12 +1236,24 @@ int
 prefix_adjout_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
     int prefixlen)
 {
-       struct prefix           *p;
+       struct prefix *p;
 
        p = prefix_lookup(peer, prefix, prefixlen);
        if (p == NULL)          /* Got a dummy withdrawn request. */
                return (0);
 
+       /* already a withdraw, shortcut */
+       if (p->flags & PREFIX_FLAG_WITHDRAW) {
+               p->lastchange = time(NULL);
+               p->flags &= ~PREFIX_FLAG_STALE;
+               return (0);
+       }
+       /* pending update just got withdrawn */
+       if (p->flags & PREFIX_FLAG_UPDATE)
+               RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
+       /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
+       p->flags &= ~PREFIX_FLAG_MASK;
+
        /* remove nexthop ref ... */
        nexthop_unref(p->nexthop);
        p->nexthop = NULL;
@@ -1260,15 +1273,6 @@ prefix_adjout_withdraw(struct rde_peer *
 
        p->lastchange = time(NULL);
 
-       if (p->flags & PREFIX_FLAG_MASK) {
-               struct prefix_tree *prefix_head;
-               /* p is a pending update or withdraw, remove first */
-               prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
-                   &peer->updates[prefix->aid] :
-                   &peer->withdraws[prefix->aid];
-               RB_REMOVE(prefix_tree, prefix_head, p);
-               p->flags &= ~PREFIX_FLAG_MASK;
-       }
        p->flags |= PREFIX_FLAG_WITHDRAW;
        if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
                fatalx("%s: RB tree invariant violated", __func__);
@@ -1308,7 +1312,7 @@ prefix_adjout_destroy(struct prefix *p)
                RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
        else if (p->flags & PREFIX_FLAG_UPDATE)
                RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
-       /* nothing needs to be done for PREFIX_FLAG_DEAD */
+       /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
        p->flags &= ~PREFIX_FLAG_MASK;
 
 

Reply via email to