This is a major diff which replaces some old code of bgpd.
Using the Adj-RIB-Out it is possible to replace the old code generating
updates and queuing them in two RB trees with a RB tree of prefix
elements. The big benefit of this is a lot less memeory pressure during
large operations (e.g. when starting the daemon).

I have been running this on some production system for a while with no
issues but since this change is rather big I would prefer more testing and
also review of the diff.

The change from before is the introduction of a RB tree element in struct
prefix which allows to link prefixes from the Adj-RIB-Out onto update or
withdraw trees. The trees are sorted by AS path, nexthop, flags and
finally prefix. This way the grouping of prefixes still works while
actually not consuming more memory to get there. This is the reason why
the spikes of memory usage are now lower. Additionally until now the
malloc bucket for struct prefix was also used by the update trees and
casued a lot of fragmentation in them which resulted in excessive memory
usage (comapred to the amount of show rib mem).

The behaviour should be the same and it should hopefully also solve the
issue with sending out all EoR records on MP sessions (IP + IPv6).

-- 
:wq Claudio

Index: mrt.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.88
diff -u -p -r1.88 mrt.c
--- mrt.c       30 Dec 2018 13:53:07 -0000      1.88
+++ mrt.c       31 Dec 2018 15:20:03 -0000
@@ -510,6 +510,9 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                struct bgpd_addr        *nh;
                struct ibuf             *tbuf;
 
+               if (prefix_aspath(p) == NULL)
+                       continue;
+
                nexthop = prefix_nexthop(p);
                if (nexthop == NULL) {
                        bzero(&addr, sizeof(struct bgpd_addr));
@@ -672,6 +675,8 @@ mrt_dump_upcall(struct rib_entry *re, vo
         * be dumped p should be set to p = pt->active.
         */
        LIST_FOREACH(p, &re->prefix_h, rib_l) {
+               if (prefix_aspath(p) == NULL)
+                       continue;
                if (mrtbuf->type == MRT_TABLE_DUMP)
                        mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
                            prefix_peer(p));
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.458
diff -u -p -r1.458 rde.c
--- rde.c       31 Dec 2018 08:53:09 -0000      1.458
+++ rde.c       31 Dec 2018 15:20:03 -0000
@@ -2260,6 +2260,8 @@ rde_dump_filter(struct prefix *p, struct
                return;
 
        asp = prefix_aspath(p);
+       if (asp == NULL)
+               return;
        if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
                return;
        if ((req->flags & F_CTL_INVALID) &&
@@ -2651,8 +2653,9 @@ rde_up_dump_done(void *ptr, u_int8_t aid
 {
        struct rde_peer         *peer = ptr;
 
+       peer->throttled = 0;
        if (peer->capa.grestart.restart)
-               up_generate_marker(peer, aid);
+               prefix_add_eor(peer, aid);
 }
 
 u_char queue_buf[4096];
@@ -2674,8 +2677,8 @@ rde_update_queue_pending(void)
                if (peer->throttled)
                        continue;
                for (aid = 0; aid < AID_MAX; aid++) {
-                       if (!TAILQ_EMPTY(&peer->updates[aid]) ||
-                           !TAILQ_EMPTY(&peer->withdraws[aid]))
+                       if (!RB_EMPTY(&peer->updates[aid]) ||
+                           !RB_EMPTY(&peer->withdraws[aid]))
                                return 1;
                }
        }
@@ -2687,7 +2690,7 @@ rde_update_queue_runner(void)
 {
        struct rde_peer         *peer;
        int                      r, sent, max = RDE_RUNNER_ROUNDS, eor;
-       u_int16_t                len, wd_len, wpos;
+       u_int16_t                len, wpos;
 
        len = sizeof(queue_buf) - MSGSIZE_HEADER;
        do {
@@ -2700,48 +2703,33 @@ rde_update_queue_runner(void)
                        if (peer->throttled)
                                continue;
                        eor = 0;
-                       /* first withdraws */
-                       wpos = 2; /* reserve space for the length field */
-                       r = up_dump_prefix(queue_buf + wpos, len - wpos - 2,
-                           &peer->withdraws[AID_INET], peer, 1);
-                       wd_len = r;
-                       /* write withdraws length filed */
-                       wd_len = htons(wd_len);
-                       memcpy(queue_buf, &wd_len, 2);
+                       wpos = 0;
+                       /* first withdraws, save 2 bytes for path attributes */
+                       if ((r = up_dump_withdraws(queue_buf, len - 2, peer,
+                           AID_INET)) == -1)
+                               continue;
                        wpos += r;
 
-                       /* now bgp path attributes */
-                       r = up_dump_attrnlri(queue_buf + wpos, len - wpos,
-                           peer);
-                       switch (r) {
-                       case -1:
+                       /* now bgp path attributes unless it is the EoR mark */
+                       if (up_is_eor(peer, AID_INET)) {
                                eor = 1;
-                               if (wd_len == 0) {
-                                       /* no withdraws queued just send EoR */
-                                       peer_send_eor(peer, AID_INET);
-                                       continue;
-                               }
-                               break;
-                       case 2:
-                               if (wd_len == 0) {
-                                       /*
-                                        * No packet to send. No withdraws and
-                                        * no path attributes. Skip.
-                                        */
-                                       continue;
-                               }
-                               /* FALLTHROUGH */
-                       default:
+                               bzero(queue_buf + wpos, 2);
+                               wpos += 2;
+                       } else {
+                               r = up_dump_attrnlri(queue_buf + wpos,
+                                   len - wpos, peer);
                                wpos += r;
-                               break;
                        }
 
                        /* finally send message to SE */
-                       if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
-                           0, -1, queue_buf, wpos) == -1)
-                               fatal("%s %d imsg_compose error", __func__,
-                                   __LINE__);
-                       sent++;
+                       if (wpos > 4) {
+                               if (imsg_compose(ibuf_se, IMSG_UPDATE,
+                                   peer->conf.id, 0, -1, queue_buf,
+                                   wpos) == -1)
+                                       fatal("%s %d imsg_compose error",
+                                           __func__, __LINE__);
+                               sent++;
+                       }
                        if (eor)
                                peer_send_eor(peer, AID_INET);
                }
@@ -2753,7 +2741,6 @@ void
 rde_update6_queue_runner(u_int8_t aid)
 {
        struct rde_peer         *peer;
-       u_char                  *b;
        int                      r, sent, max = RDE_RUNNER_ROUNDS / 2;
        u_int16_t                len;
 
@@ -2768,13 +2755,12 @@ rde_update6_queue_runner(u_int8_t aid)
                        if (peer->throttled)
                                continue;
                        len = sizeof(queue_buf) - MSGSIZE_HEADER;
-                       b = up_dump_mp_unreach(queue_buf, &len, peer, aid);
-
-                       if (b == NULL)
+                       r = up_dump_mp_unreach(queue_buf, len, peer, aid);
+                       if (r == -1)
                                continue;
                        /* finally send message to SE */
                        if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
-                           0, -1, b, len) == -1)
+                           0, -1, queue_buf, r) == -1)
                                fatal("%s %d imsg_compose error", __func__,
                                    __LINE__);
                        sent++;
@@ -2794,21 +2780,17 @@ rde_update6_queue_runner(u_int8_t aid)
                        if (peer->throttled)
                                continue;
                        len = sizeof(queue_buf) - MSGSIZE_HEADER;
-                       r = up_dump_mp_reach(queue_buf, &len, peer, aid);
-                       switch (r) {
-                       case -2:
-                               continue;
-                       case -1:
+                       if (up_is_eor(peer, aid)) {
                                peer_send_eor(peer, aid);
                                continue;
-                       default:
-                               b = queue_buf + r;
-                               break;
                        }
+                       r = up_dump_mp_reach(queue_buf, len, peer, aid);
+                       if (r == 0)
+                               continue;
 
                        /* finally send message to SE */
                        if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
-                           0, -1, b, len) == -1)
+                           0, -1, queue_buf, r) == -1)
                                fatal("%s %d imsg_compose error", __func__,
                                    __LINE__);
                        sent++;
@@ -3333,7 +3315,6 @@ peer_add(u_int32_t id, struct peer_confi
        if (peer->loc_rib_id == RIB_NOTFOUND)
                fatalx("King Bula's new peer met an unknown RIB");
        peer->state = PEER_NONE;
-       up_init(peer);
 
        head = PEER_HASH(id);
 
@@ -3396,6 +3377,20 @@ peer_localaddrs(struct rde_peer *peer, s
        return (0);
 }
 
+static void
+peer_adjout_flush_upcall(struct rib_entry *re, void *arg)
+{
+       struct rde_peer *peer = arg;
+       struct prefix *p, *np;
+
+       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+               if (peer != prefix_peer(p))
+                       continue;
+               prefix_destroy(p);
+               break;  /* optimization, only one match per peer possible */
+       }
+}
+
 void
 peer_up(u_int32_t id, struct session_up *sup)
 {
@@ -3414,8 +3409,10 @@ peer_up(u_int32_t id, struct session_up 
                 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
                 * So just do a full reset of the peer here.
                 */
+               if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
+                   peer_adjout_flush_upcall, NULL, NULL) == -1)
+                       fatal("%s: rib_dump_new", __func__);
                peer_flush(peer, AID_UNSPEC, 0);
-               up_down(peer);
                peer->prefix_cnt = 0;
                peer->state = PEER_DOWN;
        }
@@ -3432,7 +3429,6 @@ peer_up(u_int32_t id, struct session_up 
        }
 
        peer->state = PEER_UP;
-       up_init(peer);
 
        if (rde_noevaluate())
                /*
@@ -3447,20 +3443,6 @@ peer_up(u_int32_t id, struct session_up 
        }
 }
 
-static void
-peer_adjout_flush_upcall(struct rib_entry *re, void *arg)
-{
-       struct rde_peer *peer = arg;
-       struct prefix *p, *np;
-
-       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
-               if (peer != prefix_peer(p))
-                       continue;
-               prefix_destroy(p);
-               break;  /* optimization, only one match per peer possible */
-       }
-}
-
 void
 peer_down(u_int32_t id)
 {
@@ -3481,8 +3463,6 @@ peer_down(u_int32_t id)
            peer_adjout_flush_upcall, NULL, NULL) == -1)
                fatal("%s: rib_dump_new", __func__);
 
-       up_down(peer);
-
        peer_flush(peer, AID_UNSPEC, 0);
 
        peer->prefix_cnt = 0;
@@ -3602,15 +3582,16 @@ peer_dump(u_int32_t id, u_int8_t aid)
        if (peer->conf.export_type == EXPORT_NONE) {
                /* nothing to send apart from the marker */
                if (peer->capa.grestart.restart)
-                       up_generate_marker(peer, aid);
+                       prefix_add_eor(peer, aid);
        } else if (peer->conf.export_type == EXPORT_DEFAULT_ROUTE) {
                up_generate_default(out_rules, peer, aid);
                if (peer->capa.grestart.restart)
-                       up_generate_marker(peer, aid);
+                       prefix_add_eor(peer, aid);
        } else {
                if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer,
                    rde_up_dump_upcall, rde_up_dump_done, NULL) == -1)
                        fatal("%s: rib_dump_new", __func__);
+               peer->throttled = 1; /* XXX throttle peer until dump is done */
        }
 }
 
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.207
diff -u -p -r1.207 rde.h
--- rde.h       30 Dec 2018 13:53:07 -0000      1.207
+++ rde.h       31 Dec 2018 15:20:03 -0000
@@ -77,11 +77,7 @@ LIST_HEAD(rde_peer_head, rde_peer);
 LIST_HEAD(aspath_list, aspath);
 LIST_HEAD(attr_list, attr);
 LIST_HEAD(aspath_head, rde_aspath);
-RB_HEAD(uptree_prefix, update_prefix);
-RB_HEAD(uptree_attr, update_attr);
-
-TAILQ_HEAD(uplist_prefix, update_prefix);
-TAILQ_HEAD(uplist_attr, update_attr);
+RB_HEAD(prefix_tree, prefix);
 
 struct rde_peer {
        LIST_ENTRY(rde_peer)             hash_l; /* hash list over all peers */
@@ -90,12 +86,10 @@ struct rde_peer {
        struct bgpd_addr                 remote_addr;
        struct bgpd_addr                 local_v4_addr;
        struct bgpd_addr                 local_v6_addr;
-       struct uptree_prefix             up_prefix;
-       struct uptree_attr               up_attrs;
-       struct uplist_attr               updates[AID_MAX];
-       struct uplist_prefix             withdraws[AID_MAX];
-       time_t                           staletime[AID_MAX];
        struct capabilities              capa;
+       struct prefix_tree               updates[AID_MAX];
+       struct prefix_tree               withdraws[AID_MAX];
+       time_t                           staletime[AID_MAX];
        u_int64_t                        prefix_rcvd_update;
        u_int64_t                        prefix_rcvd_withdraw;
        u_int64_t                        prefix_rcvd_eor;
@@ -104,8 +98,6 @@ struct rde_peer {
        u_int64_t                        prefix_sent_eor;
        u_int32_t                        prefix_cnt; /* # of prefixes */
        u_int32_t                        remote_bgpid; /* host byte order! */
-       u_int32_t                        up_pcnt;
-       u_int32_t                        up_acnt;
        u_int32_t                        up_nlricnt;
        u_int32_t                        up_wcnt;
        enum peer_state                  state;
@@ -297,6 +289,7 @@ struct pt_entry_vpn6 {
 
 struct prefix {
        LIST_ENTRY(prefix)               rib_l, nexthop_l;
+       RB_ENTRY(prefix)                 entry;
        struct rib_entry                *re;
        struct rde_aspath               *aspath;
        struct rde_peer                 *peer;
@@ -304,6 +297,10 @@ struct prefix {
        time_t                           lastchange;
        u_int8_t                         validation_state;
        u_int8_t                         nhflags;
+       u_int8_t                         flags;
+       u_int8_t                         eor;
+#define        PREFIX_FLAG_WITHDRAW    0x01
+#define        PREFIX_FLAG_UPDATE      0x02
 };
 
 #define        NEXTHOP_SELF            0x01
@@ -499,6 +496,11 @@ struct prefix      *prefix_get(struct rib *, 
                    struct bgpd_addr *, int);
 int             prefix_remove(struct rib *, struct rde_peer *,
                    struct bgpd_addr *, int);
+void            prefix_add_eor(struct rde_peer *, u_int8_t);
+void            prefix_update(struct rib *, struct rde_peer *,
+                   struct bgpd_addr *, int);
+int             prefix_withdraw(struct rib *, struct rde_peer *,
+                   struct bgpd_addr *, int);
 int             prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t, int);
 int             prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t);
 struct prefix  *prefix_bypeer(struct rib_entry *, struct rde_peer *);
@@ -507,6 +509,8 @@ void                 prefix_updateall(struct prefix *,
 void            prefix_destroy(struct prefix *);
 void            prefix_relink(struct prefix *, struct rde_aspath *, int);
 
+RB_PROTOTYPE(prefix_tree, prefix, entry, prefix_cmp)
+
 static inline struct rde_peer *
 prefix_peer(struct prefix *p)
 {
@@ -551,25 +555,18 @@ int                nexthop_compare(struct nexthop *, 
 
 /* rde_update.c */
 void            up_init(struct rde_peer *);
-void            up_down(struct rde_peer *);
 int             up_rib_remove(struct rde_peer *, struct rib_entry *);
 void            up_rib_add(struct rde_peer *, struct rib_entry *);
 void            up_withdraw_all(struct rde_peer *);
-int             up_test_update(struct rde_peer *, struct prefix *);
-int             up_generate(struct rde_peer *, struct filterstate *,
-                    struct bgpd_addr *, u_int8_t);
 void            up_generate_updates(struct filter_head *, struct rde_peer *,
                     struct prefix *, struct prefix *);
 void            up_generate_default(struct filter_head *, struct rde_peer *,
                     u_int8_t);
-int             up_generate_marker(struct rde_peer *, u_int8_t);
-int             up_dump_prefix(u_char *, int, struct uplist_prefix *,
-                    struct rde_peer *, int);
+int             up_is_eor(struct rde_peer *, u_int8_t);
+int             up_dump_withdraws(u_char *, int, struct rde_peer *, u_int8_t);
+int             up_dump_mp_unreach(u_char *, int, struct rde_peer *, u_int8_t);
 int             up_dump_attrnlri(u_char *, int, struct rde_peer *);
-u_char         *up_dump_mp_unreach(u_char *, u_int16_t *, struct rde_peer *,
-                    u_int8_t);
-int             up_dump_mp_reach(u_char *, u_int16_t *, struct rde_peer *,
-                    u_int8_t);
+int             up_dump_mp_reach(u_char *, int, struct rde_peer *, u_int8_t);
 
 /* rde_trie.c */
 int    trie_add(struct trie_head *, struct bgpd_addr *, u_int8_t, u_int8_t,
Index: rde_decide.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.73
diff -u -p -r1.73 rde_decide.c
--- rde_decide.c        4 Dec 2018 14:13:40 -0000       1.73
+++ rde_decide.c        17 Dec 2018 14:05:31 -0000
@@ -126,9 +126,9 @@ prefix_cmp(struct prefix *p1, struct pre
        peer2 = prefix_peer(p2);
 
        /* pathes with errors are not eligible */
-       if (asp1->flags & F_ATTR_PARSE_ERR)
+       if (asp1 == NULL || asp1->flags & F_ATTR_PARSE_ERR)
                return (-1);
-       if (asp2->flags & F_ATTR_PARSE_ERR)
+       if (asp2 == NULL || asp2->flags & F_ATTR_PARSE_ERR)
                return (1);
 
        /* only loop free pathes are eligible */
@@ -271,9 +271,10 @@ prefix_evaluate(struct prefix *p, struct
        xp = LIST_FIRST(&re->prefix_h);
        if (xp != NULL) {
                struct rde_aspath *xasp = prefix_aspath(xp);
-               if (xasp->flags & (F_ATTR_LOOP|F_ATTR_PARSE_ERR) ||
-                   (prefix_nexthop(xp) != NULL &&
-                   prefix_nexthop(xp)->state != NEXTHOP_REACH))
+               if (xasp == NULL ||
+                   xasp->flags & (F_ATTR_LOOP|F_ATTR_PARSE_ERR) ||
+                   (prefix_nexthop(xp) != NULL && prefix_nexthop(xp)->state !=
+                   NEXTHOP_REACH))
                        /* xp is ineligible */
                        xp = NULL;
        }
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.188
diff -u -p -r1.188 rde_rib.c
--- rde_rib.c   30 Dec 2018 13:53:07 -0000      1.188
+++ rde_rib.c   3 Jan 2019 09:36:02 -0000
@@ -101,6 +101,13 @@ rib_tree(struct rib *rib)
 static inline int
 rib_compare(const struct rib_entry *a, const struct rib_entry *b)
 {
+       /* need to handle NULL entries because of EoR marker */
+       if (a == NULL && b == NULL)
+               return (0);
+       else if (b == NULL)
+               return (1);
+       else if (a == NULL)
+               return (-1);
        return (pt_prefix_cmp(a->prefix, b->prefix));
 }
 
@@ -195,7 +202,7 @@ rib_free(struct rib *rib)
                while ((p = LIST_FIRST(&re->prefix_h))) {
                        struct rde_aspath *asp = prefix_aspath(p);
                        np = LIST_NEXT(p, rib_l);
-                       if (asp->pftableid) {
+                       if (asp && asp->pftableid) {
                                struct bgpd_addr addr;
 
                                pt_getaddr(p->re->prefix, &addr);
@@ -494,7 +501,12 @@ SIPHASH_KEY pathtablekey;
 
 #define        PATH_HASH(x)    &pathtable.path_hashtbl[x & 
pathtable.path_hashmask]
 
-#define        path_empty(asp) ((asp)->refcnt <= 0)
+
+static inline int
+path_empty(struct rde_aspath *asp)
+{
+       return (asp == NULL || asp->refcnt <= 0);
+}
 
 static inline void
 path_ref(struct rde_aspath *asp)
@@ -598,6 +610,15 @@ path_update(struct rib *rib, struct rde_
                        p->validation_state = vstate;
                        return (2);
                }
+               if (p->flags) {
+                       struct prefix_tree *prefix_head;
+                       /* prefix is a pending update */
+                       prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
+                           &peer->updates[prefix->aid] :
+                           &peer->withdraws[prefix->aid];
+                       RB_REMOVE(prefix_tree, prefix_head, p);
+                       p->flags = 0;
+               }
        }
 
        /*
@@ -731,6 +752,9 @@ path_link(struct rde_aspath *asp)
 static void
 path_unlink(struct rde_aspath *asp)
 {
+       if (asp == NULL)
+               return;
+
        /* make sure no reference is hold for this rde_aspath */
        if (!path_empty(asp))
                fatalx("%s: still has prefixes", __func__);
@@ -822,12 +846,29 @@ path_put(struct rde_aspath *asp)
 
 /* prefix specific functions */
 
-static struct prefix   *prefix_alloc(void);
-static void             prefix_free(struct prefix *);
 static void             prefix_link(struct prefix *, struct rib_entry *,
                             struct rde_peer *, struct rde_aspath *,
                             struct filterstate *, u_int8_t);
 static void             prefix_unlink(struct prefix *);
+static struct prefix   *prefix_alloc(void);
+static void             prefix_free(struct prefix *);
+
+/* RB tree comparison function */
+static inline int
+prefix_cmp(struct prefix *a, struct prefix *b)
+{
+       if (a->eor != b->eor)
+               return a->eor - b->eor;
+       if (a->aspath != b->aspath)
+               return (a->aspath > b->aspath ? 1 : -1);
+       if (a->nexthop != b->nexthop)
+               return (a->nexthop > b->nexthop ? 1 : -1);
+       if (a->nhflags != b->nhflags)
+               return (a->nhflags > b->nhflags ? 1 : -1);
+       return rib_compare(a->re, b->re);
+}
+
+RB_GENERATE(prefix_tree, prefix, entry, prefix_cmp)
 
 /*
  * search for specified prefix of a peer. Returns NULL if not found.
@@ -920,9 +961,10 @@ prefix_move(struct prefix *p, struct rde
        prefix_evaluate(np, np->re);
 
        /* remove old prefix node */
-       oasp = prefix_aspath(p);
-       path_unref(oasp);
        /* as before peer count needs no update because of move */
+       oasp = p->aspath;
+       if (oasp)
+               path_unref(oasp);
 
        /* destroy all references to other objects and free the old prefix */
        p->aspath = NULL;
@@ -949,19 +991,14 @@ prefix_remove(struct rib *rib, struct rd
     int prefixlen)
 {
        struct prefix           *p;
-       struct rib_entry        *re;
        struct rde_aspath       *asp;
 
-       re = rib_get(rib, prefix, prefixlen);
-       if (re == NULL)         /* Got a dummy withdrawn request */
-               return (0);
-
-       p = prefix_bypeer(re, peer);
+       p = prefix_get(rib, peer, prefix, prefixlen);
        if (p == NULL)          /* Got a dummy withdrawn request. */
                return (0);
 
        asp = prefix_aspath(p);
-       if (asp->pftableid) {
+       if (asp && asp->pftableid) {
                /* only prefixes in the local RIB were pushed into pf */
                rde_send_pftable(asp->pftableid, prefix, prefixlen, 1);
                rde_send_pftable_commit();
@@ -972,6 +1009,89 @@ prefix_remove(struct rib *rib, struct rd
        return (1);
 }
 
+/*
+ * Insert an End-of-RIB marker into the update queue.
+ */
+void
+prefix_add_eor(struct rde_peer *peer, u_int8_t aid)
+{
+       struct prefix *p;
+
+       p = prefix_alloc();
+       p->eor = 1;
+       p->flags = PREFIX_FLAG_UPDATE;
+       if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
+               /* no need to add if EoR marker already present */
+               prefix_free(p);
+}
+
+/*
+ * Put a prefix from the Adj-RIB-Out onto the update queue.
+ */
+void
+prefix_update(struct rib *rib, struct rde_peer *peer,
+    struct bgpd_addr *prefix, int prefixlen)
+{
+       struct prefix *p;
+
+       p = prefix_get(rib, peer, prefix, prefixlen);
+       if (p == NULL)          /* Got a dummy withdrawn request. */
+               return;
+
+       if (p->flags != 0)
+               fatalx("%s: bad flags %x", __func__, p->flags);
+       p->flags = PREFIX_FLAG_UPDATE;
+       if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL)
+               fatalx("%s: RB tree invariant violated", __func__);
+}
+
+/*
+ * Withdraw a prefix from the Adj-RIB-Out, this unlinks the aspath but leaves
+ * the prefix in the RIB linked to the peer withdraw list.
+ */
+int
+prefix_withdraw(struct rib *rib, struct rde_peer *peer,
+    struct bgpd_addr *prefix, int prefixlen)
+{
+       struct prefix           *p;
+       struct rde_aspath       *asp;
+
+       p = prefix_get(rib, peer, prefix, prefixlen);
+       if (p == NULL)          /* Got a dummy withdrawn request. */
+               return (0);
+
+       /* unlink from aspath ...*/
+       asp = p->aspath;
+       if (asp != NULL) {
+               path_unref(asp);
+               p->aspath = NULL;
+               if (path_empty(asp))
+                       path_unlink(asp);
+       }
+
+       /* ... and nexthop but keep the re link */
+       nexthop_unlink(p);
+       nexthop_put(p->nexthop);
+       p->nexthop = NULL;
+       p->nhflags = 0;
+       /* re link still exists */
+
+       if (p->flags) {
+               struct prefix_tree *prefix_head;
+               /* p is a pending update or withdraw, remove first */
+               prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
+                   &peer->updates[prefix->aid] :
+                   &peer->withdraws[prefix->aid];
+               RB_REMOVE(prefix_tree, prefix_head, p);
+               p->flags = 0;
+       }
+       p->flags = PREFIX_FLAG_WITHDRAW;
+       if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
+               fatalx("%s: RB tree invariant violated", __func__);
+       return (1);
+}
+
+
 /* dump a prefix into specified buffer */
 int
 prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen,
@@ -1139,7 +1259,6 @@ prefix_destroy(struct prefix *p)
        struct rde_aspath       *asp;
 
        asp = prefix_aspath(p);
-
        prefix_unlink(p);
        prefix_free(p);
 
@@ -1151,48 +1270,52 @@ prefix_destroy(struct prefix *p)
  * Link a prefix into the different parent objects.
  */
 static void
-prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_peer *peer,
+prefix_link(struct prefix *p, struct rib_entry *re, struct rde_peer *peer,
     struct rde_aspath *asp, struct filterstate *state, u_int8_t vstate)
 {
        path_ref(asp);
 
-       pref->aspath = asp;
-       pref->peer = peer;
-       pref->nexthop = nexthop_ref(state->nexthop);
-       nexthop_link(pref);
-       pref->re = re;
-       pref->lastchange = time(NULL);
-       pref->nhflags = state->nhflags;
-       pref->validation_state = vstate;
+       p->aspath = asp;
+       p->peer = peer;
+       p->nexthop = nexthop_ref(state->nexthop);
+       nexthop_link(p);
+       p->re = re;
+       p->lastchange = time(NULL);
+       p->nhflags = state->nhflags;
+       p->validation_state = vstate;
 
        /* make route decision */
-       prefix_evaluate(pref, re);
+       prefix_evaluate(p, re);
 }
 
 /*
  * Unlink a prefix from the different parent objects.
  */
 static void
-prefix_unlink(struct prefix *pref)
+prefix_unlink(struct prefix *p)
 {
-       struct rib_entry        *re = pref->re;
+       struct rib_entry        *re = p->re;
+
+       if (p->eor)     /* nothing to unlink for EoR markers */
+               return;
 
        /* make route decision */
-       LIST_REMOVE(pref, rib_l);
+       LIST_REMOVE(p, rib_l);
        prefix_evaluate(NULL, re);
 
-       path_unref(prefix_aspath(pref));
+       if (p->aspath)
+               path_unref(p->aspath);
 
        if (rib_empty(re))
                rib_remove(re);
 
        /* destroy all references to other objects */
-       nexthop_unlink(pref);
-       nexthop_put(pref->nexthop);
-       pref->nexthop = NULL;
-       pref->aspath = NULL;
-       pref->peer = NULL;
-       pref->re = NULL;
+       nexthop_unlink(p);
+       nexthop_put(p->nexthop);
+       p->nexthop = NULL;
+       p->aspath = NULL;
+       p->peer = NULL;
+       p->re = NULL;
 
        /*
         * It's the caller's duty to do accounting and remove empty aspath
@@ -1215,10 +1338,10 @@ prefix_alloc(void)
 
 /* free a unlinked entry */
 static void
-prefix_free(struct prefix *pref)
+prefix_free(struct prefix *p)
 {
        rdemem.prefix_cnt--;
-       free(pref);
+       free(p);
 }
 
 /*
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.107
diff -u -p -r1.107 rde_update.c
--- rde_update.c        30 Dec 2018 13:53:07 -0000      1.107
+++ rde_update.c        3 Jan 2019 09:36:10 -0000
@@ -17,6 +17,7 @@
  */
 #include <sys/types.h>
 #include <sys/queue.h>
+#include <sys/tree.h>
 
 #include <limits.h>
 #include <stdlib.h>
@@ -27,45 +28,6 @@
 #include "rde.h"
 #include "log.h"
 
-in_addr_t      up_get_nexthop(struct rde_peer *, struct filterstate *);
-int            up_generate_mp_reach(struct rde_peer *, struct update_attr *,
-                   struct filterstate *, u_int8_t);
-int            up_generate_attr(struct rde_peer *, struct update_attr *,
-                   struct filterstate *, u_int8_t);
-
-/* update stuff. */
-struct update_prefix {
-       TAILQ_ENTRY(update_prefix)       prefix_l;
-       RB_ENTRY(update_prefix)          entry;
-       struct uplist_prefix            *prefix_h;
-       struct bgpd_addr                 prefix;
-       int                              prefixlen;
-};
-
-struct update_attr {
-       TAILQ_ENTRY(update_attr)         attr_l;
-       RB_ENTRY(update_attr)            entry;
-       struct uplist_prefix             prefix_h;
-       u_char                          *attr;
-       u_char                          *mpattr;
-       u_int32_t                        attr_hash;
-       u_int16_t                        attr_len;
-       u_int16_t                        mpattr_len;
-};
-
-void   up_clear(struct uplist_attr *, struct uplist_prefix *);
-int    up_prefix_cmp(struct update_prefix *, struct update_prefix *);
-int    up_attr_cmp(struct update_attr *, struct update_attr *);
-int    up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
-
-RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
-RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
-
-RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
-RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
-
-SIPHASH_KEY uptree_key;
-
 static struct filter_community comm_no_advertise = {
        .type = COMMUNITY_TYPE_BASIC,
        .c.b.data1 = COMMUNITY_WELLKNOWN,
@@ -82,233 +44,7 @@ static struct filter_community      comm_no_e
        .c.b.data2 = COMMUNITY_NO_EXPSUBCONFED
 };
 
-void
-up_init(struct rde_peer *peer)
-{
-       u_int8_t        i;
-
-       for (i = 0; i < AID_MAX; i++) {
-               TAILQ_INIT(&peer->updates[i]);
-               TAILQ_INIT(&peer->withdraws[i]);
-       }
-       RB_INIT(&peer->up_prefix);
-       RB_INIT(&peer->up_attrs);
-       peer->up_pcnt = 0;
-       peer->up_acnt = 0;
-       peer->up_nlricnt = 0;
-       peer->up_wcnt = 0;
-       arc4random_buf(&uptree_key, sizeof(uptree_key));
-}
-
-void
-up_clear(struct uplist_attr *updates, struct uplist_prefix *withdraws)
-{
-       struct update_attr      *ua;
-       struct update_prefix    *up;
-
-       while ((ua = TAILQ_FIRST(updates)) != NULL) {
-               TAILQ_REMOVE(updates, ua, attr_l);
-               while ((up = TAILQ_FIRST(&ua->prefix_h)) != NULL) {
-                       TAILQ_REMOVE(&ua->prefix_h, up, prefix_l);
-                       free(up);
-               }
-               free(ua->attr);
-               free(ua->mpattr);
-               free(ua);
-       }
-
-       while ((up = TAILQ_FIRST(withdraws)) != NULL) {
-               TAILQ_REMOVE(withdraws, up, prefix_l);
-               free(up);
-       }
-}
-
-void
-up_down(struct rde_peer *peer)
-{
-       u_int8_t                i;
-
-       for (i = 0; i < AID_MAX; i++)
-               up_clear(&peer->updates[i], &peer->withdraws[i]);
-
-       RB_INIT(&peer->up_prefix);
-       RB_INIT(&peer->up_attrs);
-
-       peer->up_pcnt = 0;
-       peer->up_acnt = 0;
-       peer->up_nlricnt = 0;
-       peer->up_wcnt = 0;
-}
-
-int
-up_prefix_cmp(struct update_prefix *a, struct update_prefix *b)
-{
-       int     i;
-
-       if (a->prefix.aid < b->prefix.aid)
-               return (-1);
-       if (a->prefix.aid > b->prefix.aid)
-               return (1);
-
-       switch (a->prefix.aid) {
-       case AID_INET:
-               if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr))
-                       return (-1);
-               if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr))
-                       return (1);
-               break;
-       case AID_INET6:
-               i = memcmp(&a->prefix.v6, &b->prefix.v6,
-                   sizeof(struct in6_addr));
-               if (i > 0)
-                       return (1);
-               if (i < 0)
-                       return (-1);
-               break;
-       case AID_VPN_IPv4:
-               if (betoh64(a->prefix.vpn4.rd) < betoh64(b->prefix.vpn4.rd))
-                       return (-1);
-               if (betoh64(a->prefix.vpn4.rd) > betoh64(b->prefix.vpn4.rd))
-                       return (1);
-               if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr))
-                       return (-1);
-               if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr))
-                       return (1);
-               if (a->prefixlen < b->prefixlen)
-                       return (-1);
-               if (a->prefixlen > b->prefixlen)
-                       return (1);
-               if (a->prefix.vpn4.labellen < b->prefix.vpn4.labellen)
-                       return (-1);
-               if (a->prefix.vpn4.labellen > b->prefix.vpn4.labellen)
-                       return (1);
-               return (memcmp(a->prefix.vpn4.labelstack,
-                   b->prefix.vpn4.labelstack, a->prefix.vpn4.labellen));
-       case AID_VPN_IPv6:
-               if (betoh64(a->prefix.vpn6.rd) < betoh64(b->prefix.vpn6.rd))
-                       return (-1);
-               if (betoh64(a->prefix.vpn6.rd) > betoh64(b->prefix.vpn6.rd))
-                       return (1);
-               i = memcmp(&a->prefix.vpn6.addr, &b->prefix.vpn6.addr,
-                   sizeof(struct in6_addr));
-               if (i > 0)
-                       return (1);
-               if (i < 0)
-                       return (-1);
-               if (a->prefixlen < b->prefixlen)
-                       return (-1);
-               if (a->prefixlen > b->prefixlen)
-                       return (1);
-               if (a->prefix.vpn6.labellen < b->prefix.vpn6.labellen)
-                       return (-1);
-               if (a->prefix.vpn6.labellen > b->prefix.vpn6.labellen)
-                       return (1);
-               return (memcmp(a->prefix.vpn6.labelstack,
-                   b->prefix.vpn6.labelstack, a->prefix.vpn6.labellen));
-       default:
-               fatalx("up_prefix_cmp: unknown af");
-       }
-       if (a->prefixlen < b->prefixlen)
-               return (-1);
-       if (a->prefixlen > b->prefixlen)
-               return (1);
-       return (0);
-}
-
-int
-up_attr_cmp(struct update_attr *a, struct update_attr *b)
-{
-       int     r;
-
-       if ((r = a->attr_hash - b->attr_hash) != 0)
-               return (r);
-       if ((r = a->attr_len - b->attr_len) != 0)
-               return (r);
-       if ((r = a->mpattr_len - b->mpattr_len) != 0)
-               return (r);
-       if ((r = memcmp(a->mpattr, b->mpattr, a->mpattr_len)) != 0)
-               return (r);
-       return (memcmp(a->attr, b->attr, a->attr_len));
-}
-
-int
-up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
-{
-       struct update_attr      *na = NULL;
-       struct update_prefix    *np;
-       struct uplist_attr      *upl = NULL;
-       struct uplist_prefix    *wdl = NULL;
-
-       upl = &peer->updates[p->prefix.aid];
-       wdl = &peer->withdraws[p->prefix.aid];
-
-       /* 1. search for attr */
-       if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) ==
-           NULL) {
-               /* 1.1 if not found -> add */
-               TAILQ_INIT(&a->prefix_h);
-               if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) {
-                       log_warnx("uptree_attr insert failed");
-                       /* cleanup */
-                       free(a->attr);
-                       free(a->mpattr);
-                       free(a);
-                       free(p);
-                       return (-1);
-               }
-               TAILQ_INSERT_TAIL(upl, a, attr_l);
-               peer->up_acnt++;
-       } else {
-               /* 1.2 if found -> use that, free a */
-               if (a != NULL) {
-                       free(a->attr);
-                       free(a->mpattr);
-                       free(a);
-                       a = na;
-                       /* move to end of update queue */
-                       TAILQ_REMOVE(upl, a, attr_l);
-                       TAILQ_INSERT_TAIL(upl, a, attr_l);
-               }
-       }
-
-       /* 2. search for prefix */
-       if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) {
-               /* 2.1 if not found -> add */
-               if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) {
-                       log_warnx("uptree_prefix insert failed");
-                       /*
-                        * cleanup. But do not free a because it is already
-                        * linked or NULL. up_dump_attrnlri() will remove and
-                        * free the empty attribute later.
-                        */
-                       free(p);
-                       return (-1);
-               }
-               peer->up_pcnt++;
-       } else {
-               /* 2.2 if found -> use that and free p */
-               TAILQ_REMOVE(np->prefix_h, np, prefix_l);
-               free(p);
-               p = np;
-               if (p->prefix_h == wdl)
-                       peer->up_wcnt--;
-               else
-                       peer->up_nlricnt--;
-       }
-       /* 3. link prefix to attr */
-       if (a == NULL) {
-               TAILQ_INSERT_TAIL(wdl, p, prefix_l);
-               p->prefix_h = wdl;
-               peer->up_wcnt++;
-       } else {
-               TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l);
-               p->prefix_h = &a->prefix_h;
-               peer->up_nlricnt++;
-       }
-       return (0);
-}
-
-int
+static int
 up_test_update(struct rde_peer *peer, struct prefix *p)
 {
        struct bgpd_addr         addr;
@@ -316,9 +52,6 @@ up_test_update(struct rde_peer *peer, st
        struct rde_peer         *prefp;
        struct attr             *attr;
 
-       if (peer->state != PEER_UP)
-               return (-1);
-
        if (p == NULL)
                /* no prefix available */
                return (0);
@@ -330,7 +63,7 @@ up_test_update(struct rde_peer *peer, st
                /* Do not send routes back to sender */
                return (0);
 
-       if (asp->flags & F_ATTR_PARSE_ERR)
+       if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
                fatalx("try to send out a botched path");
        if (asp->flags & F_ATTR_LOOP)
                fatalx("try to send out a looped path");
@@ -390,47 +123,6 @@ up_test_update(struct rde_peer *peer, st
        return (1);
 }
 
-int
-up_generate(struct rde_peer *peer, struct filterstate *state,
-    struct bgpd_addr *addr, u_int8_t prefixlen)
-{
-       struct update_attr              *ua = NULL;
-       struct update_prefix            *up;
-       SIPHASH_CTX                     ctx;
-
-       if (state) {
-               ua = calloc(1, sizeof(struct update_attr));
-               if (ua == NULL)
-                       fatal("up_generate");
-
-               if (up_generate_attr(peer, ua, state, addr->aid) == -1) {
-                       log_warnx("generation of bgp path attributes failed");
-                       free(ua);
-                       return (-1);
-               }
-               /*
-                * use aspath_hash as attr_hash, this may be unoptimal
-                * but currently I don't care.
-                */
-               SipHash24_Init(&ctx, &uptree_key);
-               SipHash24_Update(&ctx, ua->attr, ua->attr_len);
-               if (ua->mpattr)
-                       SipHash24_Update(&ctx, ua->mpattr, ua->mpattr_len);
-               ua->attr_hash = SipHash24_End(&ctx);
-       }
-
-       up = calloc(1, sizeof(struct update_prefix));
-       if (up == NULL)
-               fatal("up_generate");
-       up->prefix = *addr;
-       up->prefixlen = prefixlen;
-
-       if (up_add(peer, up, ua) == -1)
-               return (-1);
-
-       return (0);
-}
-
 void
 up_generate_updates(struct filter_head *rules, struct rde_peer *peer,
     struct prefix *new, struct prefix *old)
@@ -444,16 +136,14 @@ up_generate_updates(struct filter_head *
        if (new == NULL) {
 withdraw:
                if (old == NULL)
+                       /* no prefix to withdraw */
                        return;
 
                /* withdraw prefix */
                pt_getaddr(old->re->prefix, &addr);
-               if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
-                   old->re->prefix->prefixlen) == 0) {
-                       /* not in table, no need to send withdraw */
-                       return;
-               }
-               up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
+               if (prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
+                   old->re->prefix->prefixlen) == 1)
+                       peer->up_wcnt++;
        } else {
                switch (up_test_update(peer, new)) {
                case 1:
@@ -475,8 +165,9 @@ withdraw:
                if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
                    new->re->prefix->prefixlen, prefix_vstate(new)) != 2) {
                        /* only send update if path changed */
-                       up_generate(peer, &state, &addr,
+                       prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
                            new->re->prefix->prefixlen);
+                       peer->up_nlricnt++;
                }
 
                rde_filterstate_clean(&state);
@@ -501,7 +192,8 @@ up_generate_default(struct filter_head *
        if (peer->capa.mp[aid] == 0)
                return;
 
-       asp = path_get();
+       rde_filterstate_prep(&state, NULL, NULL, 0);
+       asp = &state.aspath;
        asp->aspath = aspath_get(NULL, 0);
        asp->origin = ORIGIN_IGP;
        /* the other default values are OK, nexthop is once again NULL */
@@ -513,8 +205,8 @@ up_generate_default(struct filter_head *
        /* rde_apply_set(asp, set, af, NULL ???, DIR_IN); */
 
        /*
-        * XXX this is ugly but it will get better once we have a proper
-        * Adj-RIB-Out. Since then this will be just inserted there.
+        * XXX this is ugly because we need to have a prefix for rde_filter()
+        * but it will be added after filtering. So fake it till we make it.
         */
        bzero(&p, sizeof(p));
        bzero(&addr, sizeof(addr));
@@ -524,66 +216,26 @@ up_generate_default(struct filter_head *
                re = rib_add(rib_byid(peer->loc_rib_id), &addr, 0);
        p.re = re;
        p.aspath = asp;
-       p.peer = peer;
+       p.peer = peer; /* XXX should be peerself */
 
        /* filter as usual */
-       rde_filterstate_prep(&state, asp, NULL, 0);
        if (rde_filter(rules, peer, &p, &state) == ACTION_DENY) {
                rde_filterstate_clean(&state);
                return;
        }
 
-       up_generate(peer, &state, &addr, 0);
+       path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, 0,
+           ROA_NOTFOUND);
 
        /* no longer needed */
        rde_filterstate_clean(&state);
-       path_put(asp);
 
        if (rib_empty(re))
                rib_remove(re);
 }
 
-/* generate a EoR marker in the update list. This is a horrible hack. */
-int
-up_generate_marker(struct rde_peer *peer, u_int8_t aid)
-{
-       struct update_attr      *ua;
-       struct update_attr      *na = NULL;
-       struct uplist_attr      *upl = NULL;
-
-       ua = calloc(1, sizeof(struct update_attr));
-       if (ua == NULL)
-               fatal("up_generate_marker");
-
-       upl = &peer->updates[aid];
-
-       /* 1. search for attr */
-       if ((na = RB_FIND(uptree_attr, &peer->up_attrs, ua)) == NULL) {
-               /* 1.1 if not found -> add */
-               TAILQ_INIT(&ua->prefix_h);
-               if (RB_INSERT(uptree_attr, &peer->up_attrs, ua) != NULL) {
-                       log_warnx("uptree_attr insert failed");
-                       /* cleanup */
-                       free(ua);
-                       return (-1);
-               }
-               TAILQ_INSERT_TAIL(upl, ua, attr_l);
-               peer->up_acnt++;
-       } else {
-               /* 1.2 if found -> use that, free ua */
-               free(ua);
-               ua = na;
-               /* move to end of update queue */
-               TAILQ_REMOVE(upl, ua, attr_l);
-               TAILQ_INSERT_TAIL(upl, ua, attr_l);
-       }
-       return (0);
-}
-
-u_char up_attr_buf[4096];
-
 /* only for IPv4 */
-in_addr_t
+static in_addr_t
 up_get_nexthop(struct rde_peer *peer, struct filterstate *state)
 {
        in_addr_t       mask;
@@ -638,205 +290,8 @@ up_get_nexthop(struct rde_peer *peer, st
                return (peer->local_v4_addr.v4.s_addr);
 }
 
-int
-up_generate_mp_reach(struct rde_peer *peer, struct update_attr *upa,
-    struct filterstate *state, u_int8_t aid)
-{
-       u_int16_t       tmp;
-
-       switch (aid) {
-       case AID_INET6:
-               upa->mpattr_len = 21; /* AFI + SAFI + NH LEN + NH + Reserved */
-               upa->mpattr = malloc(upa->mpattr_len);
-               if (upa->mpattr == NULL)
-                       fatal("up_generate_mp_reach");
-               if (aid2afi(aid, &tmp, &upa->mpattr[2]))
-                       fatalx("up_generate_mp_reachi: bad AID");
-               tmp = htons(tmp);
-               memcpy(upa->mpattr, &tmp, sizeof(tmp));
-               upa->mpattr[3] = sizeof(struct in6_addr);
-               upa->mpattr[20] = 0; /* Reserved must be 0 */
-
-               /* nexthop dance see also up_get_nexthop() */
-               if (state->nhflags & NEXTHOP_NOMODIFY) {
-                       /* no modify flag set */
-                       if (state->nexthop == NULL)
-                               memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
-                                   sizeof(struct in6_addr));
-                       else
-                               memcpy(&upa->mpattr[4],
-                                   &state->nexthop->exit_nexthop.v6,
-                                   sizeof(struct in6_addr));
-               } else if (state->nhflags & NEXTHOP_SELF)
-                       memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
-                           sizeof(struct in6_addr));
-               else if (!peer->conf.ebgp) {
-                       /* ibgp */
-                       if (state->nexthop == NULL ||
-                           (state->nexthop->exit_nexthop.aid == AID_INET6 &&
-                           !memcmp(&state->nexthop->exit_nexthop.v6,
-                           &peer->remote_addr.v6, sizeof(struct in6_addr))))
-                               memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
-                                   sizeof(struct in6_addr));
-                       else
-                               memcpy(&upa->mpattr[4],
-                                   &state->nexthop->exit_nexthop.v6,
-                                   sizeof(struct in6_addr));
-               } else if (peer->conf.distance == 1) {
-                       /* ebgp directly connected */
-                       if (state->nexthop != NULL &&
-                           state->nexthop->flags & NEXTHOP_CONNECTED)
-                               if (prefix_compare(&peer->remote_addr,
-                                   &state->nexthop->nexthop_net,
-                                   state->nexthop->nexthop_netlen) == 0) {
-                                       /*
-                                        * nexthop and peer are in the same
-                                        * subnet
-                                        */
-                                       memcpy(&upa->mpattr[4],
-                                           &state->nexthop->exit_nexthop.v6,
-                                           sizeof(struct in6_addr));
-                                       return (0);
-                               }
-                       memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
-                           sizeof(struct in6_addr));
-               } else
-                       /* ebgp multihop */
-                       memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
-                           sizeof(struct in6_addr));
-               return (0);
-       case AID_VPN_IPv4:
-               upa->mpattr_len = 17; /* AFI + SAFI + NH LEN + NH + Reserved */
-               upa->mpattr = calloc(upa->mpattr_len, 1);
-               if (upa->mpattr == NULL)
-                       fatal("up_generate_mp_reach");
-               if (aid2afi(aid, &tmp, &upa->mpattr[2]))
-                       fatalx("up_generate_mp_reachi: bad AID");
-               tmp = htons(tmp);
-               memcpy(upa->mpattr, &tmp, sizeof(tmp));
-               upa->mpattr[3] = sizeof(u_int64_t) + sizeof(struct in_addr);
-
-               /* nexthop dance see also up_get_nexthop() */
-               if (state->nhflags & NEXTHOP_NOMODIFY) {
-                       /* no modify flag set */
-                       if (state->nexthop == NULL)
-                               memcpy(&upa->mpattr[12],
-                                   &peer->local_v4_addr.v4,
-                                   sizeof(struct in_addr));
-                       else
-                               /* nexthops are stored as IPv4 addrs */
-                               memcpy(&upa->mpattr[12],
-                                   &state->nexthop->exit_nexthop.v4,
-                                   sizeof(struct in_addr));
-               } else if (state->nhflags & NEXTHOP_SELF)
-                       memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4,
-                           sizeof(struct in_addr));
-               else if (!peer->conf.ebgp) {
-                       /* ibgp */
-                       if (state->nexthop == NULL ||
-                           (state->nexthop->exit_nexthop.aid == AID_INET &&
-                           !memcmp(&state->nexthop->exit_nexthop.v4,
-                           &peer->remote_addr.v4, sizeof(struct in_addr))))
-                               memcpy(&upa->mpattr[12],
-                                   &peer->local_v4_addr.v4,
-                                   sizeof(struct in_addr));
-                       else
-                               memcpy(&upa->mpattr[12],
-                                   &state->nexthop->exit_nexthop.v4,
-                                   sizeof(struct in_addr));
-               } else if (peer->conf.distance == 1) {
-                       /* ebgp directly connected */
-                       if (state->nexthop != NULL &&
-                           state->nexthop->flags & NEXTHOP_CONNECTED)
-                               if (prefix_compare(&peer->remote_addr,
-                                   &state->nexthop->nexthop_net,
-                                   state->nexthop->nexthop_netlen) == 0) {
-                                       /*
-                                        * nexthop and peer are in the same
-                                        * subnet
-                                        */
-                                       memcpy(&upa->mpattr[12],
-                                           &state->nexthop->exit_nexthop.v4,
-                                           sizeof(struct in_addr));
-                                       return (0);
-                               }
-                       memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4,
-                           sizeof(struct in_addr));
-               } else
-                       /* ebgp multihop */
-                       memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4,
-                           sizeof(struct in_addr));
-               return (0);
-       case AID_VPN_IPv6:
-               upa->mpattr_len = 29; /* AFI + SAFI + NH LEN + NH + Reserved */
-               upa->mpattr = calloc(upa->mpattr_len, 1);
-               if (upa->mpattr == NULL)
-                       fatal("up_generate_mp_reach");
-               if (aid2afi(aid, &tmp, &upa->mpattr[2]))
-                       fatalx("up_generate_mp_reachi: bad AID");
-               tmp = htons(tmp);
-               memcpy(upa->mpattr, &tmp, sizeof(tmp));
-               upa->mpattr[3] = sizeof(u_int64_t) + sizeof(struct in6_addr);
-               upa->mpattr[28] = 0; /* Reserved must be 0 */
-
-               /* nexthop dance see also up_get_nexthop() */
-               if (state->nhflags & NEXTHOP_NOMODIFY) {
-                       /* no modify flag set */
-                       if (state->nexthop == NULL)
-                               memcpy(&upa->mpattr[12],
-                                   &peer->local_v6_addr.v6,
-                                   sizeof(struct in6_addr));
-                       else
-                               memcpy(&upa->mpattr[12],
-                                   &state->nexthop->exit_nexthop.v6,
-                                   sizeof(struct in6_addr));
-               } else if (state->nhflags & NEXTHOP_SELF)
-                       memcpy(&upa->mpattr[12], &peer->local_v6_addr.v6,
-                           sizeof(struct in6_addr));
-               else if (!peer->conf.ebgp) {
-                       /* ibgp */
-                       if (state->nexthop == NULL ||
-                           (state->nexthop->exit_nexthop.aid == AID_INET6 &&
-                           !memcmp(&state->nexthop->exit_nexthop.v6,
-                           &peer->remote_addr.v6, sizeof(struct in6_addr))))
-                               memcpy(&upa->mpattr[12],
-                                   &peer->local_v6_addr.v6,
-                                   sizeof(struct in6_addr));
-                       else
-                               memcpy(&upa->mpattr[12],
-                                   &state->nexthop->exit_nexthop.v6,
-                                   sizeof(struct in6_addr));
-               } else if (peer->conf.distance == 1) {
-                       /* ebgp directly connected */
-                       if (state->nexthop != NULL &&
-                           state->nexthop->flags & NEXTHOP_CONNECTED)
-                               if (prefix_compare(&peer->remote_addr,
-                                   &state->nexthop->nexthop_net,
-                                   state->nexthop->nexthop_netlen) == 0) {
-                                       /*
-                                       * nexthop and peer are in the same
-                                       * subnet
-                                       */
-                                       memcpy(&upa->mpattr[12],
-                                           &state->nexthop->exit_nexthop.v6,
-                                           sizeof(struct in6_addr));
-                                       return (0);
-                               }
-                       memcpy(&upa->mpattr[12], &peer->local_v6_addr.v6,
-                           sizeof(struct in6_addr));
-               } else
-                       /* ebgp multihop */
-                       memcpy(&upa->mpattr[12], &peer->local_v6_addr.v6,
-                           sizeof(struct in6_addr));
-               return (0);
-       default:
-               break;
-       }
-       return (-1);
-}
-
-int
-up_generate_attr(struct rde_peer *peer, struct update_attr *upa,
+static int
+up_generate_attr(u_char *buf, int len, struct rde_peer *peer,
     struct filterstate *state, u_int8_t aid)
 {
        struct rde_aspath *asp = &state->aspath;
@@ -844,14 +299,14 @@ up_generate_attr(struct rde_peer *peer, 
        u_char          *pdata;
        u_int32_t        tmp32;
        in_addr_t        nexthop;
-       int              flags, r, ismp = 0, neednewpath = 0;
-       u_int16_t        len = sizeof(up_attr_buf), wlen = 0, plen;
+       int              flags, r, neednewpath = 0;
+       u_int16_t        wlen = 0, plen;
        u_int8_t         l;
        u_int16_t        nlen = 0;
        u_char          *ndata;
 
        /* origin */
-       if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+       if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
            ATTR_ORIGIN, &asp->origin, 1)) == -1)
                return (-1);
        wlen += r; len -= r;
@@ -868,7 +323,7 @@ up_generate_attr(struct rde_peer *peer, 
        if (!rde_as4byte(peer))
                pdata = aspath_deflate(pdata, &plen, &neednewpath);
 
-       if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+       if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
            ATTR_ASPATH, pdata, plen)) == -1)
                return (-1);
        wlen += r; len -= r;
@@ -877,13 +332,12 @@ up_generate_attr(struct rde_peer *peer, 
        switch (aid) {
        case AID_INET:
                nexthop = up_get_nexthop(peer, state);
-               if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+               if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
                    ATTR_NEXTHOP, &nexthop, 4)) == -1)
                        return (-1);
                wlen += r; len -= r;
                break;
        default:
-               ismp = 1;
                break;
        }
 
@@ -896,7 +350,7 @@ up_generate_attr(struct rde_peer *peer, 
            asp->flags & F_ATTR_MED_ANNOUNCE ||
            peer->conf.flags & PEERFLAG_TRANS_AS)) {
                tmp32 = htonl(asp->med);
-               if ((r = attr_write(up_attr_buf + wlen, len, ATTR_OPTIONAL,
+               if ((r = attr_write(buf + wlen, len, ATTR_OPTIONAL,
                    ATTR_MED, &tmp32, 4)) == -1)
                        return (-1);
                wlen += r; len -= r;
@@ -905,7 +359,7 @@ up_generate_attr(struct rde_peer *peer, 
        if (!peer->conf.ebgp) {
                /* local preference, only valid for ibgp */
                tmp32 = htonl(asp->lpref);
-               if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+               if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
                    ATTR_LOCALPREF, &tmp32, 4)) == -1)
                        return (-1);
                wlen += r; len -= r;
@@ -925,7 +379,7 @@ up_generate_attr(struct rde_peer *peer, 
                        break;
                switch (oa->type) {
                case ATTR_ATOMIC_AGGREGATE:
-                       if ((r = attr_write(up_attr_buf + wlen, len,
+                       if ((r = attr_write(buf + wlen, len,
                            ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE,
                            NULL, 0)) == -1)
                                return (-1);
@@ -953,7 +407,7 @@ up_generate_attr(struct rde_peer *peer, 
                                memcpy(t + sizeof(tas),
                                    oa->data + sizeof(tmp32),
                                    oa->len - sizeof(tmp32));
-                               if ((r = attr_write(up_attr_buf + wlen, len,
+                               if ((r = attr_write(buf + wlen, len,
                                    oa->flags, oa->type, &t, sizeof(t))) == -1)
                                        return (-1);
                                break;
@@ -968,7 +422,7 @@ up_generate_attr(struct rde_peer *peer, 
                                r = 0;
                                break;
                        }
-                       if ((r = attr_write(up_attr_buf + wlen, len,
+                       if ((r = attr_write(buf + wlen, len,
                            oa->flags, oa->type, oa->data, oa->len)) == -1)
                                return (-1);
                        break;
@@ -979,8 +433,8 @@ up_generate_attr(struct rde_peer *peer, 
                                    oa->len, &nlen);
 
                                if (nlen > 0) {
-                                       if ((r = attr_write(up_attr_buf + wlen,
-                                           len, oa->flags, oa->type, ndata,
+                                       if ((r = attr_write(buf + wlen, len,
+                                           oa->flags, oa->type, ndata,
                                            nlen)) == -1) {
                                                free(ndata);
                                                return (-1);
@@ -991,9 +445,8 @@ up_generate_attr(struct rde_peer *peer, 
                                        r = 0;
                                }
                        } else {
-                               if ((r = attr_write(up_attr_buf + wlen,
-                                   len, oa->flags, oa->type, oa->data,
-                                   oa->len)) == -1)
+                               if ((r = attr_write(buf + wlen, len, oa->flags,
+                                   oa->type, oa->data, oa->len)) == -1)
                                        return (-1);
                        }
                        break;
@@ -1009,7 +462,7 @@ up_generate_attr(struct rde_peer *peer, 
                                r = 0;
                                break;
                        }
-                       if ((r = attr_write(up_attr_buf + wlen, len,
+                       if ((r = attr_write(buf + wlen, len,
                            oa->flags | ATTR_PARTIAL, oa->type,
                            oa->data, oa->len)) == -1)
                                return (-1);
@@ -1032,7 +485,7 @@ up_generate_attr(struct rde_peer *peer, 
                        flags |= ATTR_PARTIAL;
                if (plen == 0)
                        r = 0;
-               else if ((r = attr_write(up_attr_buf + wlen, len, flags,
+               else if ((r = attr_write(buf + wlen, len, flags,
                    ATTR_AS4_PATH, pdata, plen)) == -1)
                        return (-1);
                wlen += r; len -= r;
@@ -1042,281 +495,456 @@ up_generate_attr(struct rde_peer *peer, 
                flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
                if (!(asp->flags & F_PREFIX_ANNOUNCED))
                        flags |= ATTR_PARTIAL;
-               if ((r = attr_write(up_attr_buf + wlen, len, flags,
+               if ((r = attr_write(buf + wlen, len, flags,
                    ATTR_AS4_AGGREGATOR, newaggr->data, newaggr->len)) == -1)
                        return (-1);
                wlen += r; len -= r;
        }
 
-       /* write mp attribute to different buffer */
-       if (ismp)
-               if (up_generate_mp_reach(peer, upa, state, aid) == -1)
-                       return (-1);
-
-       /* the bgp path attributes are now stored in the global buf */
-       upa->attr = malloc(wlen);
-       if (upa->attr == NULL)
-               fatal("up_generate_attr");
-       memcpy(upa->attr, up_attr_buf, wlen);
-       upa->attr_len = wlen;
        return (wlen);
 }
 
-#define MIN_PREFIX_LEN 5       /* 1 byte prefix length + 4 bytes addr */
+/*
+ * Check if the pending element is a EoR marker. If so remove it from the
+ * tree and return 1.
+ */
 int
-up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head,
+up_is_eor(struct rde_peer *peer, u_int8_t aid)
+{
+       struct prefix *p;
+
+       p = RB_MIN(prefix_tree, &peer->updates[aid]);
+       if (p != NULL && p->eor) {
+               RB_REMOVE(prefix_tree, &peer->updates[aid], p);
+               prefix_destroy(p);
+               return 1;
+       }
+       return 0;
+}
+
+/* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
+#define MIN_UPDATE_LEN 16
+
+/*
+ * Write prefixes to buffer until either there is no more space or
+ * the next prefix has no longer the same ASPATH attributes.
+ */
+static int
+up_dump_prefix(u_char *buf, int len, struct prefix_tree *prefix_head,
     struct rde_peer *peer, int withdraw)
 {
-       struct update_prefix    *upp;
-       int                      r, wpos = 0;
+       struct prefix   *p, *np;
+       struct bgpd_addr addr;
+       int              r, wpos = 0, done = 0;
 
-       while ((upp = TAILQ_FIRST(prefix_head)) != NULL) {
+       RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
+               pt_getaddr(p->re->prefix, &addr);
                if ((r = prefix_write(buf + wpos, len - wpos,
-                   &upp->prefix, upp->prefixlen, withdraw)) == -1)
+                   &addr, p->re->prefix->prefixlen, withdraw)) == -1)
                        break;
                wpos += r;
-               if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL)
-                       log_warnx("dequeuing update failed.");
-               TAILQ_REMOVE(upp->prefix_h, upp, prefix_l);
-               peer->up_pcnt--;
+
+               /* make sure we only dump prefixes which belong together */
+               if (np == NULL || np->aspath != p->aspath ||
+                   np->nexthop != p->nexthop || np->nhflags != p->nhflags ||
+                   np->eor)
+                       done = 1;
+
+               /* prefix sent, remove from list and clear flag */
+               RB_REMOVE(prefix_tree, prefix_head, p);
+               p->flags = 0;
+
                if (withdraw) {
+                       /* prefix no longer needed, remove it */
+                       prefix_destroy(p);
                        peer->up_wcnt--;
                        peer->prefix_sent_withdraw++;
                } else {
+                       /* prefix still in Adj-RIB-Out, keep it */
                        peer->up_nlricnt--;
                        peer->prefix_sent_update++;
                }
-               free(upp);
+               if (done)
+                       break;
        }
        return (wpos);
 }
 
 int
+up_dump_withdraws(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid)
+{
+       u_int16_t wpos, wd_len;
+       int r;
+
+       if (len < MIN_UPDATE_LEN)
+               return (-1);
+
+       /* reserve space for the length field */
+       wpos = 2;
+       r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid],
+           peer, 1);
+       wd_len = htons(r);
+       memcpy(buf, &wd_len, 2);
+
+       return (wpos + r);
+}
+
+int
+up_dump_mp_unreach(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid)
+{
+       u_char          *attrbuf;
+       int              wpos, r;
+       u_int16_t        attr_len, tmp;
+
+       if (len < MIN_UPDATE_LEN || RB_EMPTY(&peer->withdraws[aid]))
+               return (-1);
+
+       /* reserve space for withdraw len, attr len */
+       wpos = 2 + 2;
+       attrbuf = buf + wpos;
+
+       /* attribute header, defaulting to extended length one */
+       attrbuf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
+       attrbuf[1] = ATTR_MP_UNREACH_NLRI;
+       wpos += 4;
+
+       /* afi & safi */
+       if (aid2afi(aid, &tmp, buf + wpos + 2))
+               fatalx("up_dump_mp_unreach: bad AID");
+       tmp = htons(tmp);
+       memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
+       wpos += 3;
+
+       r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid],
+           peer, 1);
+       if (r == 0)
+               return (-1);
+       wpos += r;
+       attr_len = r + 3;       /* prefixes + afi & safi */
+
+       /* attribute length */
+       attr_len = htons(attr_len);
+       memcpy(attrbuf + 2, &attr_len, sizeof(attr_len));
+
+       /* write length fields */
+       bzero(buf, sizeof(u_int16_t));  /* withdrawn routes len */
+       attr_len = htons(wpos - 4);
+       memcpy(buf + 2, &attr_len, sizeof(attr_len));
+
+       return (wpos);
+}
+
+int
 up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer)
 {
-       struct update_attr      *upa;
+       struct filterstate       state;
+       struct prefix           *p;
        int                      r, wpos;
        u_int16_t                attr_len;
 
-       /*
-        * It is possible that a queued path attribute has no nlri prefix.
-        * Ignore and remove those path attributes.
-        */
-       while ((upa = TAILQ_FIRST(&peer->updates[AID_INET])) != NULL)
-               if (TAILQ_EMPTY(&upa->prefix_h)) {
-                       attr_len = upa->attr_len;
-                       if (RB_REMOVE(uptree_attr, &peer->up_attrs,
-                           upa) == NULL)
-                               log_warnx("dequeuing update failed.");
-                       TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l);
-                       free(upa->attr);
-                       free(upa->mpattr);
-                       free(upa);
-                       peer->up_acnt--;
-                       /* XXX horrible hack,
-                        * if attr_len is 0, it is a EoR marker */
-                       if (attr_len == 0)
-                               return (-1);
-               } else
-                       break;
+       if (len < 2)
+               fatalx("up_dump_attrnlri: buffer way too small");
+       if (len < MIN_UPDATE_LEN)
+               goto done;
+
+       p = RB_MIN(prefix_tree, &peer->updates[AID_INET]);
+       if (p == NULL)
+               goto done;
 
-       if (upa == NULL || upa->attr_len + MIN_PREFIX_LEN > len) {
+       rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
+           prefix_nhflags(p));
+
+       r = up_generate_attr(buf + 2, len - 2, peer, &state, AID_INET);
+       rde_filterstate_clean(&state);
+       if (r == -1) {
                /*
                 * either no packet or not enough space.
                 * The length field needs to be set to zero else it would be
                 * an invalid bgp update.
                 */
+done:
                bzero(buf, 2);
                return (2);
        }
 
        /* first dump the 2-byte path attribute length */
-       attr_len = htons(upa->attr_len);
+       attr_len = htons(r);
        memcpy(buf, &attr_len, 2);
        wpos = 2;
-
-       /* then the path attributes themselves */
-       memcpy(buf + wpos, upa->attr, upa->attr_len);
-       wpos += upa->attr_len;
+       /* then skip over the already dumped path attributes themselves */
+       wpos += r;
 
        /* last but not least dump the nlri */
-       r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer, 0);
+       r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[AID_INET],
+           peer, 0);
        wpos += r;
 
-       /* now check if all prefixes were written */
-       if (TAILQ_EMPTY(&upa->prefix_h)) {
-               if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL)
-                       log_warnx("dequeuing update failed.");
-               TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l);
-               free(upa->attr);
-               free(upa->mpattr);
-               free(upa);
-               peer->up_acnt--;
-       }
-
        return (wpos);
 }
 
-u_char *
-up_dump_mp_unreach(u_char *buf, u_int16_t *len, struct rde_peer *peer,
-    u_int8_t aid)
+static int
+up_generate_mp_reach(u_char *buf, int len, struct rde_peer *peer,
+    struct filterstate *state, u_int8_t aid)
 {
-       int             wpos;
-       u_int16_t       datalen, tmp;
-       u_int16_t       attrlen = 2;    /* attribute header (without len) */
-       u_int8_t        flags = ATTR_OPTIONAL, safi;
+       u_char          *attrbuf;
+       int              r;
+       int              wpos, attrlen;
+       u_int16_t        tmp;
 
-       /*
-        * reserve space for withdraw len, attr len, the attribute header
-        * and the mp attribute header
-        */
-       wpos = 2 + 2 + 4 + 3;
-
-       if (*len < wpos)
-               return (NULL);
+       if (len < 4)
+               return (-1);
+       /* attribute header, defaulting to extended length one */
+       buf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
+       buf[1] = ATTR_MP_REACH_NLRI;
+       wpos = 4;
+       attrbuf = buf + wpos;
 
-       datalen = up_dump_prefix(buf + wpos, *len - wpos,
-           &peer->withdraws[aid], peer, 1);
-       if (datalen == 0)
-               return (NULL);
+       switch (aid) {
+       case AID_INET6:
+               attrlen = 21; /* AFI + SAFI + NH LEN + NH + Reserved */
+               if (len < wpos + attrlen)
+                       return (-1);
+               wpos += attrlen;
+               if (aid2afi(aid, &tmp, &attrbuf[2]))
+                       fatalx("up_generate_mp_reach: bad AID");
+               tmp = htons(tmp);
+               memcpy(attrbuf, &tmp, sizeof(tmp));
+               attrbuf[3] = sizeof(struct in6_addr);
+               attrbuf[20] = 0; /* Reserved must be 0 */
 
-       datalen += 3;   /* afi + safi */
+               /* nexthop dance see also up_get_nexthop() */
+               attrbuf += 4;
+               if (state->nhflags & NEXTHOP_NOMODIFY) {
+                       /* no modify flag set */
+                       if (state->nexthop == NULL)
+                               memcpy(attrbuf, &peer->local_v6_addr.v6,
+                                   sizeof(struct in6_addr));
+                       else
+                               memcpy(attrbuf,
+                                   &state->nexthop->exit_nexthop.v6,
+                                   sizeof(struct in6_addr));
+               } else if (state->nhflags & NEXTHOP_SELF)
+                       memcpy(attrbuf, &peer->local_v6_addr.v6,
+                           sizeof(struct in6_addr));
+               else if (!peer->conf.ebgp) {
+                       /* ibgp */
+                       if (state->nexthop == NULL ||
+                           (state->nexthop->exit_nexthop.aid == AID_INET6 &&
+                           !memcmp(&state->nexthop->exit_nexthop.v6,
+                           &peer->remote_addr.v6, sizeof(struct in6_addr))))
+                               memcpy(attrbuf, &peer->local_v6_addr.v6,
+                                   sizeof(struct in6_addr));
+                       else
+                               memcpy(attrbuf,
+                                   &state->nexthop->exit_nexthop.v6,
+                                   sizeof(struct in6_addr));
+               } else if (peer->conf.distance == 1) {
+                       /* ebgp directly connected */
+                       if (state->nexthop != NULL &&
+                           state->nexthop->flags & NEXTHOP_CONNECTED)
+                               if (prefix_compare(&peer->remote_addr,
+                                   &state->nexthop->nexthop_net,
+                                   state->nexthop->nexthop_netlen) == 0) {
+                                       /*
+                                        * nexthop and peer are in the same
+                                        * subnet
+                                        */
+                                       memcpy(attrbuf,
+                                           &state->nexthop->exit_nexthop.v6,
+                                           sizeof(struct in6_addr));
+                                       break;
+                               }
+                       memcpy(attrbuf, &peer->local_v6_addr.v6,
+                           sizeof(struct in6_addr));
+               } else
+                       /* ebgp multihop */
+                       memcpy(attrbuf, &peer->local_v6_addr.v6,
+                           sizeof(struct in6_addr));
+               break;
+       case AID_VPN_IPv4:
+               attrlen = 17; /* AFI + SAFI + NH LEN + NH + Reserved */
+               if (len < wpos + attrlen)
+                       return (-1);
+               wpos += attrlen;
+               if (aid2afi(aid, &tmp, &attrbuf[2]))
+                       fatalx("up_generate_mp_reachi: bad AID");
+               tmp = htons(tmp);
+               memcpy(attrbuf, &tmp, sizeof(tmp));
+               attrbuf[3] = sizeof(u_int64_t) + sizeof(struct in_addr);
+               bzero(attrbuf + 4, sizeof(u_int64_t));
+               attrbuf[16] = 0; /* Reserved must be 0 */
 
-       /* prepend header, need to do it reverse */
-       /* safi & afi */
-       if (aid2afi(aid, &tmp, &safi))
-               fatalx("up_dump_mp_unreach: bad AID");
-       buf[--wpos] = safi;
-       wpos -= sizeof(u_int16_t);
-       tmp = htons(tmp);
-       memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
+               /* nexthop dance see also up_get_nexthop() */
+               attrbuf += 12;
+               if (state->nhflags & NEXTHOP_NOMODIFY) {
+                       /* no modify flag set */
+                       if (state->nexthop == NULL)
+                               memcpy(attrbuf, &peer->local_v4_addr.v4,
+                                   sizeof(struct in_addr));
+                       else
+                               /* nexthops are stored as IPv4 addrs */
+                               memcpy(attrbuf,
+                                   &state->nexthop->exit_nexthop.v4,
+                                   sizeof(struct in_addr));
+               } else if (state->nhflags & NEXTHOP_SELF) {
+                       memcpy(attrbuf, &peer->local_v4_addr.v4,
+                           sizeof(struct in_addr));
+               } else if (!peer->conf.ebgp) {
+                       /* ibgp */
+                       if (state->nexthop == NULL ||
+                           (state->nexthop->exit_nexthop.aid == AID_INET &&
+                           !memcmp(&state->nexthop->exit_nexthop.v4,
+                           &peer->remote_addr.v4, sizeof(struct in_addr))))
+                               memcpy(attrbuf, &peer->local_v4_addr.v4,
+                                   sizeof(struct in_addr));
+                       else
+                               memcpy(attrbuf,
+                                   &state->nexthop->exit_nexthop.v4,
+                                   sizeof(struct in_addr));
+               } else if (peer->conf.distance == 1) {
+                       /* ebgp directly connected */
+                       if (state->nexthop != NULL &&
+                           state->nexthop->flags & NEXTHOP_CONNECTED)
+                               if (prefix_compare(&peer->remote_addr,
+                                   &state->nexthop->nexthop_net,
+                                   state->nexthop->nexthop_netlen) == 0) {
+                                       /*
+                                        * nexthop and peer are in the same
+                                        * subnet
+                                        */
+                                       memcpy(attrbuf,
+                                           &state->nexthop->exit_nexthop.v4,
+                                           sizeof(struct in_addr));
+                                       break;
+                               }
+                       memcpy(attrbuf, &peer->local_v4_addr.v4,
+                           sizeof(struct in_addr));
+               } else
+                       /* ebgp multihop */
+                       memcpy(attrbuf, &peer->local_v4_addr.v4,
+                           sizeof(struct in_addr));
+               break;
+       case AID_VPN_IPv6:
+               attrlen = 29; /* AFI + SAFI + NH LEN + NH + Reserved */
+               if (len < wpos + attrlen)
+                       return (-1);
+               wpos += attrlen;
+               if (aid2afi(aid, &tmp, &attrbuf[2]))
+                       fatalx("up_generate_mp_reachi: bad AID");
+               tmp = htons(tmp);
+               memcpy(attrbuf, &tmp, sizeof(tmp));
+               attrbuf[3] = sizeof(u_int64_t) + sizeof(struct in6_addr);
+               bzero(attrbuf + 4, sizeof(u_int64_t));
+               attrbuf[28] = 0; /* Reserved must be 0 */
 
-       /* attribute length */
-       if (datalen > 255) {
-               attrlen += 2 + datalen;
-               flags |= ATTR_EXTLEN;
-               wpos -= sizeof(u_int16_t);
-               tmp = htons(datalen);
-               memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
-       } else {
-               attrlen += 1 + datalen;
-               buf[--wpos] = (u_char)datalen;
+               /* nexthop dance see also up_get_nexthop() */
+               attrbuf += 12;
+               if (state->nhflags & NEXTHOP_NOMODIFY) {
+                       /* no modify flag set */
+                       if (state->nexthop == NULL)
+                               memcpy(attrbuf, &peer->local_v6_addr.v6,
+                                   sizeof(struct in6_addr));
+                       else
+                               memcpy(attrbuf,
+                                   &state->nexthop->exit_nexthop.v6,
+                                   sizeof(struct in6_addr));
+               } else if (state->nhflags & NEXTHOP_SELF)
+                       memcpy(attrbuf, &peer->local_v6_addr.v6,
+                           sizeof(struct in6_addr));
+               else if (!peer->conf.ebgp) {
+                       /* ibgp */
+                       if (state->nexthop == NULL ||
+                           (state->nexthop->exit_nexthop.aid == AID_INET6 &&
+                           !memcmp(&state->nexthop->exit_nexthop.v6,
+                           &peer->remote_addr.v6, sizeof(struct in6_addr))))
+                               memcpy(attrbuf, &peer->local_v6_addr.v6,
+                                   sizeof(struct in6_addr));
+                       else
+                               memcpy(attrbuf,
+                                   &state->nexthop->exit_nexthop.v6,
+                                   sizeof(struct in6_addr));
+               } else if (peer->conf.distance == 1) {
+                       /* ebgp directly connected */
+                       if (state->nexthop != NULL &&
+                           state->nexthop->flags & NEXTHOP_CONNECTED)
+                               if (prefix_compare(&peer->remote_addr,
+                                   &state->nexthop->nexthop_net,
+                                   state->nexthop->nexthop_netlen) == 0) {
+                                       /*
+                                       * nexthop and peer are in the same
+                                       * subnet
+                                       */
+                                       memcpy(attrbuf,
+                                           &state->nexthop->exit_nexthop.v6,
+                                           sizeof(struct in6_addr));
+                                       break;
+                               }
+                       memcpy(attrbuf, &peer->local_v6_addr.v6,
+                           sizeof(struct in6_addr));
+               } else
+                       /* ebgp multihop */
+                       memcpy(attrbuf, &peer->local_v6_addr.v6,
+                           sizeof(struct in6_addr));
+               break;
+       default:
+               fatalx("up_generate_mp_reach: unknown AID");
        }
 
-       /* mp attribute */
-       buf[--wpos] = (u_char)ATTR_MP_UNREACH_NLRI;
-       buf[--wpos] = flags;
-
-       /* attribute length */
-       wpos -= sizeof(u_int16_t);
+       r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[aid],
+           peer, 0);
+       if (r == 0) {
+               /* no prefixes written ... */
+               return (-1);
+       }
+       attrlen += r;
+       wpos += r;
+       /* update attribute length field */
        tmp = htons(attrlen);
-       memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
-
-       /* no IPv4 withdraws */
-       wpos -= sizeof(u_int16_t);
-       bzero(buf + wpos, sizeof(u_int16_t));
-
-       if (wpos < 0)
-               fatalx("up_dump_mp_unreach: buffer underflow");
+       memcpy(buf + 2, &tmp, sizeof(tmp));
 
-       /* total length includes the two 2-bytes length fields. */
-       *len = attrlen + 2 * sizeof(u_int16_t);
-
-       return (buf + wpos);
+       return (wpos);
 }
 
 int
-up_dump_mp_reach(u_char *buf, u_int16_t *len, struct rde_peer *peer,
-    u_int8_t aid)
+up_dump_mp_reach(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid)
 {
-       struct update_attr      *upa;
-       int                     wpos;
-       u_int16_t               attr_len, datalen, tmp;
-       u_int8_t                flags = ATTR_OPTIONAL;
+       struct filterstate       state;
+       struct prefix           *p;
+       int                     r, wpos;
+       u_int16_t               attr_len;
 
-       /*
-        * It is possible that a queued path attribute has no nlri prefix.
-        * Ignore and remove those path attributes.
-        */
-       while ((upa = TAILQ_FIRST(&peer->updates[aid])) != NULL)
-               if (TAILQ_EMPTY(&upa->prefix_h)) {
-                       attr_len = upa->attr_len;
-                       if (RB_REMOVE(uptree_attr, &peer->up_attrs,
-                           upa) == NULL)
-                               log_warnx("dequeuing update failed.");
-                       TAILQ_REMOVE(&peer->updates[aid], upa, attr_l);
-                       free(upa->attr);
-                       free(upa->mpattr);
-                       free(upa);
-                       peer->up_acnt--;
-                       /* XXX horrible hack,
-                        * if attr_len is 0, it is a EoR marker */
-                       if (attr_len == 0)
-                               return (-1);
-               } else
-                       break;
+       if (len < MIN_UPDATE_LEN)
+               return 0;
 
-       if (upa == NULL)
-               return (-2);
+       /* get starting point */
+       p = RB_MIN(prefix_tree, &peer->updates[aid]);
+       if (p == NULL)
+               return 0;
 
-       /*
-        * reserve space for attr len, the attributes, the
-        * mp attribute and the attribute header
-        */
-       wpos = 2 + 2 + upa->attr_len + 4 + upa->mpattr_len;
-       if (*len < wpos)
-               return (-2);
-
-       datalen = up_dump_prefix(buf + wpos, *len - wpos,
-           &upa->prefix_h, peer, 0);
-       if (datalen == 0)
-               return (-2);
-
-       if (upa->mpattr_len == 0 || upa->mpattr == NULL)
-               fatalx("multiprotocol update without MP attrs");
-
-       datalen += upa->mpattr_len;
-       wpos -= upa->mpattr_len;
-       memcpy(buf + wpos, upa->mpattr, upa->mpattr_len);
-
-       if (datalen > 255) {
-               wpos -= 2;
-               tmp = htons(datalen);
-               memcpy(buf + wpos, &tmp, sizeof(tmp));
-               datalen += 4;
-               flags |= ATTR_EXTLEN;
-       } else {
-               buf[--wpos] = (u_char)datalen;
-               datalen += 3;
-       }
-       buf[--wpos] = (u_char)ATTR_MP_REACH_NLRI;
-       buf[--wpos] = flags;
+       wpos = 4;       /* reserve space for length fields */
 
-       datalen += upa->attr_len;
-       wpos -= upa->attr_len;
-       memcpy(buf + wpos, upa->attr, upa->attr_len);
-
-       if (wpos < 4)
-               fatalx("Grrr, mp_reach buffer fucked up");
-
-       wpos -= 2;
-       tmp = htons(datalen);
-       memcpy(buf + wpos, &tmp, sizeof(tmp));
-
-       wpos -= 2;
-       bzero(buf + wpos, 2);
-
-       /* now check if all prefixes were written */
-       if (TAILQ_EMPTY(&upa->prefix_h)) {
-               if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL)
-                       log_warnx("dequeuing update failed.");
-               TAILQ_REMOVE(&peer->updates[aid], upa, attr_l);
-               free(upa->attr);
-               free(upa->mpattr);
-               free(upa);
-               peer->up_acnt--;
+       rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p),
+           prefix_nhflags(p));
+
+       /* write regular path attributes */
+       r = up_generate_attr(buf + wpos, len - wpos, peer, &state, aid);
+       if (r == -1) {
+               rde_filterstate_clean(&state);
+               return 0;
        }
+       wpos += r;
+
+       /* write mp attribute */
+       r = up_generate_mp_reach(buf + wpos, len - wpos, peer, &state, aid);
+       rde_filterstate_clean(&state);
+       if (r == -1)
+               return 0;
+       wpos += r;
+
+       /* write length fields */
+       bzero(buf, sizeof(u_int16_t));  /* withdrawn routes len */
+       attr_len = htons(wpos - 4);
+       memcpy(buf + 2, &attr_len, sizeof(attr_len));
 
-       *len = datalen + 4;
        return (wpos);
 }

Reply via email to