This diff adds the bits needed to support add-path in MRT dumps.
The problem here is that MRT as a stateless protocol has no chance
to know what kind of encoding (add-path or not) is used for the NLRI in
message dumps. And for table dumps there is a need to add an extra field
to the dumps to show the path-id.

There are two issues:
- for message dumps: it is necessary to peek into UPDATE messages to
  figure out if that update is using add-path or not. This comes from the
  fact that the add-path RFC allows to set the option per AFI/SAFI and
  also per direction. This is a major pain in bgpd since UPDATE messages
  are actually parsed in the RDE and not in the SE. The SE just does the
  basic lenght checks (header size, total length). So this peak into the
  packet needs to be done with some care (especialy for MP encoded
  UPDATEs).

- for table dumps the RFC did a major fobar and defined a extra special
  encoding for non-IPv4/IPv6 prefixes. In the general encoding the path-id
  is not part of the rib entries sub-struct but is instead part of the
  NLRI. This encoding is to complex to build into the bgpd codebase and it
  seems the only other BGP implementation supporting RIB_GENERIC_ADDPATH,
  gobgp, is also not implementing it according to the RFC but instead is
  using the same encoding as for the other _ADDPATH types. OpenBGPD will
  do the same.

This seems to work for me and results in the right output in bgpctl.

Please review mrt_bgp_guess_aid() and check if all checks are correct to
ensure we don't run off the rails.
-- 
:wq Claudio

Index: mrt.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.104
diff -u -p -r1.104 mrt.c
--- mrt.c       24 Jun 2021 10:04:05 -0000      1.104
+++ mrt.c       9 Aug 2021 10:13:43 -0000
@@ -91,23 +91,128 @@ int mrt_open(struct mrt *, time_t);
                            x == MRT_TABLE_DUMP_V2) ? RDEIDX : SEIDX    \
                        )
 
-void
-mrt_dump_bgp_msg(struct mrt *mrt, void *pkg, u_int16_t pkglen,
-    struct peer *peer)
+static u_int8_t
+mrt_bgp_guess_aid(u_int8_t *pkg, u_int16_t pkglen)
+{
+       u_int16_t wlen, alen, len, afi;
+       u_int8_t type, aid;
+
+       pkg += MSGSIZE_HEADER;
+       pkglen -= MSGSIZE_HEADER;
+
+       if (pkglen < 4)
+               goto bad;
+
+       memcpy(&wlen, pkg, 2);
+       wlen = ntohs(wlen);
+       pkg += 2;
+       pkglen -= 2;
+
+       memcpy(&alen, pkg, 2);
+       alen = ntohs(alen);
+       pkg += 2;
+       pkglen -= 2;
+
+       if (wlen > 0 || alen < pkglen || (wlen == 0 && alen == 0)) {
+               /* UPDATE has withdraw or NLRI prefixes or IPv4 EoR */
+               return AID_INET;
+       }
+
+       /* bad attribute length */
+       if (alen > pkglen)
+               goto bad;
+
+       /* try to extract AFI/SAFI from the MP attributes */
+       while (alen > 0) {
+               if (alen < 3)
+                       goto bad;
+               type = pkg[1];
+               if (pkg[0] & ATTR_EXTLEN) {
+                       if (alen < 4)
+                               goto bad;
+                       memcpy(&len, pkg + 2, 2);
+                       len = ntohs(len);
+                       pkg += 4;
+                       alen -= 4;
+               } else {
+                       len = pkg[2];
+                       pkg += 3;
+                       alen -= 3;
+               }
+               if (len > alen)
+                       goto bad;
+
+               if (type == ATTR_MP_REACH_NLRI ||
+                   type == ATTR_MP_UNREACH_NLRI) {
+                       if (alen < 3)
+                               goto bad;
+                       memcpy(&afi, pkg, 2);
+                       afi = ntohs(afi);
+                       if (afi2aid(afi, pkg[2], &aid) == -1)
+                               goto bad;
+                       return aid;
+               }
+
+               pkg += len;
+               alen -= len;
+       }
+
+bad:
+       return AID_UNSPEC;
+}
+
+static u_int16_t
+mrt_bgp_msg_subtype(struct mrt *mrt, void *pkg, u_int16_t pkglen,
+    struct peer *peer, enum msg_type msgtype, int in)
 {
-       struct ibuf     *buf;
-       int              incoming = 0;
        u_int16_t        subtype = BGP4MP_MESSAGE;
+       u_int8_t         aid, mask;
 
        if (peer->capa.neg.as4byte)
                subtype = BGP4MP_MESSAGE_AS4;
 
+       if (msgtype != UPDATE)
+               return subtype;
+
+       /*
+        * RFC8050 adjust types for add-path enabled sessions.
+        * It is necessary to extract the AID from UPDATES to decide
+        * if the add-path types are needed or not. The ADDPATH
+        * subtypes only matter for BGP UPDATES.
+        */
+
+       mask = in ? CAPA_AP_RECV : CAPA_AP_SEND;
+       /* only guess if add-path could be active */
+       if (peer->capa.neg.add_path[0] & mask) {
+               aid = mrt_bgp_guess_aid(pkg, pkglen);
+               if (aid != AID_UNSPEC &&
+                   (peer->capa.neg.add_path[aid] & mask)) {
+                       if (peer->capa.neg.as4byte)
+                               subtype = BGP4MP_MESSAGE_AS4_ADDPATH;
+                       else
+                               subtype = BGP4MP_MESSAGE_ADDPATH;
+               }
+       }
+
+       return subtype;
+}
+
+void
+mrt_dump_bgp_msg(struct mrt *mrt, void *pkg, u_int16_t pkglen,
+    struct peer *peer, enum msg_type msgtype)
+{
+       struct ibuf     *buf;
+       int              in = 0;
+       u_int16_t        subtype = BGP4MP_MESSAGE;
+
        /* get the direction of the message to swap address and AS fields */
        if (mrt->type == MRT_ALL_IN || mrt->type == MRT_UPDATE_IN)
-               incoming = 1;
+               in = 1;
+
+       subtype = mrt_bgp_msg_subtype(mrt, pkg, pkglen, peer, msgtype, in);
 
        if (mrt_dump_hdr_se(&buf, peer, MSG_PROTOCOL_BGP4MP_ET, subtype,
-           pkglen, incoming) == -1)
+           pkglen, in) == -1)
                return;
 
        if (ibuf_add(buf, pkg, pkglen) == -1) {
@@ -492,58 +597,42 @@ fail:
        return (-1);
 }
 
-int
-mrt_dump_entry_v2(struct mrt *mrt, struct rib_entry *re, u_int32_t snum)
+static int
+mrt_dump_entry_v2_rib(struct rib_entry *re, struct ibuf **nb, struct ibuf 
**apb,
+    uint16_t *np, uint16_t *app)
 {
-       struct ibuf     *buf, *hbuf = NULL;
-       struct prefix   *p;
        struct bgpd_addr addr;
-       size_t           len, off;
-       u_int16_t        subtype, nump;
-
-       switch (re->prefix->aid) {
-       case AID_INET:
-               subtype = MRT_DUMP_V2_RIB_IPV4_UNICAST;
-               break;
-       case AID_INET6:
-               subtype = MRT_DUMP_V2_RIB_IPV6_UNICAST;
-               break;
-       default:
-               subtype = MRT_DUMP_V2_RIB_GENERIC;
-               break;
-       }
+       struct ibuf *buf, **bp;
+       struct prefix *p;
+       size_t len;
+       int addpath;
 
-       if ((buf = ibuf_dynamic(0, UINT_MAX)) == NULL) {
-               log_warn("%s: ibuf_dynamic", __func__);
-               return (-1);
-       }
+       *np = 0;
+       *app = 0;
 
-       DUMP_LONG(buf, snum);
-       pt_getaddr(re->prefix, &addr);
-       if (subtype == MRT_DUMP_V2_RIB_GENERIC) {
-               u_int16_t afi;
-               u_int8_t safi;
-
-               aid2afi(re->prefix->aid, &afi, &safi);
-               DUMP_SHORT(buf, afi);
-               DUMP_BYTE(buf, safi);
-       }
-       if (prefix_writebuf(buf, &addr, re->prefix->prefixlen) == -1) {
-               log_warnx("%s: prefix_writebuf error", __func__);
-               goto fail;
-       }
-
-       off = ibuf_size(buf);
-       if (ibuf_reserve(buf, sizeof(nump)) == NULL) {
-               log_warn("%s: ibuf_reserve error", __func__);
-               goto fail;
-       }
-       nump = 0;
        LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
                struct nexthop          *nexthop;
                struct bgpd_addr        *nh;
                struct ibuf             *tbuf;
 
+               addpath = peer_has_add_path(prefix_peer(p), re->prefix->aid,
+                   CAPA_AP_RECV);
+
+               if (addpath) {
+                       bp = apb;
+                       *app += 1;
+               } else {
+                       bp = nb;
+                       *np += 1;
+               }
+               if ((buf = *bp) == NULL) {
+                       if ((buf = ibuf_dynamic(0, UINT_MAX)) == NULL) {
+                               log_warn("%s: ibuf_dynamic", __func__);
+                               goto fail;
+                       }
+                       *bp = buf;
+               }
+
                nexthop = prefix_nexthop(p);
                if (nexthop == NULL) {
                        bzero(&addr, sizeof(struct bgpd_addr));
@@ -556,6 +645,10 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                /* originated timestamp */
                DUMP_LONG(buf, time(NULL) - (getmonotime() - p->lastchange));
 
+               /* RFC8050: path-id if add-path is used */
+               if (addpath)
+                       DUMP_LONG(buf, p->path_id);
+
                if ((tbuf = ibuf_dynamic(0, MAX_PKTSIZE)) == NULL) {
                        log_warn("%s: ibuf_dynamic", __func__);
                        goto fail;
@@ -574,24 +667,109 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                        goto fail;
                }
                ibuf_free(tbuf);
-               nump++;
        }
-       nump = htons(nump);
-       memcpy(ibuf_seek(buf, off, sizeof(nump)), &nump, sizeof(nump));
 
-       len = ibuf_size(buf);
-       if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2, subtype, len) == -1) {
-               ibuf_free(buf);
-               return (-1);
+       return 0;
+
+fail:
+       return -1;
+}
+
+int
+mrt_dump_entry_v2(struct mrt *mrt, struct rib_entry *re, u_int32_t snum)
+{
+       char             pbuf[260];
+       struct ibuf     *hbuf = NULL, *nbuf = NULL, *apbuf = NULL;
+       struct bgpd_addr addr;
+       size_t           hlen, len;
+       u_int16_t        subtype, apsubtype, nump, apnump, afi;
+       u_int8_t         safi;
+       int              plen;
+
+       pt_getaddr(re->prefix, &addr);
+       plen = prefix_write(pbuf, sizeof(pbuf), &addr, re->prefix->prefixlen,
+           0);
+       if (plen == -1) {
+               log_warnx("%s: prefix_write error", __func__);
+               return -1;
        }
 
-       ibuf_close(&mrt->wbuf, hbuf);
-       ibuf_close(&mrt->wbuf, buf);
+       switch (re->prefix->aid) {
+       case AID_INET:
+               subtype = MRT_DUMP_V2_RIB_IPV4_UNICAST;
+               apsubtype = MRT_DUMP_V2_RIB_IPV4_UNICAST_ADDPATH;
+               break;
+       case AID_INET6:
+               subtype = MRT_DUMP_V2_RIB_IPV6_UNICAST;
+               apsubtype = MRT_DUMP_V2_RIB_IPV6_UNICAST_ADDPATH;
+               break;
+       default:
+               /*
+                * XXX The RFC defined the format for this type differently
+                * and it is prohibitly expensive to implement that format.
+                * Instead do what gobgp does and encode it like the other
+                * types.
+                */
+               subtype = MRT_DUMP_V2_RIB_GENERIC;
+               apsubtype = MRT_DUMP_V2_RIB_GENERIC_ADDPATH;
+               aid2afi(re->prefix->aid, &afi, &safi);
+
+               /* prepend 3-bytes AFI/SAFI */
+               memmove(pbuf + 3, pbuf, plen);
+               plen += 3;
+               afi = ntohs(afi);
+               memcpy(pbuf, &afi, sizeof(afi));
+               pbuf[2] = safi;
+               break;
+       }
+       hlen = sizeof(snum) + sizeof(nump) + plen;
+
+       if (mrt_dump_entry_v2_rib(re, &nbuf, &apbuf, &nump, &apnump))
+               goto fail;
+
+       if (nump > 0) {
+               len = ibuf_size(nbuf) + hlen;
+               if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2, subtype,
+                   len) == -1)
+                       goto fail;
+
+               DUMP_LONG(hbuf, snum);
+               if (ibuf_add(hbuf, pbuf, plen) == -1) {
+                       log_warn("%s: ibuf_add error", __func__);
+                       goto fail;
+               }
+               DUMP_SHORT(hbuf, nump);
+
+               ibuf_close(&mrt->wbuf, hbuf);
+               ibuf_close(&mrt->wbuf, nbuf);
+               hbuf = NULL;
+               nbuf = NULL;
+       }
+
+       if (apnump > 0) {
+               len = ibuf_size(apbuf) + hlen;
+               if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2, apsubtype,
+                   len) == -1)
+                       goto fail;
+
+               DUMP_LONG(hbuf, snum);
+               if (ibuf_add(hbuf, pbuf, plen) == -1) {
+                       log_warn("%s: ibuf_add error", __func__);
+                       goto fail;
+               }
+               DUMP_SHORT(hbuf, apnump);
+
+               ibuf_close(&mrt->wbuf, hbuf);
+               ibuf_close(&mrt->wbuf, apbuf);
+               hbuf = NULL;
+               apbuf = NULL;
+       }
 
        return (0);
 fail:
+       ibuf_free(apbuf);
+       ibuf_free(nbuf);
        ibuf_free(hbuf);
-       ibuf_free(buf);
        return (-1);
 }
 
@@ -744,14 +922,16 @@ mrt_dump_hdr_se(struct ibuf ** bp, struc
        switch (peer->local.aid) {
        case AID_INET:
                if (subtype == BGP4MP_STATE_CHANGE_AS4 ||
-                   subtype == BGP4MP_MESSAGE_AS4)
+                   subtype == BGP4MP_MESSAGE_AS4 ||
+                   subtype == BGP4MP_MESSAGE_AS4_ADDPATH)
                        len += MRT_BGP4MP_ET_AS4_IPv4_HEADER_SIZE;
                else
                        len += MRT_BGP4MP_ET_IPv4_HEADER_SIZE;
                break;
        case AID_INET6:
                if (subtype == BGP4MP_STATE_CHANGE_AS4 ||
-                   subtype == BGP4MP_MESSAGE_AS4)
+                   subtype == BGP4MP_MESSAGE_AS4 ||
+                   subtype == BGP4MP_MESSAGE_AS4_ADDPATH)
                        len += MRT_BGP4MP_ET_AS4_IPv6_HEADER_SIZE;
                else
                        len += MRT_BGP4MP_ET_IPv6_HEADER_SIZE;
@@ -768,7 +948,8 @@ mrt_dump_hdr_se(struct ibuf ** bp, struc
        DUMP_LONG(*bp, time.tv_nsec / 1000);
 
        if (subtype == BGP4MP_STATE_CHANGE_AS4 ||
-           subtype == BGP4MP_MESSAGE_AS4) {
+           subtype == BGP4MP_MESSAGE_AS4 ||
+           subtype == BGP4MP_MESSAGE_AS4_ADDPATH) {
                if (!swap)
                        DUMP_LONG(*bp, peer->conf.local_as);
                DUMP_LONG(*bp, peer->conf.remote_as);
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.423
diff -u -p -r1.423 session.c
--- session.c   27 Jul 2021 07:14:31 -0000      1.423
+++ session.c   30 Jul 2021 09:16:56 -0000
@@ -1392,7 +1392,8 @@ session_sendmsg(struct bgp_msg *msg, str
                if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
                    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
                    mrt->group_id == p->conf.groupid))
-                       mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
+                       mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p,
+                           msg->type);
        }
 
        ibuf_close(&p->wbuf, msg->buf);
@@ -1915,7 +1916,8 @@ session_process_msg(struct peer *p)
                        if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
                            mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
                            mrt->group_id == p->conf.groupid))
-                               mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p);
+                               mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p,
+                                   msgtype);
                }
 
                switch (msgtype) {
@@ -2794,11 +2796,15 @@ capa_neg_calc(struct peer *p)
        if (p->capa.ann.add_path[0]) {
                for (i = AID_MIN; i < AID_MAX; i++) {
                        if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
-                           (p->capa.peer.add_path[i] & CAPA_AP_SEND))
+                           (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
                                p->capa.neg.add_path[i] |= CAPA_AP_RECV;
+                               p->capa.neg.add_path[0] |= CAPA_AP_RECV;
+                       }
                        if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
-                           (p->capa.peer.add_path[i] & CAPA_AP_RECV))
+                           (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
                                p->capa.neg.add_path[i] |= CAPA_AP_SEND;
+                               p->capa.neg.add_path[0] |= CAPA_AP_SEND;
+                       }
                }
        }
 
Index: session.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.152
diff -u -p -r1.152 session.h
--- session.h   27 Jul 2021 07:14:31 -0000      1.152
+++ session.h   30 Jul 2021 09:16:56 -0000
@@ -289,7 +289,7 @@ void                 log_conn_attempt(const struct pee
 
 /* mrt.c */
 void            mrt_dump_bgp_msg(struct mrt *, void *, u_int16_t,
-                    struct peer *);
+                    struct peer *, enum msg_type);
 void            mrt_dump_state(struct mrt *, u_int16_t, u_int16_t,
                     struct peer *);
 void            mrt_done(struct mrt *);

Reply via email to