The branch main has been updated by melifaro:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=36e15b717eec80047fe7442898b5752101f2fbca

commit 36e15b717eec80047fe7442898b5752101f2fbca
Author:     Alexander V. Chernikov <[email protected]>
AuthorDate: 2021-08-15 22:25:21 +0000
Commit:     Alexander V. Chernikov <[email protected]>
CommitDate: 2021-08-17 20:46:22 +0000

    routing: Fix crashes with dpdk_lpm[46] algo.
    
    When a prefix gets deleted from the RIB, dpdk_lpm algo needs to know
     the nexthop of the "parent" prefix to update its internal state.
    The glue code, which utilises RIB as a backing route store, uses
     fib[46]_lookup_rt() for the prefix destination after its deletion
     to fetch the desired nexthop.
    This approach does not work when deleting less-specific prefixes
     with most-specific ones are still present. For example, if
     10.0.0.0/24, 10.0.0.0/23 and 10.0.0.0/22 exist in RIB, deleting
     10.0.0.0/23 would result in 10.0.0.0/24 being returned as a search
     result instead of 10.0.0.0/22. This, in turn, results in the failed
     datastructure update: part of the deleted /23 prefix will still
     contain the reference to an old nexthop. This leads to the
     use-after-free behaviour, ending with the eventual crashes.
    
    Fix the logic flaw by properly fetching the prefix "parent" via
     newly-created rt_get_inet[6]_parent() helpers.
    
    Differential Revision: https://reviews.freebsd.org/D31546
    PR:     256882,256833
    MFC after:      1 week
---
 sys/contrib/dpdk_rte_lpm/dpdk_lpm.c  |  32 ++++----
 sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c |  42 +++++-----
 sys/net/radix.c                      |  14 ++++
 sys/net/radix.h                      |   1 +
 sys/net/route/route_ctl.h            |   3 +
 sys/net/route/route_helpers.c        | 150 +++++++++++++++++++++++++++++++++++
 6 files changed, 208 insertions(+), 34 deletions(-)

diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c 
b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
index af145997c4d6..51cd134132f6 100644
--- a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
+++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
@@ -134,26 +134,27 @@ handle_default_change(struct dpdk_lpm_data *dd, struct 
rib_cmd_info *rc)
 }
 
 static void
-get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, uint8_t *plen, 
uint32_t *nhop_idx)
+get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen,
+    uint8_t *pplen, uint32_t *nhop_idx)
 {
-       struct route_nhop_data rnd;
        struct rtentry *rt;
 
-       rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd);
+       rt = rt_get_inet_parent(dd->fibnum, addr, plen);
        if (rt != NULL) {
                struct in_addr addr4;
                uint32_t scopeid;
-               int inet_plen;
-               rt_get_inet_prefix_plen(rt, &addr4, &inet_plen, &scopeid);
-               if (inet_plen > 0) {
-                       *plen = inet_plen;
-                       *nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
+               int parent_plen;
+
+               rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
+               if (parent_plen > 0) {
+                       *pplen = parent_plen;
+                       *nhop_idx = fib_get_nhop_idx(dd->fd, 
rt_get_raw_nhop(rt));
                        return;
                }
        }
 
        *nhop_idx = 0;
-       *plen = 0;
+       *pplen = 0;
 }
 
 static enum flm_op_result
@@ -181,20 +182,23 @@ handle_gu_change(struct dpdk_lpm_data *dd, const struct 
rib_cmd_info *rc,
                }
 
                ret = rte_lpm_add(dd->lpm, ip, plen, nhidx);
-               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
+               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u 
ret: %d",
                    (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
-                   abuf, plen, nhidx, ret);
+                   abuf, plen,
+                   rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, 
rc->rc_nh_old) : 0,
+                   nhidx, ret);
        } else {
                /*
                 * Need to lookup parent. Assume deletion happened already
                 */
                uint8_t parent_plen;
                uint32_t parent_nhop_idx;
-               get_parent_rule(dd, addr, &parent_plen, &parent_nhop_idx);
+               get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx);
 
                ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, 
parent_nhop_idx);
-               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d nhop %u = %d",
-                   "DEL", abuf, plen, nhidx, ret);
+               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> 
%u ret: %d",
+                   "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, 
rc->rc_nh_old),
+                   parent_nhop_idx, ret);
        }
 
        if (ret != 0) {
diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c 
b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
index 17d35c16346d..ec1a3228d42b 100644
--- a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
+++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
@@ -165,30 +165,26 @@ handle_ll_change(struct dpdk_lpm6_data *dd, struct 
rib_cmd_info *rc,
 }
 
 static struct rte_lpm6_rule *
-pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6,
-    char *buffer)
+pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int 
plen,
+    int *pplen, uint32_t *pnhop_idx, char *buffer)
 {
        struct rte_lpm6_rule *lsp_rule = NULL;
-       struct route_nhop_data rnd;
        struct rtentry *rt;
-       int plen;
 
-       rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd);
+       *pnhop_idx = 0;
+       *pplen = 0;
+
+       rt = rt_get_inet6_parent(dd->fibnum, addr6, plen);
        /* plen = 0 means default route and it's out of scope */
        if (rt != NULL) {
-               uint32_t scopeid;
+               uint32_t nhop_idx, scopeid;
                struct in6_addr new_addr6;
                rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid);
                if (plen > 0) {
-                       uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
-                       if (nhidx == 0) {
-                               /*
-                                * shouldn't happen as we already have parent 
route.
-                                * It will trigger rebuild automatically.
-                                */
-                               return (NULL);
-                       }
-                       lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, 
plen, nhidx);
+                       nhop_idx = fib_get_nhop_idx(dd->fd, 
rt_get_raw_nhop(rt));
+                       lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, 
plen, nhop_idx);
+                       *pnhop_idx = nhop_idx;
+                       *pplen = plen;
                }
        }
 
@@ -217,20 +213,26 @@ handle_gu_change(struct dpdk_lpm6_data *dd, const struct 
rib_cmd_info *rc,
 
                ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
                                   plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 
0);
-               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
+               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u 
ret: %d",
                    (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
-                   abuf, plen, nhidx, ret);
+                   abuf, plen,
+                   rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, 
rc->rc_nh_old) : 0,
+                   nhidx, ret);
        } else {
                /*
                 * Need to lookup parent. Assume deletion happened already
                 */
                char buffer[RTE_LPM6_RULE_SIZE];
                struct rte_lpm6_rule *lsp_rule = NULL;
-               lsp_rule = pack_parent_rule(dd, addr6, buffer);
+               int parent_plen;
+               uint32_t parent_nhop_idx;
+               lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen,
+                   &parent_nhop_idx, buffer);
 
                ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, 
lsp_rule);
-               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop ? = %d",
-                   "DEL", abuf, plen, ret);
+               FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u 
-> %u ret: %d",
+                   "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, 
rc->rc_nh_old),
+                   parent_nhop_idx, ret);
        }
 
        if (ret != 0) {
diff --git a/sys/net/radix.c b/sys/net/radix.c
index 931bf6db871b..2169361c7229 100644
--- a/sys/net/radix.c
+++ b/sys/net/radix.c
@@ -371,6 +371,20 @@ on1:
        return (0);
 }
 
+/*
+ * Returns the next (wider) prefix for the key defined by @rn
+ *  if exists.
+ */
+struct radix_node *
+rn_nextprefix(struct radix_node *rn)
+{
+       for (rn = rn->rn_dupedkey; rn != NULL; rn = rn->rn_dupedkey) {
+               if (!(rn->rn_flags & RNF_ROOT))
+                       return (rn);
+       }
+       return (NULL);
+}
+
 #ifdef RN_DEBUG
 int    rn_nodenum;
 struct radix_node *rn_clist;
diff --git a/sys/net/radix.h b/sys/net/radix.h
index a0e5e5c5aa3f..97555ee9e16d 100644
--- a/sys/net/radix.h
+++ b/sys/net/radix.h
@@ -119,6 +119,7 @@ typedef int rn_walktree_t(struct radix_head *head, 
walktree_f_t *f,
 typedef int rn_walktree_from_t(struct radix_head *head,
     void *a, void *m, walktree_f_t *f, void *w);
 typedef void rn_close_t(struct radix_node *rn, struct radix_head *head);
+struct radix_node *rn_nextprefix(struct radix_node *rn);
 
 struct radix_mask_head;
 
diff --git a/sys/net/route/route_ctl.h b/sys/net/route/route_ctl.h
index 229c762d4a73..a670979df8c9 100644
--- a/sys/net/route/route_ctl.h
+++ b/sys/net/route/route_ctl.h
@@ -117,6 +117,7 @@ void rt_get_inet_prefix_plen(const struct rtentry *rt, 
struct in_addr *paddr,
     int *plen, uint32_t *pscopeid);
 void rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr,
     struct in_addr *pmask, uint32_t *pscopeid);
+struct rtentry *rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int 
plen);
 #endif
 #ifdef INET6
 struct in6_addr;
@@ -124,6 +125,8 @@ void rt_get_inet6_prefix_plen(const struct rtentry *rt, 
struct in6_addr *paddr,
     int *plen, uint32_t *pscopeid);
 void rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr 
*paddr,
     struct in6_addr *pmask, uint32_t *pscopeid);
+struct rtentry *rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr 
*paddr,
+    int plen);
 #endif
 
 /* Nexthops */
diff --git a/sys/net/route/route_helpers.c b/sys/net/route/route_helpers.c
index 5d29197cc4fb..569e13a308eb 100644
--- a/sys/net/route/route_helpers.c
+++ b/sys/net/route/route_helpers.c
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #endif
 #ifdef INET6
 #include <netinet6/in6_fib.h>
+#include <netinet6/in6_var.h>
 #endif
 #include <net/vnet.h>
 
@@ -415,3 +416,152 @@ rib_decompose_notification(struct rib_cmd_info *rc, 
route_notification_t *cb,
        }
 }
 #endif
+
+#ifdef INET
+/*
+ * Checks if the found key in the trie contains (<=) a prefix covering
+ *  @paddr/@plen.
+ * Returns the most specific rtentry matching the condition or NULL.
+ */
+static struct rtentry *
+get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen)
+{
+       struct route_nhop_data rnd;
+       struct rtentry *rt;
+       struct in_addr addr4;
+       uint32_t scopeid;
+       int parent_plen;
+       struct radix_node *rn;
+
+       rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd);
+       rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
+       if (parent_plen <= plen)
+               return (rt);
+
+       /*
+        * There can be multiple prefixes associated with the found key:
+        * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc.
+        * All such prefixes are linked via rn_dupedkey, from most specific
+        *  to least specific. Iterate over them to check if any of these
+        *  prefixes are wider than desired plen.
+        */
+       rn = (struct radix_node *)rt;
+       while ((rn = rn_nextprefix(rn)) != NULL) {
+               rt = RNTORT(rn);
+               rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
+               if (parent_plen <= plen)
+                       return (rt);
+       }
+
+       return (NULL);
+}
+
+/*
+ * Returns the most specific prefix containing (>) @paddr/plen.
+ */
+struct rtentry *
+rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen)
+{
+       struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST };
+       struct in_addr addr4 = addr;
+       struct in_addr mask4;
+       struct rtentry *rt;
+
+       while (plen-- > 0) {
+               /* Calculate wider mask & new key to lookup */
+               mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
+               addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr));
+               if (addr4.s_addr == lookup_addr.s_addr) {
+                       /* Skip lookup if the key is the same */
+                       continue;
+               }
+               lookup_addr = addr4;
+
+               rt = get_inet_parent_prefix(fibnum, lookup_addr, plen);
+               if (rt != NULL)
+                       return (rt);
+       }
+
+       return (NULL);
+}
+#endif
+
+#ifdef INET6
+/*
+ * Checks if the found key in the trie contains (<=) a prefix covering
+ *  @paddr/@plen.
+ * Returns the most specific rtentry matching the condition or NULL.
+ */
+static struct rtentry *
+get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int 
plen)
+{
+       struct route_nhop_data rnd;
+       struct rtentry *rt;
+       struct in6_addr addr6;
+       uint32_t scopeid;
+       int parent_plen;
+       struct radix_node *rn;
+
+       rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd);
+       rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
+       if (parent_plen <= plen)
+               return (rt);
+
+       /*
+        * There can be multiple prefixes associated with the found key:
+        * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc.
+        * All such prefixes are linked via rn_dupedkey, from most specific
+        *  to least specific. Iterate over them to check if any of these
+        *  prefixes are wider than desired plen.
+        */
+       rn = (struct radix_node *)rt;
+       while ((rn = rn_nextprefix(rn)) != NULL) {
+               rt = RNTORT(rn);
+               rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
+               if (parent_plen <= plen)
+                       return (rt);
+       }
+
+       return (NULL);
+}
+
+static void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+       uint32_t *cp;
+
+       for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+               *cp++ = 0xFFFFFFFF;
+       if (mask > 0)
+               *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+
+/*
+ * Returns the most specific prefix containing (>) @paddr/plen.
+ */
+struct rtentry *
+rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen)
+{
+       struct in6_addr lookup_addr = in6mask128;
+       struct in6_addr addr6 = *paddr;
+       struct in6_addr mask6;
+       struct rtentry *rt;
+
+       while (plen-- > 0) {
+               /* Calculate wider mask & new key to lookup */
+               ipv6_writemask(&mask6, plen);
+               IN6_MASK_ADDR(&addr6, &mask6);
+               if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) {
+                       /* Skip lookup if the key is the same */
+                       continue;
+               }
+               lookup_addr = addr6;
+
+               rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen);
+               if (rt != NULL)
+                       return (rt);
+       }
+
+       return (NULL);
+}
+#endif
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "[email protected]"

Reply via email to