Hi,

When forwaring in packets parallel while removing the corresponding
ARP entry in a loop like this
while :; do arp -nd 10.10.21.1; arp -nd 10.10.22.4; done
it was quite easy to crash the kernel.

The race is exposed by putting a delay() here:

Index: net/rtable.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtable.c,v
retrieving revision 1.82
diff -u -p -r1.82 rtable.c
--- net/rtable.c        19 Apr 2023 17:42:47 -0000      1.82
+++ net/rtable.c        5 Jul 2023 19:59:04 -0000
@@ -489,6 +489,8 @@ rtable_match(unsigned int rtableid, stru
        if (an == NULL)
                goto out;
 
+       delay(10);  /* triggers crash */
+
        rt = SRPL_FIRST(&sr, &an->an_rtlist);
        if (rt == NULL) {
                SRPL_LEAVE(&sr);

Then expecting the struct rtentry rt_ifa is bogous and rt_refcnt
is 0.  So it looks like a refcounting bug.

While an_rtlist is protected by SRP, the route entries are not.  I
increased the refcount while the entry is held the an_rtlist.  Then
the crash goes away.

ok?

bluhm

Index: net/rtable.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtable.c,v
retrieving revision 1.82
diff -u -p -r1.82 rtable.c
--- net/rtable.c        19 Apr 2023 17:42:47 -0000      1.82
+++ net/rtable.c        5 Jul 2023 20:05:26 -0000
@@ -604,6 +604,11 @@ rtable_insert(unsigned int rtableid, str
        SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
 
        prev = art_insert(ar, an, addr, plen);
+       if (prev == an) {
+               rw_exit_write(&ar->ar_lock);
+               /* keep the refcount for rt while it is in an_rtlist */
+               return (0);
+       }
        if (prev != an) {
                SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
                    rt_next);
@@ -689,9 +694,10 @@ rtable_delete(unsigned int rtableid, str
                npaths++;
 
        if (npaths > 1) {
-               KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
+               KASSERT(refcnt_read(&rt->rt_refcnt) >= 2);
                SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
                    rt_next);
+               rtfree(rt);
 
                mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
                if (npaths == 2)
@@ -703,8 +709,9 @@ rtable_delete(unsigned int rtableid, str
        if (art_delete(ar, an, addr, plen) == NULL)
                panic("art_delete failed to find node %p", an);
 
-       KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
+       KASSERT(refcnt_read(&rt->rt_refcnt) >= 2);
        SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
+       rtfree(rt);
        art_put(an);
 
 leave:
@@ -821,12 +828,11 @@ rtable_mpath_reprio(unsigned int rtablei
                 */
                rt->rt_priority = prio;
        } else {
-               rtref(rt); /* keep rt alive in between remove and insert */
+               KASSERT(refcnt_read(&rt->rt_refcnt) >= 2);
                SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist,
                    rt, rtentry, rt_next);
                rt->rt_priority = prio;
                rtable_mpath_insert(an, rt);
-               rtfree(rt);
                error = EAGAIN;
        }
        rw_exit_write(&ar->ar_lock);
@@ -839,6 +845,9 @@ rtable_mpath_insert(struct art_node *an,
 {
        struct rtentry                  *mrt, *prt = NULL;
        uint8_t                          prio = rt->rt_priority;
+
+       /* increment the refcount for rt while it is in an_rtlist */
+       rtref(rt);
 
        if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) {
                SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);

Reply via email to