On Fri, Apr 28, 2023 at 02:13:15PM +0200, Alexander Bluhm wrote:
> After running stress test successfully with this diff, next day
> machine crashed while compiling a new kernel.  It is unclear whether
> it is related to the diff.  The softdep in ps is problably processing
> make output via ssh.  Looks like recursive kernel stack overflow.
> 
> [-- MARK -- Fri Apr 28 13:25:00 2023]
> kernel: protection fault trap, code=0
> Stopped at      rt_if_linkstate_change+0x21:    movl    0x58(%rdi),%eax
> ddb{3}> 
> 
> 8a0) at art_table_walk+0x26c
> art_table_walk(ffff800000390900,fffffd8746decdc0,ffffffff813bde40,ffff8000247c9
> 8a0) at art_table_walk+0x26c
> art_table_walk(ffff800000390900,fffffd8746decea0,ffffffff813bde40,ffff8000247c9
> 8a0) at art_table_walk+0x26c
> art_walk(ffff800000390900,ffffffff813bde40,ffff8000247c98a0) at art_walk+0xd1
> rtable_walk(0,18,ffff8000247c9938,ffffffff813c2b70,ffff800000784050) at 
> rtable_
> walk+0xa4
> art_walk(ffff800000390900,ffffffff813bde40,ffff8000247c98a0) at art_walk+0xd1
> rtable_walk(0,18,ffff8000247c9938,ffffffff813c2b70,ffff800000784050) at 
> rtable_

This is not rtfree() related. I was surptized, but reference counters
used by art tables are not MP safe. I propose to use refcnt API.


Index: sys/net/art.c
===================================================================
RCS file: /cvs/src/sys/net/art.c,v
retrieving revision 1.29
diff -u -p -r1.29 art.c
--- sys/net/art.c       12 Nov 2020 15:25:28 -0000      1.29
+++ sys/net/art.c       28 Apr 2023 12:53:28 -0000
@@ -535,7 +535,7 @@ art_table_delete(struct art_root *ar, st
 struct art_table *
 art_table_ref(struct art_root *ar, struct art_table *at)
 {
-       at->at_refcnt++;
+       refcnt_take(&at->at_refcnt);
        return (at);
 }
 
@@ -545,7 +545,7 @@ art_table_rele(struct art_table *at)
        if (at == NULL)
                return (0);
 
-       return (--at->at_refcnt == 0);
+       return (refcnt_rele(&at->at_refcnt) != 0);
 }
 
 int
@@ -729,7 +729,7 @@ art_table_get(struct art_root *ar, struc
        at->at_level = lvl;
        at->at_bits = ar->ar_bits[lvl];
        at->at_heap = at_heap;
-       at->at_refcnt = 0;
+       refcnt_init(&at->at_refcnt);
 
        if (parent != NULL) {
                node = srp_get_locked(&parent->at_heap[j].node);
@@ -754,13 +754,13 @@ art_table_put(struct art_root *ar, struc
        struct art_node         *node;
        uint32_t                 j = at->at_index;
 
-       KASSERT(at->at_refcnt == 0);
+       KASSERT(refcnt_read(&at->at_refcnt) == 0);
        KASSERT(j != 0 && j != 1);
 
        if (parent != NULL) {
                KASSERT(j != -1);
                KASSERT(at->at_level == parent->at_level + 1);
-               KASSERT(parent->at_refcnt >= 1);
+               KASSERT(refcnt_read(&parent->at_refcnt) >= 1);
 
                /* Give the route back to its parent. */
                node = srp_get_locked(&at->at_default);
Index: sys/net/art.h
===================================================================
RCS file: /cvs/src/sys/net/art.h,v
retrieving revision 1.23
diff -u -p -r1.23 art.h
--- sys/net/art.h       19 Apr 2023 17:42:47 -0000      1.23
+++ sys/net/art.h       28 Apr 2023 12:53:28 -0000
@@ -21,6 +21,7 @@
 
 #include <sys/rwlock.h>
 #include <sys/srp.h>
+#include <sys/refcnt.h>
 
 #define ART_MAXLVL     32      /* We currently use 32 levels for IPv6. */
 
@@ -66,10 +67,11 @@ struct art_table {
         */
        union {
                struct srp               node;
-               unsigned long            count;
+               struct refcnt            refcnt;
        } *at_heap;                             /* Array of 2^(slen+1) items */
 };
-#define        at_refcnt       at_heap[0].count/* Refcounter (1 per different 
route) */
+#define        at_refcnt       at_heap[0].refcnt /* Refcounter (1 per
+                                               different route) */
 #define        at_default      at_heap[1].node /* Default route (was in parent 
heap) */
 
 /* Heap size for an ART table of stride length ``slen''. */

Reply via email to