On Mon, Nov 18, 2024 at 03:26:21PM -0800, Andrew Hewus Fresh wrote:
> On Sun, Nov 17, 2024 at 04:16:17PM +1000, David Gwynne wrote:
> > On Sat, Nov 16, 2024 at 07:36:37PM -0800, Andrew Hewus Fresh wrote:
> > > I finally got around to fixing my alpha, which involved replacing the
> > > disk.  That meant I have to scp some stuff over to to it and after a bit
> > > of time it panics:
> > > 
> > > panic: mtx 0xfffffe000002a628: locking against myself
> > > Stopped at      db_enter+0x8:   lda     sp,10(sp)
> > >     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
> > > *204699  79774      0     0x14000      0x200    0  softnet0
> > > db_enter(0, 7ffffe00e0003f8, 1, 8, 3, 8) at db_enter+0x8
> > > panic(?, fffffe000002a628, 1b0, 10, a, 1) at panic+0xe8
> > > mtx_enter(?, ?, 1b0, 10, a, 1) at mtx_enter+0xb4
> > > ifq_set_oactive(?, ?, 1b0, 10, a, 1) at ifq_set_oactive+0x50
> > 
> > this is from src/sys/net/ifq.c r1.50 where i added a counter for the
> > number of times oactive gets set. because there's checks and multiple
> > things being tweaked i used the ifq mutex to serialise the updates.
> > 
> > de(4) uses ifq_deq_begin to try and shove an mbuf onto the hardware,
> > which takes but doesnt release the ifq mutex until ifq_deq_commit or
> > ifq_deq_rollback is called. so while it's holding the mutex is calls
> > ifq_set_oactive, which also tries to take the mutex.
> > 
> > i honestly don't understand what de(4) is doing with the hardware and
> > packet setup, so i dont feel confident changing the driver to avoid
> > this. the least worst alternative i could think of is to provide an
> > alternative set_oactive it can call.
> > 
> > the diff below should fix this.
> 
> I mean, it "fixed" the locking against myself error :-)

nice work getting a kernel build going.

> de0 at pci0 dev 11 function 0 "DEC 21040" rev 0x23, DEC 21040 pass 2.3: isa 
> irq 5, address ...
> panic: mutex 0xfffffe000002a628 not held in ifq_deq_set_oactive
> Stopped at      db_enter+0x8:   lda     sp,10(sp)
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
> *     0      0      0     0x10000      0x200    0  swapper
> db_enter(0, 7ffffe00e0003f8, 1, 8, 3, fffffc0000000008) at db_enter+0x8
> panic(?, fffffe000002a628, fffffc0000bc9718, 1, 5, 1) at panic+0xe8
> ifq_deq_set_oactive(?, ?, fffffc0000bc9718, 1, 5, 1) at 
> ifq_deq_set_oactive+0x8
> 8
> tulip_txput(?, ?, fffffe000002a000, ?, 5, 1) at tulip_txput+0x5e8

hrm.

this might be better. or it might not even compile.

Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
diff -u -p -r1.54 ifq.c
--- net/ifq.c   9 Nov 2024 04:09:56 -0000       1.54
+++ net/ifq.c   19 Nov 2024 01:58:05 -0000
@@ -156,6 +156,17 @@ ifq_set_oactive(struct ifqueue *ifq)
 }
 
 void
+ifq_deq_set_oactive(struct ifqueue *ifq)
+{
+       MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
+
+       if (!ifq->ifq_oactive) {
+               ifq->ifq_oactive = 1;
+               ifq->ifq_oactives++;
+       }
+}
+
+void
 ifq_restart_task(void *p)
 {
        struct ifqueue *ifq = p;
Index: net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
diff -u -p -r1.41 ifq.h
--- net/ifq.h   10 Nov 2023 15:51:24 -0000      1.41
+++ net/ifq.h   19 Nov 2024 01:58:05 -0000
@@ -444,6 +444,7 @@ void                 ifq_q_leave(struct ifqueue *, voi
 void            ifq_serialize(struct ifqueue *, struct task *);
 void            ifq_barrier(struct ifqueue *);
 void            ifq_set_oactive(struct ifqueue *);
+void            ifq_deq_set_oactive(struct ifqueue *);
 
 int             ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
                     const char *, volatile unsigned int *,
Index: dev/pci/if_de.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_de.c,v
diff -u -p -r1.143 if_de.c
--- dev/pci/if_de.c     24 May 2024 06:02:53 -0000      1.143
+++ dev/pci/if_de.c     19 Nov 2024 01:58:05 -0000
@@ -3728,6 +3728,8 @@ tulip_txput(tulip_softc_t * const sc, st
     u_int32_t d_status;
     bus_dmamap_t map;
     struct ifnet *ifp = &sc->tulip_if;
+    void (*set_oactive)(struct ifqueue *) =
+      notonqueue ? ifq_set_oactive : ifq_deq_set_oactive;
 
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
@@ -3846,8 +3848,10 @@ tulip_txput(tulip_softc_t * const sc, st
      * The descriptors have been filled in.  Now get ready
      * to transmit.
      */
-    if (!notonqueue)
+    if (!notonqueue) {
        ifq_deq_commit(&ifp->if_snd, m);
+       set_oactive = ifq_set_oactive;
+    }
 
     TULIP_SETCTX(m, map);
     map = NULL;
@@ -3897,7 +3901,7 @@ tulip_txput(tulip_softc_t * const sc, st
 
     if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
        TULIP_CSR_WRITE(sc, csr_txpoll, 1);
-       ifq_set_oactive(&sc->tulip_if.if_snd);
+       (*set_oactive)(&sc->tulip_if.if_snd);
        TULIP_PERFEND(txput);
        return (NULL);
     }
@@ -3926,7 +3930,7 @@ tulip_txput(tulip_softc_t * const sc, st
     sc->tulip_dbg.dbg_txput_finishes[6]++;
 #endif
     if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) {
-       ifq_set_oactive(&sc->tulip_if.if_snd);
+       (*set_oactive)(&sc->tulip_if.if_snd);
        if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
            sc->tulip_intrmask |= TULIP_STS_TXINTR;
            TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);

Reply via email to