On Mon, Nov 18, 2024 at 03:26:21PM -0800, Andrew Hewus Fresh wrote: > On Sun, Nov 17, 2024 at 04:16:17PM +1000, David Gwynne wrote: > > On Sat, Nov 16, 2024 at 07:36:37PM -0800, Andrew Hewus Fresh wrote: > > > I finally got around to fixing my alpha, which involved replacing the > > > disk. That meant I have to scp some stuff over to to it and after a bit > > > of time it panics: > > > > > > panic: mtx 0xfffffe000002a628: locking against myself > > > Stopped at db_enter+0x8: lda sp,10(sp) > > > TID PID UID PRFLAGS PFLAGS CPU COMMAND > > > *204699 79774 0 0x14000 0x200 0 softnet0 > > > db_enter(0, 7ffffe00e0003f8, 1, 8, 3, 8) at db_enter+0x8 > > > panic(?, fffffe000002a628, 1b0, 10, a, 1) at panic+0xe8 > > > mtx_enter(?, ?, 1b0, 10, a, 1) at mtx_enter+0xb4 > > > ifq_set_oactive(?, ?, 1b0, 10, a, 1) at ifq_set_oactive+0x50 > > > > this is from src/sys/net/ifq.c r1.50 where i added a counter for the > > number of times oactive gets set. because there's checks and multiple > > things being tweaked i used the ifq mutex to serialise the updates. > > > > de(4) uses ifq_deq_begin to try and shove an mbuf onto the hardware, > > which takes but doesnt release the ifq mutex until ifq_deq_commit or > > ifq_deq_rollback is called. so while it's holding the mutex is calls > > ifq_set_oactive, which also tries to take the mutex. > > > > i honestly don't understand what de(4) is doing with the hardware and > > packet setup, so i dont feel confident changing the driver to avoid > > this. the least worst alternative i could think of is to provide an > > alternative set_oactive it can call. > > > > the diff below should fix this. > > I mean, it "fixed" the locking against myself error :-)
nice work getting a kernel build going. > de0 at pci0 dev 11 function 0 "DEC 21040" rev 0x23, DEC 21040 pass 2.3: isa > irq 5, address ... > panic: mutex 0xfffffe000002a628 not held in ifq_deq_set_oactive > Stopped at db_enter+0x8: lda sp,10(sp) > TID PID UID PRFLAGS PFLAGS CPU COMMAND > * 0 0 0 0x10000 0x200 0 swapper > db_enter(0, 7ffffe00e0003f8, 1, 8, 3, fffffc0000000008) at db_enter+0x8 > panic(?, fffffe000002a628, fffffc0000bc9718, 1, 5, 1) at panic+0xe8 > ifq_deq_set_oactive(?, ?, fffffc0000bc9718, 1, 5, 1) at > ifq_deq_set_oactive+0x8 > 8 > tulip_txput(?, ?, fffffe000002a000, ?, 5, 1) at tulip_txput+0x5e8 hrm. this might be better. or it might not even compile. Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v diff -u -p -r1.54 ifq.c --- net/ifq.c 9 Nov 2024 04:09:56 -0000 1.54 +++ net/ifq.c 19 Nov 2024 01:58:05 -0000 @@ -156,6 +156,17 @@ ifq_set_oactive(struct ifqueue *ifq) } void +ifq_deq_set_oactive(struct ifqueue *ifq) +{ + MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); + + if (!ifq->ifq_oactive) { + ifq->ifq_oactive = 1; + ifq->ifq_oactives++; + } +} + +void ifq_restart_task(void *p) { struct ifqueue *ifq = p; Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v diff -u -p -r1.41 ifq.h --- net/ifq.h 10 Nov 2023 15:51:24 -0000 1.41 +++ net/ifq.h 19 Nov 2024 01:58:05 -0000 @@ -444,6 +444,7 @@ void ifq_q_leave(struct ifqueue *, voi void ifq_serialize(struct ifqueue *, struct task *); void ifq_barrier(struct ifqueue *); void ifq_set_oactive(struct ifqueue *); +void ifq_deq_set_oactive(struct ifqueue *); int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, const char *, volatile unsigned int *, Index: dev/pci/if_de.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_de.c,v diff -u -p -r1.143 if_de.c --- dev/pci/if_de.c 24 May 2024 06:02:53 -0000 1.143 +++ dev/pci/if_de.c 19 Nov 2024 01:58:05 -0000 @@ -3728,6 +3728,8 @@ tulip_txput(tulip_softc_t * const sc, st u_int32_t d_status; bus_dmamap_t map; struct ifnet *ifp = &sc->tulip_if; + void (*set_oactive)(struct ifqueue *) = + notonqueue ? ifq_set_oactive : ifq_deq_set_oactive; #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { @@ -3846,8 +3848,10 @@ tulip_txput(tulip_softc_t * const sc, st * The descriptors have been filled in. Now get ready * to transmit. */ - if (!notonqueue) + if (!notonqueue) { ifq_deq_commit(&ifp->if_snd, m); + set_oactive = ifq_set_oactive; + } TULIP_SETCTX(m, map); map = NULL; @@ -3897,7 +3901,7 @@ tulip_txput(tulip_softc_t * const sc, st if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { TULIP_CSR_WRITE(sc, csr_txpoll, 1); - ifq_set_oactive(&sc->tulip_if.if_snd); + (*set_oactive)(&sc->tulip_if.if_snd); TULIP_PERFEND(txput); return (NULL); } @@ -3926,7 +3930,7 @@ tulip_txput(tulip_softc_t * const sc, st sc->tulip_dbg.dbg_txput_finishes[6]++; #endif if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) { - ifq_set_oactive(&sc->tulip_if.if_snd); + (*set_oactive)(&sc->tulip_if.if_snd); if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);