Hi, there is an issue with the admin queue of ixl(4) which leads into the following panic when the link state changes:
uvm_fault(0xffffffff818005f8, 0x18, 0, 2) -> e kernel: page fault trap, code=0 Stopped at ixl_intr0+0xca: movq %rdx,0x18(%rax) TID PID UID PRFLAGS PFLAGS CPU COMMAND 392823 13219 0 0x100040 0 2 ifstated 444681 94950 90 0x1100010 0 6 ospf6d 428704 9496 90 0x1100010 0 9 ospf6d 106020 59273 85 0x1100010 0 1 ospfd 420435 72114 85 0x1100010 0 5 ospfd 295821 93368 73 0x1100010 0 3 syslogd 367116 56598 0 0x14000 0x200 7 zerothread 275385 57815 0 0x14000 0x200 4 softnet ixl_intr0(ffff800004509000) at ixl_intr0+0xca intr_handler(0,ffff8000044b0b80) at intr_handler+0x5b Xintr_ioapic_edge25_untramp() at Xintr_ioapic_edge25_untramp+0x18f acpicpu_idle() at acpicpu_idle+0x1f6 sched_idle(0) at sched_idle+0x280 end trace frame: 0x0, count: 10 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. ddb{0}> The queue is corrupted in a way, that slot->iaq_cookie is 0. Which causes the uvm fault when iatq is dereferenced. The following diff uses a mutex to protect the admin queue and avoids the issue above. ok? bye, Jan Index: dev/pci/if_ixl.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.87 diff -u -p -r1.87 if_ixl.c --- dev/pci/if_ixl.c 6 Feb 2023 20:27:45 -0000 1.87 +++ dev/pci/if_ixl.c 19 Jul 2023 07:05:40 -0000 @@ -1274,6 +1274,7 @@ struct ixl_softc { unsigned int sc_atq_prod; unsigned int sc_atq_cons; + struct mutex sc_atq_mtx; struct ixl_dmamem sc_arq; struct task sc_arq_task; struct ixl_aq_bufs sc_arq_idle; @@ -1723,6 +1724,8 @@ ixl_attach(struct device *parent, struct /* initialise the adminq */ + mtx_init(&sc->sc_atq_mtx, IPL_NET); + if (ixl_dmamem_alloc(sc, &sc->sc_atq, sizeof(struct ixl_aq_desc) * IXL_AQ_NUM, IXL_AQ_ALIGN) != 0) { printf("\n" "%s: unable to allocate atq\n", DEVNAME(sc)); @@ -3599,6 +3602,8 @@ ixl_atq_post(struct ixl_softc *sc, struc struct ixl_aq_desc *atq, *slot; unsigned int prod; + mtx_enter(&sc->sc_atq_mtx); + /* assert locked */ atq = IXL_DMA_KVA(&sc->sc_atq); @@ -3618,6 +3623,8 @@ ixl_atq_post(struct ixl_softc *sc, struc prod &= IXL_AQ_MASK; sc->sc_atq_prod = prod; ixl_wr(sc, sc->sc_aq_regs->atq_tail, prod); + + mtx_leave(&sc->sc_atq_mtx); } static void @@ -3628,11 +3635,15 @@ ixl_atq_done(struct ixl_softc *sc) unsigned int cons; unsigned int prod; + mtx_enter(&sc->sc_atq_mtx); + prod = sc->sc_atq_prod; cons = sc->sc_atq_cons; - if (prod == cons) + if (prod == cons) { + mtx_leave(&sc->sc_atq_mtx); return; + } atq = IXL_DMA_KVA(&sc->sc_atq); @@ -3645,6 +3656,7 @@ ixl_atq_done(struct ixl_softc *sc) if (!ISSET(slot->iaq_flags, htole16(IXL_AQ_DD))) break; + KASSERT(slot->iaq_cookie != 0); iatq = (struct ixl_atq *)slot->iaq_cookie; iatq->iatq_desc = *slot; @@ -3661,6 +3673,8 @@ ixl_atq_done(struct ixl_softc *sc) BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); sc->sc_atq_cons = cons; + + mtx_leave(&sc->sc_atq_mtx); } static void @@ -3691,6 +3705,8 @@ ixl_atq_poll(struct ixl_softc *sc, struc unsigned int prod; unsigned int t = 0; + mtx_enter(&sc->sc_atq_mtx); + atq = IXL_DMA_KVA(&sc->sc_atq); prod = sc->sc_atq_prod; slot = atq + prod; @@ -3712,8 +3728,10 @@ ixl_atq_poll(struct ixl_softc *sc, struc while (ixl_rd(sc, sc->sc_aq_regs->atq_head) != prod) { delaymsec(1); - if (t++ > tm) + if (t++ > tm) { + mtx_leave(&sc->sc_atq_mtx); return (ETIMEDOUT); + } } bus_dmamap_sync(sc->sc_dmat, IXL_DMA_MAP(&sc->sc_atq), @@ -3724,6 +3742,7 @@ ixl_atq_poll(struct ixl_softc *sc, struc sc->sc_atq_cons = prod; + mtx_leave(&sc->sc_atq_mtx); return (0); }