Hi,

there is an issue with the admin queue of ixl(4) which leads into the
following panic when the link state changes:

uvm_fault(0xffffffff818005f8, 0x18, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at      ixl_intr0+0xca: movq    %rdx,0x18(%rax)
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
 392823  13219      0    0x100040          0    2  ifstated
 444681  94950     90   0x1100010          0    6  ospf6d
 428704   9496     90   0x1100010          0    9  ospf6d
 106020  59273     85   0x1100010          0    1  ospfd
 420435  72114     85   0x1100010          0    5  ospfd
 295821  93368     73   0x1100010          0    3  syslogd
 367116  56598      0     0x14000      0x200    7  zerothread
 275385  57815      0     0x14000      0x200    4  softnet
ixl_intr0(ffff800004509000) at ixl_intr0+0xca
intr_handler(0,ffff8000044b0b80) at intr_handler+0x5b
Xintr_ioapic_edge25_untramp() at Xintr_ioapic_edge25_untramp+0x18f
acpicpu_idle() at acpicpu_idle+0x1f6
sched_idle(0) at sched_idle+0x280
end trace frame: 0x0, count: 10
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}>

The queue is corrupted in a way, that slot->iaq_cookie is 0.  Which
causes the uvm fault when iatq is dereferenced.

The following diff uses a mutex to protect the admin queue and avoids
the issue above.

ok?

bye,
Jan

Index: dev/pci/if_ixl.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.87
diff -u -p -r1.87 if_ixl.c
--- dev/pci/if_ixl.c    6 Feb 2023 20:27:45 -0000       1.87
+++ dev/pci/if_ixl.c    19 Jul 2023 07:05:40 -0000
@@ -1274,6 +1274,7 @@ struct ixl_softc {
        unsigned int             sc_atq_prod;
        unsigned int             sc_atq_cons;
 
+       struct mutex             sc_atq_mtx;
        struct ixl_dmamem        sc_arq;
        struct task              sc_arq_task;
        struct ixl_aq_bufs       sc_arq_idle;
@@ -1723,6 +1724,8 @@ ixl_attach(struct device *parent, struct
 
        /* initialise the adminq */
 
+       mtx_init(&sc->sc_atq_mtx, IPL_NET);
+
        if (ixl_dmamem_alloc(sc, &sc->sc_atq,
            sizeof(struct ixl_aq_desc) * IXL_AQ_NUM, IXL_AQ_ALIGN) != 0) {
                printf("\n" "%s: unable to allocate atq\n", DEVNAME(sc));
@@ -3599,6 +3602,8 @@ ixl_atq_post(struct ixl_softc *sc, struc
        struct ixl_aq_desc *atq, *slot;
        unsigned int prod;
 
+       mtx_enter(&sc->sc_atq_mtx);
+
        /* assert locked */
 
        atq = IXL_DMA_KVA(&sc->sc_atq);
@@ -3618,6 +3623,8 @@ ixl_atq_post(struct ixl_softc *sc, struc
        prod &= IXL_AQ_MASK;
        sc->sc_atq_prod = prod;
        ixl_wr(sc, sc->sc_aq_regs->atq_tail, prod);
+
+       mtx_leave(&sc->sc_atq_mtx);
 }
 
 static void
@@ -3628,11 +3635,15 @@ ixl_atq_done(struct ixl_softc *sc)
        unsigned int cons;
        unsigned int prod;
 
+       mtx_enter(&sc->sc_atq_mtx);
+
        prod = sc->sc_atq_prod;
        cons = sc->sc_atq_cons;
 
-       if (prod == cons)
+       if (prod == cons) {
+               mtx_leave(&sc->sc_atq_mtx);
                return;
+       }
 
        atq = IXL_DMA_KVA(&sc->sc_atq);
 
@@ -3645,6 +3656,7 @@ ixl_atq_done(struct ixl_softc *sc)
                if (!ISSET(slot->iaq_flags, htole16(IXL_AQ_DD)))
                        break;
 
+               KASSERT(slot->iaq_cookie != 0);
                iatq = (struct ixl_atq *)slot->iaq_cookie;
                iatq->iatq_desc = *slot;
 
@@ -3661,6 +3673,8 @@ ixl_atq_done(struct ixl_softc *sc)
            BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 
        sc->sc_atq_cons = cons;
+
+       mtx_leave(&sc->sc_atq_mtx);
 }
 
 static void
@@ -3691,6 +3705,8 @@ ixl_atq_poll(struct ixl_softc *sc, struc
        unsigned int prod;
        unsigned int t = 0;
 
+       mtx_enter(&sc->sc_atq_mtx);
+
        atq = IXL_DMA_KVA(&sc->sc_atq);
        prod = sc->sc_atq_prod;
        slot = atq + prod;
@@ -3712,8 +3728,10 @@ ixl_atq_poll(struct ixl_softc *sc, struc
        while (ixl_rd(sc, sc->sc_aq_regs->atq_head) != prod) {
                delaymsec(1);
 
-               if (t++ > tm)
+               if (t++ > tm) {
+                       mtx_leave(&sc->sc_atq_mtx);
                        return (ETIMEDOUT);
+               }
        }
 
        bus_dmamap_sync(sc->sc_dmat, IXL_DMA_MAP(&sc->sc_atq),
@@ -3724,6 +3742,7 @@ ixl_atq_poll(struct ixl_softc *sc, struc
 
        sc->sc_atq_cons = prod;
 
+       mtx_leave(&sc->sc_atq_mtx);
        return (0);
 }
 

Reply via email to