On Mon, Sep 01, 2025 at 07:23:58PM +0200, BESSOT Jean-Michel wrote:
> Hello
> 
> I have the lines :
> 
> qwx0: failed to setup rxd tid queue for tid 10: 12
> qwx0: failed to setup dp for peer 18:df:26:7f:50:75 on vdev 0 (12)
> 
> on my dmesg but the nework works.
> 
> I attach the dmesg .
> 
> I hope it can help, bye

This means your system was too low on un-fragmented DMA memory in order
to allocate a ring with descriptors and buffers for Rx aggregation.
If network worked anyway then it was probably just a bit slower than it
could have been.

If this happens often or causes issues the driver could be modified
to allocate this memory upfront when the system is booting.

Or we could allow the driver to use DMA memory above the 4GB boundary
for memory related to Tx/Rx rings. The patch below does that and so far
works without issues for me on amd64.

I recall patrick@ clamping this driver for 4GB early on to fix some
problem related to loading the firmware. This was done before qwx even
provided a working network interface. So perhaps using 64-bit DMA for
packets is fine even on arm64?

Could you try this diff on your machine?

commit 64c534826179113d40f77dfa20af572146d9926b (qwx64)
from: Stefan Sperling <s...@stsp.name>
date: Tue Sep  2 08:31:17 2025 UTC

make qwx(4) use 64-bit DMA allocations for Tx/Rx rings and related memory

M  sys/dev/ic/qwx.c          |  23+  13-
M  sys/dev/ic/qwxvar.h       |   1+   1-
M  sys/dev/pci/if_qwx_pci.c  |  11+  11-

3 files changed, 35 insertions(+), 25 deletions(-)

commit - d5c7514e92137dc0e160a8f4f8ce9935d1c6968a
commit + 64c534826179113d40f77dfa20af572146d9926b
blob - 83d90d8a26cf1678ca1847b39a6269275b8b5333
blob + 5abea3afe50987a9b5c3ee065114267698ce0c92
--- sys/dev/ic/qwx.c
+++ sys/dev/ic/qwx.c
@@ -8431,7 +8431,7 @@ qwx_qmi_mem_seg_send(struct qwx_softc *sc)
        } else if (sc->fwmem == NULL || QWX_DMA_LEN(sc->fwmem) < total_size) {
                if (sc->fwmem != NULL) 
                        qwx_dmamem_free(sc->sc_dmat, sc->fwmem);
-               sc->fwmem = qwx_dmamem_alloc(sc->sc_dmat, total_size, 65536);
+               sc->fwmem = qwx_dmamem_alloc(sc->sc_dmat, total_size, 65536, 0);
                if (sc->fwmem == NULL) {
                        printf("%s: failed to allocate %zu bytes of DMA "
                            "memory for firmware\n", sc->sc_dev.dv_xname,
@@ -9287,7 +9287,7 @@ qwx_qmi_m3_load(struct qwx_softc *sc)
        if (sc->m3_mem == NULL || QWX_DMA_LEN(sc->m3_mem) < len) {
                if (sc->m3_mem)
                        qwx_dmamem_free(sc->sc_dmat, sc->m3_mem);
-               sc->m3_mem = qwx_dmamem_alloc(sc->sc_dmat, len, 65536);
+               sc->m3_mem = qwx_dmamem_alloc(sc->sc_dmat, len, 65536, 0);
                if (sc->m3_mem == NULL) {
                        printf("%s: failed to allocate %zu bytes of DMA "
                            "memory for M3 firmware\n", sc->sc_dev.dv_xname,
@@ -9604,7 +9604,7 @@ qwx_dp_srng_setup(struct qwx_softc *sc, struct dp_srng
 #endif
        if (!cached) {
                ring->mem = qwx_dmamem_alloc(sc->sc_dmat, ring->size,
-                   PAGE_SIZE);
+                   PAGE_SIZE, BUS_DMA_64BIT);
                if (ring->mem == NULL) {
                        printf("%s: could not allocate DP SRNG DMA memory\n",
                            sc->sc_dev.dv_xname);
@@ -9804,7 +9804,7 @@ qwx_dp_link_desc_bank_alloc(struct qwx_softc *sc,
                        desc_sz = last_bank_sz;
 
                desc_bank[i].mem = qwx_dmamem_alloc(sc->sc_dmat, desc_sz,
-                   PAGE_SIZE);
+                   PAGE_SIZE, BUS_DMA_64BIT);
                if (!desc_bank[i].mem) {
                        ret = ENOMEM;
                        goto err;
@@ -9955,7 +9955,8 @@ qwx_dp_scatter_idle_link_desc_setup(struct qwx_softc *
 
        for (i = 0; i < num_scatter_buf; i++) {
                slist[i].mem = qwx_dmamem_alloc(sc->sc_dmat,
-                   HAL_WBM_IDLE_SCATTER_BUF_SIZE_MAX, PAGE_SIZE);
+                   HAL_WBM_IDLE_SCATTER_BUF_SIZE_MAX, PAGE_SIZE,
+                   BUS_DMA_64BIT);
                if (slist[i].mem == NULL) {
                        ret = ENOMEM;
                        goto err;
@@ -21022,7 +21023,8 @@ qwx_hal_alloc_cont_rdp(struct qwx_softc *sc)
        size_t size = sizeof(uint32_t) * HAL_SRNG_RING_ID_MAX;
 
        if (hal->rdpmem == NULL) {
-               hal->rdpmem = qwx_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE);
+               hal->rdpmem = qwx_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE,
+                   BUS_DMA_64BIT);
                if (hal->rdpmem == NULL) {
                        printf("%s: could not allocate RDP DMA memory\n",
                            sc->sc_dev.dv_xname);
@@ -21057,7 +21059,8 @@ qwx_hal_alloc_cont_wrp(struct qwx_softc *sc)
        size_t size = sizeof(uint32_t) * HAL_SRNG_NUM_LMAC_RINGS;
 
        if (hal->wrpmem == NULL) {
-               hal->wrpmem = qwx_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE);
+               hal->wrpmem = qwx_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE,
+                   BUS_DMA_64BIT);
                if (hal->wrpmem == NULL) {
                        printf("%s: could not allocate WDP DMA memory\n",
                            sc->sc_dev.dv_xname);
@@ -24677,7 +24680,7 @@ qwx_peer_rx_tid_setup(struct qwx_softc *sc, struct iee
                hw_desc_sz = qwx_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
 
        rx_tid->mem = qwx_dmamem_alloc(sc->sc_dmat, hw_desc_sz,
-           HAL_LINK_DESC_ALIGN);
+           HAL_LINK_DESC_ALIGN, BUS_DMA_64BIT);
        if (rx_tid->mem == NULL) {
 #ifdef notyet
                spin_unlock_bh(&ab->base_lock);
@@ -26523,7 +26526,8 @@ qwx_detach(struct qwx_softc *sc)
 }
 
 struct qwx_dmamem *
-qwx_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align)
+qwx_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align,
+    int flags)
 {
        struct qwx_dmamem *adm;
        int nsegs;
@@ -26534,12 +26538,18 @@ qwx_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, 
        adm->size = size;
 
        if (bus_dmamap_create(dmat, size, 1, size, 0,
-           BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &adm->map) != 0)
+           BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | flags, &adm->map) != 0)
                goto admfree;
 
-       if (bus_dmamem_alloc_range(dmat, size, align, 0, &adm->seg, 1,
-           &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO, 0, 0xffffffff) != 0)
-               goto destroy;
+       if (flags & BUS_DMA_64BIT) {
+               if (bus_dmamem_alloc(dmat, size, align, 0, &adm->seg, 1,
+                   &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO | flags) != 0)
+                       goto destroy;
+       } else {
+               if (bus_dmamem_alloc_range(dmat, size, align, 0, &adm->seg, 1,
+                   &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO, 0, 0xffffffff) != 0)
+                       goto destroy;
+       }
 
        if (bus_dmamem_map(dmat, &adm->seg, nsegs, size,
            &adm->kva, BUS_DMA_NOWAIT | BUS_DMA_COHERENT) != 0)
blob - a45e474c188469d9c0375dd883db99993a0ce36f
blob + 3bfc0e0d70417239a76a794fcf5d38451c1b78fb
--- sys/dev/ic/qwxvar.h
+++ sys/dev/ic/qwxvar.h
@@ -473,7 +473,7 @@ struct qwx_dmamem {
        caddr_t                 kva;
 };
 
-struct qwx_dmamem *qwx_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t);
+struct qwx_dmamem *qwx_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, 
int);
 void qwx_dmamem_free(bus_dma_tag_t, struct qwx_dmamem *);
 
 #define QWX_DMA_MAP(_adm)      ((_adm)->map)
blob - 0dadf173db4401d6d7ad4ed761b5eeab92e95932
blob + 1f21c39bd2b601f91342c2ad4219fc8774d28d11
--- sys/dev/pci/if_qwx_pci.c
+++ sys/dev/pci/if_qwx_pci.c
@@ -978,7 +978,7 @@ unsupported_wcn6855_soc:
                goto err_pci_disable_msi;
 
        psc->chan_ctxt = qwx_dmamem_alloc(sc->sc_dmat,
-           sizeof(struct qwx_mhi_chan_ctxt) * psc->max_chan, 0);
+           sizeof(struct qwx_mhi_chan_ctxt) * psc->max_chan, 0, 0);
        if (psc->chan_ctxt == NULL) {
                printf("%s: could not allocate channel context array\n",
                    sc->sc_dev.dv_xname);
@@ -992,7 +992,7 @@ unsupported_wcn6855_soc:
        }
 
        psc->event_ctxt = qwx_dmamem_alloc(sc->sc_dmat,
-           sizeof(struct qwx_mhi_event_ctxt) * QWX_NUM_EVENT_CTX, 0);
+           sizeof(struct qwx_mhi_event_ctxt) * QWX_NUM_EVENT_CTX, 0, 0);
        if (psc->event_ctxt == NULL) {
                printf("%s: could not allocate event context array\n",
                    sc->sc_dev.dv_xname);
@@ -1006,7 +1006,7 @@ unsupported_wcn6855_soc:
        }
 
        psc->cmd_ctxt = qwx_dmamem_alloc(sc->sc_dmat,
-           sizeof(struct qwx_mhi_cmd_ctxt), 0);
+           sizeof(struct qwx_mhi_cmd_ctxt), 0, 0);
        if (psc->cmd_ctxt == NULL) {
                printf("%s: could not allocate command context array\n",
                    sc->sc_dev.dv_xname);
@@ -1245,7 +1245,7 @@ qwx_pci_alloc_xfer_ring(struct qwx_softc *sc, struct q
 
        size = sizeof(struct qwx_mhi_ring_element) * num_elements;
        /* Hardware requires that rings are aligned to ring size. */
-       ring->dmamem = qwx_dmamem_alloc(sc->sc_dmat, size, size);
+       ring->dmamem = qwx_dmamem_alloc(sc->sc_dmat, size, size, 0);
        if (ring->dmamem == NULL)
                return ENOMEM;
 
@@ -1396,7 +1396,7 @@ qwx_pci_alloc_event_ring(struct qwx_softc *sc, struct 
 
        size = sizeof(struct qwx_mhi_ring_element) * num_elements;
        /* Hardware requires that rings are aligned to ring size. */
-       ring->dmamem = qwx_dmamem_alloc(sc->sc_dmat, size, size);
+       ring->dmamem = qwx_dmamem_alloc(sc->sc_dmat, size, size, 0);
        if (ring->dmamem == NULL)
                return ENOMEM;
 
@@ -1451,7 +1451,7 @@ qwx_pci_init_cmd_ring(struct qwx_softc *sc, struct qwx
        ring->size = sizeof(struct qwx_mhi_ring_element) * ring->num_elements;
 
        /* Hardware requires that rings are aligned to ring size. */
-       ring->dmamem = qwx_dmamem_alloc(sc->sc_dmat, ring->size, ring->size);
+       ring->dmamem = qwx_dmamem_alloc(sc->sc_dmat, ring->size, ring->size, 0);
        if (ring->dmamem == NULL)
                return ENOMEM;
 
@@ -3263,7 +3263,7 @@ qwx_mhi_fw_load_bhi(struct qwx_pci_softc *psc, uint8_t
        uint64_t paddr;
        int ret;
 
-       data_adm = qwx_dmamem_alloc(sc->sc_dmat, len, 0);
+       data_adm = qwx_dmamem_alloc(sc->sc_dmat, len, 0, 0);
        if (data_adm == NULL) {
                printf("%s: could not allocate BHI DMA data buffer\n",
                    sc->sc_dev.dv_xname);
@@ -3331,7 +3331,7 @@ qwx_mhi_fw_load_bhie(struct qwx_pci_softc *psc, uint8_
        if (psc->amss_data == NULL || QWX_DMA_LEN(psc->amss_data) < len) {
                if (psc->amss_data)
                        qwx_dmamem_free(sc->sc_dmat, psc->amss_data);
-               psc->amss_data = qwx_dmamem_alloc(sc->sc_dmat, len, 0);
+               psc->amss_data = qwx_dmamem_alloc(sc->sc_dmat, len, 0, 0);
                if (psc->amss_data == NULL) {
                        printf("%s: could not allocate BHIE DMA data buffer\n",
                            sc->sc_dev.dv_xname);
@@ -3343,7 +3343,7 @@ qwx_mhi_fw_load_bhie(struct qwx_pci_softc *psc, uint8_
        if (psc->amss_vec == NULL || QWX_DMA_LEN(psc->amss_vec) < vec_size) {
                if (psc->amss_vec)
                        qwx_dmamem_free(sc->sc_dmat, psc->amss_vec);
-               psc->amss_vec = qwx_dmamem_alloc(sc->sc_dmat, vec_size, 0);
+               psc->amss_vec = qwx_dmamem_alloc(sc->sc_dmat, vec_size, 0, 0);
                if (psc->amss_vec == NULL) {
                        printf("%s: could not allocate BHIE DMA vec buffer\n",
                            sc->sc_dev.dv_xname);
@@ -3429,7 +3429,7 @@ qwx_rddm_prepare(struct qwx_pci_softc *psc)
                return;
        }
 
-       data_adm = qwx_dmamem_alloc(sc->sc_dmat, len, 0);
+       data_adm = qwx_dmamem_alloc(sc->sc_dmat, len, 0, 0);
        if (data_adm == NULL) {
                printf("%s: could not allocate BHIE DMA data buffer\n",
                    sc->sc_dev.dv_xname);
@@ -3437,7 +3437,7 @@ qwx_rddm_prepare(struct qwx_pci_softc *psc)
        }
 
        vec_size = nseg * sizeof(*vec);
-       vec_adm = qwx_dmamem_alloc(sc->sc_dmat, vec_size, 0);
+       vec_adm = qwx_dmamem_alloc(sc->sc_dmat, vec_size, 0, 0);
        if (vec_adm == NULL) {
                printf("%s: could not allocate BHIE DMA vector buffer\n",
                    sc->sc_dev.dv_xname);

Reply via email to