Hi, mcx(4) seems to sync the whole mapsize on processing a received packet. As far as I know, we usually only sync the actual size that we have received. Noticed this when doing bounce buffer tests, seeing that it copied a lot more data than is necessary.
That's because the RX buffer size is maximum supported MTU, which is about 9500 bytes or so. For small packets, or regular 1500 bytes, this adds overhead. This change should not change anything for ARM machines that have a cache coherent PCIe bus or x86. ok? Patrick diff --git a/sys/dev/pci/if_mcx.c b/sys/dev/pci/if_mcx.c index 38437e54897..065855d46d3 100644 --- a/sys/dev/pci/if_mcx.c +++ b/sys/dev/pci/if_mcx.c @@ -6800,20 +6800,20 @@ mcx_process_rx(struct mcx_softc *sc, struct mcx_rx *rx, { struct mcx_slot *ms; struct mbuf *m; - uint32_t flags; + uint32_t flags, len; int slot; + len = bemtoh32(&cqe->cq_byte_cnt); slot = betoh16(cqe->cq_wqe_count) % (1 << MCX_LOG_RQ_SIZE); ms = &rx->rx_slots[slot]; - bus_dmamap_sync(sc->sc_dmat, ms->ms_map, 0, ms->ms_map->dm_mapsize, - BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(sc->sc_dmat, ms->ms_map, 0, len, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, ms->ms_map); m = ms->ms_m; ms->ms_m = NULL; - m->m_pkthdr.len = m->m_len = bemtoh32(&cqe->cq_byte_cnt); + m->m_pkthdr.len = m->m_len = len; if (cqe->cq_rx_hash_type) { m->m_pkthdr.ph_flowid = betoh32(cqe->cq_rx_hash);