On Fri, Jan 23, 2026 at 01:05:10PM +0100, Morten Brørup wrote:
> I haven't looked into the details yet, but have a quick question inline below.
>
> > @@ -345,12 +345,20 @@ ci_txq_release_all_mbufs(struct ci_tx_queue *txq,
> > bool use_ctx)
> > return;
> >
> > if (!txq->vector_tx) {
> > - for (uint16_t i = 0; i < txq->nb_tx_desc; i++) {
> > - if (txq->sw_ring[i].mbuf != NULL) {
>
> You changed this loop to only operate on not-yet-cleaned descriptors.
>
> Here comes the first part of my question:
> You removed the NULL check for txq->sw_ring[i].mbuf, thereby assuming that it
> is never NULL for not-yet-cleaned descriptors.
>
Good point, I was quite focused on making this block and the vector block
the same, I forgot that we can have NULL pointers for context descriptors.
That was a silly mistake (and AI never caught it for me either.)
> > + /* Free mbufs from (last_desc_cleaned + 1) to (tx_tail -
> > 1). */
> > + const uint16_t start = (txq->last_desc_cleaned + 1) % txq-
> > >nb_tx_desc;
> > + const uint16_t nb_desc = txq->nb_tx_desc;
> > + const uint16_t end = txq->tx_tail;
> > +
> > + uint16_t i = start;
> > + if (end < i) {
> > + for (; i < nb_desc; i++)
> > rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
> > - txq->sw_ring[i].mbuf = NULL;
> > - }
> > + i = 0;
> > }
> > + for (; i < end; i++)
> > + rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
> > + memset(txq->sw_ring, 0, sizeof(txq->sw_ring[0]) * nb_desc);
> > return;
> > }
> >
> > diff --git a/drivers/net/intel/common/tx_scalar_fns.h
> > b/drivers/net/intel/common/tx_scalar_fns.h
> > index 82dc54438f..47ddcf411b 100644
> > --- a/drivers/net/intel/common/tx_scalar_fns.h
> > +++ b/drivers/net/intel/common/tx_scalar_fns.h
> > @@ -30,16 +30,60 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
> > const uint16_t rs_idx = (last_desc_cleaned == nb_tx_desc - 1) ?
> > 0 :
> > (last_desc_cleaned + 1) >> txq->log2_rs_thresh;
> > - uint16_t desc_to_clean_to = (rs_idx << txq->log2_rs_thresh) +
> > (txq->tx_rs_thresh - 1);
> > + const uint16_t dd_idx = txq->rs_last_id[rs_idx];
> > + const uint16_t first_to_clean = rs_idx << txq->log2_rs_thresh;
> >
> > /* Check if descriptor is done - all drivers use 0xF as done
> > value in bits 3:0 */
> > - if ((txd[txq->rs_last_id[rs_idx]].cmd_type_offset_bsz &
> > rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
> > + if ((txd[dd_idx].cmd_type_offset_bsz &
> > rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
> > rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
> > /* Descriptor not yet processed by hardware */
> > return -1;
> >
> > + /* DD bit is set, descriptors are done. Now free the mbufs. */
> > + /* Note: nb_tx_desc is guaranteed to be a multiple of
> > tx_rs_thresh,
> > + * validated during queue setup. This means cleanup never wraps
> > around
> > + * the ring within a single burst (e.g., ring=256, rs_thresh=32
> > gives
> > + * bursts of 0-31, 32-63, ..., 224-255).
> > + */
> > + const uint16_t nb_to_clean = txq->tx_rs_thresh;
> > + struct ci_tx_entry *sw_ring = txq->sw_ring;
> > +
> > + if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > + /* FAST_FREE path: mbufs are already reset, just return to
> > pool */
> > + uint16_t nb_free = 0;
> > +
> > + /* Get cached mempool pointer, or cache it on first use */
> > + struct rte_mempool *mp =
> > + likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ?
> > + txq->fast_free_mp :
> > + (txq->fast_free_mp = sw_ring[dd_idx].mbuf->pool);
> > +
> > + /* Pack non-NULL mbufs in-place at start of sw_ring range.
>
> Here is the second part of my question:
> How can they (sw_ring[X].mbuf) be NULL here, when they cannot be NULL in
> ci_txq_release_all_mbufs()?
Because the latter function is wrong! :-)
>
> > + * No modulo needed in loop since we're guaranteed not to
> > wrap.
> > + */
> > + for (uint16_t i = 0; i < nb_to_clean; i++) {
> > + struct rte_mbuf *m = sw_ring[first_to_clean +
> > i].mbuf;
> > + if (m != NULL) {
> > + /* Pack into sw_ring at packed position */
> > + sw_ring[first_to_clean + nb_free].mbuf = m;
> > + nb_free++;
> > + }
> > + }
> > +
> > + /* Bulk return to mempool using packed sw_ring entries
> > directly */
> > + if (nb_free > 0)
> > + rte_mempool_put_bulk(mp, (void
> > **)&sw_ring[first_to_clean].mbuf, nb_free);
> > + } else {
> > + /* Non-FAST_FREE path: use prefree_seg for refcount checks
> > */
> > + for (uint16_t i = 0; i < nb_to_clean; i++) {
> > + struct rte_mbuf *m = sw_ring[first_to_clean +
> > i].mbuf;
> > + if (m != NULL)
> > + rte_pktmbuf_free_seg(m);
> > + }
> > + }
> > +
> > /* Update the txq to reflect the last descriptor that was cleaned
> > */
> > - txq->last_desc_cleaned = desc_to_clean_to;
> > + txq->last_desc_cleaned = first_to_clean + txq->tx_rs_thresh - 1;
> > txq->nb_tx_free += txq->tx_rs_thresh;
> >
> > return 0;