On Fri, Jan 23, 2026 at 01:05:10PM +0100, Morten Brørup wrote:
> I haven't looked into the details yet, but have a quick question inline below.
> 
> > @@ -345,12 +345,20 @@ ci_txq_release_all_mbufs(struct ci_tx_queue *txq,
> > bool use_ctx)
> >             return;
> > 
> >     if (!txq->vector_tx) {
> > -           for (uint16_t i = 0; i < txq->nb_tx_desc; i++) {
> > -                   if (txq->sw_ring[i].mbuf != NULL) {
> 
> You changed this loop to only operate on not-yet-cleaned descriptors.
> 
> Here comes the first part of my question:
> You removed the NULL check for txq->sw_ring[i].mbuf, thereby assuming that it 
> is never NULL for not-yet-cleaned descriptors.
> 

Good point, I was quite focused on making this block and the vector block
the same, I forgot that we can have NULL pointers for context descriptors.
That was a silly mistake (and AI never caught it for me either.)

> > +           /* Free mbufs from (last_desc_cleaned + 1) to (tx_tail -
> > 1). */
> > +           const uint16_t start = (txq->last_desc_cleaned + 1) % txq-
> > >nb_tx_desc;
> > +           const uint16_t nb_desc = txq->nb_tx_desc;
> > +           const uint16_t end = txq->tx_tail;
> > +
> > +           uint16_t i = start;
> > +           if (end < i) {
> > +                   for (; i < nb_desc; i++)
> >                             rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
> > -                           txq->sw_ring[i].mbuf = NULL;
> > -                   }
> > +                   i = 0;
> >             }
> > +           for (; i < end; i++)
> > +                   rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
> > +           memset(txq->sw_ring, 0, sizeof(txq->sw_ring[0]) * nb_desc);
> >             return;
> >     }
> > 
> > diff --git a/drivers/net/intel/common/tx_scalar_fns.h
> > b/drivers/net/intel/common/tx_scalar_fns.h
> > index 82dc54438f..47ddcf411b 100644
> > --- a/drivers/net/intel/common/tx_scalar_fns.h
> > +++ b/drivers/net/intel/common/tx_scalar_fns.h
> > @@ -30,16 +30,60 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
> >     const uint16_t rs_idx = (last_desc_cleaned == nb_tx_desc - 1) ?
> >                     0 :
> >                     (last_desc_cleaned + 1) >> txq->log2_rs_thresh;
> > -   uint16_t desc_to_clean_to = (rs_idx << txq->log2_rs_thresh) +
> > (txq->tx_rs_thresh - 1);
> > +   const uint16_t dd_idx = txq->rs_last_id[rs_idx];
> > +   const uint16_t first_to_clean = rs_idx << txq->log2_rs_thresh;
> > 
> >     /* Check if descriptor is done - all drivers use 0xF as done
> > value in bits 3:0 */
> > -   if ((txd[txq->rs_last_id[rs_idx]].cmd_type_offset_bsz &
> > rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
> > +   if ((txd[dd_idx].cmd_type_offset_bsz &
> > rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
> >                     rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
> >             /* Descriptor not yet processed by hardware */
> >             return -1;
> > 
> > +   /* DD bit is set, descriptors are done. Now free the mbufs. */
> > +   /* Note: nb_tx_desc is guaranteed to be a multiple of
> > tx_rs_thresh,
> > +    * validated during queue setup. This means cleanup never wraps
> > around
> > +    * the ring within a single burst (e.g., ring=256, rs_thresh=32
> > gives
> > +    * bursts of 0-31, 32-63, ..., 224-255).
> > +    */
> > +   const uint16_t nb_to_clean = txq->tx_rs_thresh;
> > +   struct ci_tx_entry *sw_ring = txq->sw_ring;
> > +
> > +   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > +           /* FAST_FREE path: mbufs are already reset, just return to
> > pool */
> > +           uint16_t nb_free = 0;
> > +
> > +           /* Get cached mempool pointer, or cache it on first use */
> > +           struct rte_mempool *mp =
> > +                   likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ?
> > +                   txq->fast_free_mp :
> > +                   (txq->fast_free_mp = sw_ring[dd_idx].mbuf->pool);
> > +
> > +           /* Pack non-NULL mbufs in-place at start of sw_ring range.
> 
> Here is the second part of my question:
> How can they (sw_ring[X].mbuf) be NULL here, when they cannot be NULL in 
> ci_txq_release_all_mbufs()?

Because the latter function is wrong! :-)
> 
> > +            * No modulo needed in loop since we're guaranteed not to
> > wrap.
> > +            */
> > +           for (uint16_t i = 0; i < nb_to_clean; i++) {
> > +                   struct rte_mbuf *m = sw_ring[first_to_clean +
> > i].mbuf;
> > +                   if (m != NULL) {
> > +                           /* Pack into sw_ring at packed position */
> > +                           sw_ring[first_to_clean + nb_free].mbuf = m;
> > +                           nb_free++;
> > +                   }
> > +           }
> > +
> > +           /* Bulk return to mempool using packed sw_ring entries
> > directly */
> > +           if (nb_free > 0)
> > +                   rte_mempool_put_bulk(mp, (void
> > **)&sw_ring[first_to_clean].mbuf, nb_free);
> > +   } else {
> > +           /* Non-FAST_FREE path: use prefree_seg for refcount checks
> > */
> > +           for (uint16_t i = 0; i < nb_to_clean; i++) {
> > +                   struct rte_mbuf *m = sw_ring[first_to_clean +
> > i].mbuf;
> > +                   if (m != NULL)
> > +                           rte_pktmbuf_free_seg(m);
> > +           }
> > +   }
> > +
> >     /* Update the txq to reflect the last descriptor that was cleaned
> > */
> > -   txq->last_desc_cleaned = desc_to_clean_to;
> > +   txq->last_desc_cleaned = first_to_clean + txq->tx_rs_thresh - 1;
> >     txq->nb_tx_free += txq->tx_rs_thresh;
> > 
> >     return 0;

Reply via email to