The branch main has been updated by gallatin:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=43d7ee540efe0df1def80ce24255b32f9f9396ba

commit 43d7ee540efe0df1def80ce24255b32f9f9396ba
Author:     Andrew Gallatin <[email protected]>
AuthorDate: 2025-12-19 20:48:36 +0000
Commit:     Andrew Gallatin <[email protected]>
CommitDate: 2025-12-21 14:45:12 +0000

    iflib: support for transmit side nic KTLS offload
    
    This change adds support to iflib for drivers that want to do
    transmit-side NIC ktls offload. This change does 2 things:
    
    1) Extends the pkt info to include an optional mbuf pointer.
    
    This gives drivers the ability to find the start of a TLS record if
    they need to re-DMA part of the record to re-construct TLS state on
    the NIC. This mbuf pointer is only passed when CSUM_SND_TAG is
    present on the pkthdr. Note that I don't bother to inspect the send
    tag on purpose; this will only be present for TLS offloaded or paced
    connections
    
    2) Allows the driver to specify how much ring padding is needed
       before the ring is considered to be full using the new isc_tx_pad
       field in if_softc_ctx.
    
    This re-uses a field that was marked spare in 2019 via d49e83eac3baf.
    Iflib initializes this to the previous value of 2 slots and allows the
    driver to override it. The TXQ_AVAIL() macro has been adjusted to
    subtract this padding, and uses of the macro have removed +2 from the
    other side of the comparison. To avoid potential cache misses from
    looking at the ifc_softc_ctx in TXQ_AVAIL(), the value is mirrored in
    the txq (in an alignment hole).
    
    Reviewed by: kbowling, kgalazka, sumit.saxena_broadcom.com, shurd
    Sponsored by: Netflix
    MFC after: 1 month
    Differential Revision: https://reviews.freebsd.org/D54274
---
 sys/net/iflib.c | 31 +++++++++++++++++++------------
 sys/net/iflib.h |  5 +++--
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 3181bdbcb849..bd0bfe4742df 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -375,6 +375,7 @@ struct iflib_txq {
        struct ifmp_ring        *ift_br;
        struct grouptask        ift_task;
        qidx_t          ift_size;
+       qidx_t          ift_pad;
        uint16_t        ift_id;
        struct callout  ift_timer;
 #ifdef DEV_NETMAP
@@ -445,7 +446,8 @@ get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen)
        return (used);
 }
 
-#define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, 
txq->ift_cidx, txq->ift_pidx, txq->ift_gen))
+#define TXQ_AVAIL(txq) ((txq->ift_size - txq->ift_pad) -\
+           get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, 
txq->ift_gen))
 
 #define IDXDIFF(head, tail, wrap) \
        ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
@@ -485,11 +487,11 @@ typedef struct if_rxsd {
 
 /* multiple of word size */
 #ifdef __LP64__
-#define PKT_INFO_SIZE  6
+#define PKT_INFO_SIZE  7
 #define RXD_INFO_SIZE  5
 #define PKT_TYPE uint64_t
 #else
-#define PKT_INFO_SIZE  11
+#define PKT_INFO_SIZE  12
 #define RXD_INFO_SIZE  8
 #define PKT_TYPE uint32_t
 #endif
@@ -1933,6 +1935,7 @@ iflib_txq_setup(iflib_txq_t txq)
        txq->ift_cidx_processed = 0;
        txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
        txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
+       txq->ift_pad = scctx->isc_tx_pad;
 
        for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++)
                bzero((void *)di->idi_vaddr, di->idi_size);
@@ -3100,7 +3103,7 @@ iflib_txd_db_check(iflib_txq_t txq, int ring)
        max = TXQ_MAX_DB_DEFERRED(txq, txq->ift_in_use);
 
        /* force || threshold exceeded || at the edge of the ring */
-       if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= 
MAX_TX_DESC(ctx) + 2)) {
+       if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= 
MAX_TX_DESC(ctx))) {
 
                /*
                 * 'npending' is used if the card's doorbell is in terms of the 
number of descriptors
@@ -3604,14 +3607,18 @@ defrag:
                return (err);
        }
        ifsd_m[pidx] = m_head;
+       if (m_head->m_pkthdr.csum_flags & CSUM_SND_TAG)
+               pi.ipi_mbuf = m_head;
+       else
+               pi.ipi_mbuf = NULL;
        /*
         * XXX assumes a 1 to 1 relationship between segments and
         *        descriptors - this does not hold true on all drivers, e.g.
         *        cxgb
         */
-       if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
+       if (__predict_false(nsegs > TXQ_AVAIL(txq))) {
                (void)iflib_completed_tx_reclaim(txq);
-               if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
+               if (__predict_false(nsegs > TXQ_AVAIL(txq))) {
                        txq->ift_no_desc_avail++;
                        bus_dmamap_unload(buf_tag, map);
                        DBG_COUNTER_INC(encap_txq_avail_fail);
@@ -3635,7 +3642,7 @@ defrag:
         */
        txq->ift_rs_pending += nsegs + 1;
        if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) ||
-           iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 
2) {
+           iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx)) {
                pi.ipi_flags |= IPI_TX_INTR;
                txq->ift_rs_pending = 0;
        }
@@ -3658,10 +3665,9 @@ defrag:
                        txq->ift_gen = 1;
                }
                /*
-                * drivers can need as many as
-                * two sentinels
+                * drivers can need up to ift_pad sentinels
                 */
-               MPASS(ndesc <= pi.ipi_nsegs + 2);
+               MPASS(ndesc <= pi.ipi_nsegs + txq->ift_pad);
                MPASS(pi.ipi_new_pidx != pidx);
                MPASS(ndesc > 0);
                txq->ift_in_use += ndesc;
@@ -3816,7 +3822,7 @@ iflib_txq_can_drain(struct ifmp_ring *r)
        iflib_txq_t txq = r->cookie;
        if_ctx_t ctx = txq->ift_ctx;
 
-       if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2)
+       if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx))
                return (1);
        bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
            BUS_DMASYNC_POSTREAD);
@@ -3880,7 +3886,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, 
uint32_t pidx)
 #endif
        do_prefetch = (ctx->ifc_flags & IFC_PREFETCH);
        err = 0;
-       for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx) + 2; i++) {
+       for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx); i++) {
                int rem = do_prefetch ? count - i : 0;
 
                mp = _ring_peek_one(r, cidx, i, rem);
@@ -4740,6 +4746,7 @@ iflib_reset_qvalues(if_ctx_t ctx)
                        scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
                }
        }
+       scctx->isc_tx_pad = 2;
 }
 
 static void
diff --git a/sys/net/iflib.h b/sys/net/iflib.h
index e65c936fc4b4..fe70fc5775cd 100644
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -127,7 +127,8 @@ typedef struct if_pkt_info {
        uint8_t                 ipi_ip_tos;     /* IP ToS field data */
        uint8_t                 ipi_mflags;     /* packet mbuf flags */
        uint8_t                 __spare0__;
-       uint8_t         __spare1__;
+       uint8_t                 __spare1__;
+       struct mbuf             *ipi_mbuf;              /* mbuf for ktls */
 } *if_pkt_info_t;
 
 typedef struct if_irq {
@@ -191,7 +192,7 @@ typedef struct if_softc_ctx {
        int isc_vectors;
        int isc_nrxqsets;
        int isc_ntxqsets;
-       uint16_t __spare0__;
+       uint16_t isc_tx_pad;
        uint32_t __spare1__;
        int isc_msix_bar;               /* can be model specific - initialize 
in attach_pre */
        int isc_tx_nsegments;           /* can be model specific - initialize 
in attach_pre */

Reply via email to