The branch main has been updated by np:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4f272a5ef3d8073940e7719401d1e8de2de6100a

commit 4f272a5ef3d8073940e7719401d1e8de2de6100a
Author:     John Baldwin <j...@freebsd.org>
AuthorDate: 2025-09-29 14:59:03 +0000
Commit:     Navdeep Parhar <n...@freebsd.org>
CommitDate: 2025-09-29 15:19:12 +0000

    cxgbe: Support for NIC KTLS transmit on T7 adapters.
    
    Unlike NIC KTLS support on T6, T7 is able to reuse the existing TSO
    functionality directly, including trimming the output of the crypto
    engine before it is passed on to TSO.  This is much simpler and does
    not require the use of bypass pseudo-connections in the TOE engine.
    Among other things this permits arbitrary TCP options (including the
    full range of possible TCP timestamp values) while also avoiding
    various edge cases where parts of a requested TCP packet could not
    always be transmitted (e.g. partial trailers).  This implementation
    also permits NIC KTLS to be used in parallel with TOE.
    
    This version does not yet support connections over a VF (specifically
    the ktls_tunnel_packet function needs to handle the VF work request),
    nor does it support VxLAN offload.
    
    MFC after:      3 days
    Sponsored by:   Chelsio Communications
---
 sys/conf/files                      |    2 +
 sys/dev/cxgbe/adapter.h             |   20 +-
 sys/dev/cxgbe/crypto/t7_kern_tls.c  | 1435 +++++++++++++++++++++++++++++++++++
 sys/dev/cxgbe/offload.h             |    7 +-
 sys/dev/cxgbe/t4_main.c             |   16 +-
 sys/dev/cxgbe/t4_sge.c              |   36 +-
 sys/modules/cxgbe/if_cxgbe/Makefile |    1 +
 7 files changed, 1503 insertions(+), 14 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index 6da1f7e97973..d9730e6bf55b 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1405,6 +1405,8 @@ dev/cxgbe/common/t4vf_hw.c        optional cxgbev pci \
        compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/crypto/t6_kern_tls.c optional cxgbe pci kern_tls \
        compile-with "${NORMAL_C} -I$S/dev/cxgbe"
+dev/cxgbe/crypto/t7_kern_tls.c optional cxgbe pci kern_tls \
+       compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/crypto/t4_keyctx.c   optional cxgbe pci \
        compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/cudbg/cudbg_common.c optional cxgbe \
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index ac8cdddd41e5..9064d148cba9 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -640,12 +640,22 @@ struct sge_txq {
        uint64_t kern_tls_full;
        uint64_t kern_tls_octets;
        uint64_t kern_tls_waste;
-       uint64_t kern_tls_options;
        uint64_t kern_tls_header;
-       uint64_t kern_tls_fin;
        uint64_t kern_tls_fin_short;
        uint64_t kern_tls_cbc;
        uint64_t kern_tls_gcm;
+       union {
+               struct {
+                       /* T6 only. */
+                       uint64_t kern_tls_options;
+                       uint64_t kern_tls_fin;
+               };
+               struct {
+                       /* T7 only. */
+                       uint64_t kern_tls_lso;
+                       uint64_t kern_tls_splitmode;
+               };
+       };
 
        /* stats for not-that-common events */
 
@@ -1425,6 +1435,12 @@ void t6_ktls_modunload(void);
 int t6_ktls_try(if_t, struct socket *, struct ktls_session *);
 int t6_ktls_parse_pkt(struct mbuf *);
 int t6_ktls_write_wr(struct sge_txq *, void *, struct mbuf *, u_int);
+
+/* t7_kern_tls.c */
+int t7_tls_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *,
+    struct m_snd_tag **);
+int t7_ktls_parse_pkt(struct mbuf *);
+int t7_ktls_write_wr(struct sge_txq *, void *, struct mbuf *, u_int);
 #endif
 
 /* t4_keyctx.c */
diff --git a/sys/dev/cxgbe/crypto/t7_kern_tls.c 
b/sys/dev/cxgbe/crypto/t7_kern_tls.c
new file mode 100644
index 000000000000..402b2cab20ba
--- /dev/null
+++ b/sys/dev/cxgbe/crypto/t7_kern_tls.c
@@ -0,0 +1,1435 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Chelsio Communications
+ * Written by: John Baldwin <j...@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_kern_tls.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/ktr.h>
+#include <sys/ktls.h>
+#include <sys/sglist.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockbuf.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp_var.h>
+#include <opencrypto/cryptodev.h>
+#include <opencrypto/xform.h>
+
+#include "common/common.h"
+#include "common/t4_regs.h"
+#include "common/t4_regs_values.h"
+#include "common/t4_tcb.h"
+#include "t4_l2t.h"
+#include "t4_clip.h"
+#include "t4_mp_ring.h"
+#include "crypto/t4_crypto.h"
+
+#if defined(INET) || defined(INET6)
+
+#define TLS_HEADER_LENGTH              5
+
+struct tls_scmd {
+       __be32 seqno_numivs;
+       __be32 ivgen_hdrlen;
+};
+
+struct tlspcb {
+       struct m_snd_tag com;
+       struct vi_info *vi;     /* virtual interface */
+       struct adapter *sc;
+       struct sge_txq *txq;
+
+       int tx_key_addr;
+       bool inline_key;
+       unsigned char enc_mode;
+
+       struct tls_scmd scmd0;
+       struct tls_scmd scmd0_short;
+
+       unsigned int tx_key_info_size;
+
+       uint16_t prev_mss;
+
+       /* Only used outside of setup and teardown when using inline keys. */
+       struct tls_keyctx keyctx;
+};
+
+static void t7_tls_tag_free(struct m_snd_tag *mst);
+static int ktls_setup_keys(struct tlspcb *tlsp,
+    const struct ktls_session *tls, struct sge_txq *txq);
+
+static const struct if_snd_tag_sw t7_tls_tag_sw = {
+       .snd_tag_free = t7_tls_tag_free,
+       .type = IF_SND_TAG_TYPE_TLS
+};
+
+static inline struct tlspcb *
+mst_to_tls(struct m_snd_tag *t)
+{
+       return (__containerof(t, struct tlspcb, com));
+}
+
+static struct tlspcb *
+alloc_tlspcb(struct ifnet *ifp, struct vi_info *vi, int flags)
+{
+       struct port_info *pi = vi->pi;
+       struct adapter *sc = pi->adapter;
+       struct tlspcb *tlsp;
+
+       tlsp = malloc(sizeof(*tlsp), M_CXGBE, M_ZERO | flags);
+       if (tlsp == NULL)
+               return (NULL);
+
+       m_snd_tag_init(&tlsp->com, ifp, &t7_tls_tag_sw);
+       tlsp->vi = vi;
+       tlsp->sc = sc;
+       tlsp->tx_key_addr = -1;
+
+       return (tlsp);
+}
+
+int
+t7_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
+    struct m_snd_tag **pt)
+{
+       const struct ktls_session *tls;
+       struct tlspcb *tlsp;
+       struct adapter *sc;
+       struct vi_info *vi;
+       struct inpcb *inp;
+       struct sge_txq *txq;
+       int error, explicit_iv_size, keyid, mac_first;
+
+       tls = params->tls.tls;
+
+       /* Only TLS 1.1 and TLS 1.2 are currently supported. */
+       if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
+           tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
+           tls->params.tls_vminor > TLS_MINOR_VER_TWO)
+               return (EPROTONOSUPPORT);
+
+       /* Sanity check values in *tls. */
+       switch (tls->params.cipher_algorithm) {
+       case CRYPTO_AES_CBC:
+               /* XXX: Explicitly ignore any provided IV. */
+               switch (tls->params.cipher_key_len) {
+               case 128 / 8:
+               case 192 / 8:
+               case 256 / 8:
+                       break;
+               default:
+                       return (EINVAL);
+               }
+               switch (tls->params.auth_algorithm) {
+               case CRYPTO_SHA1_HMAC:
+               case CRYPTO_SHA2_256_HMAC:
+               case CRYPTO_SHA2_384_HMAC:
+                       break;
+               default:
+                       return (EPROTONOSUPPORT);
+               }
+               explicit_iv_size = AES_BLOCK_LEN;
+               mac_first = 1;
+               break;
+       case CRYPTO_AES_NIST_GCM_16:
+               if (tls->params.iv_len != SALT_SIZE)
+                       return (EINVAL);
+               switch (tls->params.cipher_key_len) {
+               case 128 / 8:
+               case 192 / 8:
+               case 256 / 8:
+                       break;
+               default:
+                       return (EINVAL);
+               }
+               explicit_iv_size = 8;
+               mac_first = 0;
+               break;
+       default:
+               return (EPROTONOSUPPORT);
+       }
+
+       vi = if_getsoftc(ifp);
+       sc = vi->adapter;
+
+       tlsp = alloc_tlspcb(ifp, vi, M_WAITOK);
+
+       if (sc->tlst.inline_keys)
+               keyid = -1;
+       else
+               keyid = t4_alloc_tls_keyid(sc);
+       if (keyid < 0) {
+               CTR(KTR_CXGBE, "%s: %p using immediate key ctx", __func__,
+                   tlsp);
+               tlsp->inline_key = true;
+       } else {
+               tlsp->tx_key_addr = keyid;
+               CTR(KTR_CXGBE, "%s: %p allocated TX key addr %#x", __func__,
+                   tlsp, tlsp->tx_key_addr);
+       }
+
+       inp = params->tls.inp;
+       INP_RLOCK(inp);
+       if (inp->inp_flags & INP_DROPPED) {
+               INP_RUNLOCK(inp);
+               error = ECONNRESET;
+               goto failed;
+       }
+
+       txq = &sc->sge.txq[vi->first_txq];
+       if (inp->inp_flowtype != M_HASHTYPE_NONE)
+               txq += ((inp->inp_flowid % (vi->ntxq - vi->rsrv_noflowq)) +
+                   vi->rsrv_noflowq);
+       tlsp->txq = txq;
+       INP_RUNLOCK(inp);
+
+       error = ktls_setup_keys(tlsp, tls, txq);
+       if (error)
+               goto failed;
+
+       tlsp->enc_mode = t4_tls_cipher_mode(tls);
+       tlsp->tx_key_info_size = t4_tls_key_info_size(tls);
+
+       /* The SCMD fields used when encrypting a full TLS record. */
+       tlsp->scmd0.seqno_numivs = htobe32(V_SCMD_SEQ_NO_CTRL(3) |
+           V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
+           V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
+           V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
+           V_SCMD_CIPH_MODE(tlsp->enc_mode) |
+           V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
+           V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
+           V_SCMD_IV_SIZE(explicit_iv_size / 2) | V_SCMD_NUM_IVS(1));
+
+       tlsp->scmd0.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
+           V_SCMD_TLS_FRAG_ENABLE(0);
+       if (tlsp->inline_key)
+               tlsp->scmd0.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
+
+       /*
+        * The SCMD fields used when encrypting a partial TLS record
+        * (no trailer and possibly a truncated payload).
+        */
+       tlsp->scmd0_short.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) |
+           V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) |
+           V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
+           V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
+           V_SCMD_AUTH_MODE(SCMD_AUTH_MODE_NOP) |
+           V_SCMD_HMAC_CTRL(SCMD_HMAC_CTRL_NOP) |
+           V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(0);
+       if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
+               tlsp->scmd0_short.seqno_numivs |=
+                   V_SCMD_CIPH_MODE(SCMD_CIPH_MODE_AES_CTR);
+       else
+               tlsp->scmd0_short.seqno_numivs |=
+                   V_SCMD_CIPH_MODE(tlsp->enc_mode);
+       tlsp->scmd0_short.seqno_numivs =
+           htobe32(tlsp->scmd0_short.seqno_numivs);
+
+       tlsp->scmd0_short.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
+           V_SCMD_TLS_FRAG_ENABLE(0) | V_SCMD_AADIVDROP(1);
+       if (tlsp->inline_key)
+               tlsp->scmd0_short.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
+
+       TXQ_LOCK(txq);
+       if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
+               txq->kern_tls_gcm++;
+       else
+               txq->kern_tls_cbc++;
+       TXQ_UNLOCK(txq);
+       *pt = &tlsp->com;
+       return (0);
+
+failed:
+       m_snd_tag_rele(&tlsp->com);
+       return (error);
+}
+
+static int
+ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls,
+    struct sge_txq *txq)
+{
+       struct tls_key_req *kwr;
+       struct tls_keyctx *kctx;
+       void *items[1];
+       struct mbuf *m;
+       int error;
+
+       /*
+        * Store the salt and keys in the key context.  For
+        * connections with an inline key, this key context is passed
+        * as immediate data in each work request.  For connections
+        * storing the key in DDR, a work request is used to store a
+        * copy of the key context in DDR.
+        */
+       t4_tls_key_ctx(tls, KTLS_TX, &tlsp->keyctx);
+       if (tlsp->inline_key)
+               return (0);
+
+       /* Populate key work request. */
+        m = alloc_wr_mbuf(TLS_KEY_WR_SZ, M_NOWAIT);
+       if (m == NULL) {
+               CTR(KTR_CXGBE, "%s: %p failed to alloc WR mbuf", __func__,
+                   tlsp);
+               return (ENOMEM);
+       }
+       m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com);
+       m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+       kwr = mtod(m, void *);
+       memset(kwr, 0, TLS_KEY_WR_SZ);
+
+       t4_write_tlskey_wr(tls, KTLS_TX, 0, 0, tlsp->tx_key_addr, kwr);
+       kctx = (struct tls_keyctx *)(kwr + 1);
+       memcpy(kctx, &tlsp->keyctx, sizeof(*kctx));
+
+       /*
+        * Place the key work request in the transmit queue.  It
+        * should be sent to the NIC before any TLS packets using this
+        * session.
+        */
+       items[0] = m;
+       error = mp_ring_enqueue(txq->r, items, 1, 1);
+       if (error)
+               m_free(m);
+       else
+               CTR(KTR_CXGBE, "%s: %p sent key WR", __func__, tlsp);
+       return (error);
+}
+
+static u_int
+ktls_base_wr_size(struct tlspcb *tlsp)
+{
+       u_int wr_len;
+
+       wr_len = sizeof(struct fw_ulptx_wr);    // 16
+       wr_len += sizeof(struct ulp_txpkt);     // 8
+       wr_len += sizeof(struct ulptx_idata);   // 8
+       wr_len += sizeof(struct cpl_tx_sec_pdu);// 32
+       if (tlsp->inline_key)
+               wr_len += tlsp->tx_key_info_size;
+       else {
+               wr_len += sizeof(struct ulptx_sc_memrd);// 8
+               wr_len += sizeof(struct ulptx_idata);   // 8
+       }
+       /* SplitMode CPL_RX_PHYS_DSGL here if needed. */
+       /* CPL_TX_*_LSO here if needed. */
+       wr_len += sizeof(struct cpl_tx_pkt_core);// 16
+       return (wr_len);
+}
+
+static u_int
+ktls_sgl_size(u_int nsegs)
+{
+       u_int wr_len;
+
+       /* First segment is part of ulptx_sgl. */
+       nsegs--;
+
+       wr_len = sizeof(struct ulptx_sgl);
+       wr_len += 8 * ((3 * nsegs) / 2 + (nsegs & 1));
+       return (wr_len);
+}
+
+/*
+ * A request that doesn't need to generate the TLS trailer is a short
+ * record.  For these requests, part of the TLS record payload is
+ * encrypted without invoking the MAC.
+ *
+ * Returns true if this record should be sent as a short record.  In
+ * either case, the remaining outputs describe the how much of the
+ * TLS record to send as input to the crypto block and the amount of
+ * crypto output to trim via SplitMode:
+ *
+ * *header_len - Number of bytes of TLS header to pass as immediate
+ *               data
+ *
+ * *offset - Start offset of TLS record payload to pass as DSGL data
+ *
+ * *plen - Length of TLS record payload to pass as DSGL data
+ *
+ * *leading_waste - amount of non-packet-header bytes to drop at the
+ *                  start of the crypto output
+ *
+ * *trailing_waste - amount of crypto output to drop from the end
+ */
+static bool
+ktls_is_short_record(struct tlspcb *tlsp, struct mbuf *m_tls,
+    u_int tlen, u_int *header_len, u_int *offset, u_int *plen,
+    u_int *leading_waste, u_int *trailing_waste)
+{
+       const struct tls_record_layer *hdr;
+       u_int new_tlen, rlen;
+
+       MPASS(tlen > m_tls->m_epg_hdrlen);
+
+       hdr = (void *)m_tls->m_epg_hdr;
+       rlen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length);
+
+       /*
+        * Default to sending the full record as input to the crypto
+        * engine and relying on SplitMode to drop any waste.
+        */
+       *header_len = m_tls->m_epg_hdrlen;
+       *offset = 0;
+       *plen = rlen - (m_tls->m_epg_hdrlen + m_tls->m_epg_trllen);
+       *leading_waste = mtod(m_tls, vm_offset_t);
+       *trailing_waste = rlen - tlen;
+       if (!tlsp->sc->tlst.short_records)
+               return (false);
+
+       if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC) {
+               /*
+                * For AES-CBC we have to send input from the start of
+                * the TLS record payload that is a multiple of the
+                * block size.  new_tlen rounds up tlen to the end of
+                * the containing AES block.  If this last block
+                * overlaps with the trailer, send the full record to
+                * generate the MAC.
+                */
+               new_tlen = TLS_HEADER_LENGTH +
+                   roundup2(tlen - TLS_HEADER_LENGTH, AES_BLOCK_LEN);
+               if (rlen - new_tlen < m_tls->m_epg_trllen)
+                       return (false);
+
+               *trailing_waste = new_tlen - tlen;
+               *plen = new_tlen - m_tls->m_epg_hdrlen;
+       } else {
+               /*
+                * For AES-GCM we have to send the full record if
+                * the end overlaps with the trailer.  Otherwise, we
+                * can use AES-CTR to encrypt a partial PDU.
+                */
+               if (rlen - tlen < m_tls->m_epg_trllen)
+                       return (false);
+
+               /*
+                * The last record can be partially encrypted via
+                * AES-CTR without any trailing waste.
+                */
+               *trailing_waste = 0;
+               *plen = tlen - m_tls->m_epg_hdrlen;
+
+               /*
+                * In addition, with AES-CTR, we can minimize leading
+                * waste by starting encryption at the start of the
+                * closest AES block.
+                */
+               if (mtod(m_tls, vm_offset_t) >= m_tls->m_epg_hdrlen) {
+                       *header_len = 0;
+                       *offset = rounddown2(mtod(m_tls, vm_offset_t) -
+                           m_tls->m_epg_hdrlen, AES_BLOCK_LEN);
+                       *plen -= *offset;
+                       *leading_waste -= (m_tls->m_epg_hdrlen + *offset);
+               }
+       }
+       return (true);
+}
+
+static int
+ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls,
+    int *nsegsp)
+{
+       u_int header_len, imm_len, offset, plen, tlen, wr_len;
+       u_int leading_waste, trailing_waste;
+       bool short_record;
+
+       M_ASSERTEXTPG(m_tls);
+
+       /*
+        * The relative offset of the last byte to send from the TLS
+        * record.
+        */
+       tlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
+       if (tlen <= m_tls->m_epg_hdrlen) {
+               /*
+                * For requests that only want to send the TLS header,
+                * send a tunnelled packet as immediate data.
+                */
+               wr_len = sizeof(struct fw_eth_tx_pkt_wr) +
+                   sizeof(struct cpl_tx_pkt_core) +
+                   roundup2(m->m_len + m_tls->m_len, 16);
+               if (wr_len > SGE_MAX_WR_LEN) {
+                       CTR(KTR_CXGBE,
+                   "%s: %p TLS header-only packet too long (len %d)",
+                           __func__, tlsp, m->m_len + m_tls->m_len);
+               }
+
+               /* This should always be the last TLS record in a chain. */
+               MPASS(m_tls->m_next == NULL);
+               *nsegsp = 0;
+               return (wr_len);
+       }
+
+       short_record = ktls_is_short_record(tlsp, m_tls, tlen, &header_len,
+           &offset, &plen, &leading_waste, &trailing_waste);
+
+       /* Calculate the size of the work request. */
+       wr_len = ktls_base_wr_size(tlsp);
+
+       if (leading_waste != 0 || trailing_waste != 0) {
+               /*
+                * Partial records might require a SplitMode
+                * CPL_RX_PHYS_DSGL.
+                */
+               wr_len += sizeof(struct cpl_t7_rx_phys_dsgl);
+       }
+
+       /* Budget for an LSO header even if we don't use it. */
+       wr_len += sizeof(struct cpl_tx_pkt_lso_core);
+
+       /*
+        * Headers (including the TLS header) are always sent as
+        * immediate data.  Short records include a raw AES IV as
+        * immediate data.
+        */
+       imm_len = m->m_len + header_len;
+       if (short_record)
+               imm_len += AES_BLOCK_LEN;
+       wr_len += roundup2(imm_len, 16);
+
+       /* TLS record payload via DSGL. */
+       *nsegsp = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset,
+           plen);
+       wr_len += ktls_sgl_size(*nsegsp);
+
+       wr_len = roundup2(wr_len, 16);
+       return (wr_len);
+}
+
+int
+t7_ktls_parse_pkt(struct mbuf *m)
+{
+       struct tlspcb *tlsp;
+       struct ether_header *eh;
+       struct ip *ip;
+       struct ip6_hdr *ip6;
+       struct tcphdr *tcp;
+       struct mbuf *m_tls;
+       void *items[1];
+       int nsegs;
+       u_int wr_len, tot_len;
+       uint16_t eh_type;
+
+       /*
+        * Locate headers in initial mbuf.
+        *
+        * XXX: This assumes all of the headers are in the initial mbuf.
+        * Could perhaps use m_advance() like parse_pkt() if that turns
+        * out to not be true.
+        */
+       M_ASSERTPKTHDR(m);
+       MPASS(m->m_pkthdr.snd_tag != NULL);
+       tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
+
+       if (m->m_len <= sizeof(*eh) + sizeof(*ip)) {
+               CTR(KTR_CXGBE, "%s: %p header mbuf too short", __func__, tlsp);
+               return (EINVAL);
+       }
+       eh = mtod(m, struct ether_header *);
+       eh_type = ntohs(eh->ether_type);
+       if (eh_type == ETHERTYPE_VLAN) {
+               struct ether_vlan_header *evh = (void *)eh;
+
+               eh_type = ntohs(evh->evl_proto);
+               m->m_pkthdr.l2hlen = sizeof(*evh);
+       } else
+               m->m_pkthdr.l2hlen = sizeof(*eh);
+
+       switch (eh_type) {
+       case ETHERTYPE_IP:
+               ip = (struct ip *)(eh + 1);
+               if (ip->ip_p != IPPROTO_TCP) {
+                       CTR(KTR_CXGBE, "%s: %p mbuf not IPPROTO_TCP", __func__,
+                           tlsp);
+                       return (EINVAL);
+               }
+               m->m_pkthdr.l3hlen = ip->ip_hl * 4;
+               break;
+       case ETHERTYPE_IPV6:
+               ip6 = (struct ip6_hdr *)(eh + 1);
+               if (ip6->ip6_nxt != IPPROTO_TCP) {
+                       CTR(KTR_CXGBE, "%s: %p, mbuf not IPPROTO_TCP (%u)",
+                           __func__, tlsp, ip6->ip6_nxt);
+                       return (EINVAL);
+               }
+               m->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
+               break;
+       default:
+               CTR(KTR_CXGBE, "%s: %p mbuf not ETHERTYPE_IP{,V6}", __func__,
+                   tlsp);
+               return (EINVAL);
+       }
+       if (m->m_len < m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
+           sizeof(*tcp)) {
+               CTR(KTR_CXGBE, "%s: %p header mbuf too short (2)", __func__,
+                   tlsp);
+               return (EINVAL);
+       }
+       tcp = (struct tcphdr *)((char *)(eh + 1) + m->m_pkthdr.l3hlen);
+       m->m_pkthdr.l4hlen = tcp->th_off * 4;
+
+       /* Bail if there is TCP payload before the TLS record. */
+       if (m->m_len != m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
+           m->m_pkthdr.l4hlen) {
+               CTR(KTR_CXGBE,
+                   "%s: %p header mbuf bad length (%d + %d + %d != %d)",
+                   __func__, tlsp, m->m_pkthdr.l2hlen, m->m_pkthdr.l3hlen,
+                   m->m_pkthdr.l4hlen, m->m_len);
+               return (EINVAL);
+       }
+
+       /* Assume all headers are in 'm' for now. */
+       MPASS(m->m_next != NULL);
+       MPASS(m->m_next->m_flags & M_EXTPG);
+
+       tot_len = 0;
+
+       /*
+        * Each of the remaining mbufs in the chain should reference a
+        * TLS record.
+        */
+       for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
+               MPASS(m_tls->m_flags & M_EXTPG);
+
+               wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs);
+#ifdef VERBOSE_TRACES
+               CTR(KTR_CXGBE, "%s: %p wr_len %d nsegs %d", __func__, tlsp,
+                   wr_len, nsegs);
+#endif
+               if (wr_len > SGE_MAX_WR_LEN || nsegs > TX_SGL_SEGS)
+                       return (EFBIG);
+               tot_len += roundup2(wr_len, EQ_ESIZE);
+
+               /*
+                * Store 'nsegs' for the first TLS record in the
+                * header mbuf's metadata.
+                */
+               if (m_tls == m->m_next)
+                       set_mbuf_nsegs(m, nsegs);
+       }
+
+       MPASS(tot_len != 0);
+
+       set_mbuf_len16(m, tot_len / 16);
+#ifdef VERBOSE_TRACES
+       CTR(KTR_CXGBE, "%s: %p len16 %d nsegs %d", __func__, tlsp,
+           mbuf_len16(m), mbuf_nsegs(m));
+#endif
+       items[0] = m;
+       return (mp_ring_enqueue(tlsp->txq->r, items, 1, 256));
+}
+
+static inline bool
+needs_vlan_insertion(struct mbuf *m)
+{
+
+       M_ASSERTPKTHDR(m);
+
+       return (m->m_flags & M_VLANTAG);
+}
+
+static inline uint64_t
+pkt_ctrl1(struct sge_txq *txq, struct mbuf *m, uint16_t eh_type)
+{
+       uint64_t ctrl1;
+
+       /* Checksums are always offloaded */
+       if (eh_type == ETHERTYPE_IP) {
+               ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) |
+                   V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
+                   V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
+       } else {
+               MPASS(m->m_pkthdr.l3hlen == sizeof(struct ip6_hdr));
+               ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) |
+                   V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
+                   V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
+       }
+       txq->txcsum++;
+
+       /* VLAN tag insertion */
+       if (needs_vlan_insertion(m)) {
+               ctrl1 |= F_TXPKT_VLAN_VLD |
+                   V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
+               txq->vlan_insertion++;
+       }
+
+       return (ctrl1);
+}
+
+static inline void *
+write_lso_cpl(void *cpl, struct mbuf *m0, uint16_t mss, uint16_t eh_type,
+    int total_len)
+{
+       struct cpl_tx_pkt_lso_core *lso;
+       uint32_t ctrl;
+
+       KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
+           m0->m_pkthdr.l4hlen > 0,
+           ("%s: mbuf %p needs TSO but missing header lengths",
+               __func__, m0));
+
+       ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
+           F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
+           V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
+           V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
+           V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
+       if (eh_type == ETHERTYPE_IPV6)
+               ctrl |= F_LSO_IPV6;
+
+       lso = cpl;
+       lso->lso_ctrl = htobe32(ctrl);
+       lso->ipid_ofst = htobe16(0);
+       lso->mss = htobe16(mss);
+       lso->seqno_offset = htobe32(0);
+       lso->len = htobe32(total_len);
+
+       return (lso + 1);
+}
+
+static inline void *
+write_split_mode_rx_phys(void *dst, struct mbuf *m, struct mbuf *m_tls,
+    u_int crypto_hdr_len, u_int leading_waste, u_int trailing_waste)
+{
+       struct cpl_t7_rx_phys_dsgl *cpl;
+       uint16_t *len;
+       uint8_t numsge;
+
+       /* Forward first (3) and third (1) segments. */
+       numsge = 0xa;
+
+       cpl = dst;
+       cpl->ot.opcode = CPL_RX_PHYS_DSGL;
+       cpl->PhysAddrFields_lo_to_NumSGE =
+           htobe32(F_CPL_T7_RX_PHYS_DSGL_SPLITMODE |
+           V_CPL_T7_RX_PHYS_DSGL_NUMSGE(numsge));
+
+       len = (uint16_t *)(cpl->RSSCopy);
+
+       /*
+        * First segment always contains packet headers as well as
+        * transmit-related CPLs.
+        */
+       len[0] = htobe16(crypto_hdr_len);
+
+       /*
+        * Second segment is "gap" of data to drop at the front of the
+        * TLS record.
+        */
+       len[1] = htobe16(leading_waste);
+
+       /* Third segment is how much of the TLS record to send. */
+       len[2] = htobe16(m_tls->m_len);
+
+       /* Fourth segment is how much data to drop at the end. */
+       len[3] = htobe16(trailing_waste);
+
+#ifdef VERBOSE_TRACES
+       CTR(KTR_CXGBE, "%s: forward %u skip %u forward %u skip %u",
+           __func__, be16toh(len[0]), be16toh(len[1]), be16toh(len[2]),
+           be16toh(len[3]));
+#endif
+       return (cpl + 1);
+}
+
+/*
+ * If the SGL ends on an address that is not 16 byte aligned, this function 
will
+ * add a 0 filled flit at the end.
+ */
+static void
+write_gl_to_buf(struct sglist *gl, caddr_t to)
+{
+       struct sglist_seg *seg;
+       __be64 *flitp;
+       struct ulptx_sgl *usgl;
+       int i, nflits, nsegs;
+
+       KASSERT(((uintptr_t)to & 0xf) == 0,
+           ("%s: SGL must start at a 16 byte boundary: %p", __func__, to));
+
+       nsegs = gl->sg_nseg;
+       MPASS(nsegs > 0);
+
+       nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
+       flitp = (__be64 *)to;
+       seg = &gl->sg_segs[0];
+       usgl = (void *)flitp;
+
+       usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
+           V_ULPTX_NSGE(nsegs));
+       usgl->len0 = htobe32(seg->ss_len);
+       usgl->addr0 = htobe64(seg->ss_paddr);
+       seg++;
+
+       for (i = 0; i < nsegs - 1; i++, seg++) {
+               usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
+               usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
+       }
+       if (i & 1)
+               usgl->sge[i / 2].len[1] = htobe32(0);
+       flitp += nflits;
+
+       if (nflits & 1) {
+               MPASS(((uintptr_t)flitp) & 0xf);
+               *flitp++ = 0;
+       }
+
+       MPASS((((uintptr_t)flitp) & 0xf) == 0);
+}
+
+static inline void
+copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
+{
+
+       MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
+       MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
+
+       if (__predict_true((uintptr_t)(*to) + len <=
+           (uintptr_t)&eq->desc[eq->sidx])) {
+               bcopy(from, *to, len);
+               (*to) += len;
+               if ((uintptr_t)(*to) == (uintptr_t)&eq->desc[eq->sidx])
+                       (*to) = (caddr_t)eq->desc;
+       } else {
+               int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
+
+               bcopy(from, *to, portion);
+               from += portion;
+               portion = len - portion;        /* remaining */
+               bcopy(from, (void *)eq->desc, portion);
+               (*to) = (caddr_t)eq->desc + portion;
+       }
+}
+
+static int
+ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m,
+    struct mbuf *m_tls, u_int available, tcp_seq tcp_seqno, u_int pidx,
+    uint16_t eh_type)
+{
+       struct tx_sdesc *txsd;
+       struct fw_eth_tx_pkt_wr *wr;
+       struct cpl_tx_pkt_core *cpl;
+       uint32_t ctrl;
+       int len16, ndesc, pktlen;
+       struct ether_header *eh;
+       struct ip *ip, newip;
+       struct ip6_hdr *ip6, newip6;
+       struct tcphdr *tcp, newtcp;
+       caddr_t out;
+
+       TXQ_LOCK_ASSERT_OWNED(txq);
+       M_ASSERTPKTHDR(m);
+
+       /* Locate the template TLS header. */
+       M_ASSERTEXTPG(m_tls);
+
+       /* This should always be the last TLS record in a chain. */
+       MPASS(m_tls->m_next == NULL);
+
+       wr = dst;
+       pktlen = m->m_len + m_tls->m_len;
+       ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen;
+       len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16);
+       ndesc = tx_len16_to_desc(len16);
+       MPASS(ndesc <= available);
+
+       /* Firmware work request header */
+       /* TODO: Handle VF work request. */
+       wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
+           V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
+
+       ctrl = V_FW_WR_LEN16(len16);
+       wr->equiq_to_len16 = htobe32(ctrl);
+       wr->r3 = 0;
+
+       cpl = (void *)(wr + 1);
+
+       /* CPL header */
+       cpl->ctrl0 = txq->cpl_ctrl0;
+       cpl->pack = 0;
+       cpl->len = htobe16(pktlen);
+
+       out = (void *)(cpl + 1);
+
+       /* Copy over Ethernet header. */
+       eh = mtod(m, struct ether_header *);
+       copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen);
*** 696 LINES SKIPPED ***

Reply via email to