The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=46bee8043ee2bd352d420cd573e0364ca45f813e

commit 46bee8043ee2bd352d420cd573e0364ca45f813e
Author:     John Baldwin <[email protected]>
AuthorDate: 2021-05-14 19:17:06 +0000
Commit:     John Baldwin <[email protected]>
CommitDate: 2021-05-14 19:17:06 +0000

    cxgbei: Support DDP for target I/O S/G lists with more than one entry.
    
    A CAM target layer I/O CCB can use a S/G list of virtual address ranges
    to describe its data buffer.  This change adds zero-copy receive support
    for such requests.
    
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D29908
---
 sys/dev/cxgbe/cxgbei/icl_cxgbei.c |  50 +++++++---
 sys/dev/cxgbe/tom/t4_ddp.c        | 191 ++++++++++++++++++++++++++++++++++++++
 sys/dev/cxgbe/tom/t4_tom.h        |   5 +
 3 files changed, 232 insertions(+), 14 deletions(-)

diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c 
b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
index 655cc1de1478..5770599eeeef 100644
--- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
@@ -873,6 +873,28 @@ icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg)
        }
 }
 
+static inline bool
+ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen)
+{
+       int total_len = 0;
+
+       MPASS(entries > 0);
+       if (((vm_offset_t)sg[--entries].addr & 3U) != 0)
+               return (false);
+
+       total_len += sg[entries].len;
+
+       while (--entries >= 0) {
+               if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 ||
+                   (sg[entries].len % PAGE_SIZE) != 0)
+                       return (false);
+               total_len += sg[entries].len;
+       }
+
+       MPASS(total_len == xferlen);
+       return (true);
+}
+
 /* XXXNP: PDU should be passed in as parameter, like on the initiator. */
 #define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr)
 #define io_to_ppod_reservation(io) 
((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
@@ -888,6 +910,8 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union 
ctl_io *io,
        struct cxgbei_data *ci = sc->iscsi_ulp_softc;
        struct ppod_region *pr = &ci->pr;
        struct ppod_reservation *prsv;
+       struct ctl_sg_entry *sgl, sg_entry;
+       int sg_entries = ctsio->kern_sg_entries;
        uint32_t ttt;
        int xferlen, rc = 0, alias;
 
@@ -898,7 +922,6 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union 
ctl_io *io,
        if (ctsio->ext_data_filled == 0) {
                int first_burst;
                struct icl_pdu *ip = io_to_request_pdu(io);
-               vm_offset_t buf;
 #ifdef INVARIANTS
                struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
 
@@ -931,18 +954,16 @@ no_ddp:
                        return (0);
                }
 
-               if (ctsio->kern_sg_entries == 0)
-                       buf = (vm_offset_t)ctsio->kern_data_ptr;
-               else if (ctsio->kern_sg_entries == 1) {
-                       struct ctl_sg_entry *sgl = (void *)ctsio->kern_data_ptr;
+               if (sg_entries == 0) {
+                       sgl = &sg_entry;
+                       sgl->len = xferlen;
+                       sgl->addr = (void *)ctsio->kern_data_ptr;
+                       sg_entries = 1;
+               } else
+                       sgl = (void *)ctsio->kern_data_ptr;
 
-                       MPASS(sgl->len == xferlen);
-                       buf = (vm_offset_t)sgl->addr;
-               } else {
-                       rc = EAGAIN;    /* XXX implement */
+               if (!ddp_sgl_check(sgl, sg_entries, xferlen))
                        goto no_ddp;
-               }
-
 
                /*
                 * Reserve resources for DDP, update the ttt that should be used
@@ -956,14 +977,15 @@ no_ddp:
                        goto no_ddp;
                }
 
-               rc = t4_alloc_page_pods_for_buf(pr, buf, xferlen, prsv);
+               rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv);
                if (rc != 0) {
                        uma_zfree(prsv_zone, prsv);
                        goto no_ddp;
                }
 
-               rc = t4_write_page_pods_for_buf(sc, toep, prsv, buf, xferlen);
-               if (rc != 0) {
+               rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries,
+                   xferlen);
+               if (__predict_false(rc != 0)) {
                        t4_free_page_pods(prsv);
                        uma_zfree(prsv_zone, prsv);
                        goto no_ddp;
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index e87d013a0453..c266a2e39fa9 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
+#include <cam/scsi/scsi_all.h>
+#include <cam/ctl/ctl_io.h>
+
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_msg.h"
@@ -981,6 +984,76 @@ have_pgsz:
        return (0);
 }
 
+int
+t4_alloc_page_pods_for_sgl(struct ppod_region *pr, struct ctl_sg_entry *sgl,
+    int entries, struct ppod_reservation *prsv)
+{
+       int hcf, seglen, idx = 0, npages, nppods, i, len;
+       uintptr_t start_pva, end_pva, pva, p1 ;
+       vm_offset_t buf;
+       struct ctl_sg_entry *sge;
+
+       MPASS(entries > 0);
+       MPASS(sgl);
+
+       /*
+        * The DDP page size is unrelated to the VM page size.  We combine
+        * contiguous physical pages into larger segments to get the best DDP
+        * page size possible.  This is the largest of the four sizes in
+        * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
+        * in the page list.
+        */
+       hcf = 0;
+       for (i = entries - 1; i >= 0; i--) {
+               sge = sgl + i;
+               buf = (vm_offset_t)sge->addr;
+               len = sge->len;
+               start_pva = trunc_page(buf);
+               end_pva = trunc_page(buf + len - 1);
+               pva = start_pva;
+               while (pva <= end_pva) {
+                       seglen = PAGE_SIZE;
+                       p1 = pmap_kextract(pva);
+                       pva += PAGE_SIZE;
+                       while (pva <= end_pva && p1 + seglen ==
+                           pmap_kextract(pva)) {
+                               seglen += PAGE_SIZE;
+                               pva += PAGE_SIZE;
+                       }
+
+                       hcf = calculate_hcf(hcf, seglen);
+                       if (hcf < (1 << pr->pr_page_shift[1])) {
+                               idx = 0;
+                               goto have_pgsz; /* give up, short circuit */
+                       }
+               }
+       }
+#define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
+       MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
+       for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
+               if ((hcf & PR_PAGE_MASK(idx)) == 0)
+                       break;
+       }
+#undef PR_PAGE_MASK
+
+have_pgsz:
+       MPASS(idx <= M_PPOD_PGSZ);
+
+       npages = 0;
+       while (entries--) {
+               npages++;
+               start_pva = trunc_page(sgl->addr);
+               end_pva = trunc_page((vm_offset_t)sgl->addr + sgl->len - 1);
+               npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
+               sgl = sgl + 1;
+       }
+       nppods = howmany(npages, PPOD_PAGES);
+       if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
+               return (ENOMEM);
+       MPASS(prsv->prsv_nppods > 0);
+       return (0);
+}
+
 void
 t4_free_page_pods(struct ppod_reservation *prsv)
 {
@@ -1197,6 +1270,124 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct 
toepcb *toep,
        return (0);
 }
 
+int
+t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep,
+    struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries,
+    int xferlen)
+{
+       struct inpcb *inp = toep->inp;
+       struct ulp_mem_io *ulpmc;
+       struct ulptx_idata *ulpsc;
+       struct pagepod *ppod;
+       int i, j, k, n, chunk, len, ddp_pgsz;
+       u_int ppod_addr, offset, sg_offset = 0;
+       uint32_t cmd;
+       struct ppod_region *pr = prsv->prsv_pr;
+       uintptr_t pva, pa;
+       struct mbuf *m;
+       struct mbufq wrq;
+
+       MPASS(sgl != NULL);
+       MPASS(entries > 0);
+       cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
+       if (is_t4(sc))
+               cmd |= htobe32(F_ULP_MEMIO_ORDER);
+       else
+               cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
+       ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
+       offset = (vm_offset_t)sgl->addr & PAGE_MASK;
+       ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
+       pva = trunc_page(sgl->addr);
+       mbufq_init(&wrq, INT_MAX);
+       for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
+
+               /* How many page pods are we writing in this cycle */
+               n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
+               MPASS(n > 0);
+               chunk = PPOD_SZ(n);
+               len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
+
+               m = alloc_raw_wr_mbuf(len);
+               if (m == NULL) {
+                       mbufq_drain(&wrq);
+                       return (ENOMEM);
+               }
+               ulpmc = mtod(m, struct ulp_mem_io *);
+
+               INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
+               ulpmc->cmd = cmd;
+               ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
+               ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
+               ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
+
+               ulpsc = (struct ulptx_idata *)(ulpmc + 1);
+               ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
+               ulpsc->len = htobe32(chunk);
+
+               ppod = (struct pagepod *)(ulpsc + 1);
+               for (j = 0; j < n; i++, j++, ppod++) {
+                       ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
+                           V_PPOD_TID(toep->tid) |
+                           (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
+                       ppod->len_offset = htobe64(V_PPOD_LEN(xferlen) |
+                           V_PPOD_OFST(offset));
+                       ppod->rsvd = 0;
+
+                       for (k = 0; k < nitems(ppod->addr); k++) {
+                               if (entries != 0) {
+                                       pa = pmap_kextract(pva + sg_offset);
+                                       ppod->addr[k] = htobe64(pa);
+                               } else
+                                       ppod->addr[k] = 0;
+
+#if 0
+                               CTR5(KTR_CXGBE,
+                                   "%s: tid %d ppod[%d]->addr[%d] = %p",
+                                   __func__, toep->tid, i, k,
+                                   htobe64(ppod->addr[k]));
+#endif
+
+                               /*
+                                * If this is the last entry in a pod,
+                                * reuse the same entry for first address
+                                * in the next pod.
+                                */
+                               if (k + 1 == nitems(ppod->addr))
+                                       break;
+
+                               /*
+                                * Don't move to the next DDP page if the
+                                * sgl is already finished.
+                                */
+                               if (entries == 0)
+                                       continue;
+
+                               sg_offset += ddp_pgsz;
+                               if (sg_offset == sgl->len) {
+                                       /*
+                                        * This sgl entry is done.  Go
+                                        * to the next.
+                                        */
+                                       entries--;
+                                       sgl++;
+                                       sg_offset = 0;
+                                       if (entries != 0)
+                                               pva = trunc_page(
+                                                   (vm_offset_t)sgl->addr);
+                               }
+                       }
+               }
+
+               mbufq_enqueue(&wrq, m);
+       }
+
+       INP_WLOCK(inp);
+       mbufq_concat(&toep->ulp_pduq, &wrq);
+       INP_WUNLOCK(inp);
+
+       return (0);
+}
+
 /*
  * Prepare a pageset for DDP.  This sets up page pods.
  */
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index f1129b47cbcf..c7984f838735 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -88,6 +88,7 @@ enum {
        DDP_DEAD        = (1 << 6),     /* toepcb is shutting down */
 };
 
+struct ctl_sg_entry;
 struct sockopt;
 struct offload_settings;
 
@@ -437,10 +438,14 @@ void t4_free_ppod_region(struct ppod_region *);
 int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *);
 int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int,
     struct ppod_reservation *);
+int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, 
int,
+    struct ppod_reservation *);
 int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int,
     struct pageset *);
 int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *,
     struct ppod_reservation *, vm_offset_t, int);
+int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *,
+    struct ppod_reservation *, struct ctl_sg_entry *, int, int);
 void t4_free_page_pods(struct ppod_reservation *);
 int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,
     struct mbuf **, struct mbuf **, int *);
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to