Author: shurd
Date: Thu Apr 25 21:24:56 2019
New Revision: 346708
URL: https://svnweb.freebsd.org/changeset/base/346708

Log:
  iflib: Better control over queue core assignment
  
  By default, cores are now assigned to queues in a sequential
  manner rather than all NICs starting at the first core. On a four-core
  system with two NICs each using two queue pairs, the nic:queue -> core
  mapping has changed from this:
  
  0:0 -> 0, 0:1 -> 1
  1:0 -> 0, 1:1 -> 1
  
  To this:
  
  0:0 -> 0, 0:1 -> 1
  1:0 -> 2, 1:1 -> 3
  
  Additionally, a device can now be configured to use separate cores for TX
  and RX queues.
  
  Two new tunables have been added, dev.X.Y.iflib.separate_txrx and
  dev.X.Y.iflib.core_offset. If core_offset is set, the NIC is not part
  of the auto-assigned sequence.
  
  Reviewed by:  marius
  MFC after:    2 weeks
  Sponsored by: Limelight Networks
  Differential Revision:        https://reviews.freebsd.org/D20029

Modified:
  head/share/man/man4/iflib.4
  head/sys/net/iflib.c

Modified: head/share/man/man4/iflib.4
==============================================================================
--- head/share/man/man4/iflib.4 Thu Apr 25 21:09:07 2019        (r346707)
+++ head/share/man/man4/iflib.4 Thu Apr 25 21:24:56 2019        (r346708)
@@ -55,6 +55,16 @@ If zero, the number of TX queues is derived from the n
 socket connected to the controller.
 .It Va disable_msix
 Disables MSI-X interrupts for the device.
+.It Va core_offset
+Specifies a starting core offset to assign queues to.
+If the value is unspecified or 65535, cores are assigned sequentially across
+controllers.
+.It Va separate_txrx
+Requests that RX and TX queues not be paired on the same core.
+If this is zero or not set, an RX and TX queue pair will be assigned to each
+core.
+When set to a non-zero value, TX queues are assigned to cores following the
+last RX queue.
 .El
 .Pp
 These

Modified: head/sys/net/iflib.c
==============================================================================
--- head/sys/net/iflib.c        Thu Apr 25 21:09:07 2019        (r346707)
+++ head/sys/net/iflib.c        Thu Apr 25 21:24:56 2019        (r346708)
@@ -189,6 +189,9 @@ struct iflib_ctx {
        uint16_t ifc_sysctl_qs_eq_override;
        uint16_t ifc_sysctl_rx_budget;
        uint16_t ifc_sysctl_tx_abdicate;
+       uint16_t ifc_sysctl_core_offset;
+#define        CORE_OFFSET_UNSPECIFIED 0xffff
+       uint8_t  ifc_sysctl_separate_txrx;
 
        qidx_t ifc_sysctl_ntxds[8];
        qidx_t ifc_sysctl_nrxds[8];
@@ -723,6 +726,18 @@ static void iflib_free_intr_mem(if_ctx_t ctx);
 static struct mbuf * iflib_fixup_rx(struct mbuf *m);
 #endif
 
+static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets =
+    SLIST_HEAD_INITIALIZER(cpu_offsets);
+struct cpu_offset {
+       SLIST_ENTRY(cpu_offset) entries;
+       cpuset_t        set;
+       unsigned int    refcount;
+       uint16_t        offset;
+};
+static struct mtx cpu_offset_mtx;
+MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock",
+    MTX_DEF);
+
 NETDUMP_DEFINE(iflib);
 
 #ifdef DEV_NETMAP
@@ -4461,6 +4476,71 @@ iflib_rem_pfil(if_ctx_t ctx)
        pfil_head_unregister(pfil);
 }
 
+static uint16_t
+get_ctx_core_offset(if_ctx_t ctx)
+{
+       if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
+       struct cpu_offset *op;
+       uint16_t qc;
+       uint16_t ret = ctx->ifc_sysctl_core_offset;
+
+       if (ret != CORE_OFFSET_UNSPECIFIED)
+               return (ret);
+
+       if (ctx->ifc_sysctl_separate_txrx)
+               qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets;
+       else
+               qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets);
+
+       mtx_lock(&cpu_offset_mtx);
+       SLIST_FOREACH(op, &cpu_offsets, entries) {
+               if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
+                       ret = op->offset;
+                       op->offset += qc;
+                       MPASS(op->refcount < UINT_MAX);
+                       op->refcount++;
+                       break;
+               }
+       }
+       if (ret == CORE_OFFSET_UNSPECIFIED) {
+               ret = 0;
+               op = malloc(sizeof(struct cpu_offset), M_IFLIB,
+                   M_NOWAIT | M_ZERO);
+               if (op == NULL) {
+                       device_printf(ctx->ifc_dev,
+                           "allocation for cpu offset failed.\n");
+               } else {
+                       op->offset = qc;
+                       op->refcount = 1;
+                       CPU_COPY(&ctx->ifc_cpus, &op->set);
+                       SLIST_INSERT_HEAD(&cpu_offsets, op, entries);
+               }
+       }
+       mtx_unlock(&cpu_offset_mtx);
+
+       return (ret);
+}
+
+static void
+unref_ctx_core_offset(if_ctx_t ctx)
+{
+       struct cpu_offset *op, *top;
+
+       mtx_lock(&cpu_offset_mtx);
+       SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) {
+               if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
+                       MPASS(op->refcount > 0);
+                       op->refcount--;
+                       if (op->refcount == 0) {
+                               SLIST_REMOVE(&cpu_offsets, op, cpu_offset, 
entries);
+                               free(op, M_IFLIB);
+                       }
+                       break;
+               }
+       }
+       mtx_unlock(&cpu_offset_mtx);
+}
+
 int
 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t 
*ctxp)
 {
@@ -4613,6 +4693,11 @@ iflib_device_register(device_t dev, void *sc, if_share
                goto fail_queues;
 
        /*
+        * Now that we know how many queues there are, get the core offset.
+        */
+       ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx);
+
+       /*
         * Group taskqueues aren't properly set up until SMP is started,
         * so we disable interrupts until we can handle them post
         * SI_SUB_SMP.
@@ -5037,6 +5122,7 @@ iflib_device_deregister(if_ctx_t ctx)
        iflib_rx_structures_free(ctx);
        if (ctx->ifc_flags & IFC_SC_ALLOCATED)
                free(ctx->ifc_softc, M_IFLIB);
+       unref_ctx_core_offset(ctx);
        STATE_LOCK_DESTROY(ctx);
        free(ctx, M_IFLIB);
        return (0);
@@ -5655,7 +5741,7 @@ find_child_with_core(int cpu, struct cpu_group *grp)
  * Find the nth "close" core to the specified core
  * "close" is defined as the deepest level that shares
  * at least an L2 cache.  With threads, this will be
- * threads on the same core.  If the sahred cache is L3
+ * threads on the same core.  If the shared cache is L3
  * or higher, simply returns the same core.
  */
 static int
@@ -5739,10 +5825,13 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, ifl
     const char *name)
 {
        device_t dev;
-       int err, cpuid, tid;
+       int co, cpuid, err, tid;
 
        dev = ctx->ifc_dev;
-       cpuid = find_nth(ctx, qid);
+       co = ctx->ifc_sysctl_core_offset;
+       if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX)
+               co += ctx->ifc_softc_ctx.isc_nrxqsets;
+       cpuid = find_nth(ctx, qid + co);
        tid = get_core_offset(ctx, type, qid);
        MPASS(tid >= 0);
        cpuid = find_close_core(cpuid, tid);
@@ -6344,6 +6433,13 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
        SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
                       CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
                       "cause tx to abdicate instead of running to completion");
+       ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED;
+       SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset",
+                      CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0,
+                      "offset to start using cores at");
+       SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
+                      CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
+                      "use separate cores for TX and RX");
 
        /* XXX change for per-queue sizes */
        SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to