Module Name:    src
Committed By:   knakahara
Date:           Fri Mar  2 10:19:20 UTC 2018

Modified Files:
        src/sys/dev/pci/ixgbe: ix_txrx.c ixgbe.c ixgbe.h

Log Message:
ixg(4) supports workqueue poll mode, but not enabled by default yet. (that is, 
the default behavior is *not* changed)

At the time of high load near the wire rate, the turnaround time
of update/delete processing such as "ifconfig ixg0 inet XXX" or
"ifconfig ixg0 delete" is very long. The main reason is CPU
starvation caused by ixg(4)'s softint poll mode. ixg(4) uses
workqueue poll mode instead of softint poll mode, so that this
problem will be fix.

This change may cause performance issues, so it is not enabled
by default yet. Although there are that issues, if you want to use
workqueue poll mode, do "sysctl -w hw.ixgXX.txrx_workqueue=1" while
there is no traffic on the ixgXX.

ok by msaitoh@n.o.


To generate a diff of this commit:
cvs rdiff -u -r1.33 -r1.34 src/sys/dev/pci/ixgbe/ix_txrx.c
cvs rdiff -u -r1.127 -r1.128 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.31 -r1.32 src/sys/dev/pci/ixgbe/ixgbe.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/ixgbe/ix_txrx.c
diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.33 src/sys/dev/pci/ixgbe/ix_txrx.c:1.34
--- src/sys/dev/pci/ixgbe/ix_txrx.c:1.33	Mon Feb 26 04:19:00 2018
+++ src/sys/dev/pci/ixgbe/ix_txrx.c	Fri Mar  2 10:19:20 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.33 2018/02/26 04:19:00 knakahara Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.34 2018/03/02 10:19:20 knakahara Exp $ */
 
 /******************************************************************************
 
@@ -238,8 +238,26 @@ ixgbe_mq_start(struct ifnet *ifp, struct
 	if (IXGBE_TX_TRYLOCK(txr)) {
 		ixgbe_mq_start_locked(ifp, txr);
 		IXGBE_TX_UNLOCK(txr);
-	} else
-		softint_schedule(txr->txr_si);
+	} else {
+		if (adapter->txrx_use_workqueue) {
+			/*
+			 * This function itself is not called in interrupt
+			 * context, however it can be called in fast softint
+			 * context right after receiving forwarding packets.
+			 * So, it is required to protect workqueue from twice
+			 * enqueuing when the machine uses both spontaneous
+			 * packets and forwarding packets.
+			 */
+			u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+			if (*enqueued == 0) {
+				*enqueued = 1;
+				percpu_putref(adapter->txr_wq_enqueued);
+				workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
+			} else
+				percpu_putref(adapter->txr_wq_enqueued);
+		} else
+			softint_schedule(txr->txr_si);
+	}
 
 	return (0);
 } /* ixgbe_mq_start */
@@ -291,7 +309,8 @@ ixgbe_mq_start_locked(struct ifnet *ifp,
 /************************************************************************
  * ixgbe_deferred_mq_start
  *
- *   Called from a taskqueue to drain queued transmit packets.
+ *   Called from a softint and workqueue (indirectly) to drain queued
+ *   transmit packets.
  ************************************************************************/
 void
 ixgbe_deferred_mq_start(void *arg)
@@ -307,6 +326,24 @@ ixgbe_deferred_mq_start(void *arg)
 } /* ixgbe_deferred_mq_start */
 
 /************************************************************************
+ * ixgbe_deferred_mq_start_work
+ *
+ *   Called from a workqueue to drain queued transmit packets.
+ ************************************************************************/
+void
+ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
+{
+	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
+	struct adapter *adapter = txr->adapter;
+	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+	*enqueued = 0;
+	percpu_putref(adapter->txr_wq_enqueued);
+
+	ixgbe_deferred_mq_start(txr);
+} /* ixgbe_deferred_mq_start */
+
+
+/************************************************************************
  * ixgbe_xmit
  *
  *   Maps the mbufs to tx descriptors, allowing the

Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.127 src/sys/dev/pci/ixgbe/ixgbe.c:1.128
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.127	Mon Feb 26 04:19:00 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.c	Fri Mar  2 10:19:20 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.127 2018/02/26 04:19:00 knakahara Exp $ */
+/* $NetBSD: ixgbe.c,v 1.128 2018/03/02 10:19:20 knakahara Exp $ */
 
 /******************************************************************************
 
@@ -260,6 +260,9 @@ static void	ixgbe_handle_msf(void *);
 static void	ixgbe_handle_mod(void *);
 static void	ixgbe_handle_phy(void *);
 
+/* Workqueue handler for deferred work */
+static void	ixgbe_handle_que_work(struct work *, void *);
+
 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
 
 /************************************************************************
@@ -315,6 +318,9 @@ static int ixgbe_flow_control = ixgbe_fc
 SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN,
     &ixgbe_flow_control, 0, "Default flow control used for all adapters");
 
+/* Which pakcet processing uses workqueue or softint */
+static bool ixgbe_txrx_workqueue = false;
+
 /*
  * Smart speed setting, default to on
  * this only works as a compile option
@@ -395,10 +401,13 @@ static int (*ixgbe_ring_empty)(struct if
 #define IXGBE_MPSAFE		1
 #define IXGBE_CALLOUT_FLAGS	CALLOUT_MPSAFE
 #define IXGBE_SOFTINFT_FLAGS	SOFTINT_MPSAFE
+#define IXGBE_WORKQUEUE_FLAGS	WQ_PERCPU | WQ_MPSAFE
 #else
 #define IXGBE_CALLOUT_FLAGS	0
 #define IXGBE_SOFTINFT_FLAGS	0
+#define IXGBE_WORKQUEUE_FLAGS	WQ_PERCPU
 #endif
+#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
 /************************************************************************
  * ixgbe_initialize_rss_mapping
@@ -2525,9 +2534,30 @@ ixgbe_msix_que(void *arg)
 	rxr->packets = 0;
 
 no_calc:
-	if (more)
-		softint_schedule(que->que_si);
-	else
+	if (more) {
+		if (adapter->txrx_use_workqueue) {
+			/*
+			 * adapter->que_wq is bound to each CPU instead of
+			 * each NIC queue to reduce workqueue kthread. As we
+			 * should consider about interrupt affinity in this
+			 * function, the workqueue kthread must be WQ_PERCPU.
+			 * If create WQ_PERCPU workqueue kthread for each NIC
+			 * queue, that number of created workqueue kthread is
+			 * (number of used NIC queue) * (number of CPUs) =
+			 * (number of CPUs) ^ 2 most often.
+			 *
+			 * The same NIC queue's interrupts are avoided by
+			 * masking the queue's interrupt. And different
+			 * NIC queue's interrupts use different struct work
+			 * (que->wq_cookie). So, "enqueued flag" to avoid
+			 * twice workqueue_enqueue() is not required .
+			 */
+			workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+			    curcpu());
+		} else {
+			softint_schedule(que->que_si);
+		}
+	} else
 		ixgbe_enable_queue(adapter, que->msix);
 
 	return 1;
@@ -3100,6 +3130,12 @@ ixgbe_add_device_sysctls(struct adapter 
 	    CTL_EOL) != 0)
 		aprint_error_dev(dev, "could not create sysctl\n");
 
+	adapter->txrx_use_workqueue = ixgbe_txrx_workqueue;
+	if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
+	    CTLTYPE_BOOL, "txrx_workqueue", SYSCTL_DESCR("Use workqueue for packet processing"),
+	    NULL, 0, &adapter->txrx_use_workqueue, 0, CTL_CREATE, CTL_EOL) != 0)
+		aprint_error_dev(dev, "could not create sysctl\n");
+
 #ifdef IXGBE_DEBUG
 	/* testing sysctls (for all devices) */
 	if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
@@ -3232,6 +3268,12 @@ ixgbe_free_softint(struct adapter *adapt
 		if (que->que_si != NULL)
 			softint_disestablish(que->que_si);
 	}
+	if (adapter->txr_wq != NULL)
+		workqueue_destroy(adapter->txr_wq);
+	if (adapter->txr_wq_enqueued != NULL)
+		percpu_free(adapter->txr_wq_enqueued, sizeof(u_int));
+	if (adapter->que_wq != NULL)
+		workqueue_destroy(adapter->que_wq);
 
 	/* Drain the Link queue */
 	if (adapter->link_si != NULL) {
@@ -5800,9 +5842,18 @@ ixgbe_handle_que(void *context)
 		IXGBE_TX_UNLOCK(txr);
 	}
 
-	if (more)
-		softint_schedule(que->que_si);
-	else if (que->res != NULL) {
+	if (more) {
+		if (adapter->txrx_use_workqueue) {
+			/*
+			 * "enqueued flag" is not required here.
+			 * See ixgbe_msix_que().
+			 */
+			workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+			    curcpu());
+		} else {
+			softint_schedule(que->que_si);
+		}
+	} else if (que->res != NULL) {
 		/* Re-enable this interrupt */
 		ixgbe_enable_queue(adapter, que->msix);
 	} else
@@ -5812,6 +5863,21 @@ ixgbe_handle_que(void *context)
 } /* ixgbe_handle_que */
 
 /************************************************************************
+ * ixgbe_handle_que_work
+ ************************************************************************/
+static void
+ixgbe_handle_que_work(struct work *wk, void *context)
+{
+	struct ix_queue *que = container_of(wk, struct ix_queue, wq_cookie);
+
+	/*
+	 * "enqueued flag" is not required here.
+	 * See ixgbe_msix_que().
+	 */
+	ixgbe_handle_que(que);
+}
+
+/************************************************************************
  * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler
  ************************************************************************/
 static int
@@ -5906,7 +5972,6 @@ alloc_retry:
 	return (0);
 } /* ixgbe_allocate_legacy */
 
-
 /************************************************************************
  * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers
  ************************************************************************/
@@ -5919,6 +5984,7 @@ ixgbe_allocate_msix(struct adapter *adap
 	pci_chipset_tag_t pc;
 	char		intrbuf[PCI_INTRSTR_LEN];
 	char		intr_xname[32];
+	char		wqname[MAXCOMLEN];
 	const char	*intrstr = NULL;
 	int 		error, vector = 0;
 	int		cpu_id = 0;
@@ -6044,6 +6110,24 @@ ixgbe_allocate_msix(struct adapter *adap
 			goto err_out;
 		}
 	}
+	snprintf(wqname, sizeof(wqname), "%sdeferTx", device_xname(dev));
+	error = workqueue_create(&adapter->txr_wq, wqname,
+	    ixgbe_deferred_mq_start_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+	    IXGBE_WORKQUEUE_FLAGS);
+	if (error) {
+		aprint_error_dev(dev, "couldn't create workqueue for deferred Tx\n");
+		goto err_out;
+	}
+	adapter->txr_wq_enqueued = percpu_alloc(sizeof(u_int));
+
+	snprintf(wqname, sizeof(wqname), "%sTxRx", device_xname(dev));
+	error = workqueue_create(&adapter->que_wq, wqname,
+	    ixgbe_handle_que_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+	    IXGBE_WORKQUEUE_FLAGS);
+	if (error) {
+		aprint_error_dev(dev, "couldn't create workqueue for Tx/Rx\n");
+		goto err_out;
+	}
 
 	/* and Link */
 	cpu_id++;

Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.31 src/sys/dev/pci/ixgbe/ixgbe.h:1.32
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.31	Mon Feb 26 04:19:00 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.h	Fri Mar  2 10:19:20 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.31 2018/02/26 04:19:00 knakahara Exp $ */
+/* $NetBSD: ixgbe.h,v 1.32 2018/03/02 10:19:20 knakahara Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -80,6 +80,7 @@
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sockio.h>
+#include <sys/percpu.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -338,6 +339,8 @@ struct ix_queue {
 
 	kmutex_t         im_mtx;	/* lock for im_nest and this queue's EIMS/EIMC bit */
 	int              im_nest;
+
+	struct work      wq_cookie;
 };
 
 /*
@@ -373,6 +376,8 @@ struct tx_ring {
 	struct evcnt		no_desc_avail;
 	struct evcnt		total_packets;
 	struct evcnt		pcq_drops;
+
+	struct work		wq_cookie;
 };
 
 
@@ -499,6 +504,18 @@ struct adapter {
 
 	void			*phy_si;   /* PHY intr tasklet */
 
+	bool			txrx_use_workqueue;
+	struct workqueue	*que_wq;    /* workqueue for ixgbe_handle_que_work() */
+					    /*
+					     * que_wq's "enqueued flag" is not required,
+					     * because twice workqueue_enqueue() for
+					     * ixgbe_handle_que_work() is avoided by masking
+					     * the queue's interrupt by EIMC.
+					     * See also ixgbe_msix_que().
+					     */
+	struct workqueue	*txr_wq;    /* workqueue for ixgbe_deferred_mq_start_work() */
+	percpu_t		*txr_wq_enqueued;
+
 	/*
 	 * Queues:
 	 *   This is the irq holder, it has
@@ -714,6 +731,7 @@ int  ixgbe_legacy_start_locked(struct if
 int  ixgbe_mq_start(struct ifnet *, struct mbuf *);
 int  ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
 void ixgbe_deferred_mq_start(void *);
+void ixgbe_deferred_mq_start_work(struct work *, void *);
 
 int  ixgbe_allocate_queues(struct adapter *);
 int  ixgbe_setup_transmit_structures(struct adapter *);

Reply via email to