Module Name: src Committed By: knakahara Date: Fri Mar 2 10:19:20 UTC 2018
Modified Files: src/sys/dev/pci/ixgbe: ix_txrx.c ixgbe.c ixgbe.h Log Message: ixg(4) supports workqueue poll mode, but not enabled by default yet. (that is, the default behavior is *not* changed) At the time of high load near the wire rate, the turnaround time of update/delete processing such as "ifconfig ixg0 inet XXX" or "ifconfig ixg0 delete" is very long. The main reason is CPU starvation caused by ixg(4)'s softint poll mode. ixg(4) uses workqueue poll mode instead of softint poll mode, so that this problem will be fix. This change may cause performance issues, so it is not enabled by default yet. Although there are that issues, if you want to use workqueue poll mode, do "sysctl -w hw.ixgXX.txrx_workqueue=1" while there is no traffic on the ixgXX. ok by msaitoh@n.o. To generate a diff of this commit: cvs rdiff -u -r1.33 -r1.34 src/sys/dev/pci/ixgbe/ix_txrx.c cvs rdiff -u -r1.127 -r1.128 src/sys/dev/pci/ixgbe/ixgbe.c cvs rdiff -u -r1.31 -r1.32 src/sys/dev/pci/ixgbe/ixgbe.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/pci/ixgbe/ix_txrx.c diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.33 src/sys/dev/pci/ixgbe/ix_txrx.c:1.34 --- src/sys/dev/pci/ixgbe/ix_txrx.c:1.33 Mon Feb 26 04:19:00 2018 +++ src/sys/dev/pci/ixgbe/ix_txrx.c Fri Mar 2 10:19:20 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ix_txrx.c,v 1.33 2018/02/26 04:19:00 knakahara Exp $ */ +/* $NetBSD: ix_txrx.c,v 1.34 2018/03/02 10:19:20 knakahara Exp $ */ /****************************************************************************** @@ -238,8 +238,26 @@ ixgbe_mq_start(struct ifnet *ifp, struct if (IXGBE_TX_TRYLOCK(txr)) { ixgbe_mq_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); - } else - softint_schedule(txr->txr_si); + } else { + if (adapter->txrx_use_workqueue) { + /* + * This function itself is not called in interrupt + * context, however it can be called in fast softint + * context right after receiving forwarding packets. + * So, it is required to protect workqueue from twice + * enqueuing when the machine uses both spontaneous + * packets and forwarding packets. + */ + u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued); + if (*enqueued == 0) { + *enqueued = 1; + percpu_putref(adapter->txr_wq_enqueued); + workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu()); + } else + percpu_putref(adapter->txr_wq_enqueued); + } else + softint_schedule(txr->txr_si); + } return (0); } /* ixgbe_mq_start */ @@ -291,7 +309,8 @@ ixgbe_mq_start_locked(struct ifnet *ifp, /************************************************************************ * ixgbe_deferred_mq_start * - * Called from a taskqueue to drain queued transmit packets. + * Called from a softint and workqueue (indirectly) to drain queued + * transmit packets. ************************************************************************/ void ixgbe_deferred_mq_start(void *arg) @@ -307,6 +326,24 @@ ixgbe_deferred_mq_start(void *arg) } /* ixgbe_deferred_mq_start */ /************************************************************************ + * ixgbe_deferred_mq_start_work + * + * Called from a workqueue to drain queued transmit packets. + ************************************************************************/ +void +ixgbe_deferred_mq_start_work(struct work *wk, void *arg) +{ + struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie); + struct adapter *adapter = txr->adapter; + u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued); + *enqueued = 0; + percpu_putref(adapter->txr_wq_enqueued); + + ixgbe_deferred_mq_start(txr); +} /* ixgbe_deferred_mq_start */ + + +/************************************************************************ * ixgbe_xmit * * Maps the mbufs to tx descriptors, allowing the Index: src/sys/dev/pci/ixgbe/ixgbe.c diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.127 src/sys/dev/pci/ixgbe/ixgbe.c:1.128 --- src/sys/dev/pci/ixgbe/ixgbe.c:1.127 Mon Feb 26 04:19:00 2018 +++ src/sys/dev/pci/ixgbe/ixgbe.c Fri Mar 2 10:19:20 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.c,v 1.127 2018/02/26 04:19:00 knakahara Exp $ */ +/* $NetBSD: ixgbe.c,v 1.128 2018/03/02 10:19:20 knakahara Exp $ */ /****************************************************************************** @@ -260,6 +260,9 @@ static void ixgbe_handle_msf(void *); static void ixgbe_handle_mod(void *); static void ixgbe_handle_phy(void *); +/* Workqueue handler for deferred work */ +static void ixgbe_handle_que_work(struct work *, void *); + static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *); /************************************************************************ @@ -315,6 +318,9 @@ static int ixgbe_flow_control = ixgbe_fc SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, &ixgbe_flow_control, 0, "Default flow control used for all adapters"); +/* Which pakcet processing uses workqueue or softint */ +static bool ixgbe_txrx_workqueue = false; + /* * Smart speed setting, default to on * this only works as a compile option @@ -395,10 +401,13 @@ static int (*ixgbe_ring_empty)(struct if #define IXGBE_MPSAFE 1 #define IXGBE_CALLOUT_FLAGS CALLOUT_MPSAFE #define IXGBE_SOFTINFT_FLAGS SOFTINT_MPSAFE +#define IXGBE_WORKQUEUE_FLAGS WQ_PERCPU | WQ_MPSAFE #else #define IXGBE_CALLOUT_FLAGS 0 #define IXGBE_SOFTINFT_FLAGS 0 +#define IXGBE_WORKQUEUE_FLAGS WQ_PERCPU #endif +#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET /************************************************************************ * ixgbe_initialize_rss_mapping @@ -2525,9 +2534,30 @@ ixgbe_msix_que(void *arg) rxr->packets = 0; no_calc: - if (more) - softint_schedule(que->que_si); - else + if (more) { + if (adapter->txrx_use_workqueue) { + /* + * adapter->que_wq is bound to each CPU instead of + * each NIC queue to reduce workqueue kthread. As we + * should consider about interrupt affinity in this + * function, the workqueue kthread must be WQ_PERCPU. + * If create WQ_PERCPU workqueue kthread for each NIC + * queue, that number of created workqueue kthread is + * (number of used NIC queue) * (number of CPUs) = + * (number of CPUs) ^ 2 most often. + * + * The same NIC queue's interrupts are avoided by + * masking the queue's interrupt. And different + * NIC queue's interrupts use different struct work + * (que->wq_cookie). So, "enqueued flag" to avoid + * twice workqueue_enqueue() is not required . + */ + workqueue_enqueue(adapter->que_wq, &que->wq_cookie, + curcpu()); + } else { + softint_schedule(que->que_si); + } + } else ixgbe_enable_queue(adapter, que->msix); return 1; @@ -3100,6 +3130,12 @@ ixgbe_add_device_sysctls(struct adapter CTL_EOL) != 0) aprint_error_dev(dev, "could not create sysctl\n"); + adapter->txrx_use_workqueue = ixgbe_txrx_workqueue; + if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE, + CTLTYPE_BOOL, "txrx_workqueue", SYSCTL_DESCR("Use workqueue for packet processing"), + NULL, 0, &adapter->txrx_use_workqueue, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + #ifdef IXGBE_DEBUG /* testing sysctls (for all devices) */ if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE, @@ -3232,6 +3268,12 @@ ixgbe_free_softint(struct adapter *adapt if (que->que_si != NULL) softint_disestablish(que->que_si); } + if (adapter->txr_wq != NULL) + workqueue_destroy(adapter->txr_wq); + if (adapter->txr_wq_enqueued != NULL) + percpu_free(adapter->txr_wq_enqueued, sizeof(u_int)); + if (adapter->que_wq != NULL) + workqueue_destroy(adapter->que_wq); /* Drain the Link queue */ if (adapter->link_si != NULL) { @@ -5800,9 +5842,18 @@ ixgbe_handle_que(void *context) IXGBE_TX_UNLOCK(txr); } - if (more) - softint_schedule(que->que_si); - else if (que->res != NULL) { + if (more) { + if (adapter->txrx_use_workqueue) { + /* + * "enqueued flag" is not required here. + * See ixgbe_msix_que(). + */ + workqueue_enqueue(adapter->que_wq, &que->wq_cookie, + curcpu()); + } else { + softint_schedule(que->que_si); + } + } else if (que->res != NULL) { /* Re-enable this interrupt */ ixgbe_enable_queue(adapter, que->msix); } else @@ -5812,6 +5863,21 @@ ixgbe_handle_que(void *context) } /* ixgbe_handle_que */ /************************************************************************ + * ixgbe_handle_que_work + ************************************************************************/ +static void +ixgbe_handle_que_work(struct work *wk, void *context) +{ + struct ix_queue *que = container_of(wk, struct ix_queue, wq_cookie); + + /* + * "enqueued flag" is not required here. + * See ixgbe_msix_que(). + */ + ixgbe_handle_que(que); +} + +/************************************************************************ * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler ************************************************************************/ static int @@ -5906,7 +5972,6 @@ alloc_retry: return (0); } /* ixgbe_allocate_legacy */ - /************************************************************************ * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers ************************************************************************/ @@ -5919,6 +5984,7 @@ ixgbe_allocate_msix(struct adapter *adap pci_chipset_tag_t pc; char intrbuf[PCI_INTRSTR_LEN]; char intr_xname[32]; + char wqname[MAXCOMLEN]; const char *intrstr = NULL; int error, vector = 0; int cpu_id = 0; @@ -6044,6 +6110,24 @@ ixgbe_allocate_msix(struct adapter *adap goto err_out; } } + snprintf(wqname, sizeof(wqname), "%sdeferTx", device_xname(dev)); + error = workqueue_create(&adapter->txr_wq, wqname, + ixgbe_deferred_mq_start_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET, + IXGBE_WORKQUEUE_FLAGS); + if (error) { + aprint_error_dev(dev, "couldn't create workqueue for deferred Tx\n"); + goto err_out; + } + adapter->txr_wq_enqueued = percpu_alloc(sizeof(u_int)); + + snprintf(wqname, sizeof(wqname), "%sTxRx", device_xname(dev)); + error = workqueue_create(&adapter->que_wq, wqname, + ixgbe_handle_que_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET, + IXGBE_WORKQUEUE_FLAGS); + if (error) { + aprint_error_dev(dev, "couldn't create workqueue for Tx/Rx\n"); + goto err_out; + } /* and Link */ cpu_id++; Index: src/sys/dev/pci/ixgbe/ixgbe.h diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.31 src/sys/dev/pci/ixgbe/ixgbe.h:1.32 --- src/sys/dev/pci/ixgbe/ixgbe.h:1.31 Mon Feb 26 04:19:00 2018 +++ src/sys/dev/pci/ixgbe/ixgbe.h Fri Mar 2 10:19:20 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.h,v 1.31 2018/02/26 04:19:00 knakahara Exp $ */ +/* $NetBSD: ixgbe.h,v 1.32 2018/03/02 10:19:20 knakahara Exp $ */ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause @@ -80,6 +80,7 @@ #include <sys/kernel.h> #include <sys/module.h> #include <sys/sockio.h> +#include <sys/percpu.h> #include <net/if.h> #include <net/if_arp.h> @@ -338,6 +339,8 @@ struct ix_queue { kmutex_t im_mtx; /* lock for im_nest and this queue's EIMS/EIMC bit */ int im_nest; + + struct work wq_cookie; }; /* @@ -373,6 +376,8 @@ struct tx_ring { struct evcnt no_desc_avail; struct evcnt total_packets; struct evcnt pcq_drops; + + struct work wq_cookie; }; @@ -499,6 +504,18 @@ struct adapter { void *phy_si; /* PHY intr tasklet */ + bool txrx_use_workqueue; + struct workqueue *que_wq; /* workqueue for ixgbe_handle_que_work() */ + /* + * que_wq's "enqueued flag" is not required, + * because twice workqueue_enqueue() for + * ixgbe_handle_que_work() is avoided by masking + * the queue's interrupt by EIMC. + * See also ixgbe_msix_que(). + */ + struct workqueue *txr_wq; /* workqueue for ixgbe_deferred_mq_start_work() */ + percpu_t *txr_wq_enqueued; + /* * Queues: * This is the irq holder, it has @@ -714,6 +731,7 @@ int ixgbe_legacy_start_locked(struct if int ixgbe_mq_start(struct ifnet *, struct mbuf *); int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); void ixgbe_deferred_mq_start(void *); +void ixgbe_deferred_mq_start_work(struct work *, void *); int ixgbe_allocate_queues(struct adapter *); int ixgbe_setup_transmit_structures(struct adapter *);