Module Name:    src
Committed By:   martin
Date:           Tue Mar  6 11:12:41 UTC 2018

Modified Files:
        src/sys/dev/pci/ixgbe [netbsd-8]: ix_txrx.c ixgbe.c ixgbe.h ixv.c

Log Message:
Pull up following revision(s) (requested by knakahara in ticket #605):
        sys/dev/pci/ixgbe/ixgbe.h: revision 1.32
        sys/dev/pci/ixgbe/ixgbe.h: revision 1.33
        sys/dev/pci/ixgbe/ix_txrx.c: revision 1.34
        sys/dev/pci/ixgbe/ixgbe.c: revision 1.128
        sys/dev/pci/ixgbe/ixv.c: revision 1.83
        sys/dev/pci/ixgbe/ixv.c: revision 1.84

Add hw.ixvM.q[01].{interrupt_rate,[tr]xd_head,[tr]xd_tail} sysctls as ixg(4).

ixg(4) supports workqueue poll mode, but not enabled by default yet. (that is, 
the default behavior is *not* changed)

At the time of high load near the wire rate, the turnaround time
of update/delete processing such as "ifconfig ixg0 inet XXX" or
"ifconfig ixg0 delete" is very long. The main reason is CPU
starvation caused by ixg(4)'s softint poll mode. ixg(4) uses
workqueue poll mode instead of softint poll mode, so that this
problem will be fix.

This change may cause performance issues, so it is not enabled
by default yet. Although there are that issues, if you want to use
workqueue poll mode, do "sysctl -w hw.ixgXX.txrx_workqueue=1" while
there is no traffic on the ixgXX.

ok by msaitoh@n.o.

ixv(4) also supports workqueue poll mode, but not enabled by default yet, 
either.
ok by msaitoh@n.o.

Move the location of "struct work" as FreeBSD's "struct task" in ixgbe.h.
No functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.24.2.5 -r1.24.2.6 src/sys/dev/pci/ixgbe/ix_txrx.c
cvs rdiff -u -r1.88.2.11 -r1.88.2.12 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.24.6.4 -r1.24.6.5 src/sys/dev/pci/ixgbe/ixgbe.h
cvs rdiff -u -r1.56.2.8 -r1.56.2.9 src/sys/dev/pci/ixgbe/ixv.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/ixgbe/ix_txrx.c
diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.24.2.5 src/sys/dev/pci/ixgbe/ix_txrx.c:1.24.2.6
--- src/sys/dev/pci/ixgbe/ix_txrx.c:1.24.2.5	Thu Mar  1 19:02:15 2018
+++ src/sys/dev/pci/ixgbe/ix_txrx.c	Tue Mar  6 11:12:40 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.24.2.5 2018/03/01 19:02:15 martin Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.24.2.6 2018/03/06 11:12:40 martin Exp $ */
 
 /******************************************************************************
 
@@ -238,8 +238,26 @@ ixgbe_mq_start(struct ifnet *ifp, struct
 	if (IXGBE_TX_TRYLOCK(txr)) {
 		ixgbe_mq_start_locked(ifp, txr);
 		IXGBE_TX_UNLOCK(txr);
-	} else
-		softint_schedule(txr->txr_si);
+	} else {
+		if (adapter->txrx_use_workqueue) {
+			/*
+			 * This function itself is not called in interrupt
+			 * context, however it can be called in fast softint
+			 * context right after receiving forwarding packets.
+			 * So, it is required to protect workqueue from twice
+			 * enqueuing when the machine uses both spontaneous
+			 * packets and forwarding packets.
+			 */
+			u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+			if (*enqueued == 0) {
+				*enqueued = 1;
+				percpu_putref(adapter->txr_wq_enqueued);
+				workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
+			} else
+				percpu_putref(adapter->txr_wq_enqueued);
+		} else
+			softint_schedule(txr->txr_si);
+	}
 
 	return (0);
 } /* ixgbe_mq_start */
@@ -291,7 +309,8 @@ ixgbe_mq_start_locked(struct ifnet *ifp,
 /************************************************************************
  * ixgbe_deferred_mq_start
  *
- *   Called from a taskqueue to drain queued transmit packets.
+ *   Called from a softint and workqueue (indirectly) to drain queued
+ *   transmit packets.
  ************************************************************************/
 void
 ixgbe_deferred_mq_start(void *arg)
@@ -307,6 +326,24 @@ ixgbe_deferred_mq_start(void *arg)
 } /* ixgbe_deferred_mq_start */
 
 /************************************************************************
+ * ixgbe_deferred_mq_start_work
+ *
+ *   Called from a workqueue to drain queued transmit packets.
+ ************************************************************************/
+void
+ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
+{
+	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
+	struct adapter *adapter = txr->adapter;
+	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+	*enqueued = 0;
+	percpu_putref(adapter->txr_wq_enqueued);
+
+	ixgbe_deferred_mq_start(txr);
+} /* ixgbe_deferred_mq_start */
+
+
+/************************************************************************
  * ixgbe_xmit
  *
  *   Maps the mbufs to tx descriptors, allowing the

Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.11 src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.12
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.88.2.11	Thu Mar  1 19:02:15 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.c	Tue Mar  6 11:12:41 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.88.2.11 2018/03/01 19:02:15 martin Exp $ */
+/* $NetBSD: ixgbe.c,v 1.88.2.12 2018/03/06 11:12:41 martin Exp $ */
 
 /******************************************************************************
 
@@ -260,6 +260,9 @@ static void	ixgbe_handle_msf(void *);
 static void	ixgbe_handle_mod(void *);
 static void	ixgbe_handle_phy(void *);
 
+/* Workqueue handler for deferred work */
+static void	ixgbe_handle_que_work(struct work *, void *);
+
 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
 
 /************************************************************************
@@ -315,6 +318,9 @@ static int ixgbe_flow_control = ixgbe_fc
 SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN,
     &ixgbe_flow_control, 0, "Default flow control used for all adapters");
 
+/* Which pakcet processing uses workqueue or softint */
+static bool ixgbe_txrx_workqueue = false;
+
 /*
  * Smart speed setting, default to on
  * this only works as a compile option
@@ -395,10 +401,13 @@ static int (*ixgbe_ring_empty)(struct if
 #define IXGBE_MPSAFE		1
 #define IXGBE_CALLOUT_FLAGS	CALLOUT_MPSAFE
 #define IXGBE_SOFTINFT_FLAGS	SOFTINT_MPSAFE
+#define IXGBE_WORKQUEUE_FLAGS	WQ_PERCPU | WQ_MPSAFE
 #else
 #define IXGBE_CALLOUT_FLAGS	0
 #define IXGBE_SOFTINFT_FLAGS	0
+#define IXGBE_WORKQUEUE_FLAGS	WQ_PERCPU
 #endif
+#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
 /************************************************************************
  * ixgbe_initialize_rss_mapping
@@ -2525,9 +2534,30 @@ ixgbe_msix_que(void *arg)
 	rxr->packets = 0;
 
 no_calc:
-	if (more)
-		softint_schedule(que->que_si);
-	else
+	if (more) {
+		if (adapter->txrx_use_workqueue) {
+			/*
+			 * adapter->que_wq is bound to each CPU instead of
+			 * each NIC queue to reduce workqueue kthread. As we
+			 * should consider about interrupt affinity in this
+			 * function, the workqueue kthread must be WQ_PERCPU.
+			 * If create WQ_PERCPU workqueue kthread for each NIC
+			 * queue, that number of created workqueue kthread is
+			 * (number of used NIC queue) * (number of CPUs) =
+			 * (number of CPUs) ^ 2 most often.
+			 *
+			 * The same NIC queue's interrupts are avoided by
+			 * masking the queue's interrupt. And different
+			 * NIC queue's interrupts use different struct work
+			 * (que->wq_cookie). So, "enqueued flag" to avoid
+			 * twice workqueue_enqueue() is not required .
+			 */
+			workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+			    curcpu());
+		} else {
+			softint_schedule(que->que_si);
+		}
+	} else
 		ixgbe_enable_queue(adapter, que->msix);
 
 	return 1;
@@ -3100,6 +3130,12 @@ ixgbe_add_device_sysctls(struct adapter 
 	    CTL_EOL) != 0)
 		aprint_error_dev(dev, "could not create sysctl\n");
 
+	adapter->txrx_use_workqueue = ixgbe_txrx_workqueue;
+	if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
+	    CTLTYPE_BOOL, "txrx_workqueue", SYSCTL_DESCR("Use workqueue for packet processing"),
+	    NULL, 0, &adapter->txrx_use_workqueue, 0, CTL_CREATE, CTL_EOL) != 0)
+		aprint_error_dev(dev, "could not create sysctl\n");
+
 #ifdef IXGBE_DEBUG
 	/* testing sysctls (for all devices) */
 	if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
@@ -3232,6 +3268,12 @@ ixgbe_free_softint(struct adapter *adapt
 		if (que->que_si != NULL)
 			softint_disestablish(que->que_si);
 	}
+	if (adapter->txr_wq != NULL)
+		workqueue_destroy(adapter->txr_wq);
+	if (adapter->txr_wq_enqueued != NULL)
+		percpu_free(adapter->txr_wq_enqueued, sizeof(u_int));
+	if (adapter->que_wq != NULL)
+		workqueue_destroy(adapter->que_wq);
 
 	/* Drain the Link queue */
 	if (adapter->link_si != NULL) {
@@ -5800,9 +5842,18 @@ ixgbe_handle_que(void *context)
 		IXGBE_TX_UNLOCK(txr);
 	}
 
-	if (more)
-		softint_schedule(que->que_si);
-	else if (que->res != NULL) {
+	if (more) {
+		if (adapter->txrx_use_workqueue) {
+			/*
+			 * "enqueued flag" is not required here.
+			 * See ixgbe_msix_que().
+			 */
+			workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+			    curcpu());
+		} else {
+			softint_schedule(que->que_si);
+		}
+	} else if (que->res != NULL) {
 		/* Re-enable this interrupt */
 		ixgbe_enable_queue(adapter, que->msix);
 	} else
@@ -5812,6 +5863,21 @@ ixgbe_handle_que(void *context)
 } /* ixgbe_handle_que */
 
 /************************************************************************
+ * ixgbe_handle_que_work
+ ************************************************************************/
+static void
+ixgbe_handle_que_work(struct work *wk, void *context)
+{
+	struct ix_queue *que = container_of(wk, struct ix_queue, wq_cookie);
+
+	/*
+	 * "enqueued flag" is not required here.
+	 * See ixgbe_msix_que().
+	 */
+	ixgbe_handle_que(que);
+}
+
+/************************************************************************
  * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler
  ************************************************************************/
 static int
@@ -5906,7 +5972,6 @@ alloc_retry:
 	return (0);
 } /* ixgbe_allocate_legacy */
 
-
 /************************************************************************
  * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers
  ************************************************************************/
@@ -5919,6 +5984,7 @@ ixgbe_allocate_msix(struct adapter *adap
 	pci_chipset_tag_t pc;
 	char		intrbuf[PCI_INTRSTR_LEN];
 	char		intr_xname[32];
+	char		wqname[MAXCOMLEN];
 	const char	*intrstr = NULL;
 	int 		error, vector = 0;
 	int		cpu_id = 0;
@@ -6044,6 +6110,24 @@ ixgbe_allocate_msix(struct adapter *adap
 			goto err_out;
 		}
 	}
+	snprintf(wqname, sizeof(wqname), "%sdeferTx", device_xname(dev));
+	error = workqueue_create(&adapter->txr_wq, wqname,
+	    ixgbe_deferred_mq_start_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+	    IXGBE_WORKQUEUE_FLAGS);
+	if (error) {
+		aprint_error_dev(dev, "couldn't create workqueue for deferred Tx\n");
+		goto err_out;
+	}
+	adapter->txr_wq_enqueued = percpu_alloc(sizeof(u_int));
+
+	snprintf(wqname, sizeof(wqname), "%sTxRx", device_xname(dev));
+	error = workqueue_create(&adapter->que_wq, wqname,
+	    ixgbe_handle_que_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+	    IXGBE_WORKQUEUE_FLAGS);
+	if (error) {
+		aprint_error_dev(dev, "couldn't create workqueue for Tx/Rx\n");
+		goto err_out;
+	}
 
 	/* and Link */
 	cpu_id++;

Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.4 src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.5
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.24.6.4	Thu Mar  1 19:02:15 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.h	Tue Mar  6 11:12:40 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.24.6.4 2018/03/01 19:02:15 martin Exp $ */
+/* $NetBSD: ixgbe.h,v 1.24.6.5 2018/03/06 11:12:40 martin Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -80,6 +80,7 @@
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sockio.h>
+#include <sys/percpu.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -331,6 +332,7 @@ struct ix_queue {
 	int              busy;
 	struct tx_ring   *txr;
 	struct rx_ring   *rxr;
+	struct work      wq_cookie;
 	void             *que_si;
 	struct evcnt     irqs;
 	char             namebuf[32];
@@ -360,6 +362,7 @@ struct tx_ring {
 	ixgbe_dma_tag_t		*txtag;
 	char			mtx_name[16];
 	pcq_t			*txr_interq;
+	struct work		wq_cookie;
 	void			*txr_si;
 
 	/* Flow Director */
@@ -499,6 +502,18 @@ struct adapter {
 
 	void			*phy_si;   /* PHY intr tasklet */
 
+	bool			txrx_use_workqueue;
+	struct workqueue	*que_wq;    /* workqueue for ixgbe_handle_que_work() */
+					    /*
+					     * que_wq's "enqueued flag" is not required,
+					     * because twice workqueue_enqueue() for
+					     * ixgbe_handle_que_work() is avoided by masking
+					     * the queue's interrupt by EIMC.
+					     * See also ixgbe_msix_que().
+					     */
+	struct workqueue	*txr_wq;    /* workqueue for ixgbe_deferred_mq_start_work() */
+	percpu_t		*txr_wq_enqueued;
+
 	/*
 	 * Queues:
 	 *   This is the irq holder, it has
@@ -714,6 +729,7 @@ int  ixgbe_legacy_start_locked(struct if
 int  ixgbe_mq_start(struct ifnet *, struct mbuf *);
 int  ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
 void ixgbe_deferred_mq_start(void *);
+void ixgbe_deferred_mq_start_work(struct work *, void *);
 
 int  ixgbe_allocate_queues(struct adapter *);
 int  ixgbe_setup_transmit_structures(struct adapter *);

Index: src/sys/dev/pci/ixgbe/ixv.c
diff -u src/sys/dev/pci/ixgbe/ixv.c:1.56.2.8 src/sys/dev/pci/ixgbe/ixv.c:1.56.2.9
--- src/sys/dev/pci/ixgbe/ixv.c:1.56.2.8	Thu Mar  1 19:02:15 2018
+++ src/sys/dev/pci/ixgbe/ixv.c	Tue Mar  6 11:12:41 2018
@@ -1,4 +1,4 @@
-/*$NetBSD: ixv.c,v 1.56.2.8 2018/03/01 19:02:15 martin Exp $*/
+/*$NetBSD: ixv.c,v 1.56.2.9 2018/03/06 11:12:41 martin Exp $*/
 
 /******************************************************************************
 
@@ -131,8 +131,16 @@ static void	ixv_save_stats(struct adapte
 static void	ixv_init_stats(struct adapter *);
 static void	ixv_update_stats(struct adapter *);
 static void	ixv_add_stats_sysctls(struct adapter *);
+
+
+/* Sysctl handlers */
 static void	ixv_set_sysctl_value(struct adapter *, const char *,
 		    const char *, int *, int);
+static int      ixv_sysctl_interrupt_rate_handler(SYSCTLFN_PROTO);
+static int      ixv_sysctl_rdh_handler(SYSCTLFN_PROTO);
+static int      ixv_sysctl_rdt_handler(SYSCTLFN_PROTO);
+static int      ixv_sysctl_tdt_handler(SYSCTLFN_PROTO);
+static int      ixv_sysctl_tdh_handler(SYSCTLFN_PROTO);
 
 /* The MSI-X Interrupt handlers */
 static int	ixv_msix_que(void *);
@@ -142,6 +150,9 @@ static int	ixv_msix_mbx(void *);
 static void	ixv_handle_que(void *);
 static void     ixv_handle_link(void *);
 
+/* Workqueue handler for deferred work */
+static void	ixv_handle_que_work(struct work *, void *);
+
 const struct sysctlnode *ixv_sysctl_instance(struct adapter *);
 static ixgbe_vendor_info_t *ixv_lookup(const struct pci_attach_args *);
 
@@ -181,6 +192,9 @@ TUNABLE_INT("hw.ixv.num_queues", &ixv_nu
 static bool ixv_enable_aim = false;
 TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim);
 
+static int ixv_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
+TUNABLE_INT("hw.ixv.max_interrupt_rate", &ixv_max_interrupt_rate);
+
 /* How many packets rxeof tries to clean at a time */
 static int ixv_rx_process_limit = 256;
 TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit);
@@ -189,6 +203,9 @@ TUNABLE_INT("hw.ixv.rx_process_limit", &
 static int ixv_tx_process_limit = 256;
 TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit);
 
+/* Which pakcet processing uses workqueue or softint */
+static bool ixv_txrx_workqueue = false;
+
 /*
  * Number of TX descriptors per ring,
  * setting higher than RX as this seems
@@ -216,10 +233,13 @@ static u32 ixv_shadow_vfta[IXGBE_VFTA_SI
 #define IXGBE_MPSAFE		1
 #define IXGBE_CALLOUT_FLAGS	CALLOUT_MPSAFE
 #define IXGBE_SOFTINFT_FLAGS	SOFTINT_MPSAFE
+#define IXGBE_WORKQUEUE_FLAGS	WQ_PERCPU | WQ_MPSAFE
 #else
 #define IXGBE_CALLOUT_FLAGS	0
 #define IXGBE_SOFTINFT_FLAGS	0
+#define IXGBE_WORKQUEUE_FLAGS	WQ_PERCPU
 #endif
+#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
 #if 0
 static int (*ixv_start_locked)(struct ifnet *, struct tx_ring *);
@@ -504,6 +524,8 @@ ixv_attach(device_t parent, device_t dev
 	/* hw.ix defaults init */
 	adapter->enable_aim = ixv_enable_aim;
 
+	adapter->txrx_use_workqueue = ixv_txrx_workqueue;
+
 	error = ixv_allocate_msix(adapter, pa);
 	if (error) {
 		device_printf(dev, "ixv_allocate_msix() failed!\n");
@@ -593,6 +615,12 @@ ixv_detach(device_t dev, int flags)
 			softint_disestablish(txr->txr_si);
 		softint_disestablish(que->que_si);
 	}
+	if (adapter->txr_wq != NULL)
+		workqueue_destroy(adapter->txr_wq);
+	if (adapter->txr_wq_enqueued != NULL)
+		percpu_free(adapter->txr_wq_enqueued, sizeof(u_int));
+	if (adapter->que_wq != NULL)
+		workqueue_destroy(adapter->que_wq);
 
 	/* Drain the Mailbox(link) queue */
 	softint_disestablish(adapter->link_si);
@@ -1792,6 +1820,86 @@ ixv_initialize_receive_units(struct adap
 } /* ixv_initialize_receive_units */
 
 /************************************************************************
+ * ixv_sysctl_tdh_handler - Transmit Descriptor Head handler function
+ *
+ *   Retrieves the TDH value from the hardware
+ ************************************************************************/
+static int 
+ixv_sysctl_tdh_handler(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node = *rnode;
+	struct tx_ring *txr = (struct tx_ring *)node.sysctl_data;
+	uint32_t val;
+
+	if (!txr)
+		return (0);
+
+	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_VFTDH(txr->me));
+	node.sysctl_data = &val;
+	return sysctl_lookup(SYSCTLFN_CALL(&node));
+} /* ixv_sysctl_tdh_handler */
+
+/************************************************************************
+ * ixgbe_sysctl_tdt_handler - Transmit Descriptor Tail handler function
+ *
+ *   Retrieves the TDT value from the hardware
+ ************************************************************************/
+static int 
+ixv_sysctl_tdt_handler(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node = *rnode;
+	struct tx_ring *txr = (struct tx_ring *)node.sysctl_data;
+	uint32_t val;
+
+	if (!txr)
+		return (0);
+
+	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_VFTDT(txr->me));
+	node.sysctl_data = &val;
+	return sysctl_lookup(SYSCTLFN_CALL(&node));
+} /* ixv_sysctl_tdt_handler */
+
+/************************************************************************
+ * ixv_sysctl_rdh_handler - Receive Descriptor Head handler function
+ *
+ *   Retrieves the RDH value from the hardware
+ ************************************************************************/
+static int 
+ixv_sysctl_rdh_handler(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node = *rnode;
+	struct rx_ring *rxr = (struct rx_ring *)node.sysctl_data;
+	uint32_t val;
+
+	if (!rxr)
+		return (0);
+
+	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_VFRDH(rxr->me));
+	node.sysctl_data = &val;
+	return sysctl_lookup(SYSCTLFN_CALL(&node));
+} /* ixv_sysctl_rdh_handler */
+
+/************************************************************************
+ * ixv_sysctl_rdt_handler - Receive Descriptor Tail handler function
+ *
+ *   Retrieves the RDT value from the hardware
+ ************************************************************************/
+static int 
+ixv_sysctl_rdt_handler(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node = *rnode;
+	struct rx_ring *rxr = (struct rx_ring *)node.sysctl_data;
+	uint32_t val;
+
+	if (!rxr)
+		return (0);
+
+	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_VFRDT(rxr->me));
+	node.sysctl_data = &val;
+	return sysctl_lookup(SYSCTLFN_CALL(&node));
+} /* ixv_sysctl_rdt_handler */
+
+/************************************************************************
  * ixv_setup_vlan_support
  ************************************************************************/
 static void
@@ -2110,6 +2218,55 @@ ixv_update_stats(struct adapter *adapter
 	 */
 } /* ixv_update_stats */
 
+/************************************************************************
+ * ixv_sysctl_interrupt_rate_handler
+ ************************************************************************/
+static int
+ixv_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node = *rnode;
+	struct ix_queue *que = (struct ix_queue *)node.sysctl_data;
+	struct adapter  *adapter = que->adapter;
+	uint32_t reg, usec, rate;
+	int error;
+
+	if (que == NULL)
+		return 0;
+	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_VTEITR(que->msix));
+	usec = ((reg & 0x0FF8) >> 3);
+	if (usec > 0)
+		rate = 500000 / usec;
+	else
+		rate = 0;
+	node.sysctl_data = &rate;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	reg &= ~0xfff; /* default, no limitation */
+	if (rate > 0 && rate < 500000) {
+		if (rate < 1000)
+			rate = 1000;
+		reg |= ((4000000/rate) & 0xff8);
+		/*
+		 * When RSC is used, ITR interval must be larger than
+		 * RSC_DELAY. Currently, we use 2us for RSC_DELAY.
+		 * The minimum value is always greater than 2us on 100M
+		 * (and 10M?(not documented)), but it's not on 1G and higher.
+		 */
+		if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL)
+		    && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) {
+			if ((adapter->num_queues > 1)
+			    && (reg < IXGBE_MIN_RSC_EITR_10G1G))
+				return EINVAL;
+		}
+		ixv_max_interrupt_rate = rate;
+	} else
+		ixv_max_interrupt_rate = 0;
+	ixv_eitr_write(que, reg);
+
+	return (0);
+} /* ixv_sysctl_interrupt_rate_handler */
+
 const struct sysctlnode *
 ixv_sysctl_instance(struct adapter *adapter)
 {
@@ -2159,6 +2316,12 @@ ixv_add_device_sysctls(struct adapter *a
 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
 	    NULL, 0, &adapter->enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
 		aprint_error_dev(dev, "could not create sysctl\n");
+
+	if (sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_READWRITE, CTLTYPE_BOOL,
+	    "txrx_workqueue", SYSCTL_DESCR("Use workqueue for packet processing"),
+		NULL, 0, &adapter->txrx_use_workqueue, 0, CTL_CREATE, CTL_EOL) != 0)
+		aprint_error_dev(dev, "could not create sysctl\n");
 }
 
 /************************************************************************
@@ -2172,7 +2335,7 @@ ixv_add_stats_sysctls(struct adapter *ad
 	struct rx_ring          *rxr = adapter->rx_rings;
 	struct ixgbevf_hw_stats *stats = &adapter->stats.vf;
 	struct ixgbe_hw *hw = &adapter->hw;
-	const struct sysctlnode *rnode;
+	const struct sysctlnode *rnode, *cnode;
 	struct sysctllog **log = &adapter->sysctllog;
 	const char *xname = device_xname(dev);
 
@@ -2220,35 +2383,36 @@ ixv_add_stats_sysctls(struct adapter *ad
 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
 			break;
 
-#if 0 /* not yet */
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READWRITE, CTLTYPE_INT,
 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
-		    ixgbe_sysctl_interrupt_rate_handler, 0,
+		    ixv_sysctl_interrupt_rate_handler, 0,
 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
 			break;
 
+#if 0
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
 			NULL, 0, &(adapter->queues[i].irqs),
 		    0, CTL_CREATE, CTL_EOL) != 0)
 			break;
+#endif
 
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READONLY, CTLTYPE_INT,
 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
-		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
+		    ixv_sysctl_tdh_handler, 0, (void *)txr,
 		    0, CTL_CREATE, CTL_EOL) != 0)
 			break;
 
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READONLY, CTLTYPE_INT,
 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
-		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
+		    ixv_sysctl_tdt_handler, 0, (void *)txr,
 		    0, CTL_CREATE, CTL_EOL) != 0)
 			break;
-#endif
+
 		evcnt_attach_dynamic(&adapter->queues[i].irqs, EVCNT_TYPE_INTR,
 		    NULL, adapter->queues[i].evnamebuf, "IRQs on queue");
 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
@@ -2269,12 +2433,11 @@ ixv_add_stats_sysctls(struct adapter *ad
 		struct lro_ctrl *lro = &rxr->lro;
 #endif /* LRO */
 
-#if 0 /* not yet */
 		if (sysctl_createv(log, 0, &rnode, &cnode,
 		    CTLFLAG_READONLY,
 		    CTLTYPE_INT,
 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
-		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
+		    ixv_sysctl_rdh_handler, 0, (void *)rxr, 0,
 		    CTL_CREATE, CTL_EOL) != 0)
 			break;
 
@@ -2282,10 +2445,9 @@ ixv_add_stats_sysctls(struct adapter *ad
 		    CTLFLAG_READONLY,
 		    CTLTYPE_INT,
 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
-		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
+		    ixv_sysctl_rdt_handler, 0, (void *)rxr, 0,
 		    CTL_CREATE, CTL_EOL) != 0)
 			break;
-#endif
 
 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
@@ -2626,7 +2788,6 @@ ixv_init(struct ifnet *ifp)
 	return 0;
 } /* ixv_init */
 
-
 /************************************************************************
  * ixv_handle_que
  ************************************************************************/
@@ -2656,7 +2817,15 @@ ixv_handle_que(void *context)
 		IXGBE_TX_UNLOCK(txr);
 		if (more) {
 			adapter->req.ev_count++;
-			softint_schedule(que->que_si);
+			if (adapter->txrx_use_workqueue) {
+				/*
+				 * "enqueued flag" is not required here
+				 * the same as ixg(4). See ixgbe_msix_que().
+				 */
+				workqueue_enqueue(adapter->que_wq,
+				    &que->wq_cookie, curcpu());
+			} else
+				  softint_schedule(que->que_si);
 			return;
 		}
 	}
@@ -2668,6 +2837,21 @@ ixv_handle_que(void *context)
 } /* ixv_handle_que */
 
 /************************************************************************
+ * ixv_handle_que_work
+ ************************************************************************/
+static void
+ixv_handle_que_work(struct work *wk, void *context)
+{
+	struct ix_queue *que = container_of(wk, struct ix_queue, wq_cookie);
+
+	/*
+	 * "enqueued flag" is not required here the same as ixg(4).
+	 * See ixgbe_msix_que().
+	 */
+	ixv_handle_que(que);
+}
+
+/************************************************************************
  * ixv_allocate_msix - Setup MSI-X Interrupt resources and handlers
  ************************************************************************/
 static int
@@ -2680,6 +2864,7 @@ ixv_allocate_msix(struct adapter *adapte
 	pci_chipset_tag_t pc;
 	pcitag_t	tag;
 	char		intrbuf[PCI_INTRSTR_LEN];
+	char		wqname[MAXCOMLEN];
 	char		intr_xname[32];
 	const char	*intrstr = NULL;
 	kcpuset_t	*affinity;
@@ -2748,6 +2933,23 @@ ixv_allocate_msix(struct adapter *adapte
 			    "could not establish software interrupt\n"); 
 		}
 	}
+	snprintf(wqname, sizeof(wqname), "%sdeferTx", device_xname(dev));
+	error = workqueue_create(&adapter->txr_wq, wqname,
+	    ixgbe_deferred_mq_start_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+	    IXGBE_WORKQUEUE_FLAGS);
+	if (error) {
+		aprint_error_dev(dev, "couldn't create workqueue for deferred Tx\n");
+	}
+	adapter->txr_wq_enqueued = percpu_alloc(sizeof(u_int));
+
+	snprintf(wqname, sizeof(wqname), "%sTxRx", device_xname(dev));
+	error = workqueue_create(&adapter->que_wq, wqname,
+	    ixv_handle_que_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+	    IXGBE_WORKQUEUE_FLAGS);
+	if (error) {
+		aprint_error_dev(dev,
+		    "couldn't create workqueue\n");
+	}
 
 	/* and Mailbox */
 	cpu_id++;

Reply via email to