Module Name:    src
Committed By:   jdolecek
Date:           Wed Apr  8 21:51:43 UTC 2020

Modified Files:
        src/sys/dev/pci: if_wm.c

Log Message:
actually writing always the checksum offload context descriptor
makes the hw do extra processing, avoid doing that if possible -
on my computer with I219 this results in about 2% speedup in Tx performance

change adoped from FreeBSD

XXX should be also done for multiqueue case, but I don't have hw to test it


To generate a diff of this commit:
cvs rdiff -u -r1.670 -r1.671 src/sys/dev/pci/if_wm.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/if_wm.c
diff -u src/sys/dev/pci/if_wm.c:1.670 src/sys/dev/pci/if_wm.c:1.671
--- src/sys/dev/pci/if_wm.c:1.670	Sat Mar 21 16:47:05 2020
+++ src/sys/dev/pci/if_wm.c	Wed Apr  8 21:51:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_wm.c,v 1.670 2020/03/21 16:47:05 thorpej Exp $	*/
+/*	$NetBSD: if_wm.c,v 1.671 2020/04/08 21:51:42 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 2001, 2002, 2003, 2004 Wasabi Systems, Inc.
@@ -82,7 +82,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_wm.c,v 1.670 2020/03/21 16:47:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_wm.c,v 1.671 2020/04/08 21:51:42 jdolecek Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_net_mpsafe.h"
@@ -379,6 +379,12 @@ struct wm_txqueue {
 	bool txq_sending;
 	time_t txq_lastsent;
 
+	/* Checksum flags used for previous packet */
+	uint32_t 	txq_last_hw_cmd;
+	uint8_t 	txq_last_hw_fields;
+	uint16_t	txq_last_hw_ipcs;
+	uint16_t	txq_last_hw_tucs;
+
 	uint32_t txq_packets;		/* for AIM */
 	uint32_t txq_bytes;		/* for AIM */
 #ifdef WM_EVENT_COUNTERS
@@ -403,6 +409,7 @@ struct wm_txqueue {
 	WM_Q_EVCNT_DEFINE(txq, toomanyseg)  /* Pkt dropped(toomany DMA segs) */
 	WM_Q_EVCNT_DEFINE(txq, defrag)	    /* m_defrag() */
 	WM_Q_EVCNT_DEFINE(txq, underrun)    /* Tx underrun */
+	WM_Q_EVCNT_DEFINE(txq, skipcontext) /* Tx skip wring cksum context */
 
 	char txq_txseg_evcnt_names[WM_NTXSEGS][sizeof("txqXXtxsegXXX")];
 	struct evcnt txq_ev_txseg[WM_NTXSEGS]; /* Tx packets w/ N segments */
@@ -6947,6 +6954,7 @@ wm_alloc_txrx_queues(struct wm_softc *sc
 		WM_Q_MISC_EVCNT_ATTACH(txq, toomanyseg, txq, i, xname);
 		WM_Q_MISC_EVCNT_ATTACH(txq, defrag, txq, i, xname);
 		WM_Q_MISC_EVCNT_ATTACH(txq, underrun, txq, i, xname);
+		WM_Q_MISC_EVCNT_ATTACH(txq, skipcontext, txq, i, xname);
 #endif /* WM_EVENT_COUNTERS */
 
 		tx_done++;
@@ -7079,6 +7087,7 @@ wm_free_txrx_queues(struct wm_softc *sc)
 		WM_Q_EVCNT_DETACH(txq, toomanyseg, txq, i);
 		WM_Q_EVCNT_DETACH(txq, defrag, txq, i);
 		WM_Q_EVCNT_DETACH(txq, underrun, txq, i);
+		WM_Q_EVCNT_DETACH(txq, skipcontext, txq, i);
 #endif /* WM_EVENT_COUNTERS */
 
 		/* Drain txq_interq */
@@ -7394,6 +7403,9 @@ wm_tx_offload(struct wm_softc *sc, struc
 
 	default:
 		/* Don't support this protocol or encapsulation. */
+ 		txq->txq_last_hw_cmd = txq->last_hw_fields = 0;
+ 		txq->txq_last_hw_ipcs = 0;
+ 		txq->txq_last_hw_tucs = 0;
 		*fieldsp = 0;
 		*cmdp = 0;
 		return 0;
@@ -7533,13 +7545,47 @@ wm_tx_offload(struct wm_softc *sc, struc
 		    WTX_TCPIP_TUCSE(0) /* Rest of packet */;
 	}
 
+	*cmdp = cmd;
+	*fieldsp = fields;
+
 	/*
 	 * We don't have to write context descriptor for every packet
 	 * except for 82574. For 82574, we must write context descriptor
 	 * for every packet when we use two descriptor queues.
-	 * It would be overhead to write context descriptor for every packet,
-	 * however it does not cause problems.
-	 */
+	 *
+	 * The 82574L can only remember the *last* context used
+	 * regardless of queue that it was use for.  We cannot reuse
+	 * contexts on this hardware platform and must generate a new
+	 * context every time.  82574L hardware spec, section 7.2.6,
+	 * second note.
+ 	 *
+  	 * Setting up new checksum offload context for every
+	 * frames takes a lot of processing time for hardware.
+	 * This also reduces performance a lot for small sized
+	 * frames so avoid it if driver can use previously
+	 * configured checksum offload context.
+	 * For TSO, in theory we can use the same TSO context if and only if
+	 * frame is the same type(IP/TCP) and the same MSS. However
+	 * checking whether a frame has the same IP/TCP structure is
+	 * hard thing so just ignore that and always restablish a
+	 * new TSO context.
+  	 */
+	KASSERT(!wm_is_using_multiqueue(sc));
+	if ((m0->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) == 0) {
+		if (txq->txq_last_hw_cmd == cmd &&
+		    txq->txq_last_hw_fields == fields &&
+		    txq->txq_last_hw_ipcs == (ipcs & 0xffff) &&
+		    txq->txq_last_hw_tucs == (tucs & 0xffff)) {
+			WM_Q_EVCNT_INCR(txq, skipcontext);
+			return 0;
+		}
+	}
+
+ 	txq->txq_last_hw_cmd = cmd;
+ 	txq->txq_last_hw_fields = fields;
+ 	txq->txq_last_hw_ipcs = (ipcs & 0xffff);
+	txq->txq_last_hw_tucs = (tucs & 0xffff);
+
 	/* Fill in the context descriptor. */
 	t = (struct livengood_tcpip_ctxdesc *)
 	    &txq->txq_descs[txq->txq_next];
@@ -7552,9 +7598,6 @@ wm_tx_offload(struct wm_softc *sc, struc
 	txq->txq_next = WM_NEXTTX(txq, txq->txq_next);
 	txs->txs_ndesc++;
 
-	*cmdp = cmd;
-	*fieldsp = fields;
-
 	return 0;
 }
 
@@ -7839,6 +7882,8 @@ retry:
 				continue;
 			}
 		} else {
+ 			txq->txq_last_hw_cmd = txq->last_hw_fields = 0;
+ 			txq->txq_last_hw_ipcs = txq->last_hw_tucs = 0;
 			cksumcmd = 0;
 			cksumfields = 0;
 		}

Reply via email to