Module Name:    src
Committed By:   jmcneill
Date:           Wed Apr 19 00:20:02 UTC 2017

Modified Files:
        src/sys/dev/ic: rtl8169.c rtl81x9reg.h rtl81x9var.h
        src/sys/dev/pci: if_re_pci.c

Log Message:
Performance improvements for PCIe and 8168 based devices:
 - When using the countdown timer for interrupt moderation on PCIe devices,
   use a timer rate value based on a 125MHz PCIe reference clock instead of
   33 MHz.
 - For 8168 based devices, ditch the countdown timer and instead use the
   (undocumented) hardware interrupt moderation feature.
 - Support TSOv4 on 8168D and later devices.


To generate a diff of this commit:
cvs rdiff -u -r1.149 -r1.150 src/sys/dev/ic/rtl8169.c
cvs rdiff -u -r1.47 -r1.48 src/sys/dev/ic/rtl81x9reg.h
cvs rdiff -u -r1.55 -r1.56 src/sys/dev/ic/rtl81x9var.h
cvs rdiff -u -r1.45 -r1.46 src/sys/dev/pci/if_re_pci.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/rtl8169.c
diff -u src/sys/dev/ic/rtl8169.c:1.149 src/sys/dev/ic/rtl8169.c:1.150
--- src/sys/dev/ic/rtl8169.c:1.149	Mon Feb 20 06:46:41 2017
+++ src/sys/dev/ic/rtl8169.c	Wed Apr 19 00:20:02 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: rtl8169.c,v 1.149 2017/02/20 06:46:41 ozaki-r Exp $	*/
+/*	$NetBSD: rtl8169.c,v 1.150 2017/04/19 00:20:02 jmcneill Exp $	*/
 
 /*
  * Copyright (c) 1997, 1998-2003
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rtl8169.c,v 1.149 2017/02/20 06:46:41 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rtl8169.c,v 1.150 2017/04/19 00:20:02 jmcneill Exp $");
 /* $FreeBSD: /repoman/r/ncvs/src/sys/dev/re/if_re.c,v 1.20 2004/04/11 20:34:08 ru Exp $ */
 
 /*
@@ -837,14 +837,6 @@ re_attach(struct rtk_softc *sc)
 	    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
 	    IFCAP_TSOv4;
 
-	/*
-	 * XXX
-	 * Still have no idea how to make TSO work on 8168C, 8168CP,
-	 * 8102E, 8111C and 8111CP.
-	 */
-	if ((sc->sc_quirk & RTKQ_DESCV2) != 0)
-		ifp->if_capabilities &= ~IFCAP_TSOv4;
-
 	ifp->if_watchdog = re_watchdog;
 	ifp->if_init = re_init;
 	ifp->if_snd.ifq_maxlen = RE_IFQ_MAXLEN;
@@ -1418,7 +1410,8 @@ re_txeof(struct rtk_softc *sc)
 	 * This is done in case the transmitter has gone idle.
 	 */
 	if (sc->re_ldata.re_txq_free < RE_TX_QLEN) {
-		CSR_WRITE_4(sc, RTK_TIMERCNT, 1);
+		if ((sc->sc_quirk & RTKQ_IM_HW) == 0)
+			CSR_WRITE_4(sc, RTK_TIMERCNT, 1);
 		if ((sc->sc_quirk & RTKQ_PCIE) != 0) {
 			/*
 			 * Some chips will ignore a second TX request
@@ -1466,6 +1459,9 @@ re_intr(void *arg)
 	if ((ifp->if_flags & IFF_UP) == 0)
 		return 0;
 
+	const uint16_t status_mask = (sc->sc_quirk & RTKQ_IM_HW) ?
+	    RTK_INTRS_IM_HW : RTK_INTRS_CPLUS;
+
 	for (;;) {
 
 		status = CSR_READ_2(sc, RTK_ISR);
@@ -1477,14 +1473,14 @@ re_intr(void *arg)
 			CSR_WRITE_2(sc, RTK_ISR, status);
 		}
 
-		if ((status & RTK_INTRS_CPLUS) == 0)
+		if ((status & status_mask) == 0)
 			break;
 
 		if (status & (RTK_ISR_RX_OK | RTK_ISR_RX_ERR))
 			re_rxeof(sc);
 
 		if (status & (RTK_ISR_TIMEOUT_EXPIRED | RTK_ISR_TX_ERR |
-		    RTK_ISR_TX_DESC_UNAVAIL))
+		    RTK_ISR_TX_DESC_UNAVAIL | RTK_ISR_TX_OK))
 			re_txeof(sc);
 
 		if (status & RTK_ISR_SYSTEM_ERR) {
@@ -1552,8 +1548,14 @@ re_start(struct ifnet *ifp)
 		if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) {
 			uint32_t segsz = m->m_pkthdr.segsz;
 
-			re_flags = RE_TDESC_CMD_LGSEND |
-			    (segsz << RE_TDESC_CMD_MSSVAL_SHIFT);
+			if ((sc->sc_quirk & RTKQ_DESCV2) == 0) {
+				re_flags = RE_TDESC_CMD_LGSEND |
+				    (segsz << RE_TDESC_CMD_MSSVAL_SHIFT);
+			} else {
+				re_flags = RE_TDESC_CMD_LGSEND_V4;
+				vlanctl |=
+				    (segsz << RE_TDESC_VLANCTL_MSSVAL_SHIFT);
+			}
 		} else {
 			/*
 			 * set RE_TDESC_CMD_IPCSUM if any checksum offloading
@@ -1746,15 +1748,17 @@ re_start(struct ifnet *ifp)
 		else
 			CSR_WRITE_1(sc, RTK_GTXSTART, RTK_TXSTART_START);
 
-		/*
-		 * Use the countdown timer for interrupt moderation.
-		 * 'TX done' interrupts are disabled. Instead, we reset the
-		 * countdown timer, which will begin counting until it hits
-		 * the value in the TIMERINT register, and then trigger an
-		 * interrupt. Each time we write to the TIMERCNT register,
-		 * the timer count is reset to 0.
-		 */
-		CSR_WRITE_4(sc, RTK_TIMERCNT, 1);
+		if ((sc->sc_quirk & RTKQ_IM_HW) == 0) {
+			/*
+			 * Use the countdown timer for interrupt moderation.
+			 * 'TX done' interrupts are disabled. Instead, we reset
+			 * the countdown timer, which will begin counting until
+			 * it hits the value in the TIMERINT register, and then
+			 * trigger an interrupt. Each time we write to the
+			 * TIMERCNT register, the timer count is reset to 0.
+			 */
+			CSR_WRITE_4(sc, RTK_TIMERCNT, 1);
+		}
 
 		/*
 		 * Set a timeout in case the chip goes out to lunch.
@@ -1813,8 +1817,13 @@ re_init(struct ifnet *ifp)
 	CSR_WRITE_2(sc, RTK_CPLUS_CMD, cfg);
 
 	/* XXX: from Realtek-supplied Linux driver. Wholly undocumented. */
-	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0)
-		CSR_WRITE_2(sc, RTK_IM, 0x0000);
+	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0) {
+		if ((sc->sc_quirk & RTKQ_IM_HW) == 0) {
+			CSR_WRITE_2(sc, RTK_IM, 0x0000);
+		} else {
+			CSR_WRITE_2(sc, RTK_IM, 0x5151);
+		}
+	}
 
 	DELAY(10000);
 
@@ -1907,6 +1916,8 @@ re_init(struct ifnet *ifp)
 	 */
 	if (sc->re_testmode)
 		CSR_WRITE_2(sc, RTK_IMR, 0);
+	else if ((sc->sc_quirk & RTKQ_IM_HW) == 0)
+		CSR_WRITE_2(sc, RTK_IMR, RTK_INTRS_IM_HW);
 	else
 		CSR_WRITE_2(sc, RTK_IMR, RTK_INTRS_CPLUS);
 
@@ -1928,7 +1939,15 @@ re_init(struct ifnet *ifp)
 	if ((sc->sc_quirk & RTKQ_8139CPLUS) != 0)
 		CSR_WRITE_4(sc, RTK_TIMERINT, 0x400);
 	else {
-		CSR_WRITE_4(sc, RTK_TIMERINT_8169, 0x800);
+		if ((sc->sc_quirk & RTKQ_IM_HW) == 0) {
+			if ((sc->sc_quirk & RTKQ_PCIE) != 0) {
+				CSR_WRITE_4(sc, RTK_TIMERINT_8169, 15000);
+			} else {
+				CSR_WRITE_4(sc, RTK_TIMERINT_8169, 0x800);
+			}
+		} else {
+			CSR_WRITE_4(sc, RTK_TIMERINT_8169, 0);
+		}
 
 		/*
 		 * For 8169 gigE NICs, set the max allowed RX packet

Index: src/sys/dev/ic/rtl81x9reg.h
diff -u src/sys/dev/ic/rtl81x9reg.h:1.47 src/sys/dev/ic/rtl81x9reg.h:1.48
--- src/sys/dev/ic/rtl81x9reg.h:1.47	Fri Aug 28 13:20:46 2015
+++ src/sys/dev/ic/rtl81x9reg.h	Wed Apr 19 00:20:02 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: rtl81x9reg.h,v 1.47 2015/08/28 13:20:46 nonaka Exp $	*/
+/*	$NetBSD: rtl81x9reg.h,v 1.48 2017/04/19 00:20:02 jmcneill Exp $	*/
 
 /*
  * Copyright (c) 1997, 1998
@@ -258,6 +258,8 @@
 	RTK_ISR_RX_OVERRUN|RTK_ISR_PKT_UNDERRUN|RTK_ISR_FIFO_OFLOW|	\
 	RTK_ISR_PCS_TIMEOUT|RTK_ISR_SYSTEM_ERR|RTK_ISR_TIMEOUT_EXPIRED)
 
+#define RTK_INTRS_IM_HW	\
+	(RTK_INTRS_CPLUS|RTK_ISR_TX_OK)
 
 /*
  * Media status register. (8139 only)
@@ -507,12 +509,16 @@ struct re_desc {
 #define RE_TDESC_CMD_SOF	0x20000000	/* start of frame marker */
 #define RE_TDESC_CMD_EOR	0x40000000	/* end of ring marker */
 #define RE_TDESC_CMD_OWN	0x80000000	/* chip owns descriptor */
+#define RE_TDESC_CMD_LGSEND_V4	0x04000000	/* DESCV2 TCPv4 large send en */
+#define RE_TDESC_CMD_LGSEND_V6	0x08000000	/* DESCV2 TCPv6 large send en */
 
 #define RE_TDESC_VLANCTL_TAG	0x00020000	/* Insert VLAN tag */
 #define RE_TDESC_VLANCTL_DATA	0x0000FFFF	/* TAG data */
 #define RE_TDESC_VLANCTL_UDPCSUM 0x80000000	/* DESCV2 UDP cksum enable */
 #define RE_TDESC_VLANCTL_TCPCSUM 0x40000000	/* DESCV2 TCP cksum enable */
 #define RE_TDESC_VLANCTL_IPCSUM	0x20000000	/* DESCV2 IP hdr cksum enable */
+#define RE_TDESC_VLANCTL_MSSVAL	0x0ffc0000	/* DESCV2 large send MSS val */
+#define RE_TDESC_VLANCTL_MSSVAL_SHIFT 18
 
 /*
  * Error bits are valid only on the last descriptor of a frame

Index: src/sys/dev/ic/rtl81x9var.h
diff -u src/sys/dev/ic/rtl81x9var.h:1.55 src/sys/dev/ic/rtl81x9var.h:1.56
--- src/sys/dev/ic/rtl81x9var.h:1.55	Mon Apr 13 16:33:24 2015
+++ src/sys/dev/ic/rtl81x9var.h	Wed Apr 19 00:20:02 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: rtl81x9var.h,v 1.55 2015/04/13 16:33:24 riastradh Exp $	*/
+/*	$NetBSD: rtl81x9var.h,v 1.56 2017/04/19 00:20:02 jmcneill Exp $	*/
 
 /*
  * Copyright (c) 1997, 1998
@@ -193,6 +193,7 @@ struct rtk_softc {
 #define RTKQ_CMDSTOP		0x00000200	/* set STOPREQ on stop */
 #define RTKQ_PHYWAKE_PM		0x00000400	/* wake PHY from power down */
 #define RTKQ_RXDV_GATED		0x00000800
+#define RTKQ_IM_HW		0x00001000	/* HW interrupt mitigation */
 
 	bus_dma_tag_t		sc_dmat;
 

Index: src/sys/dev/pci/if_re_pci.c
diff -u src/sys/dev/pci/if_re_pci.c:1.45 src/sys/dev/pci/if_re_pci.c:1.46
--- src/sys/dev/pci/if_re_pci.c:1.45	Mon Dec 14 20:01:17 2015
+++ src/sys/dev/pci/if_re_pci.c	Wed Apr 19 00:20:02 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_re_pci.c,v 1.45 2015/12/14 20:01:17 jakllsch Exp $	*/
+/*	$NetBSD: if_re_pci.c,v 1.46 2017/04/19 00:20:02 jmcneill Exp $	*/
 
 /*
  * Copyright (c) 1997, 1998-2003
@@ -46,7 +46,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_re_pci.c,v 1.45 2015/12/14 20:01:17 jakllsch Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_re_pci.c,v 1.46 2017/04/19 00:20:02 jmcneill Exp $");
 
 #include <sys/types.h>
 
@@ -240,6 +240,9 @@ re_pci_attach(device_t parent, device_t 
 	    t->rtk_basetype == RTK_8101E)
 		sc->sc_quirk |= RTKQ_PCIE;
 
+	if (t->rtk_basetype == RTK_8168)
+		sc->sc_quirk |= RTKQ_IM_HW;
+
 	if (pci_dma64_available(pa) && (sc->sc_quirk & RTKQ_PCIE))
 		sc->sc_dmat = pa->pa_dmat64;
 	else

Reply via email to