Module Name:    src
Committed By:   msaitoh
Date:           Tue Feb 20 07:24:37 UTC 2018

Modified Files:
        src/sys/dev/pci/ixgbe: ixgbe.c ixgbe_type.h

Log Message:
- Fix a bug that RX may stall on heavy load on ixg(4) derived from FreeBSD's
 AIM (Auto Interrupt Moderation) bug.
 When I use a machine as a NFS client, sometimes one of queue pairs doesn't
 get any interrupt other than every second tick via ixgbe_local_timer1().
 When the problem occured, the queue pair's hw.ixgM.qN.interrupt_rate is
 always 500000. When this problem occuring, set hw.ixgM.qN.interrupt_rate lower
 than 166667 recover from stall. i.e.:

  sysctl -w hw.ixgM.qN.interrupt_rate=166667 (don't revocer)
  sysctl -w hw.ixgM.qN.interrupt_rate=166666 (recover)

  Relatios between the interrupt_rate and EICR's ITR_INTERVAL field is as
 follows:

 int_rate | EICR[11:0]   | interval in us | recover |
          |(ITR_INTERVAL)| (10G and 1G)   |         |
 ---------+--------------+----------------+---------+
   500000 | 0x008(0)     |              2 |     not |
   166667 | 0x010(1)     |              4 |     not |
   166666 | 0x018(2)     |              6 | recover |

  The reason why int_rate becomes 500000 is that xgbe_tx_eof() doesn't
 increment rxr->packets(*1). Even if we fix rxr->packets' bug, interrupt_rate
 might become greater than 166666 and it might cause stall.

  While reading datasheets, knakahara noticed a section titled with "ITR
 Affect on RSC Functionality". It says "When RSC is enabled on specific RX
 queues, the associated ITR interval with these queus must be enabled and must
 be larger (in time uints) than RSC delay". Currently, RSC_DELAY field in the
 GPIE register is 0 and it means 4us for 10G and 1G. The greater ITR_INTERVAL
 value of 4us is 6us == 166666. Yes, BINGO!

  This description is noted in 82599 and newer datasheets and not in 82598
 datasheet. I don't know if 82598 has this limitation but, I apply this
 limitation all of chips.

 (*1) Note that this bug is going to be fixed in the next commit to distinct
 between two different bugs.

- The bitfield of EITR register is different between 82598 and others.
 Only ixgbe_msix_que() taken care of it. Make new function ixgbe_eitr_write()
 and use it in all of functions which modify ITR_INTERVAL.

XXX pullup-8


To generate a diff of this commit:
cvs rdiff -u -r1.123 -r1.124 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.30 -r1.31 src/sys/dev/pci/ixgbe/ixgbe_type.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.123 src/sys/dev/pci/ixgbe/ixgbe.c:1.124
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.123	Fri Feb 16 10:11:21 2018
+++ src/sys/dev/pci/ixgbe/ixgbe.c	Tue Feb 20 07:24:37 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.123 2018/02/16 10:11:21 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.124 2018/02/20 07:24:37 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -207,6 +207,7 @@ static void     ixgbe_update_link_status
 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
 static void	ixgbe_configure_ivars(struct adapter *);
 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
+static void	ixgbe_eitr_write(struct ix_queue *, uint32_t);
 
 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
 #if 0
@@ -2465,8 +2466,7 @@ ixgbe_msix_que(void *arg)
 	 *    the last interval.
 	 */
 	if (que->eitr_setting)
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix),
-		    que->eitr_setting);
+		ixgbe_eitr_write(que, que->eitr_setting);
 
 	que->eitr_setting = 0;
 
@@ -2489,11 +2489,18 @@ ixgbe_msix_que(void *arg)
 	else
 		newitr = (newitr / 2);
 
-        if (adapter->hw.mac.type == ixgbe_mac_82598EB)
-                newitr |= newitr << 16;
-        else
-                newitr |= IXGBE_EITR_CNT_WDIS;
-                 
+	/*
+	 * When RSC is used, ITR interval must be larger than RSC_DELAY.
+	 * Currently, we use 2us for RSC_DELAY. The minimum value is always
+	 * greater than 2us on 100M (and 10M?(not documented)), but it's not
+	 * on 1G and higher.
+	 */
+	if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL)
+	    && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) {
+		if (newitr < IXGBE_MIN_RSC_EITR_10G1G)
+			newitr = IXGBE_MIN_RSC_EITR_10G1G;
+	}
+
         /* save for next interrupt */
         que->eitr_setting = newitr;
 
@@ -2933,6 +2940,21 @@ ixgbe_msix_link(void *arg)
 	return 1;
 } /* ixgbe_msix_link */
 
+static void
+ixgbe_eitr_write(struct ix_queue *que, uint32_t itr)
+{
+	struct adapter *adapter = que->adapter;
+	
+        if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+                itr |= itr << 16;
+        else
+                itr |= IXGBE_EITR_CNT_WDIS;
+
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix),
+	    itr);
+}
+
+
 /************************************************************************
  * ixgbe_sysctl_interrupt_rate_handler
  ************************************************************************/
@@ -2941,6 +2963,7 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC
 {
 	struct sysctlnode node = *rnode;
 	struct ix_queue *que = (struct ix_queue *)node.sysctl_data;
+	struct adapter  *adapter = que->adapter;
 	uint32_t reg, usec, rate;
 	int error;
 
@@ -2957,14 +2980,26 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC
 	if (error || newp == NULL)
 		return error;
 	reg &= ~0xfff; /* default, no limitation */
-	ixgbe_max_interrupt_rate = 0;
 	if (rate > 0 && rate < 500000) {
 		if (rate < 1000)
 			rate = 1000;
-		ixgbe_max_interrupt_rate = rate;
 		reg |= ((4000000/rate) & 0xff8);
-	}
-	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
+		/*
+		 * When RSC is used, ITR interval must be larger than
+		 * RSC_DELAY. Currently, we use 2us for RSC_DELAY.
+		 * The minimum value is always greater than 2us on 100M
+		 * (and 10M?(not documented)), but it's not on 1G and higher.
+		 */
+		if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL)
+		    && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) {
+			if ((adapter->num_queues > 1)
+			    && (reg < IXGBE_MIN_RSC_EITR_10G1G))
+				return EINVAL;
+		}
+		ixgbe_max_interrupt_rate = rate;
+	} else
+		ixgbe_max_interrupt_rate = 0;
+	ixgbe_eitr_write(que, reg);
 
 	return (0);
 } /* ixgbe_sysctl_interrupt_rate_handler */
@@ -3886,7 +3921,7 @@ ixgbe_configure_ivars(struct adapter *ad
 		/* ... and the TX */
 		ixgbe_set_ivar(adapter, txr->me, que->msix, 1);
 		/* Set an Initial EITR value */
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr);
+		ixgbe_eitr_write(que, newitr);
 	}
 
 	/* For the Link interrupt */

Index: src/sys/dev/pci/ixgbe/ixgbe_type.h
diff -u src/sys/dev/pci/ixgbe/ixgbe_type.h:1.30 src/sys/dev/pci/ixgbe/ixgbe_type.h:1.31
--- src/sys/dev/pci/ixgbe/ixgbe_type.h:1.30	Wed Dec  6 04:08:50 2017
+++ src/sys/dev/pci/ixgbe/ixgbe_type.h	Tue Feb 20 07:24:37 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe_type.h,v 1.30 2017/12/06 04:08:50 msaitoh Exp $ */
+/* $NetBSD: ixgbe_type.h,v 1.31 2018/02/20 07:24:37 msaitoh Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -312,6 +312,11 @@
  */
 #define IXGBE_MAX_INT_RATE	488281
 #define IXGBE_MIN_INT_RATE	956
+/* On 82599 and newer, minimum RSC_DELAY is 4us. ITR interval must be larger
+ * than RSC_DELAY if RSC is used. ITR_INTERVAL is in 2(.048) us units on 10G
+ * and 1G. The minimun EITR is 6us.
+ */
+#define IXGBE_MIN_RSC_EITR_10G1G 0x00000018
 #define IXGBE_MAX_EITR		0x00000FF8
 #define IXGBE_MIN_EITR		8
 #define IXGBE_EITR(_i)		(((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \

Reply via email to