Module Name: src Committed By: msaitoh Date: Tue Feb 20 07:24:37 UTC 2018
Modified Files: src/sys/dev/pci/ixgbe: ixgbe.c ixgbe_type.h Log Message: - Fix a bug that RX may stall on heavy load on ixg(4) derived from FreeBSD's AIM (Auto Interrupt Moderation) bug. When I use a machine as a NFS client, sometimes one of queue pairs doesn't get any interrupt other than every second tick via ixgbe_local_timer1(). When the problem occured, the queue pair's hw.ixgM.qN.interrupt_rate is always 500000. When this problem occuring, set hw.ixgM.qN.interrupt_rate lower than 166667 recover from stall. i.e.: sysctl -w hw.ixgM.qN.interrupt_rate=166667 (don't revocer) sysctl -w hw.ixgM.qN.interrupt_rate=166666 (recover) Relatios between the interrupt_rate and EICR's ITR_INTERVAL field is as follows: int_rate | EICR[11:0] | interval in us | recover | |(ITR_INTERVAL)| (10G and 1G) | | ---------+--------------+----------------+---------+ 500000 | 0x008(0) | 2 | not | 166667 | 0x010(1) | 4 | not | 166666 | 0x018(2) | 6 | recover | The reason why int_rate becomes 500000 is that xgbe_tx_eof() doesn't increment rxr->packets(*1). Even if we fix rxr->packets' bug, interrupt_rate might become greater than 166666 and it might cause stall. While reading datasheets, knakahara noticed a section titled with "ITR Affect on RSC Functionality". It says "When RSC is enabled on specific RX queues, the associated ITR interval with these queus must be enabled and must be larger (in time uints) than RSC delay". Currently, RSC_DELAY field in the GPIE register is 0 and it means 4us for 10G and 1G. The greater ITR_INTERVAL value of 4us is 6us == 166666. Yes, BINGO! This description is noted in 82599 and newer datasheets and not in 82598 datasheet. I don't know if 82598 has this limitation but, I apply this limitation all of chips. (*1) Note that this bug is going to be fixed in the next commit to distinct between two different bugs. - The bitfield of EITR register is different between 82598 and others. Only ixgbe_msix_que() taken care of it. Make new function ixgbe_eitr_write() and use it in all of functions which modify ITR_INTERVAL. XXX pullup-8 To generate a diff of this commit: cvs rdiff -u -r1.123 -r1.124 src/sys/dev/pci/ixgbe/ixgbe.c cvs rdiff -u -r1.30 -r1.31 src/sys/dev/pci/ixgbe/ixgbe_type.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/pci/ixgbe/ixgbe.c diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.123 src/sys/dev/pci/ixgbe/ixgbe.c:1.124 --- src/sys/dev/pci/ixgbe/ixgbe.c:1.123 Fri Feb 16 10:11:21 2018 +++ src/sys/dev/pci/ixgbe/ixgbe.c Tue Feb 20 07:24:37 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.c,v 1.123 2018/02/16 10:11:21 msaitoh Exp $ */ +/* $NetBSD: ixgbe.c,v 1.124 2018/02/20 07:24:37 msaitoh Exp $ */ /****************************************************************************** @@ -207,6 +207,7 @@ static void ixgbe_update_link_status static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); +static void ixgbe_eitr_write(struct ix_queue *, uint32_t); static void ixgbe_setup_vlan_hw_support(struct adapter *); #if 0 @@ -2465,8 +2466,7 @@ ixgbe_msix_que(void *arg) * the last interval. */ if (que->eitr_setting) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), - que->eitr_setting); + ixgbe_eitr_write(que, que->eitr_setting); que->eitr_setting = 0; @@ -2489,11 +2489,18 @@ ixgbe_msix_que(void *arg) else newitr = (newitr / 2); - if (adapter->hw.mac.type == ixgbe_mac_82598EB) - newitr |= newitr << 16; - else - newitr |= IXGBE_EITR_CNT_WDIS; - + /* + * When RSC is used, ITR interval must be larger than RSC_DELAY. + * Currently, we use 2us for RSC_DELAY. The minimum value is always + * greater than 2us on 100M (and 10M?(not documented)), but it's not + * on 1G and higher. + */ + if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL) + && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) { + if (newitr < IXGBE_MIN_RSC_EITR_10G1G) + newitr = IXGBE_MIN_RSC_EITR_10G1G; + } + /* save for next interrupt */ que->eitr_setting = newitr; @@ -2933,6 +2940,21 @@ ixgbe_msix_link(void *arg) return 1; } /* ixgbe_msix_link */ +static void +ixgbe_eitr_write(struct ix_queue *que, uint32_t itr) +{ + struct adapter *adapter = que->adapter; + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + itr |= itr << 16; + else + itr |= IXGBE_EITR_CNT_WDIS; + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), + itr); +} + + /************************************************************************ * ixgbe_sysctl_interrupt_rate_handler ************************************************************************/ @@ -2941,6 +2963,7 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC { struct sysctlnode node = *rnode; struct ix_queue *que = (struct ix_queue *)node.sysctl_data; + struct adapter *adapter = que->adapter; uint32_t reg, usec, rate; int error; @@ -2957,14 +2980,26 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC if (error || newp == NULL) return error; reg &= ~0xfff; /* default, no limitation */ - ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; - ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8); - } - IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); + /* + * When RSC is used, ITR interval must be larger than + * RSC_DELAY. Currently, we use 2us for RSC_DELAY. + * The minimum value is always greater than 2us on 100M + * (and 10M?(not documented)), but it's not on 1G and higher. + */ + if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL) + && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) { + if ((adapter->num_queues > 1) + && (reg < IXGBE_MIN_RSC_EITR_10G1G)) + return EINVAL; + } + ixgbe_max_interrupt_rate = rate; + } else + ixgbe_max_interrupt_rate = 0; + ixgbe_eitr_write(que, reg); return (0); } /* ixgbe_sysctl_interrupt_rate_handler */ @@ -3886,7 +3921,7 @@ ixgbe_configure_ivars(struct adapter *ad /* ... and the TX */ ixgbe_set_ivar(adapter, txr->me, que->msix, 1); /* Set an Initial EITR value */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr); + ixgbe_eitr_write(que, newitr); } /* For the Link interrupt */ Index: src/sys/dev/pci/ixgbe/ixgbe_type.h diff -u src/sys/dev/pci/ixgbe/ixgbe_type.h:1.30 src/sys/dev/pci/ixgbe/ixgbe_type.h:1.31 --- src/sys/dev/pci/ixgbe/ixgbe_type.h:1.30 Wed Dec 6 04:08:50 2017 +++ src/sys/dev/pci/ixgbe/ixgbe_type.h Tue Feb 20 07:24:37 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe_type.h,v 1.30 2017/12/06 04:08:50 msaitoh Exp $ */ +/* $NetBSD: ixgbe_type.h,v 1.31 2018/02/20 07:24:37 msaitoh Exp $ */ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause @@ -312,6 +312,11 @@ */ #define IXGBE_MAX_INT_RATE 488281 #define IXGBE_MIN_INT_RATE 956 +/* On 82599 and newer, minimum RSC_DELAY is 4us. ITR interval must be larger + * than RSC_DELAY if RSC is used. ITR_INTERVAL is in 2(.048) us units on 10G + * and 1G. The minimun EITR is 6us. + */ +#define IXGBE_MIN_RSC_EITR_10G1G 0x00000018 #define IXGBE_MAX_EITR 0x00000FF8 #define IXGBE_MIN_EITR 8 #define IXGBE_EITR(_i) (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \