Re: [RFC PATCH] e1000e: Fix link check race condition.
On 2018/02/26 08:14, Alexander Duyck wrote: [...] > > > > > switch (hw->mac.type) { > > case e1000_pch2lan: > > ret_val = e1000_k1_workaround_lv(hw); > > if (ret_val) > > - return ret_val; > > + goto out; > > /* fall-thru */ > > case e1000_pchlan: > > if (hw->phy.type == e1000_phy_82578) { > > ret_val = e1000_link_stall_workaround_hv(hw); > > if (ret_val) > > - return ret_val; > > + goto out; > > } > > > > /* Workaround for PCHx parts in half-duplex: > > @@ -1595,7 +1596,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > > e1000_hw *hw) > > if (hw->phy.type > e1000_phy_82579) { > > ret_val = e1000_set_eee_pchlan(hw); > > if (ret_val) > > - return ret_val; > > + goto out; > > } > > > > /* If we are forcing speed/duplex, then we simply return since > > @@ -1618,10 +1619,14 @@ static s32 > > e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) > > ret_val = e1000e_config_fc_after_link_up(hw); > > if (ret_val) { > > e_dbg("Error configuring flow control\n"); > > - return ret_val; > > + goto out; > > } > > Technically these changes would be a change in behavior. For these we > may just want to leave them as-is since I am not certain they would > have any actual impact on the link state other than delaying the > link-up. For example do we really care if we fail to negotiate flow > control? We may not so we might report link up and just a debug > message indicating we didn't negotiate that part of the link. You're right and actually that raises yet another problem with commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up"). Previously these errors which come after the "get_link_status = false" would be ignored and the link considered up. After that commit, any error implies that the link is down: - link_active = !hw->mac.get_link_status; + link_active = ret_val > 0; I'll send a patch to fix that problem first and then get back to this race.
Re: [RFC PATCH] e1000e: Fix link check race condition.
On 2018/02/26 08:14, Alexander Duyck wrote: [...] > > > > > switch (hw->mac.type) { > > case e1000_pch2lan: > > ret_val = e1000_k1_workaround_lv(hw); > > if (ret_val) > > - return ret_val; > > + goto out; > > /* fall-thru */ > > case e1000_pchlan: > > if (hw->phy.type == e1000_phy_82578) { > > ret_val = e1000_link_stall_workaround_hv(hw); > > if (ret_val) > > - return ret_val; > > + goto out; > > } > > > > /* Workaround for PCHx parts in half-duplex: > > @@ -1595,7 +1596,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > > e1000_hw *hw) > > if (hw->phy.type > e1000_phy_82579) { > > ret_val = e1000_set_eee_pchlan(hw); > > if (ret_val) > > - return ret_val; > > + goto out; > > } > > > > /* If we are forcing speed/duplex, then we simply return since > > @@ -1618,10 +1619,14 @@ static s32 > > e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) > > ret_val = e1000e_config_fc_after_link_up(hw); > > if (ret_val) { > > e_dbg("Error configuring flow control\n"); > > - return ret_val; > > + goto out; > > } > > Technically these changes would be a change in behavior. For these we > may just want to leave them as-is since I am not certain they would > have any actual impact on the link state other than delaying the > link-up. For example do we really care if we fail to negotiate flow > control? We may not so we might report link up and just a debug > message indicating we didn't negotiate that part of the link. You're right and actually that raises yet another problem with commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up"). Previously these errors which come after the "get_link_status = false" would be ignored and the link considered up. After that commit, any error implies that the link is down: - link_active = !hw->mac.get_link_status; + link_active = ret_val > 0; I'll send a patch to fix that problem first and then get back to this race.
Re: [RFC PATCH] e1000e: Fix link check race condition.
On Sun, Feb 25, 2018 at 6:31 PM, Benjamin Poirierwrote: > Alex reported the following race condition: > > /* link goes up... interrupt... schedule watchdog */ > \ e1000_watchdog_task > \ e1000e_has_link > \ hw->mac.ops.check_for_link() === > e1000e_check_for_copper_link > \ e1000e_phy_has_link_generic(..., ) > link = true > > /* link goes down... interrupt */ > \ e1000_msix_other > hw->mac.get_link_status = > true > > /* link is up */ > mac->get_link_status = false > > link_active = true > /* link_active is true, wrongly, and stays so because > * get_link_status is false */ > > Avoid this problem by making sure that we don't set get_link_status = false > after having checked the link. > > It seems this problem has been present since the introduction of e1000e. > > Link: https://lkml.org/lkml/2018/1/29/338 > Reported-by: Alexander Duyck > Signed-off-by: Benjamin Poirier > --- > drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 > - > drivers/net/ethernet/intel/e1000e/mac.c | 14 +++--- > 2 files changed, 33 insertions(+), 22 deletions(-) > > diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c > b/drivers/net/ethernet/intel/e1000e/ich8lan.c > index ff308b05d68c..3c2c4f87e075 100644 > --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c > +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c > @@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > */ > if (!mac->get_link_status) > return 1; > + mac->get_link_status = false; > > /* First we want to see if the MII Status Register reports > * link. If so, then we want to get the current speed/duplex > @@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > */ > ret_val = e1000e_phy_has_link_generic(hw, 1, 0, ); > if (ret_val) > - return ret_val; > + goto out; > > if (hw->mac.type == e1000_pchlan) { > ret_val = e1000_k1_gig_workaround_hv(hw, link); > if (ret_val) > - return ret_val; > + goto out; > } > > /* When connected at 10Mbps half-duplex, some parts are excessively > @@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > > ret_val = hw->phy.ops.acquire(hw); > if (ret_val) > - return ret_val; > + goto out; > > if (hw->mac.type == e1000_pch2lan) > emi_addr = I82579_RX_CONFIG; > @@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > hw->phy.ops.release(hw); > > if (ret_val) > - return ret_val; > + goto out; > > if (hw->mac.type >= e1000_pch_spt) { > u16 data; > @@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > if (speed == SPEED_1000) { > ret_val = hw->phy.ops.acquire(hw); > if (ret_val) > - return ret_val; > + goto out; > > ret_val = e1e_rphy_locked(hw, > PHY_REG(776, 20), > ); > if (ret_val) { > hw->phy.ops.release(hw); > - return ret_val; > + goto out; > } > > ptr_gap = (data & (0x3FF << 2)) >> 2; > @@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > } > hw->phy.ops.release(hw); > if (ret_val) > - return ret_val; > + goto out; > } else { > ret_val = hw->phy.ops.acquire(hw); > if (ret_val) > - return ret_val; > + goto out; > > ret_val = e1e_wphy_locked(hw, >
Re: [RFC PATCH] e1000e: Fix link check race condition.
On Sun, Feb 25, 2018 at 6:31 PM, Benjamin Poirier wrote: > Alex reported the following race condition: > > /* link goes up... interrupt... schedule watchdog */ > \ e1000_watchdog_task > \ e1000e_has_link > \ hw->mac.ops.check_for_link() === > e1000e_check_for_copper_link > \ e1000e_phy_has_link_generic(..., ) > link = true > > /* link goes down... interrupt */ > \ e1000_msix_other > hw->mac.get_link_status = > true > > /* link is up */ > mac->get_link_status = false > > link_active = true > /* link_active is true, wrongly, and stays so because > * get_link_status is false */ > > Avoid this problem by making sure that we don't set get_link_status = false > after having checked the link. > > It seems this problem has been present since the introduction of e1000e. > > Link: https://lkml.org/lkml/2018/1/29/338 > Reported-by: Alexander Duyck > Signed-off-by: Benjamin Poirier > --- > drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 > - > drivers/net/ethernet/intel/e1000e/mac.c | 14 +++--- > 2 files changed, 33 insertions(+), 22 deletions(-) > > diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c > b/drivers/net/ethernet/intel/e1000e/ich8lan.c > index ff308b05d68c..3c2c4f87e075 100644 > --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c > +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c > @@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > */ > if (!mac->get_link_status) > return 1; > + mac->get_link_status = false; > > /* First we want to see if the MII Status Register reports > * link. If so, then we want to get the current speed/duplex > @@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > */ > ret_val = e1000e_phy_has_link_generic(hw, 1, 0, ); > if (ret_val) > - return ret_val; > + goto out; > > if (hw->mac.type == e1000_pchlan) { > ret_val = e1000_k1_gig_workaround_hv(hw, link); > if (ret_val) > - return ret_val; > + goto out; > } > > /* When connected at 10Mbps half-duplex, some parts are excessively > @@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > > ret_val = hw->phy.ops.acquire(hw); > if (ret_val) > - return ret_val; > + goto out; > > if (hw->mac.type == e1000_pch2lan) > emi_addr = I82579_RX_CONFIG; > @@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > hw->phy.ops.release(hw); > > if (ret_val) > - return ret_val; > + goto out; > > if (hw->mac.type >= e1000_pch_spt) { > u16 data; > @@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > if (speed == SPEED_1000) { > ret_val = hw->phy.ops.acquire(hw); > if (ret_val) > - return ret_val; > + goto out; > > ret_val = e1e_rphy_locked(hw, > PHY_REG(776, 20), > ); > if (ret_val) { > hw->phy.ops.release(hw); > - return ret_val; > + goto out; > } > > ptr_gap = (data & (0x3FF << 2)) >> 2; > @@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct > e1000_hw *hw) > } > hw->phy.ops.release(hw); > if (ret_val) > - return ret_val; > + goto out; > } else { > ret_val = hw->phy.ops.acquire(hw); > if (ret_val) > - return ret_val; > + goto out; > > ret_val = e1e_wphy_locked(hw, > PHY_REG(776, 20), >
[RFC PATCH] e1000e: Fix link check race condition.
Alex reported the following race condition: /* link goes up... interrupt... schedule watchdog */ \ e1000_watchdog_task \ e1000e_has_link \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link \ e1000e_phy_has_link_generic(..., ) link = true /* link goes down... interrupt */ \ e1000_msix_other hw->mac.get_link_status = true /* link is up */ mac->get_link_status = false link_active = true /* link_active is true, wrongly, and stays so because * get_link_status is false */ Avoid this problem by making sure that we don't set get_link_status = false after having checked the link. It seems this problem has been present since the introduction of e1000e. Link: https://lkml.org/lkml/2018/1/29/338 Reported-by: Alexander DuyckSigned-off-by: Benjamin Poirier --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 - drivers/net/ethernet/intel/e1000e/mac.c | 14 +++--- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ff308b05d68c..3c2c4f87e075 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ if (!mac->get_link_status) return 1; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, ); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) - return ret_val; + goto out; } /* When connected at 10Mbps half-duplex, some parts are excessively @@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pch2lan) emi_addr = I82579_RX_CONFIG; @@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type >= e1000_pch_spt) { u16 data; @@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (speed == SPEED_1000) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_rphy_locked(hw, PHY_REG(776, 20), ); if (ret_val) { hw->phy.ops.release(hw); - return ret_val; + goto out; } ptr_gap = (data & (0x3FF << 2)) >> 2; @@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } else { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_wphy_locked(hw, PHY_REG(776, 20), 0xC023); hw->phy.ops.release(hw); if (ret_val) - return ret_val; +
[RFC PATCH] e1000e: Fix link check race condition.
Alex reported the following race condition: /* link goes up... interrupt... schedule watchdog */ \ e1000_watchdog_task \ e1000e_has_link \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link \ e1000e_phy_has_link_generic(..., ) link = true /* link goes down... interrupt */ \ e1000_msix_other hw->mac.get_link_status = true /* link is up */ mac->get_link_status = false link_active = true /* link_active is true, wrongly, and stays so because * get_link_status is false */ Avoid this problem by making sure that we don't set get_link_status = false after having checked the link. It seems this problem has been present since the introduction of e1000e. Link: https://lkml.org/lkml/2018/1/29/338 Reported-by: Alexander Duyck Signed-off-by: Benjamin Poirier --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 - drivers/net/ethernet/intel/e1000e/mac.c | 14 +++--- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ff308b05d68c..3c2c4f87e075 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ if (!mac->get_link_status) return 1; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, ); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) - return ret_val; + goto out; } /* When connected at 10Mbps half-duplex, some parts are excessively @@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pch2lan) emi_addr = I82579_RX_CONFIG; @@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type >= e1000_pch_spt) { u16 data; @@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (speed == SPEED_1000) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_rphy_locked(hw, PHY_REG(776, 20), ); if (ret_val) { hw->phy.ops.release(hw); - return ret_val; + goto out; } ptr_gap = (data & (0x3FF << 2)) >> 2; @@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } else { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_wphy_locked(hw, PHY_REG(776, 20), 0xC023); hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out;