Re: [RFC PATCH] e1000e: Fix link check race condition.

2018-02-27 Thread Benjamin Poirier
On 2018/02/26 08:14, Alexander Duyck wrote:
[...]
> 
> >
> > switch (hw->mac.type) {
> > case e1000_pch2lan:
> > ret_val = e1000_k1_workaround_lv(hw);
> > if (ret_val)
> > -   return ret_val;
> > +   goto out;
> > /* fall-thru */
> > case e1000_pchlan:
> > if (hw->phy.type == e1000_phy_82578) {
> > ret_val = e1000_link_stall_workaround_hv(hw);
> > if (ret_val)
> > -   return ret_val;
> > +   goto out;
> > }
> >
> > /* Workaround for PCHx parts in half-duplex:
> > @@ -1595,7 +1596,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> > e1000_hw *hw)
> > if (hw->phy.type > e1000_phy_82579) {
> > ret_val = e1000_set_eee_pchlan(hw);
> > if (ret_val)
> > -   return ret_val;
> > +   goto out;
> > }
> >
> > /* If we are forcing speed/duplex, then we simply return since
> > @@ -1618,10 +1619,14 @@ static s32 
> > e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
> > ret_val = e1000e_config_fc_after_link_up(hw);
> > if (ret_val) {
> > e_dbg("Error configuring flow control\n");
> > -   return ret_val;
> > +   goto out;
> > }
> 
> Technically these changes would be a change in behavior. For these we
> may just want to leave them as-is since I am not certain they would
> have any actual impact on the link state other than delaying the
> link-up. For example do we really care if we fail to negotiate flow
> control? We may not so we might report link up and just a debug
> message indicating we didn't negotiate that part of the link.

You're right and actually that raises yet another problem with commit
19110cfbb34d ("e1000e: Separate signaling for link check/link up").

Previously these errors which come after the "get_link_status = false"
would be ignored and the link considered up. After that commit, any
error implies that the link is down:

-   link_active = !hw->mac.get_link_status;
+   link_active = ret_val > 0;

I'll send a patch to fix that problem first and then get back to this
race.


Re: [RFC PATCH] e1000e: Fix link check race condition.

2018-02-27 Thread Benjamin Poirier
On 2018/02/26 08:14, Alexander Duyck wrote:
[...]
> 
> >
> > switch (hw->mac.type) {
> > case e1000_pch2lan:
> > ret_val = e1000_k1_workaround_lv(hw);
> > if (ret_val)
> > -   return ret_val;
> > +   goto out;
> > /* fall-thru */
> > case e1000_pchlan:
> > if (hw->phy.type == e1000_phy_82578) {
> > ret_val = e1000_link_stall_workaround_hv(hw);
> > if (ret_val)
> > -   return ret_val;
> > +   goto out;
> > }
> >
> > /* Workaround for PCHx parts in half-duplex:
> > @@ -1595,7 +1596,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> > e1000_hw *hw)
> > if (hw->phy.type > e1000_phy_82579) {
> > ret_val = e1000_set_eee_pchlan(hw);
> > if (ret_val)
> > -   return ret_val;
> > +   goto out;
> > }
> >
> > /* If we are forcing speed/duplex, then we simply return since
> > @@ -1618,10 +1619,14 @@ static s32 
> > e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
> > ret_val = e1000e_config_fc_after_link_up(hw);
> > if (ret_val) {
> > e_dbg("Error configuring flow control\n");
> > -   return ret_val;
> > +   goto out;
> > }
> 
> Technically these changes would be a change in behavior. For these we
> may just want to leave them as-is since I am not certain they would
> have any actual impact on the link state other than delaying the
> link-up. For example do we really care if we fail to negotiate flow
> control? We may not so we might report link up and just a debug
> message indicating we didn't negotiate that part of the link.

You're right and actually that raises yet another problem with commit
19110cfbb34d ("e1000e: Separate signaling for link check/link up").

Previously these errors which come after the "get_link_status = false"
would be ignored and the link considered up. After that commit, any
error implies that the link is down:

-   link_active = !hw->mac.get_link_status;
+   link_active = ret_val > 0;

I'll send a patch to fix that problem first and then get back to this
race.


Re: [RFC PATCH] e1000e: Fix link check race condition.

2018-02-26 Thread Alexander Duyck
On Sun, Feb 25, 2018 at 6:31 PM, Benjamin Poirier  wrote:
> Alex reported the following race condition:
>
> /* link goes up... interrupt... schedule watchdog */
> \ e1000_watchdog_task
> \ e1000e_has_link
> \ hw->mac.ops.check_for_link() === 
> e1000e_check_for_copper_link
> \ e1000e_phy_has_link_generic(..., )
> link = true
>
>  /* link goes down... interrupt */
>  \ e1000_msix_other
>  hw->mac.get_link_status = 
> true
>
> /* link is up */
> mac->get_link_status = false
>
> link_active = true
> /* link_active is true, wrongly, and stays so because
>  * get_link_status is false */
>
> Avoid this problem by making sure that we don't set get_link_status = false
> after having checked the link.
>
> It seems this problem has been present since the introduction of e1000e.
>
> Link: https://lkml.org/lkml/2018/1/29/338
> Reported-by: Alexander Duyck 
> Signed-off-by: Benjamin Poirier 
> ---
>  drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 
> -
>  drivers/net/ethernet/intel/e1000e/mac.c | 14 +++---
>  2 files changed, 33 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c 
> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
> index ff308b05d68c..3c2c4f87e075 100644
> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
> @@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
>  */
> if (!mac->get_link_status)
> return 1;
> +   mac->get_link_status = false;
>
> /* First we want to see if the MII Status Register reports
>  * link.  If so, then we want to get the current speed/duplex
> @@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
>  */
> ret_val = e1000e_phy_has_link_generic(hw, 1, 0, );
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> if (hw->mac.type == e1000_pchlan) {
> ret_val = e1000_k1_gig_workaround_hv(hw, link);
> if (ret_val)
> -   return ret_val;
> +   goto out;
> }
>
> /* When connected at 10Mbps half-duplex, some parts are excessively
> @@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
>
> ret_val = hw->phy.ops.acquire(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> if (hw->mac.type == e1000_pch2lan)
> emi_addr = I82579_RX_CONFIG;
> @@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
> hw->phy.ops.release(hw);
>
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> if (hw->mac.type >= e1000_pch_spt) {
> u16 data;
> @@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
> if (speed == SPEED_1000) {
> ret_val = hw->phy.ops.acquire(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> ret_val = e1e_rphy_locked(hw,
>   PHY_REG(776, 20),
>   );
> if (ret_val) {
> hw->phy.ops.release(hw);
> -   return ret_val;
> +   goto out;
> }
>
> ptr_gap = (data & (0x3FF << 2)) >> 2;
> @@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
> }
> hw->phy.ops.release(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
> } else {
> ret_val = hw->phy.ops.acquire(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> ret_val = e1e_wphy_locked(hw,
>  

Re: [RFC PATCH] e1000e: Fix link check race condition.

2018-02-26 Thread Alexander Duyck
On Sun, Feb 25, 2018 at 6:31 PM, Benjamin Poirier  wrote:
> Alex reported the following race condition:
>
> /* link goes up... interrupt... schedule watchdog */
> \ e1000_watchdog_task
> \ e1000e_has_link
> \ hw->mac.ops.check_for_link() === 
> e1000e_check_for_copper_link
> \ e1000e_phy_has_link_generic(..., )
> link = true
>
>  /* link goes down... interrupt */
>  \ e1000_msix_other
>  hw->mac.get_link_status = 
> true
>
> /* link is up */
> mac->get_link_status = false
>
> link_active = true
> /* link_active is true, wrongly, and stays so because
>  * get_link_status is false */
>
> Avoid this problem by making sure that we don't set get_link_status = false
> after having checked the link.
>
> It seems this problem has been present since the introduction of e1000e.
>
> Link: https://lkml.org/lkml/2018/1/29/338
> Reported-by: Alexander Duyck 
> Signed-off-by: Benjamin Poirier 
> ---
>  drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 
> -
>  drivers/net/ethernet/intel/e1000e/mac.c | 14 +++---
>  2 files changed, 33 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c 
> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
> index ff308b05d68c..3c2c4f87e075 100644
> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
> @@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
>  */
> if (!mac->get_link_status)
> return 1;
> +   mac->get_link_status = false;
>
> /* First we want to see if the MII Status Register reports
>  * link.  If so, then we want to get the current speed/duplex
> @@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
>  */
> ret_val = e1000e_phy_has_link_generic(hw, 1, 0, );
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> if (hw->mac.type == e1000_pchlan) {
> ret_val = e1000_k1_gig_workaround_hv(hw, link);
> if (ret_val)
> -   return ret_val;
> +   goto out;
> }
>
> /* When connected at 10Mbps half-duplex, some parts are excessively
> @@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
>
> ret_val = hw->phy.ops.acquire(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> if (hw->mac.type == e1000_pch2lan)
> emi_addr = I82579_RX_CONFIG;
> @@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
> hw->phy.ops.release(hw);
>
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> if (hw->mac.type >= e1000_pch_spt) {
> u16 data;
> @@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
> if (speed == SPEED_1000) {
> ret_val = hw->phy.ops.acquire(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> ret_val = e1e_rphy_locked(hw,
>   PHY_REG(776, 20),
>   );
> if (ret_val) {
> hw->phy.ops.release(hw);
> -   return ret_val;
> +   goto out;
> }
>
> ptr_gap = (data & (0x3FF << 2)) >> 2;
> @@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
> e1000_hw *hw)
> }
> hw->phy.ops.release(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
> } else {
> ret_val = hw->phy.ops.acquire(hw);
> if (ret_val)
> -   return ret_val;
> +   goto out;
>
> ret_val = e1e_wphy_locked(hw,
>   PHY_REG(776, 20),
> 

[RFC PATCH] e1000e: Fix link check race condition.

2018-02-25 Thread Benjamin Poirier
Alex reported the following race condition:

/* link goes up... interrupt... schedule watchdog */
\ e1000_watchdog_task
\ e1000e_has_link
\ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link
\ e1000e_phy_has_link_generic(..., )
link = true

 /* link goes down... interrupt */
 \ e1000_msix_other
 hw->mac.get_link_status = true

/* link is up */
mac->get_link_status = false

link_active = true
/* link_active is true, wrongly, and stays so because
 * get_link_status is false */

Avoid this problem by making sure that we don't set get_link_status = false
after having checked the link.

It seems this problem has been present since the introduction of e1000e.

Link: https://lkml.org/lkml/2018/1/29/338
Reported-by: Alexander Duyck 
Signed-off-by: Benjamin Poirier 
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 -
 drivers/net/ethernet/intel/e1000e/mac.c | 14 +++---
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c 
b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index ff308b05d68c..3c2c4f87e075 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
 */
if (!mac->get_link_status)
return 1;
+   mac->get_link_status = false;
 
/* First we want to see if the MII Status Register reports
 * link.  If so, then we want to get the current speed/duplex
@@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
 */
ret_val = e1000e_phy_has_link_generic(hw, 1, 0, );
if (ret_val)
-   return ret_val;
+   goto out;
 
if (hw->mac.type == e1000_pchlan) {
ret_val = e1000_k1_gig_workaround_hv(hw, link);
if (ret_val)
-   return ret_val;
+   goto out;
}
 
/* When connected at 10Mbps half-duplex, some parts are excessively
@@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
 
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
-   return ret_val;
+   goto out;
 
if (hw->mac.type == e1000_pch2lan)
emi_addr = I82579_RX_CONFIG;
@@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
hw->phy.ops.release(hw);
 
if (ret_val)
-   return ret_val;
+   goto out;
 
if (hw->mac.type >= e1000_pch_spt) {
u16 data;
@@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
if (speed == SPEED_1000) {
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
-   return ret_val;
+   goto out;
 
ret_val = e1e_rphy_locked(hw,
  PHY_REG(776, 20),
  );
if (ret_val) {
hw->phy.ops.release(hw);
-   return ret_val;
+   goto out;
}
 
ptr_gap = (data & (0x3FF << 2)) >> 2;
@@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
}
hw->phy.ops.release(hw);
if (ret_val)
-   return ret_val;
+   goto out;
} else {
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
-   return ret_val;
+   goto out;
 
ret_val = e1e_wphy_locked(hw,
  PHY_REG(776, 20),
  0xC023);
hw->phy.ops.release(hw);
if (ret_val)
-   return ret_val;
+ 

[RFC PATCH] e1000e: Fix link check race condition.

2018-02-25 Thread Benjamin Poirier
Alex reported the following race condition:

/* link goes up... interrupt... schedule watchdog */
\ e1000_watchdog_task
\ e1000e_has_link
\ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link
\ e1000e_phy_has_link_generic(..., )
link = true

 /* link goes down... interrupt */
 \ e1000_msix_other
 hw->mac.get_link_status = true

/* link is up */
mac->get_link_status = false

link_active = true
/* link_active is true, wrongly, and stays so because
 * get_link_status is false */

Avoid this problem by making sure that we don't set get_link_status = false
after having checked the link.

It seems this problem has been present since the introduction of e1000e.

Link: https://lkml.org/lkml/2018/1/29/338
Reported-by: Alexander Duyck 
Signed-off-by: Benjamin Poirier 
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 -
 drivers/net/ethernet/intel/e1000e/mac.c | 14 +++---
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c 
b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index ff308b05d68c..3c2c4f87e075 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1386,6 +1386,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
 */
if (!mac->get_link_status)
return 1;
+   mac->get_link_status = false;
 
/* First we want to see if the MII Status Register reports
 * link.  If so, then we want to get the current speed/duplex
@@ -1393,12 +1394,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
 */
ret_val = e1000e_phy_has_link_generic(hw, 1, 0, );
if (ret_val)
-   return ret_val;
+   goto out;
 
if (hw->mac.type == e1000_pchlan) {
ret_val = e1000_k1_gig_workaround_hv(hw, link);
if (ret_val)
-   return ret_val;
+   goto out;
}
 
/* When connected at 10Mbps half-duplex, some parts are excessively
@@ -1431,7 +1432,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
 
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
-   return ret_val;
+   goto out;
 
if (hw->mac.type == e1000_pch2lan)
emi_addr = I82579_RX_CONFIG;
@@ -1453,7 +1454,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
hw->phy.ops.release(hw);
 
if (ret_val)
-   return ret_val;
+   goto out;
 
if (hw->mac.type >= e1000_pch_spt) {
u16 data;
@@ -1462,14 +1463,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
if (speed == SPEED_1000) {
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
-   return ret_val;
+   goto out;
 
ret_val = e1e_rphy_locked(hw,
  PHY_REG(776, 20),
  );
if (ret_val) {
hw->phy.ops.release(hw);
-   return ret_val;
+   goto out;
}
 
ptr_gap = (data & (0x3FF << 2)) >> 2;
@@ -1483,18 +1484,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct 
e1000_hw *hw)
}
hw->phy.ops.release(hw);
if (ret_val)
-   return ret_val;
+   goto out;
} else {
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
-   return ret_val;
+   goto out;
 
ret_val = e1e_wphy_locked(hw,
  PHY_REG(776, 20),
  0xC023);
hw->phy.ops.release(hw);
if (ret_val)
-   return ret_val;
+   goto out;