[PATCH net] ibmvnic: Skip fatal error reset after passive init

2020-04-30 Thread Juliet Kim
During MTU change, the following events may happen.
Client-driven CRQ initialization fails due to partner’s CRQ closed,
causing client to enqueue a reset task for FATAL_ERROR. Then passive
(server-driven) CRQ initialization succeeds, causing client to
release CRQ and enqueue a reset task for failover. If the passive
CRQ initialization occurs before the FATAL reset task is processed,
the FATAL error reset task would try to access a CRQ message queue
that was freed, causing an oops. The problem may be most likely to
occur during DLPAR add vNIC with a non-default MTU, because the DLPAR
process will automatically issue a change MTU request.

Fix this by not processing fatal error reset if CRQ is passively
initialized after client-driven CRQ initialization fails.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 4bd33245bad6..3de549c6c693 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2189,7 +2189,8 @@ static void __ibmvnic_reset(struct work_struct *work)
rc = do_hard_reset(adapter, rwi, reset_state);
rtnl_unlock();
}
-   } else {
+   } else if (!(rwi->reset_reason == VNIC_RESET_FATAL &&
+   adapter->from_passive_init)) {
rc = do_reset(adapter, rwi, reset_state);
}
kfree(rwi);
-- 
2.16.4



Re: [PATCH net] ibmvnic: Fall back to 16 H_SEND_SUB_CRQ_INDIRECT entries with old FW

2020-04-28 Thread Juliet Kim


On 4/28/20 10:35 AM, Thomas Falcon wrote:
> On 4/27/20 12:33 PM, Juliet Kim wrote:
>> The maximum entries for H_SEND_SUB_CRQ_INDIRECT has increased on
>> some platforms from 16 to 128. If Live Partition Mobility is used
>> to migrate a running OS image from a newer source platform to an
>> older target platform, then H_SEND_SUB_CRQ_INDIRECT will fail with
>> H_PARAMETER if 128 entries are queued.
>>
>> Fix this by falling back to 16 entries if H_PARAMETER is returned
>> from the hcall().
>
> Thanks for the submission, but I am having a hard time believing that this is 
> what is happening since the driver does not support sending multiple frames 
> per hypervisor call at this time. Even if it were the case, this approach 
> would omit frame data needed by the VF, so the second attempt may still fail. 
> Are there system logs available that show the driver is attempting to send 
> transmissions with greater than 16 descriptors?
>
> Thanks,
>
> Tom
>
>
I am trying to confirm.

Juliet

>>
>> Signed-off-by: Juliet Kim 
>> ---
>>   drivers/net/ethernet/ibm/ibmvnic.c | 11 +++
>>   1 file changed, 11 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
>> b/drivers/net/ethernet/ibm/ibmvnic.c
>> index 4bd33245bad6..b66c2f26a427 100644
>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>> @@ -1656,6 +1656,17 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
>> struct net_device *netdev)
>>   lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
>>  (u64)tx_buff->indir_dma,
>>  (u64)num_entries);
>> +
>> +    /* Old firmware accepts max 16 num_entries */
>> +    if (lpar_rc == H_PARAMETER && num_entries > 16) {
>> +    tx_crq.v1.n_crq_elem = 16;
>> +    tx_buff->num_entries = 16;
>> +    lpar_rc = send_subcrq_indirect(adapter,
>> +   handle_array[queue_num],
>> +   (u64)tx_buff->indir_dma,
>> +   16);
>> +    }
>> +
>>   dma_unmap_single(dev, tx_buff->indir_dma,
>>    sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
>>   } else {


[PATCH net] ibmvnic: Fall back to 16 H_SEND_SUB_CRQ_INDIRECT entries with old FW

2020-04-27 Thread Juliet Kim
The maximum entries for H_SEND_SUB_CRQ_INDIRECT has increased on
some platforms from 16 to 128. If Live Partition Mobility is used
to migrate a running OS image from a newer source platform to an
older target platform, then H_SEND_SUB_CRQ_INDIRECT will fail with
H_PARAMETER if 128 entries are queued.

Fix this by falling back to 16 entries if H_PARAMETER is returned
from the hcall().

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 4bd33245bad6..b66c2f26a427 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1656,6 +1656,17 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
   (u64)tx_buff->indir_dma,
   (u64)num_entries);
+
+   /* Old firmware accepts max 16 num_entries */
+   if (lpar_rc == H_PARAMETER && num_entries > 16) {
+   tx_crq.v1.n_crq_elem = 16;
+   tx_buff->num_entries = 16;
+   lpar_rc = send_subcrq_indirect(adapter,
+  handle_array[queue_num],
+  (u64)tx_buff->indir_dma,
+  16);
+   }
+
dma_unmap_single(dev, tx_buff->indir_dma,
 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
} else {
-- 
2.18.1



[PATCH net v2] ibmvnic: Do not process device remove during device reset

2020-03-10 Thread Juliet Kim
The ibmvnic driver does not check the device state when the device
is removed. If the device is removed while a device reset is being
processed, the remove may free structures needed by the reset,
causing an oops.

Fix this by checking the device state before processing device remove.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 24 ++--
 drivers/net/ethernet/ibm/ibmvnic.h |  6 +-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d8820f..4bd33245bad6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2142,6 +2142,8 @@ static void __ibmvnic_reset(struct work_struct *work)
 {
struct ibmvnic_rwi *rwi;
struct ibmvnic_adapter *adapter;
+   bool saved_state = false;
+   unsigned long flags;
u32 reset_state;
int rc = 0;
 
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
 
-   reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+   spin_lock_irqsave(>state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+   spin_unlock_irqrestore(>state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
 
+   if (!saved_state) {
+   reset_state = adapter->state;
+   adapter->state = VNIC_RESETTING;
+   saved_state = true;
+   }
+   spin_unlock_irqrestore(>state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
  __ibmvnic_delayed_reset);
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
+   spin_lock_init(>state_lock);
mutex_init(>fw_lock);
init_completion(>init_done);
init_completion(>fw_done);
@@ -5163,8 +5174,17 @@ static int ibmvnic_remove(struct vio_dev *dev)
 {
struct net_device *netdev = dev_get_drvdata(>dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+   unsigned long flags;
+
+   spin_lock_irqsave(>state_lock, flags);
+   if (adapter->state == VNIC_RESETTING) {
+   spin_unlock_irqrestore(>state_lock, flags);
+   return -EBUSY;
+   }
 
adapter->state = VNIC_REMOVING;
+   spin_unlock_irqrestore(>state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf91b12..f8416e1d4cf0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
 VNIC_CLOSING,
 VNIC_CLOSED,
 VNIC_REMOVING,
-VNIC_REMOVED};
+VNIC_REMOVED,
+VNIC_RESETTING};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
   VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
 
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+   /* Used for serializatin of state field */
+   spinlock_t state_lock;
 };
-- 
2.25.0



[PATCH net] ibmvnic: Do not process device remove during device reset

2020-03-09 Thread Juliet Kim
The ibmvnic driver does not check the device state when the device
is removed. If the device is removed while a device reset is being 
processed, the remove may free structures needed by the reset, 
causing an oops.

Fix this by checking the device state before processing device remove.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 24 ++--
 drivers/net/ethernet/ibm/ibmvnic.h |  6 +-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d8820f..7ef1ae0d49bc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2144,6 +2144,8 @@ static void __ibmvnic_reset(struct work_struct *work)
struct ibmvnic_adapter *adapter;
u32 reset_state;
int rc = 0;
+   unsigned long flags;
+   bool saved_state = false;
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
 
-   reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+   spin_lock_irqsave(>state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+   spin_unlock_irqrestore(>state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
 
+   if (!saved_state) {
+   reset_state = adapter->state;
+   adapter->state = VNIC_RESETTING;
+   saved_state = true;
+   }
+   spin_unlock_irqrestore(>state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
  __ibmvnic_delayed_reset);
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
+   spin_lock_init(>state_lock);
mutex_init(>fw_lock);
init_completion(>init_done);
init_completion(>fw_done);
@@ -5161,10 +5172,19 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
 
 static int ibmvnic_remove(struct vio_dev *dev)
 {
+   unsigned long flags;
struct net_device *netdev = dev_get_drvdata(>dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
+   spin_lock_irqsave(>state_lock, flags);
+   if (adapter->state == VNIC_RESETTING) {
+   spin_unlock_irqrestore(>state_lock, flags);
+   return -EBUSY;
+   }
+
adapter->state = VNIC_REMOVING;
+   spin_unlock_irqrestore(>state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf91b12..971505e4cc52 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
 VNIC_CLOSING,
 VNIC_CLOSED,
 VNIC_REMOVING,
-VNIC_REMOVED};
+VNIC_REMOVED,
+VNIC_RESETTING};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
   VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
 
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+   /* lock to protect state field */
+   spinlock_t state_lock;
 };
-- 
2.25.0



[PATCH] ibmvnic: Do not process device remove during device reset

2020-03-09 Thread Juliet Kim
The ibmvnic driver does not check the device state when the device
is removed. If the device is removed while a device reset is being 
processed, the remove may free structures needed by the reset, 
causing an oops.

Fix this by checking the device state before processing device remove.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 24 ++--
 drivers/net/ethernet/ibm/ibmvnic.h |  6 +-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d8820f..7ef1ae0d49bc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2144,6 +2144,8 @@ static void __ibmvnic_reset(struct work_struct *work)
struct ibmvnic_adapter *adapter;
u32 reset_state;
int rc = 0;
+   unsigned long flags;
+   bool saved_state = false;
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
 
-   reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+   spin_lock_irqsave(>state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+   spin_unlock_irqrestore(>state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
 
+   if (!saved_state) {
+   reset_state = adapter->state;
+   adapter->state = VNIC_RESETTING;
+   saved_state = true;
+   }
+   spin_unlock_irqrestore(>state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
  __ibmvnic_delayed_reset);
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
+   spin_lock_init(>state_lock);
mutex_init(>fw_lock);
init_completion(>init_done);
init_completion(>fw_done);
@@ -5161,10 +5172,19 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
 
 static int ibmvnic_remove(struct vio_dev *dev)
 {
+   unsigned long flags;
struct net_device *netdev = dev_get_drvdata(>dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
+   spin_lock_irqsave(>state_lock, flags);
+   if (adapter->state == VNIC_RESETTING) {
+   spin_unlock_irqrestore(>state_lock, flags);
+   return -EBUSY;
+   }
+
adapter->state = VNIC_REMOVING;
+   spin_unlock_irqrestore(>state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf91b12..971505e4cc52 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
 VNIC_CLOSING,
 VNIC_CLOSED,
 VNIC_REMOVING,
-VNIC_REMOVED};
+VNIC_REMOVED,
+VNIC_RESETTING};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
   VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
 
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+   /* lock to protect state field */
+   spinlock_t state_lock;
 };
-- 
2.25.0



[PATCH 1/2] net/ibmvnic: Revert "net/ibmvnic: Fix EOI when running in XIVE mode"

2019-11-20 Thread Juliet Kim
This reverts commit 11d49ce9f7946dfed4dcf5dbde865c78058b50ab
(???net/ibmvnic: Fix EOI when running in XIVE mode.???) since that
has the unintended effect of changing the interrupt priority
and emits warning when running in legacy XICS mode.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index f59d9a8..2b073a3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2878,10 +2878,12 @@ static int enable_scrq_irq(struct ibmvnic_adapter 
*adapter,
 
if (test_bit(0, >resetting) &&
adapter->reset_reason == VNIC_RESET_MOBILITY) {
-   struct irq_desc *desc = irq_to_desc(scrq->irq);
-   struct irq_chip *chip = irq_desc_get_chip(desc);
+   u64 val = (0xff00) | scrq->hw_irq;
 
-   chip->irq_eoi(>irq_data);
+   rc = plpar_hcall_norets(H_EOI, val);
+   if (rc)
+   dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
+   val, rc);
}
 
rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
-- 
1.8.3.1



[PATCH 2/2] net/ibmvnic: Ignore H_FUNCTION return from H_EOI to tolerate XIVE mode

2019-11-20 Thread Juliet Kim
Reversion of commit 11d49ce9f7946dfed4dcf5dbde865c78058b50ab
(???net/ibmvnic: Fix EOI when running in XIVE mode.???) leaves us
calling H_EOI even in XIVE mode. That will fail with H_FUNCTION
because H_EOI is not supported in that mode. That failure is
harmless. Ignore it so we can use common code for both XICS and
XIVE.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2b073a3..0686ded 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2881,7 +2881,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter 
*adapter,
u64 val = (0xff00) | scrq->hw_irq;
 
rc = plpar_hcall_norets(H_EOI, val);
-   if (rc)
+   /* H_EOI would fail with rc = H_FUNCTION when running
+* in XIVE mode which is expected, but not an error.
+*/
+   if (rc && (rc != H_FUNCTION))
dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
val, rc);
}
-- 
1.8.3.1



[PATCH 0/2] net/ibmvnic: Support both XIVE and XICS modes in ibmvnic

2019-11-20 Thread Juliet Kim
This series aims to support both XICS and XIVE with avoiding
a regression in behavior when a system runs in XICS mode.

Patch 1 Reverts commit 11d49ce9f7946dfed4dcf5dbde865c78058b50ab
(???net/ibmvnic: Fix EOI when running in XIVE mode.???)

Patch 2 Ignore H_FUNCTION return from H_EOI to tolerate XIVE mode

Juliet Kim (2):
  net/ibmvnic: Revert "net/ibmvnic: Fix EOI when running in XIVE mode"
  net/ibmvnic: Ignore H_FUNCTION return from H_EOI to tolerate XIVE mode

 drivers/net/ethernet/ibm/ibmvnic.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

-- 
1.8.3.1



[PATCH net/ibmvnic 1/2] Revert "net/ibmvnic: Fix EOI when running in XIVE mode"

2019-11-20 Thread Juliet Kim
This reverts commit 11d49ce9f7946dfed4dcf5dbde865c78058b50ab
(???net/ibmvnic: Fix EOI when running in XIVE mode.???) since that
has the unintended effect of changing the interrupt priority
and emits warning when running in legacy XICS mode.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index f59d9a8..2b073a3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2878,10 +2878,12 @@ static int enable_scrq_irq(struct ibmvnic_adapter 
*adapter,
 
if (test_bit(0, >resetting) &&
adapter->reset_reason == VNIC_RESET_MOBILITY) {
-   struct irq_desc *desc = irq_to_desc(scrq->irq);
-   struct irq_chip *chip = irq_desc_get_chip(desc);
+   u64 val = (0xff00) | scrq->hw_irq;
 
-   chip->irq_eoi(>irq_data);
+   rc = plpar_hcall_norets(H_EOI, val);
+   if (rc)
+   dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
+   val, rc);
}
 
rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
-- 
1.8.3.1



[PATCH net/ibmvnic 2/2] net/ibmvnic: Ignore H_FUNCTION return from H_EOI to tolerate XIVE mode

2019-11-20 Thread Juliet Kim
Reversion of commit 11d49ce9f7946dfed4dcf5dbde865c78058b50ab
(???net/ibmvnic: Fix EOI when running in XIVE mode.???) leaves us
calling H_EOI even in XIVE mode. That will fail with H_FUNCTION
because H_EOI is not supported in that mode. That failure is
harmless. Ignore it so we can use common code for both XICS and
XIVE.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2b073a3..0686ded 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2881,7 +2881,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter 
*adapter,
u64 val = (0xff00) | scrq->hw_irq;
 
rc = plpar_hcall_norets(H_EOI, val);
-   if (rc)
+   /* H_EOI would fail with rc = H_FUNCTION when running
+* in XIVE mode which is expected, but not an error.
+*/
+   if (rc && (rc != H_FUNCTION))
dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
val, rc);
}
-- 
1.8.3.1



[PATCH net/ibmvnic 0/2] Support both XIVE and XICS modes in ibmvnic

2019-11-20 Thread Juliet Kim
This series aims to support both XICS and XIVE with avoiding
a regression in behavior when a system runs in XICS mode.

Patch 1 reverts commit 11d49ce9f7946dfed4dcf5dbde865c78058b50ab
(???net/ibmvnic: Fix EOI when running in XIVE mode.???)

Patch 2 Ignore H_FUNCTION return from H_EOI to tolerate XIVE mode

Juliet Kim (2):
  Revert "net/ibmvnic: Fix EOI when running in XIVE mode"
  net/ibmvnic: Ignore H_FUNCTION return from H_EOI to tolerate XIVE mode

 drivers/net/ethernet/ibm/ibmvnic.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

-- 
1.8.3.1



[PATCH v4 2/2] net/ibmvnic: prevent more than one thread from running in reset

2019-09-20 Thread Juliet Kim
The current code allows more than one thread to run in reset. This can 
corrupt struct adapter data. Check adapter->resetting before performing 
a reset, if there is another reset running delay (100 msec) before trying 
again.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 40 --
 drivers/net/ethernet/ibm/ibmvnic.h |  5 -
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index ba340aaff1b3..6aef574acdf2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1207,7 +1207,7 @@ static void ibmvnic_cleanup(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
/* ensure that transmissions are stopped if called by do_reset */
-   if (adapter->resetting)
+   if (test_bit(0, >resetting))
netif_tx_disable(netdev);
else
netif_tx_stop_all_queues(netdev);
@@ -1428,7 +1428,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
u8 proto = 0;
netdev_tx_t ret = NETDEV_TX_OK;
 
-   if (adapter->resetting) {
+   if (test_bit(0, >resetting)) {
if (!netif_subqueue_stopped(netdev, skb))
netif_stop_subqueue(netdev, queue_num);
dev_kfree_skb_any(skb);
@@ -2054,6 +2054,12 @@ static void __ibmvnic_reset(struct work_struct *work)
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
+   if (test_and_set_bit_lock(0, >resetting)) {
+   schedule_delayed_work(>ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+   return;
+   }
+
reset_state = adapter->state;
 
rwi = get_next_rwi(adapter);
@@ -2095,6 +2101,10 @@ static void __ibmvnic_reset(struct work_struct *work)
break;
 
rwi = get_next_rwi(adapter);
+
+   if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+   rwi->reset_reason == VNIC_RESET_MOBILITY))
+   adapter->force_reset_recovery = true;
}
 
if (adapter->wait_for_reset) {
@@ -2107,7 +2117,16 @@ static void __ibmvnic_reset(struct work_struct *work)
free_all_rwi(adapter);
}
 
-   adapter->resetting = false;
+   clear_bit_unlock(0, >resetting);
+}
+
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+   struct ibmvnic_adapter *adapter;
+
+   adapter = container_of(work, struct ibmvnic_adapter,
+  ibmvnic_delayed_reset.work);
+   __ibmvnic_reset(>ibmvnic_reset);
 }
 
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -2162,7 +2181,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi->reset_reason = reason;
list_add_tail(>list, >rwi_list);
spin_unlock_irqrestore(>rwi_lock, flags);
-   adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(>ibmvnic_reset);
 
@@ -2207,7 +2225,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
u16 offset;
u8 flags = 0;
 
-   if (unlikely(adapter->resetting &&
+   if (unlikely(test_bit(0, >resetting) &&
 adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
napi_complete_done(napi, frames_processed);
@@ -2858,7 +2876,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter 
*adapter,
return 1;
}
 
-   if (adapter->resetting &&
+   if (test_bit(0, >resetting) &&
adapter->reset_reason == VNIC_RESET_MOBILITY) {
u64 val = (0xff00) | scrq->hw_irq;
 
@@ -3408,7 +3426,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter 
*adapter,
if (rc) {
if (rc == H_CLOSED) {
dev_warn(dev, "CRQ Queue closed\n");
-   if (adapter->resetting)
+   if (test_bit(0, >resetting))
ibmvnic_reset(adapter, VNIC_RESET_FATAL);
}
 
@@ -4483,7 +4501,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev);
adapter->crq.active = false;
-   if (adapter->resetting)
+   if (test_bit(0, >resetting))
adapter->force_reset_recovery = true;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
  

[PATCH v4 1/2] net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run

2019-09-20 Thread Juliet Kim
Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset") 
made the change to hold the RTNL lock during a reset to avoid deadlock 
but linkwatch_event is fired during the reset and needs the RTNL lock. 
That keeps linkwatch_event process from proceeding until the reset 
is complete. The reset process cannot tolerate the linkwatch_event 
processing after reset completes, so release the RTNL lock during the 
process to allow a chance for linkwatch_event to run during reset. 
This does not guarantee that the linkwatch_event will be processed as 
soon as link state changes, but is an improvement over the current code 
where linkwatch_event processing is always delayed, which prevents 
transmissions on the device from being deactivated leading transmit 
watchdog timer to time-out. 

Release the RTNL lock before link state change and re-acquire after 
the link state change to allow linkwatch_event to grab the RTNL lock 
and run during the reset.

Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 224 ++---
 drivers/net/ethernet/ibm/ibmvnic.h |   1 +
 2 files changed, 157 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5cb55ea671e3..ba340aaff1b3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1723,6 +1723,86 @@ static int ibmvnic_set_mac(struct net_device *netdev, 
void *p)
return rc;
 }
 
+/**
+ * do_change_param_reset returns zero if we are able to keep processing reset
+ * events, or non-zero if we hit a fatal error and must halt.
+ */
+static int do_change_param_reset(struct ibmvnic_adapter *adapter,
+struct ibmvnic_rwi *rwi,
+u32 reset_state)
+{
+   struct net_device *netdev = adapter->netdev;
+   int i, rc;
+
+   netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
+  rwi->reset_reason);
+
+   netif_carrier_off(netdev);
+   adapter->reset_reason = rwi->reset_reason;
+
+   ibmvnic_cleanup(netdev);
+
+   if (reset_state == VNIC_OPEN) {
+   rc = __ibmvnic_close(netdev);
+   if (rc)
+   return rc;
+   }
+
+   release_resources(adapter);
+   release_sub_crqs(adapter, 1);
+   release_crq_queue(adapter);
+
+   adapter->state = VNIC_PROBED;
+
+   rc = init_crq_queue(adapter);
+
+   if (rc) {
+   netdev_err(adapter->netdev,
+  "Couldn't initialize crq. rc=%d\n", rc);
+   return rc;
+   }
+
+   rc = ibmvnic_reset_init(adapter);
+   if (rc)
+   return IBMVNIC_INIT_FAILED;
+
+   /* If the adapter was in PROBE state prior to the reset,
+* exit here.
+*/
+   if (reset_state == VNIC_PROBED)
+   return 0;
+
+   rc = ibmvnic_login(netdev);
+   if (rc) {
+   adapter->state = reset_state;
+   return rc;
+   }
+
+   rc = init_resources(adapter);
+   if (rc)
+   return rc;
+
+   ibmvnic_disable_irqs(adapter);
+
+   adapter->state = VNIC_CLOSED;
+
+   if (reset_state == VNIC_CLOSED)
+   return 0;
+
+   rc = __ibmvnic_open(netdev);
+   if (rc)
+   return IBMVNIC_OPEN_FAILED;
+
+   /* refresh device's multicast list */
+   ibmvnic_set_multi(netdev);
+
+   /* kick napi */
+   for (i = 0; i < adapter->req_rx_queues; i++)
+   napi_schedule(>napi[i]);
+
+   return 0;
+}
+
 /**
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
@@ -1738,6 +1818,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
   rwi->reset_reason);
 
+   rtnl_lock();
+
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
 
@@ -1751,16 +1833,25 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (reset_state == VNIC_OPEN &&
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
-   rc = __ibmvnic_close(netdev);
+   adapter->state = VNIC_CLOSING;
+
+   /* Release the RTNL lock before link state change and
+* re-acquire after the link state change to allow
+* linkwatch_event to grab the RTNL lock and run during
+* a reset.
+*/
+   rtnl_unlock();
+   rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+   rtnl_lock();
if (rc)
- 

[PATCH v4 0/2] net/ibmvnic: serialization fixes

2019-09-20 Thread Juliet Kim
This series includes two fixes. The first improves reset code to allow 
linkwatch_event to proceed during reset. The second ensures that no more
than one thread runs in reset at a time. 

v2:
- Separate change param reset from do_reset()
- Return IBMVNIC_OPEN_FAILED if __ibmvnic_open fails
- Remove setting wait_for_reset to false from __ibmvnic_reset(), this
  is done in wait_for_reset()
- Move the check for force_reset_recovery from patch 1 to patch 2

v3:
- Restore reset’s successful return in open failure case

v4:
- Change resetting flag access to atomic

Juliet Kim (2):
  net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run
  net/ibmvnic: prevent more than one thread from running in reset

 drivers/net/ethernet/ibm/ibmvnic.c | 262 ++---
 drivers/net/ethernet/ibm/ibmvnic.h |   6 +-
 2 files changed, 190 insertions(+), 78 deletions(-)

-- 
2.16.4



Re: [PATCH v3 2/2] net/ibmvnic: prevent more than one thread from running in reset

2019-09-18 Thread Juliet Kim


On 9/18/19 1:12 AM, Michael Ellerman wrote:
> Hi Juliet,
>
> Juliet Kim  writes:
>> Signed-off-by: Juliet Kim 
>> ---
>>  drivers/net/ethernet/ibm/ibmvnic.c | 23 ++-
>>  drivers/net/ethernet/ibm/ibmvnic.h |  3 +++
>>  2 files changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
>> b/drivers/net/ethernet/ibm/ibmvnic.c
>> index ba340aaff1b3..f344ccd68ad9 100644
>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>> @@ -2054,6 +2054,13 @@ static void __ibmvnic_reset(struct work_struct *work)
>>  
>>  adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
>>  
>> +if (adapter->resetting) {
>> +schedule_delayed_work(>ibmvnic_delayed_reset,
>> +  IBMVNIC_RESET_DELAY);
>> +return;
>> +}
>> +
>> +adapter->resetting = true;
>>  reset_state = adapter->state;
> Is there some locking/serialisation around this?
>
> Otherwise that looks very racy. ie. two CPUs could both see
> adapter->resetting == false, then both set it to true, and then continue
> executing and stomp on each other.
>
> cheers

I agree there may be a race here. Thank you for reviewing.

I will address it in the next version.



Re: [PATCH v2 0/2] net/ibmvnic: serialization fixes

2019-09-17 Thread Juliet Kim
I sent v3.  Please disregard v2.

On 9/17/19 9:52 AM, Juliet Kim wrote:
> This series includes two fixes. The first improves reset code to allow 
> linkwatch_event to proceed during reset. The second ensures that no more
> than one thread runs in reset at a time. 
>
> v2: 
> - Separate change param reset from do_reset()
> - Return IBMVNIC_OPEN_FAILED if __ibmvnic_open fails 
> - Remove setting wait_for_reset to false from __ibmvnic_reset(), this 
>   is done in wait_for_reset()
> - Move the check for force_reset_recovery from patch 1 to patch 2
>
> Juliet Kim (2):
>   net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run
>   net/ibmvnic: prevent more than one thread from running in reset
>
>  drivers/net/ethernet/ibm/ibmvnic.c | 244 
> ++---
>  drivers/net/ethernet/ibm/ibmvnic.h |   4 +
>  2 files changed, 180 insertions(+), 68 deletions(-)
>


[PATCH v3 0/2] net/ibmvnic: serialization fixes

2019-09-17 Thread Juliet Kim
This series includes two fixes. The first improves reset code to allow
linkwatch_event to proceed during reset. The second ensures that no more
than one thread runs in reset at a time.

v2:
- Separate change param reset from do_reset()
- Return IBMVNIC_OPEN_FAILED in open failiure case
- Remove setting wait_for_reset to false from __ibmvnic_reset(), this
  is done in wait_for_reset()
- Move the check for force_reset_recovery from patch 1 to patch 2

v3:
- Restore reset’s successful return in open failure case

Juliet Kim (2):
  net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run
  net/ibmvnic: prevent more than one thread from running in reset

 drivers/net/ethernet/ibm/ibmvnic.c | 245 +++--
 drivers/net/ethernet/ibm/ibmvnic.h |   4 +
 2 files changed, 181 insertions(+), 68 deletions(-)

-- 
2.16.4



[PATCH v3 1/2] net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run

2019-09-17 Thread Juliet Kim
Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
made the change to hold the RTNL lock during a reset to avoid deadlock
but linkwatch_event is fired during the reset and needs the RTNL lock.
That keeps linkwatch_event process from proceeding until the reset
is complete. The reset process cannot tolerate the linkwatch_event
processing after reset completes, so release the RTNL lock during the
process to allow a chance for linkwatch_event to run during reset.
This does not guarantee that the linkwatch_event will be processed as
soon as link state changes, but is an improvement over the current code
where linkwatch_event processing is always delayed, which prevents
transmissions on the device from being deactivated leading transmit
watchdog timer to time-out.

Release the RTNL lock before link state change and re-acquire after
the link state change to allow linkwatch_event to grab the RTNL lock
and run during the reset.

Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 224 ++---
 drivers/net/ethernet/ibm/ibmvnic.h |   1 +
 2 files changed, 157 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5cb55ea671e3..ba340aaff1b3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1723,6 +1723,86 @@ static int ibmvnic_set_mac(struct net_device *netdev, 
void *p)
return rc;
 }
 
+/**
+ * do_change_param_reset returns zero if we are able to keep processing reset
+ * events, or non-zero if we hit a fatal error and must halt.
+ */
+static int do_change_param_reset(struct ibmvnic_adapter *adapter,
+struct ibmvnic_rwi *rwi,
+u32 reset_state)
+{
+   struct net_device *netdev = adapter->netdev;
+   int i, rc;
+
+   netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
+  rwi->reset_reason);
+
+   netif_carrier_off(netdev);
+   adapter->reset_reason = rwi->reset_reason;
+
+   ibmvnic_cleanup(netdev);
+
+   if (reset_state == VNIC_OPEN) {
+   rc = __ibmvnic_close(netdev);
+   if (rc)
+   return rc;
+   }
+
+   release_resources(adapter);
+   release_sub_crqs(adapter, 1);
+   release_crq_queue(adapter);
+
+   adapter->state = VNIC_PROBED;
+
+   rc = init_crq_queue(adapter);
+
+   if (rc) {
+   netdev_err(adapter->netdev,
+  "Couldn't initialize crq. rc=%d\n", rc);
+   return rc;
+   }
+
+   rc = ibmvnic_reset_init(adapter);
+   if (rc)
+   return IBMVNIC_INIT_FAILED;
+
+   /* If the adapter was in PROBE state prior to the reset,
+* exit here.
+*/
+   if (reset_state == VNIC_PROBED)
+   return 0;
+
+   rc = ibmvnic_login(netdev);
+   if (rc) {
+   adapter->state = reset_state;
+   return rc;
+   }
+
+   rc = init_resources(adapter);
+   if (rc)
+   return rc;
+
+   ibmvnic_disable_irqs(adapter);
+
+   adapter->state = VNIC_CLOSED;
+
+   if (reset_state == VNIC_CLOSED)
+   return 0;
+
+   rc = __ibmvnic_open(netdev);
+   if (rc)
+   return IBMVNIC_OPEN_FAILED;
+
+   /* refresh device's multicast list */
+   ibmvnic_set_multi(netdev);
+
+   /* kick napi */
+   for (i = 0; i < adapter->req_rx_queues; i++)
+   napi_schedule(>napi[i]);
+
+   return 0;
+}
+
 /**
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
@@ -1738,6 +1818,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
   rwi->reset_reason);
 
+   rtnl_lock();
+
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
 
@@ -1751,16 +1833,25 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (reset_state == VNIC_OPEN &&
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
-   rc = __ibmvnic_close(netdev);
+   adapter->state = VNIC_CLOSING;
+
+   /* Release the RTNL lock before link state change and
+* re-acquire after the link state change to allow
+* linkwatch_event to grab the RTNL lock and run during
+* a reset.
+*/
+   rtnl_unlock();
+   rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+   rtnl_lock();
if (rc)
- 

[PATCH v3 2/2] net/ibmvnic: prevent more than one thread from running in reset

2019-09-17 Thread Juliet Kim
Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 23 ++-
 drivers/net/ethernet/ibm/ibmvnic.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index ba340aaff1b3..f344ccd68ad9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2054,6 +2054,13 @@ static void __ibmvnic_reset(struct work_struct *work)
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
+   if (adapter->resetting) {
+   schedule_delayed_work(>ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+   return;
+   }
+
+   adapter->resetting = true;
reset_state = adapter->state;
 
rwi = get_next_rwi(adapter);
@@ -2095,6 +2102,10 @@ static void __ibmvnic_reset(struct work_struct *work)
break;
 
rwi = get_next_rwi(adapter);
+
+   if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+   rwi->reset_reason == VNIC_RESET_MOBILITY))
+   adapter->force_reset_recovery = true;
}
 
if (adapter->wait_for_reset) {
@@ -2110,6 +2121,15 @@ static void __ibmvnic_reset(struct work_struct *work)
adapter->resetting = false;
 }
 
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+   struct ibmvnic_adapter *adapter;
+
+   adapter = container_of(work, struct ibmvnic_adapter,
+  ibmvnic_delayed_reset.work);
+   __ibmvnic_reset(>ibmvnic_reset);
+}
+
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 enum ibmvnic_reset_reason reason)
 {
@@ -2162,7 +2182,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi->reset_reason = reason;
list_add_tail(>list, >rwi_list);
spin_unlock_irqrestore(>rwi_lock, flags);
-   adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(>ibmvnic_reset);
 
@@ -4933,6 +4952,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
spin_lock_init(>stats_lock);
 
INIT_WORK(>ibmvnic_reset, __ibmvnic_reset);
+   INIT_DELAYED_WORK(>ibmvnic_delayed_reset,
+ __ibmvnic_delayed_reset);
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
init_completion(>init_done);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 9d3d35cc91d6..4f4651d92cc1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -39,6 +39,8 @@
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
 
+#define IBMVNIC_RESET_DELAY 100
+
 static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
 #define IBMVNIC_USE_SERVER_MAXES 0x1
"use-server-maxes"
@@ -1077,6 +1079,7 @@ struct ibmvnic_adapter {
spinlock_t rwi_lock;
struct list_head rwi_list;
struct work_struct ibmvnic_reset;
+   struct delayed_work ibmvnic_delayed_reset;
bool resetting;
bool napi_enabled, from_passive_init;
 
-- 
2.16.4



[PATCH v2 2/2] net/ibmvnic: prevent more than one thread from running in reset

2019-09-17 Thread Juliet Kim
The current code allows more than one thread to run in reset. This can 
corrupt struct adapter data. Check adapter->resetting before performing 
a reset, if there is another reset running delay (100 msec) before trying 
again.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 23 ++-
 drivers/net/ethernet/ibm/ibmvnic.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index fc760c1eb0b0..d3ebf4caa09c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2054,6 +2054,13 @@ static void __ibmvnic_reset(struct work_struct *work)
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
+   if (adapter->resetting) {
+   schedule_delayed_work(>ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+   return;
+   }
+
+   adapter->resetting = true;
reset_state = adapter->state;
 
rwi = get_next_rwi(adapter);
@@ -2094,6 +2101,10 @@ static void __ibmvnic_reset(struct work_struct *work)
break;
 
rwi = get_next_rwi(adapter);
+
+   if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+   rwi->reset_reason == VNIC_RESET_MOBILITY))
+   adapter->force_reset_recovery = true;
}
 
if (adapter->wait_for_reset) {
@@ -2109,6 +2120,15 @@ static void __ibmvnic_reset(struct work_struct *work)
adapter->resetting = false;
 }
 
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+   struct ibmvnic_adapter *adapter;
+
+   adapter = container_of(work, struct ibmvnic_adapter,
+  ibmvnic_delayed_reset.work);
+   __ibmvnic_reset(>ibmvnic_reset);
+}
+
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 enum ibmvnic_reset_reason reason)
 {
@@ -2161,7 +2181,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi->reset_reason = reason;
list_add_tail(>list, >rwi_list);
spin_unlock_irqrestore(>rwi_lock, flags);
-   adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(>ibmvnic_reset);
 
@@ -4932,6 +4951,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
spin_lock_init(>stats_lock);
 
INIT_WORK(>ibmvnic_reset, __ibmvnic_reset);
+   INIT_DELAYED_WORK(>ibmvnic_delayed_reset,
+ __ibmvnic_delayed_reset);
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
init_completion(>init_done);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 9d3d35cc91d6..4f4651d92cc1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -39,6 +39,8 @@
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
 
+#define IBMVNIC_RESET_DELAY 100
+
 static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
 #define IBMVNIC_USE_SERVER_MAXES 0x1
"use-server-maxes"
@@ -1077,6 +1079,7 @@ struct ibmvnic_adapter {
spinlock_t rwi_lock;
struct list_head rwi_list;
struct work_struct ibmvnic_reset;
+   struct delayed_work ibmvnic_delayed_reset;
bool resetting;
bool napi_enabled, from_passive_init;
 
-- 
2.16.4



[PATCH v2 1/2] net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run

2019-09-17 Thread Juliet Kim
Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset") 
made the change to hold the RTNL lock during a reset to avoid deadlock 
but linkwatch_event is fired during the reset and needs the RTNL lock. 
That keeps linkwatch_event process from proceeding until the reset 
is complete. The reset process cannot tolerate the linkwatch_event 
processing after reset completes, so release the RTNL lock during the 
process to allow a chance for linkwatch_event to run during reset. 
This does not guarantee that the linkwatch_event will be processed as 
soon as link state changes, but is an improvement over the current code 
where linkwatch_event processing is always delayed, which prevents 
transmissions on the device from being deactivated leading transmit 
watchdog timer to time-out. 

Release the RTNL lock before link state change and re-acquire after 
the link state change to allow linkwatch_event to grab the RTNL lock 
and run during the reset.

Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 223 ++---
 drivers/net/ethernet/ibm/ibmvnic.h |   1 +
 2 files changed, 156 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5cb55ea671e3..fc760c1eb0b0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1723,6 +1723,86 @@ static int ibmvnic_set_mac(struct net_device *netdev, 
void *p)
return rc;
 }
 
+/**
+ * do_change_param_reset returns zero if we are able to keep processing reset
+ * events, or non-zero if we hit a fatal error and must halt.
+ */
+static int do_change_param_reset(struct ibmvnic_adapter *adapter,
+struct ibmvnic_rwi *rwi,
+u32 reset_state)
+{
+   struct net_device *netdev = adapter->netdev;
+   int i, rc;
+
+   netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
+  rwi->reset_reason);
+
+   netif_carrier_off(netdev);
+   adapter->reset_reason = rwi->reset_reason;
+
+   ibmvnic_cleanup(netdev);
+
+   if (reset_state == VNIC_OPEN) {
+   rc = __ibmvnic_close(netdev);
+   if (rc)
+   return rc;
+   }
+
+   release_resources(adapter);
+   release_sub_crqs(adapter, 1);
+   release_crq_queue(adapter);
+
+   adapter->state = VNIC_PROBED;
+
+   rc = init_crq_queue(adapter);
+
+   if (rc) {
+   netdev_err(adapter->netdev,
+  "Couldn't initialize crq. rc=%d\n", rc);
+   return rc;
+   }
+
+   rc = ibmvnic_reset_init(adapter);
+   if (rc)
+   return IBMVNIC_INIT_FAILED;
+
+   /* If the adapter was in PROBE state prior to the reset,
+* exit here.
+*/
+   if (reset_state == VNIC_PROBED)
+   return 0;
+
+   rc = ibmvnic_login(netdev);
+   if (rc) {
+   adapter->state = reset_state;
+   return rc;
+   }
+
+   rc = init_resources(adapter);
+   if (rc)
+   return rc;
+
+   ibmvnic_disable_irqs(adapter);
+
+   adapter->state = VNIC_CLOSED;
+
+   if (reset_state == VNIC_CLOSED)
+   return 0;
+
+   rc = __ibmvnic_open(netdev);
+   if (rc)
+   return IBMVNIC_OPEN_FAILED;
+
+   /* refresh device's multicast list */
+   ibmvnic_set_multi(netdev);
+
+   /* kick napi */
+   for (i = 0; i < adapter->req_rx_queues; i++)
+   napi_schedule(>napi[i]);
+
+   return 0;
+}
+
 /**
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
@@ -1738,6 +1818,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
   rwi->reset_reason);
 
+   rtnl_lock();
+
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
 
@@ -1751,16 +1833,25 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (reset_state == VNIC_OPEN &&
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
-   rc = __ibmvnic_close(netdev);
+   adapter->state = VNIC_CLOSING;
+
+   /* Release the RTNL lock before link state change and
+* re-acquire after the link state change to allow
+* linkwatch_event to grab the RTNL lock and run during
+* a reset.
+*/
+   rtnl_unlock();
+   rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+   rtnl_lock();
if (rc)
- 

[PATCH v2 0/2] net/ibmvnic: serialization fixes

2019-09-17 Thread Juliet Kim
This series includes two fixes. The first improves reset code to allow 
linkwatch_event to proceed during reset. The second ensures that no more
than one thread runs in reset at a time. 

v2: 
- Separate change param reset from do_reset()
- Return IBMVNIC_OPEN_FAILED if __ibmvnic_open fails 
- Remove setting wait_for_reset to false from __ibmvnic_reset(), this 
  is done in wait_for_reset()
- Move the check for force_reset_recovery from patch 1 to patch 2

Juliet Kim (2):
  net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run
  net/ibmvnic: prevent more than one thread from running in reset

 drivers/net/ethernet/ibm/ibmvnic.c | 244 ++---
 drivers/net/ethernet/ibm/ibmvnic.h |   4 +
 2 files changed, 180 insertions(+), 68 deletions(-)

-- 
2.16.4



Re: [PATCH] net/ibmvnic: Fix missing { in __ibmvnic_reset

2019-09-09 Thread Juliet Kim
On 9/9/19 3:44 PM, Michal Suchanek wrote:
> Commit 1c2977c09499 ("net/ibmvnic: free reset work of removed device from 
> queue")
> adds a } without corresponding { causing build break.
>
> Fixes: 1c2977c09499 ("net/ibmvnic: free reset work of removed device from 
> queue")
> Signed-off-by: Michal Suchanek 

Reviewed-by: Juliet Kim 

> ---
>  drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index 6644cabc8e75..5cb55ea671e3 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -1984,7 +1984,7 @@ static void __ibmvnic_reset(struct work_struct *work)
>   rwi = get_next_rwi(adapter);
>   while (rwi) {
>   if (adapter->state == VNIC_REMOVING ||
> - adapter->state == VNIC_REMOVED)
> + adapter->state == VNIC_REMOVED) {
>   kfree(rwi);
>   rc = EBUSY;
>   break;


[PATCH] net/ibmvnic: free reset work of removed device from queue

2019-09-05 Thread Juliet Kim
Commit 36f1031c51a2 ("ibmvnic: Do not process reset during or after
 device removal") made the change to exit reset if the driver has been
removed, but does not free reset work items of the adapter from queue.

Ensure all reset work items are freed when breaking out of the loop early.

Fixes: 36f1031c51a2 ("ibmnvic: Do not process reset during or after
device removal”)

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index fa4bb940665c..6644cabc8e75 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1985,7 +1985,10 @@ static void __ibmvnic_reset(struct work_struct *work)
while (rwi) {
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED)
-   goto out;
+   kfree(rwi);
+   rc = EBUSY;
+   break;
+   }
 
if (adapter->force_reset_recovery) {
adapter->force_reset_recovery = false;
@@ -2011,7 +2014,7 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev_dbg(adapter->netdev, "Reset failed\n");
free_all_rwi(adapter);
}
-out:
+
adapter->resetting = false;
if (we_lock_rtnl)
rtnl_unlock();
-- 
2.16.4



[PATCH] net/ibmvnic: free reset work of removed device from queue

2019-08-29 Thread Juliet Kim
Commit 36f1031c51a2 ("ibmvnic: Do not process reset during or after
 device removal") made the change to exit reset if the driver has been
removed, but does not free reset work items of the adapter from queue.

Ensure all reset work items are freed when breaking out of the loop early.

Fixes: 36f1031c51a2 ("ibmnvic: Do not process reset during or after
device removal”)

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index fa4bb940665c..51bc943e66aa 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1984,8 +1984,11 @@ static void __ibmvnic_reset(struct work_struct *work)
rwi = get_next_rwi(adapter);
while (rwi) {
if (adapter->state == VNIC_REMOVING ||
-   adapter->state == VNIC_REMOVED)
-   goto out;
+   adapter->state == VNIC_REMOVED )
+   kfree(rwi);
+   rc = EBUSY;
+   break;
+   }
 
if (adapter->force_reset_recovery) {
adapter->force_reset_recovery = false;
@@ -2011,7 +2014,7 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev_dbg(adapter->netdev, "Reset failed\n");
free_all_rwi(adapter);
}
-out:
+
adapter->resetting = false;
if (we_lock_rtnl)
rtnl_unlock();
-- 
2.16.4



[PATCH 1/2] net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run

2019-08-20 Thread Juliet Kim
Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset") 
made the change to hold the RTNL lock during a reset to avoid deadlock 
but linkwatch_event is fired during the reset and needs the RTNL lock.  
That keeps linkwatch_event process from proceeding until the reset 
is complete. The reset process cannot tolerate the linkwatch_event 
processing after reset completes, so release the RTNL lock during the 
process to allow a chance for linkwatch_event to run during reset. 
This does not guarantee that the linkwatch_event will be processed as 
soon as link state changes, but is an improvement over the current code
where linkwatch_event processing is always delayed, which prevents 
transmissions on the device from being deactivated leading transmit 
watchdog timer to time-out. 

Release the RTNL lock before link state change and re-acquire after 
the link state change to allow linkwatch_event to grab the RTNL lock 
and run during the reset.

Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 144 +++--
 1 file changed, 108 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index cebd20f3128d..a3e2fbb9c5db 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1733,11 +1733,21 @@ static int do_reset(struct ibmvnic_adapter *adapter,
u64 old_num_rx_queues, old_num_tx_queues;
u64 old_num_rx_slots, old_num_tx_slots;
struct net_device *netdev = adapter->netdev;
+   bool we_lock_rtnl = false;
int i, rc;
 
netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
   rwi->reset_reason);
 
+   /* netif_set_real_num_xx_queues needs to take rtnl lock here
+* unless wait_for_reset is set, in which case the rtnl lock
+* has already been taken before initializing the reset
+*/
+   if (!adapter->wait_for_reset && !we_lock_rtnl) {
+   rtnl_lock();
+   we_lock_rtnl = true;
+   }
+
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
 
@@ -1751,9 +1761,27 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (reset_state == VNIC_OPEN &&
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
-   rc = __ibmvnic_close(netdev);
+   adapter->state = VNIC_CLOSING;
+   if (!adapter->wait_for_reset && we_lock_rtnl) {
+   we_lock_rtnl = false;
+   rtnl_unlock();
+   }
+
+   rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
if (rc)
-   return rc;
+   goto out;
+
+   if (!adapter->wait_for_reset && !we_lock_rtnl) {
+   rtnl_lock();
+   we_lock_rtnl = true;
+   }
+
+   if (adapter->state != VNIC_CLOSING) {
+   rc = -1;
+   goto out;
+   }
+
+   adapter->state = VNIC_CLOSED;
}
 
if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
@@ -1783,30 +1811,34 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (rc) {
netdev_err(adapter->netdev,
   "Couldn't initialize crq. rc=%d\n", rc);
-   return rc;
+   goto out;
}
 
rc = ibmvnic_reset_init(adapter);
-   if (rc)
-   return IBMVNIC_INIT_FAILED;
+   if (rc) {
+   rc = IBMVNIC_INIT_FAILED;
+   goto out;
+   }
 
/* If the adapter was in PROBE state prior to the reset,
 * exit here.
 */
-   if (reset_state == VNIC_PROBED)
-   return 0;
+   if (reset_state == VNIC_PROBED) {
+   rc = 0;
+   goto out;
+   }
 
rc = ibmvnic_login(netdev);
if (rc) {
adapter->state = reset_state;
-   return rc;
+   goto out;
}
 
if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
adapter->wait_for_reset) {
rc = init_resources(adapter);
if (rc)
-   return rc;
+   goto out;
} else if (adapter->req_rx_queues != old_num_rx_queues ||
   adapter->r

[PATCH 2/2] net/ibmvnic: prevent more than one thread from running in reset

2019-08-20 Thread Juliet Kim
The current code allows more than one thread to run in reset. This can 
corrupt struct adapter data. Check adapter->resetting before performing 
a reset, if there is another reset running delay (100 msec) before trying 
again.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 19 ++-
 drivers/net/ethernet/ibm/ibmvnic.h |  3 +++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index a3e2fbb9c5db..7db7a44eeeb9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2049,6 +2049,13 @@ static void __ibmvnic_reset(struct work_struct *work)
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
+   if (adapter->resetting) {
+   schedule_delayed_work(>ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+   return;
+   }
+
+   adapter->resetting = true;
reset_state = adapter->state;
 
rwi = get_next_rwi(adapter);
@@ -2085,6 +2092,15 @@ static void __ibmvnic_reset(struct work_struct *work)
adapter->resetting = false;
 }
 
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+   struct ibmvnic_adapter *adapter;
+
+   adapter = container_of(work, struct ibmvnic_adapter,
+  ibmvnic_delayed_reset.work);
+   __ibmvnic_reset(>ibmvnic_reset);
+}
+
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 enum ibmvnic_reset_reason reason)
 {
@@ -2137,7 +2153,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi->reset_reason = reason;
list_add_tail(>list, >rwi_list);
spin_unlock_irqrestore(>rwi_lock, flags);
-   adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(>ibmvnic_reset);
 
@@ -4910,6 +4925,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
spin_lock_init(>stats_lock);
 
INIT_WORK(>ibmvnic_reset, __ibmvnic_reset);
+   INIT_DELAYED_WORK(>ibmvnic_delayed_reset,
+ __ibmvnic_delayed_reset);
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
init_completion(>init_done);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 70bd286f8932..0e1fadbcffc0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -38,6 +38,8 @@
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
 
+#define IBMVNIC_RESET_DELAY 100
+
 static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
 #define IBMVNIC_USE_SERVER_MAXES 0x1
"use-server-maxes"
@@ -1076,6 +1078,7 @@ struct ibmvnic_adapter {
spinlock_t rwi_lock;
struct list_head rwi_list;
struct work_struct ibmvnic_reset;
+   struct delayed_work ibmvnic_delayed_reset;
bool resetting;
bool napi_enabled, from_passive_init;
 
-- 
2.16.4



Re: [PATCH] powerpc/rtas: Fix hang in race against concurrent cpu offline

2019-06-28 Thread Juliet Kim


On 6/26/19 6:51 PM, Nathan Lynch wrote:
> Hi Juliet,
>
> Juliet Kim  writes:
>> On 6/25/19 12:29 PM, Nathan Lynch wrote:
>>> Juliet Kim  writes:
>>>> However, that fix failed to notify Hypervisor that the LPM attempted
>>>> had been abandoned which results in a system hang.
>>> It is surprising to me that leaving a migration unterminated would cause
>>> Linux to hang. Can you explain more about how that happens?
>>>
>> PHYP will block further requests(next partition migration, dlpar etc) while
>> it's in suspending state. That would have a follow-on effect on the HMC and
>> potentially this and other partitions.
> I can believe that operations on _this LPAR_ would be blocked by the
> platform and/or management console while the migration remains
> unterminated, but the OS should not be able to perpetrate a denial of
> service on other partitions or the management console simply by botching
> the LPM protocol. If it can, that's not Linux's bug to fix.
>
>
>>>> Fix this by sending a signal PHYP to cancel the migration, so that PHYP
>>>> can stop waiting, and clean up the migration.
>>> This is well-spotted and rtas_ibm_suspend_me() needs to signal
>>> cancellation in several error paths. But I don't agree that this is one
>>> of them: this race is going to be a temporary condition in any
>>> production setting, and retrying would allow the migration to
>>> succeed.
>> If LPM and CPU offine requests conflict with one another, it might be better
>> to let them fail and let the customer decide which he prefers.
> Hmm I don't think so. When (if ever) this happens in production it would
> be the result of an unlucky race with a power management daemon or
> similar, not a conscious decision of the administrator in the moment.
>
Guessing that a production race would only be against power mgmt is maybe
reasonable.  But we have an actual failure case where the race was against
an explicit offline request, and that's a legitimate/supported thing for
a customer to do.

>> IBM i cancels migration if the other OS components/operations veto
>> migration. It’s consistent with other OS behavior for LPM.
> But this situation isn't really like that. If we were to have a real
> veto mechanism, it would only make sense to have it run as early as
> possible, before the platform has done a bunch of work. This benign,
> recoverable race is occurring right before we complete the migration,
> which at this point has been copying state to the destination for
> minutes or hours. It doesn't make sense to error out like this.

Let me clarify that the cancellation is occurring in the phase preparing
for migration.It would be even better if it runs before LPM is allowed to make
a start. But that's what a long-term solution might look like.

> As I mentioned earlier though, it does make sense to signal a
> cancellation for these less-recoverable error conditions in
> rtas_ibm_suspend_me():
>
> - rtas_online_cpus_mask() failure
> - alloc_cpumask_var() failure
> - the atomic_read() != 0 case after returning from the IPI


Re: [PATCH] powerpc/rtas: retry when cpu offline races with suspend/migration

2019-06-27 Thread Juliet Kim


On 6/27/19 12:01 AM, Michael Ellerman wrote:
> Juliet Kim  writes:
>> On 6/25/19 1:51 PM, Nathan Lynch wrote:
>>> Juliet Kim  writes:
>>>
>>>> There's some concern this could retry forever, resulting in live lock.
>>> First of all the system will make progress in other areas even if there
>>> are repeated retries; we're not indefinitely holding locks or anything
>>> like that.
>> For instance, system admin runs a script that picks and offlines CPUs in a
>> loop to keep a certain rate of onlined CPUs for energy saving. If LPM keeps
>> putting CPUs back online, that would never finish, and would keepgenerating
>> new offline requests
>>
>>> Second, Linux checks the H_VASI_STATE result on every retry. If the
>>> platform wants to terminate the migration (say, if it imposes a
>>> timeout), Linux will abandon it when H_VASI_STATE fails to return
>>> H_VASI_SUSPENDING. And it seems incorrect to bail out before that
>>> happens, absent hard errors on the Linux side such as allocation
>>> failures.
>> I confirmed with the PHYP and HMC folks that they wouldn't time out the LPM
>> request including H_VASI_STATE, so if the LPM retries were unlucky enough to
>> encounter repeated CPU offline attempts (maybe some customer code retrying
>> that), then the retries could continue indefinitely, or until some manual
>> intervention.  And in the mean time, the LPM delay here would cause PHYP to
>> block other operations.
> That sounds like a PHYP bug to me.
>
> cheers


PHYP doesn’t time out because they have no idea how long it will take for OS to
get to the point that it suspends. Other OS allows application to prepare for 
LPM.
They cannot predict the length of time that is appropriate in all cases and in 
any
case, it’s unlikely they’d make a change to that would come in time to help with
the current issue.



Re: [PATCH] powerpc/rtas: Fix hang in race against concurrent cpu offline

2019-06-26 Thread Juliet Kim
On 6/25/19 12:29 PM, Nathan Lynch wrote:
> Juliet Kim  writes:
>> The commit
>> (“powerpc/rtas: Fix a potential race between CPU-Offline & Migration)
>> attempted to fix a hang in Live Partition Mobility(LPM) by abandoning
>> the LPM attempt if a race between LPM and concurrent CPU offline was
>> detected.
>>
>> However, that fix failed to notify Hypervisor that the LPM attempted
>> had been abandoned which results in a system hang.
> It is surprising to me that leaving a migration unterminated would cause
> Linux to hang. Can you explain more about how that happens?
>
PHYP will block further requests(next partition migration, dlpar etc) while
it's in suspending state. That would have a follow-on effect on the HMC and
potentially this and other partitions.
>> Fix this by sending a signal PHYP to cancel the migration, so that PHYP
>> can stop waiting, and clean up the migration.
> This is well-spotted and rtas_ibm_suspend_me() needs to signal
> cancellation in several error paths. But I don't agree that this is one
> of them: this race is going to be a temporary condition in any
> production setting, and retrying would allow the migration to succeed.
If LPM and CPU offine requests conflict with one another, it might be better
to let them fail and let the customer decide which he prefers. IBM i cancels
migration if the other OS components/operations veto migration. It’s consistent
with other OS behavior for LPM. I think all the IBM products should have a
consistent customer experience. Even if the race can be temporary, it still
could happen and can cause livelock.


Re: [PATCH] powerpc/rtas: retry when cpu offline races with suspend/migration

2019-06-26 Thread Juliet Kim


On 6/25/19 1:51 PM, Nathan Lynch wrote:
> Juliet Kim  writes:
>
>> There's some concern this could retry forever, resulting in live lock.
> First of all the system will make progress in other areas even if there
> are repeated retries; we're not indefinitely holding locks or anything
> like that.

For instance, system admin runs a script that picks and offlines CPUs in a
loop to keep a certain rate of onlined CPUs for energy saving. If LPM keeps
putting CPUs back online, that would never finish, and would keepgenerating
new offline requests

> Second, Linux checks the H_VASI_STATE result on every retry. If the
> platform wants to terminate the migration (say, if it imposes a
> timeout), Linux will abandon it when H_VASI_STATE fails to return
> H_VASI_SUSPENDING. And it seems incorrect to bail out before that
> happens, absent hard errors on the Linux side such as allocation
> failures.
I confirmed with the PHYP and HMC folks that they wouldn't time out the LPM
request including H_VASI_STATE, so if the LPM retries were unlucky enough to
encounter repeated CPU offline attempts (maybe some customer code retrying
that), then the retries could continue indefinitely, or until some manual
intervention.  And in the mean time, the LPM delay here would cause PHYP to
block other operations.



[PATCH REPOST] powerpc/rtas: Fix hang in race against concurrent cpu offline

2019-06-25 Thread Juliet Kim
The commit
(“powerpc/rtas: Fix a potential race between CPU-Offline & Migration)
attempted to fix a hang in Live Partition Mobility(LPM) by abandoning
the LPM attempt if a race between LPM and concurrent CPU offline was
detected.

However, that fix failed to notify Hypervisor that the LPM attempted
had been abandoned which results in a system hang.

Fix this by sending a signal PHYP to cancel the migration, so that PHYP
can stop waiting, and clean up the migration.

Fixes: dfd718a2ed1f ("powerpc/rtas: Fix a potential race between CPU-Offline & 
Migration")
Signed-off-by: Juliet Kim 
---
 arch/powerpc/include/asm/hvcall.h | 7 +++
 arch/powerpc/kernel/rtas.c| 8 
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 463c63a..29ca285 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -261,6 +261,7 @@
 #define H_ADD_CONN 0x284
 #define H_DEL_CONN 0x288
 #define H_JOIN 0x298
+#define H_VASI_SIGNAL   0x2A0
 #define H_VASI_STATE0x2A4
 #define H_VIOCTL   0x2A8
 #define H_ENABLE_CRQ   0x2B0
@@ -348,6 +349,12 @@
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS  -2
 /* >= 0 values are CPU number */
 
+/* Values for argument to H_VASI_SIGNAL */
+#define H_SIGNAL_CANCEL_MIG 0x01
+
+/* Values for 2nd argument to H_VASI_SIGNAL */
+#define H_CPU_OFFLINE_DETECTED  0x0604
+
 /* H_GET_CPU_CHARACTERISTICS return values */
 #define H_CPU_CHAR_SPEC_BAR_ORI31  (1ull << 63) // IBM bit 0
 #define H_CPU_CHAR_BCCTRL_SERIALISED   (1ull << 62) // IBM bit 1
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b824f4c..f9002b7 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -981,6 +981,14 @@ int rtas_ibm_suspend_me(u64 handle)
 
/* Check if we raced with a CPU-Offline Operation */
if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
+
+   /* We uses CANCEL, not ABORT to gracefully cancel migration */
+   rc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
+   H_SIGNAL_CANCEL_MIG, H_CPU_OFFLINE_DETECTED);
+
+   if (rc != H_SUCCESS)
+   pr_err("%s: vasi_signal failed %ld\n", __func__, rc);
+
pr_err("%s: Raced against a concurrent CPU-Offline\n",
   __func__);
atomic_set(, -EBUSY);
-- 
1.8.3.1



Re: [PATCH] powerpc/rtas: retry when cpu offline races with suspend/migration

2019-06-24 Thread Juliet Kim
There's some concern this could retry forever, resulting in live lock.  
Another approach would be to abandon the attempt and fail the LPM 
request. 
https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-June/192531.html


On 6/24/19 12:23 PM, Nathan Lynch wrote:

Hi Mingming,

mmc  writes:

On 2019-06-21 00:05, Nathan Lynch wrote:

So return -EAGAIN instead of -EBUSY when this race is
encountered. Additionally: logging this event is still appropriate but
use pr_info instead of pr_err; and remove use of unlikely() while here
as this is not a hot path at all.

Looks good, since it's not a hot path anyway, so unlikely() should
benefit from optimize compiler path, and should stay. No?

The latency of this path in rtas_ibm_suspend_me() in the best case is
around 2-3 seconds.

So I think not -- this is such a heavyweight and relatively
seldom-executed path that the unlikely() cannot yield any discernible
performance benefit, and its presence imposes a readability cost.


[PATCH] powerpc/rtas: Fix hang in race against concurrent cpu offline

2019-06-24 Thread Juliet Kim
>From 1bd2bf7146876e099eafb292668f9a12dc22f526 Mon Sep 17 00:00:00 2001
From: Juliet Kim 
Date: Mon, 24 Jun 2019 17:17:46 -0400
Subject: [PATCH 1/1] powerpc/rtas: Fix hang in race against concurrent cpu
 offline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The commit
(“powerpc/rtas: Fix a potential race between CPU-Offline & Migration)
attempted to fix a hang in Live Partition Mobility(LPM) by abandoning
the LPM attempt if a race between LPM and concurrent CPU offline was
detected.

However, that fix failed to notify Hypervisor that the LPM attempted
had been abandoned which results in a system hang.

Fix this by sending a signal PHYP to cancel the migration, so that PHYP
can stop waiting, and clean up the migration.

Fixes: dfd718a2ed1f ("powerpc/rtas: Fix a potential race between CPU-Offline & 
Migration")
Signed-off-by: Juliet Kim 
---
This is an alternate solution to the one Nathan proposed in:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-June/192414.html
---
 arch/powerpc/include/asm/hvcall.h | 7 +++
 arch/powerpc/kernel/rtas.c| 8 
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 463c63a..29ca285 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -261,6 +261,7 @@
 #define H_ADD_CONN 0x284
 #define H_DEL_CONN 0x288
 #define H_JOIN 0x298
+#define H_VASI_SIGNAL   0x2A0
 #define H_VASI_STATE0x2A4
 #define H_VIOCTL   0x2A8
 #define H_ENABLE_CRQ   0x2B0
@@ -348,6 +349,12 @@
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS  -2
 /* >= 0 values are CPU number */
 
+/* Values for argument to H_VASI_SIGNAL */
+#define H_SIGNAL_CANCEL_MIG 0x01
+
+/* Values for 2nd argument to H_VASI_SIGNAL */
+#define H_CPU_OFFLINE_DETECTED  0x0604
+
 /* H_GET_CPU_CHARACTERISTICS return values */
 #define H_CPU_CHAR_SPEC_BAR_ORI31  (1ull << 63) // IBM bit 0
 #define H_CPU_CHAR_BCCTRL_SERIALISED   (1ull << 62) // IBM bit 1
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b824f4c..f9002b7 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -981,6 +981,14 @@ int rtas_ibm_suspend_me(u64 handle)
 
/* Check if we raced with a CPU-Offline Operation */
if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
+
+   /* We uses CANCEL, not ABORT to gracefully cancel migration */
+   rc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
+   H_SIGNAL_CANCEL_MIG, H_CPU_OFFLINE_DETECTED);
+
+   if (rc != H_SUCCESS)
+   pr_err("%s: vasi_signal failed %ld\n", __func__, rc);
+
pr_err("%s: Raced against a concurrent CPU-Offline\n",
   __func__);
atomic_set(, -EBUSY);
-- 
1.8.3.1



Re: [PATCH]powerpc/mobility: Serialize PRRN and LPM in device tree update

2019-05-07 Thread Juliet Kim

Hi Nathan,

On 5/6/19 12:14 PM, Nathan Lynch wrote:

Hi Juliet,

Juliet Kim writes:

Fix extending start/stop topology update scope during LPM
Commit 65b9fdadfc4d ("powerpc/pseries/mobility: Extend start/stop
topology update scope") made the change to the duration that
topology updates are suppressed during LPM to allow the complete
device tree update which leaves the property update notifier
unregistered until device tree update completes. This prevents
topology update during LPM.

Instead, use mutex_lock, which serializes LPM and PRRN operation
in pseries_devicetree_update.

I think this is conflating two issues:

1. Insufficient serialization/ordering of handling PRRNs and
LPM. E.g. we could migrate while processing a PRRN from the source
system and end up with incorrect contents in the device tree on the
destination if the LPM changes the same nodes. The OS is supposed to
drain any outstanding PRRNs before proceeding with migration, which
is a stronger requirement than simple serialization of device tree
updates. If we don't impose this ordering already we should fix that.


PRRN request can be received at any time including before/after LPM and
during LPM.  Currently, we do not have a protocol with hypervisor 
prohibiting
PRRN after LPM begins. This patch is to fix the regression(inconsistent 
state

of device tree and skipping CPU affinity update) injected by a patch
Commit 65b9fdadfc4d (Extending start/stop topology update scope during
LPM ).

This patch uses mutex_lock to update device tree allowing device tree to be
consistent state in both cases : LPM begins while PRRN event is running and
vice versa. If we migrate while PRRN is running at source, PRRN holding 
the lock
completes at target. Once PRRN release the lock, LPM take the lock and 
update

device tree. PRRN completes device tree update before LPM begins.
To avoid PRRN and LPM from running at the same time, it needs serialization
at the higher layer which requires design change and may be future work.



2. The NUMA topology update processing. Generally speaking,
start/stop_topology_update() enable/disable dt_update_callback(),
which we use to update CPU-node assignments. Since we now know that
doing that is Bad, it's sort of a happy accident that
migration_store() was changed to re-register the notifier after
updating the device tree, which is too late. So I don't think we
should try to "fix" this. Instead we should remove the broken code
(dt_update_callback -> dlpar_cpu_readdd and so on).
When the regression (CPU affinity update has been accidentally disabled 
at LPM)
and CPU readd causes some issues, I suggested that we revert the CPU 
readd patch
already upstream and leave the regression without fixing. But then, we 
decided to
disable CPU affinity update globally for LPM, PRRN, VPHN and fix the 
regression

once the disablement CPU affinity update patch is accepted upstream as the
regression needs to be corrected in case of enabling CPU affinity update 
and we

would learn up codes once the disablement is stabilized.

Do you agree?

Thanks,
Nathan




[PATCH]powerpc/mobility: Serialize PRRN and LPM in device tree update

2019-04-26 Thread Juliet Kim
Fix extending start/stop topology update scope during LPM
Commit 65b9fdadfc4d ("powerpc/pseries/mobility: Extend start/stop
topology update scope") made the change to the duration that 
topology updates are suppressed during LPM to allow the complete 
device tree update which leaves the property update notifier
unregistered until device tree update completes. This prevents
topology update during LPM.  

Instead, use mutex_lock, which serializes LPM and PRRN operation 
in pseries_devicetree_update.

Signed-off-by: Juliet Kim 

 arch/powerpc/platforms/pseries/mobility.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 88925f8ca8a0..3a79ded056fd 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -42,6 +42,8 @@ struct update_props_workarea {
 #define MIGRATION_SCOPE(1)
 #define PRRN_SCOPE -2
 
+static DEFINE_MUTEX(dt_affinity_mutex);
+
 static int mobility_rtas_call(int token, char *buf, s32 scope)
 {
int rc;
@@ -270,13 +272,19 @@ int pseries_devicetree_update(s32 scope)
int update_nodes_token;
int rc;
 
+   mutex_lock(_affinity_mutex);
+
update_nodes_token = rtas_token("ibm,update-nodes");
-   if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+   if (update_nodes_token == RTAS_UNKNOWN_SERVICE) {
+   mutex_unlock(_affinity_mutex);
return -EINVAL;
+   }
 
rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
-   if (!rtas_buf)
+   if (!rtas_buf) {
+   mutex_unlock(_affinity_mutex);
return -ENOMEM;
+   }
 
do {
rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
@@ -316,6 +324,7 @@ int pseries_devicetree_update(s32 scope)
} while (rc == 1);
 
kfree(rtas_buf);
+   mutex_unlock(_affinity_mutex);
return rc;
 }
 
@@ -371,10 +380,10 @@ static ssize_t migration_store(struct class *class,
if (rc)
return rc;
 
-   post_mobility_fixup();
-
start_topology_update();
 
+   post_mobility_fixup();
+
return count;
 }
 
-- 
2.12.3



[PATCH]powerpc/mobility: Serialize PRRN and LPM in device tree update

2019-04-26 Thread Juliet Kim
Fix extending start/stop topology update scope during LPM
Commit 65b9fdadfc4d ("powerpc/pseries/mobility: Extend start/stop
topology update scope") made the change to the duration that 
topology updates are suppressed during LPM to allow the complete 
device tree update which leaves the property update notifier
unregistered until device tree update completes. This prevents
topology update during LPM.  

Instead, use mutex_lock, which serializes LPM and PRRN operation 
in pseries_devicetree_update.

Signed-off-by: Juliet Kim 

 arch/powerpc/platforms/pseries/mobility.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 88925f8ca8a0..3a79ded056fd 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -42,6 +42,8 @@ struct update_props_workarea {
 #define MIGRATION_SCOPE(1)
 #define PRRN_SCOPE -2
 
+static DEFINE_MUTEX(dt_affinity_mutex);
+
 static int mobility_rtas_call(int token, char *buf, s32 scope)
 {
int rc;
@@ -270,13 +272,19 @@ int pseries_devicetree_update(s32 scope)
int update_nodes_token;
int rc;
 
+   mutex_lock(_affinity_mutex);
+
update_nodes_token = rtas_token("ibm,update-nodes");
-   if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+   if (update_nodes_token == RTAS_UNKNOWN_SERVICE) {
+   mutex_unlock(_affinity_mutex);
return -EINVAL;
+   }
 
rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
-   if (!rtas_buf)
+   if (!rtas_buf) {
+   mutex_unlock(_affinity_mutex);
return -ENOMEM;
+   }
 
do {
rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
@@ -316,6 +324,7 @@ int pseries_devicetree_update(s32 scope)
} while (rc == 1);
 
kfree(rtas_buf);
+   mutex_unlock(_affinity_mutex);
return rc;
 }
 
@@ -371,10 +380,10 @@ static ssize_t migration_store(struct class *class,
if (rc)
return rc;
 
-   post_mobility_fixup();
-
start_topology_update();
 
+   post_mobility_fixup();
+
return count;
 }
 
-- 
2.12.3



[PATCH]powerpc/mobility: Serialize PRRN and LPM in device tree update

2019-04-26 Thread Juliet Kim
Fix extending start/stop topology update scope during LPM
Commit 65b9fdadfc4d ("powerpc/pseries/mobility: Extend start/stop
topology update scope") made the change to the duration that 
topology updates are suppressed during LPM to allow the complete 
device tree update which leaves the property update notifier
unregistered until device tree update completes. This prevents
topology update during LPM.  

Instead, use mutex_lock, which serializes LPM and PRRN operation 
in pseries_devicetree_update.

Signed-off-by: Juliet Kim 

 arch/powerpc/platforms/pseries/mobility.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 88925f8ca8a0..3a79ded056fd 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -42,6 +42,8 @@ struct update_props_workarea {
 #define MIGRATION_SCOPE(1)
 #define PRRN_SCOPE -2
 
+static DEFINE_MUTEX(dt_affinity_mutex);
+
 static int mobility_rtas_call(int token, char *buf, s32 scope)
 {
int rc;
@@ -270,13 +272,19 @@ int pseries_devicetree_update(s32 scope)
int update_nodes_token;
int rc;
 
+   mutex_lock(_affinity_mutex);
+
update_nodes_token = rtas_token("ibm,update-nodes");
-   if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+   if (update_nodes_token == RTAS_UNKNOWN_SERVICE) {
+   mutex_unlock(_affinity_mutex);
return -EINVAL;
+   }
 
rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
-   if (!rtas_buf)
+   if (!rtas_buf) {
+   mutex_unlock(_affinity_mutex);
return -ENOMEM;
+   }
 
do {
rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
@@ -316,6 +324,7 @@ int pseries_devicetree_update(s32 scope)
} while (rc == 1);
 
kfree(rtas_buf);
+   mutex_unlock(_affinity_mutex);
return rc;
 }
 
@@ -371,10 +380,10 @@ static ssize_t migration_store(struct class *class,
if (rc)
return rc;
 
-   post_mobility_fixup();
-
start_topology_update();
 
+   post_mobility_fixup();
+
return count;
 }
 
-- 
2.12.3



[PATCH net] net/ibmnvic: Fix deadlock problem in reset

2018-11-19 Thread Juliet Kim
This patch changes to use rtnl_lock only during a reset to avoid
deadlock that could occur when a thread operating close is holding
rtnl_lock and waiting for reset_lock acquired by another thread,
which is waiting for rtnl_lock in order to set the number of tx/rx
queues during a reset.

Also, we now setting the number of tx/rx queues during a soft reset
for failover or LPM events.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c |   59 +---
 drivers/net/ethernet/ibm/ibmvnic.h |2 +
 2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 7893bef..4a5de59 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1103,20 +1103,15 @@ static int ibmvnic_open(struct net_device *netdev)
return 0;
}
 
-   mutex_lock(>reset_lock);
-
if (adapter->state != VNIC_CLOSED) {
rc = ibmvnic_login(netdev);
-   if (rc) {
-   mutex_unlock(>reset_lock);
+   if (rc)
return rc;
-   }
 
rc = init_resources(adapter);
if (rc) {
netdev_err(netdev, "failed to initialize resources\n");
release_resources(adapter);
-   mutex_unlock(>reset_lock);
return rc;
}
}
@@ -1124,8 +1119,6 @@ static int ibmvnic_open(struct net_device *netdev)
rc = __ibmvnic_open(netdev);
netif_carrier_on(netdev);
 
-   mutex_unlock(>reset_lock);
-
return rc;
 }
 
@@ -1269,10 +1262,8 @@ static int ibmvnic_close(struct net_device *netdev)
return 0;
}
 
-   mutex_lock(>reset_lock);
rc = __ibmvnic_close(netdev);
ibmvnic_cleanup(netdev);
-   mutex_unlock(>reset_lock);
 
return rc;
 }
@@ -1820,20 +1811,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return rc;
} else if (adapter->req_rx_queues != old_num_rx_queues ||
   adapter->req_tx_queues != old_num_tx_queues) {
-   adapter->map_id = 1;
release_rx_pools(adapter);
release_tx_pools(adapter);
-   rc = init_rx_pools(netdev);
-   if (rc)
-   return rc;
-   rc = init_tx_pools(netdev);
-   if (rc)
-   return rc;
-
release_napi(adapter);
-   rc = init_napi(adapter);
+   release_vpd_data(adapter);
+
+   rc = init_resources(adapter);
if (rc)
return rc;
+
} else {
rc = reset_tx_pools(adapter);
if (rc)
@@ -1917,17 +1903,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
adapter->state = VNIC_PROBED;
return 0;
}
-   /* netif_set_real_num_xx_queues needs to take rtnl lock here
-* unless wait_for_reset is set, in which case the rtnl lock
-* has already been taken before initializing the reset
-*/
-   if (!adapter->wait_for_reset) {
-   rtnl_lock();
-   rc = init_resources(adapter);
-   rtnl_unlock();
-   } else {
-   rc = init_resources(adapter);
-   }
+
+   rc = init_resources(adapter);
if (rc)
return rc;
 
@@ -1986,13 +1963,21 @@ static void __ibmvnic_reset(struct work_struct *work)
struct ibmvnic_rwi *rwi;
struct ibmvnic_adapter *adapter;
struct net_device *netdev;
+   bool we_lock_rtnl = false;
u32 reset_state;
int rc = 0;
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
netdev = adapter->netdev;
 
-   mutex_lock(>reset_lock);
+   /* netif_set_real_num_xx_queues needs to take rtnl lock here
+* unless wait_for_reset is set, in which case the rtnl lock
+* has already been taken before initializing the reset
+*/
+   if (!adapter->wait_for_reset) {
+   rtnl_lock();
+   we_lock_rtnl = true;
+   }
reset_state = adapter->state;
 
rwi = get_next_rwi(adapter);
@@ -2020,12 +2005,11 @@ static void __ibmvnic_reset(struct work_struct *work)
if (rc) {
netdev_dbg(adapter->netdev, "Reset failed\n");
free_all_rwi(adapter);
-   mutex_unlock(>reset_lock);
-   return;
}
 
adapter->resetting = false;
-   mutex_unlock(>reset_lock);
+   if (we

[PATCH] net/ibmnvic: Fix deadlock problem in reset

2018-11-15 Thread Juliet Kim

Subject: [PATCH] net/ibmnvic: Fix deadlock problem in reset

From: Juliet Kim 

This patch changes to use rtnl_lock only during a reset to avoid
deadlock that could occur when a thread operating close is holding
rtnl_lock and waiting for reset_lock acquired by another thread,
which is waiting for rtnl_lock in order to set the number of tx/rx
queues during a reset.

Also, we now setting the number of tx/rx queues during a soft reset
for failover or LPM events.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c |   59 
+---

 drivers/net/ethernet/ibm/ibmvnic.h |    2 +
 2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c

index 7893bef..4a5de59 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1103,20 +1103,15 @@ static int ibmvnic_open(struct net_device *netdev)
    return 0;
    }

-   mutex_lock(>reset_lock);
-
    if (adapter->state != VNIC_CLOSED) {
    rc = ibmvnic_login(netdev);
-   if (rc) {
-   mutex_unlock(>reset_lock);
+   if (rc)
    return rc;
-   }

    rc = init_resources(adapter);
    if (rc) {
    netdev_err(netdev, "failed to initialize resources\n");
    release_resources(adapter);
-   mutex_unlock(>reset_lock);
    return rc;
    }
    }
@@ -1124,8 +1119,6 @@ static int ibmvnic_open(struct net_device *netdev)
    rc = __ibmvnic_open(netdev);
    netif_carrier_on(netdev);

-   mutex_unlock(>reset_lock);
-
    return rc;
 }

@@ -1269,10 +1262,8 @@ static int ibmvnic_close(struct net_device *netdev)
    return 0;
    }

-   mutex_lock(>reset_lock);
    rc = __ibmvnic_close(netdev);
    ibmvnic_cleanup(netdev);
-   mutex_unlock(>reset_lock);

    return rc;
 }
@@ -1820,20 +1811,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
    return rc;
    } else if (adapter->req_rx_queues != old_num_rx_queues ||
   adapter->req_tx_queues != old_num_tx_queues) {
-   adapter->map_id = 1;
    release_rx_pools(adapter);
    release_tx_pools(adapter);
-   rc = init_rx_pools(netdev);
-   if (rc)
-   return rc;
-   rc = init_tx_pools(netdev);
-   if (rc)
-   return rc;
-
    release_napi(adapter);
-   rc = init_napi(adapter);
+   release_vpd_data(adapter);
+
+   rc = init_resources(adapter);
    if (rc)
    return rc;
+
    } else {
    rc = reset_tx_pools(adapter);
    if (rc)
@@ -1917,17 +1903,8 @@ static int do_hard_reset(struct ibmvnic_adapter 
*adapter,

    adapter->state = VNIC_PROBED;
    return 0;
    }
-   /* netif_set_real_num_xx_queues needs to take rtnl lock here
-    * unless wait_for_reset is set, in which case the rtnl lock
-    * has already been taken before initializing the reset
-    */
-   if (!adapter->wait_for_reset) {
-   rtnl_lock();
-   rc = init_resources(adapter);
-   rtnl_unlock();
-   } else {
-   rc = init_resources(adapter);
-   }
+
+   rc = init_resources(adapter);
    if (rc)
    return rc;

@@ -1986,13 +1963,21 @@ static void __ibmvnic_reset(struct work_struct 
*work)

    struct ibmvnic_rwi *rwi;
    struct ibmvnic_adapter *adapter;
    struct net_device *netdev;
+   bool we_lock_rtnl = false;
    u32 reset_state;
    int rc = 0;

    adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
    netdev = adapter->netdev;

-   mutex_lock(>reset_lock);
+   /* netif_set_real_num_xx_queues needs to take rtnl lock here
+    * unless wait_for_reset is set, in which case the rtnl lock
+    * has already been taken before initializing the reset
+    */
+   if (!adapter->wait_for_reset) {
+   rtnl_lock();
+   we_lock_rtnl = true;
+   }

    reset_state = adapter->state;


    rwi = get_next_rwi(adapter);
@@ -2020,12 +2005,11 @@ static void __ibmvnic_reset(struct work_struct 
*work)

    if (rc) {
    netdev_dbg(adapter->netdev, "Reset failed\n");
    free_all_rwi(adapter);
-   mutex_unlock(>reset_lock);
-   return;
    }

    adapter->resetting = false;
-   mutex_unlock(>reset_lock);
+   if (we_lock_rtnl)
+   rtnl_unlock();
 }

 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -4768,7 +4752,6 @@ static int ibmvnic_probe(struct vio_dev *dev, 
const struct vio_device_id *id)


    INIT_WORK(>ibmvnic_reset, __ibmvnic_reset);
    INIT_LIST_HEAD(>rwi_list);
-   mutex_init(>reset_lock);
    mutex_init(>rwi_lock);
    adapter->resetting = false;

@@ -4840,8 +4823,8 @@ static int ibmvnic_remove(struct vio_dev *dev)
    struct ibmvnic_adapter *adapter = netdev_priv(netdev);

    adapter->state = VNIC_REMOVING;
-   unregister_netdev(netdev);
-   mutex_lo