from:"Thomas Falcon"

[PATCH net v3 1/2] ibmvnic: Ensure that SCRQ entry reads are correctly ordered

2020-12-01 Thread Thomas Falcon

Ensure that received Subordinate Command-Response Queue (SCRQ)
entries are properly read in order by the driver. These queues
are used in the ibmvnic device to process RX buffer and TX completion
descriptors. dma_rmb barriers have been added after checking for a
pending descriptor to ensure the correct descriptor entry is checked
and after reading the SCRQ descriptor to ensure the entire
descriptor is read before processing.

Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aa40b2..5ea9f5c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2403,6 +2403,12 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
break;
+   /* The queue entry at the current index is peeked at above
+* to determine that there is a valid descriptor awaiting
+* processing. We want to be sure that the current slot
+* holds a valid descriptor before reading its contents.
+*/
+   dma_rmb();
next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -3098,6 +3104,13 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
unsigned int pool = scrq->pool_index;
int num_entries = 0;
 
+   /* The queue entry at the current index is peeked at above
+* to determine that there is a valid descriptor awaiting
+* processing. We want to be sure that the current slot
+* holds a valid descriptor before reading its contents.
+*/
+   dma_rmb();
+
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
if (next->tx_comp.rcs[i]) {
@@ -3498,6 +3511,11 @@ static union sub_crq *ibmvnic_next_scrq(struct 
ibmvnic_adapter *adapter,
}
spin_unlock_irqrestore(>lock, flags);
 
+   /* Ensure that the entire buffer descriptor has been
+* loaded before reading its contents
+*/
+   dma_rmb();
+
return entry;
 }
 
-- 
1.8.3.1

[PATCH net v3 0/2] ibmvnic: Bug fixes for queue descriptor processing

2020-12-01 Thread Thomas Falcon

This series resolves a few issues in the ibmvnic driver's
RX buffer and TX completion processing. The first patch
includes memory barriers to synchronize queue descriptor
reads. The second patch fixes a memory leak that could
occur if the device returns a TX completion with an error
code in the descriptor, in which case the respective socket
buffer and other relevant data structures may not be freed
or updated properly.

v3: Correct length of Fixes tags, requested by Jakub Kicinski

v2: Provide more detailed comments explaining specifically what
reads are being ordered, suggested by Michael Ellerman

Thomas Falcon (2):
  ibmvnic: Ensure that SCRQ entry reads are correctly ordered
  ibmvnic: Fix TX completion error handling

 drivers/net/ethernet/ibm/ibmvnic.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

-- 
1.8.3.1

[PATCH net v3 2/2] ibmvnic: Fix TX completion error handling

2020-12-01 Thread Thomas Falcon

TX completions received with an error return code are not
being processed properly. When an error code is seen, do not
proceed to the next completion before cleaning up the existing
entry's data structures.

Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5ea9f5c..10878f8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3113,11 +3113,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
 
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
-   if (next->tx_comp.rcs[i]) {
+   if (next->tx_comp.rcs[i])
dev_err(dev, "tx error %x\n",
next->tx_comp.rcs[i]);
-   continue;
-   }
index = be32_to_cpu(next->tx_comp.correlators[i]);
if (index & IBMVNIC_TSO_POOL_MASK) {
tx_pool = >tso_pool[pool];
-- 
1.8.3.1

[PATCH net v2 2/2] ibmvnic: Fix TX completion error handling

2020-11-30 Thread Thomas Falcon

TX completions received with an error return code are not
being processed properly. When an error code is seen, do not
proceed to the next completion before cleaning up the existing
entry's data structures.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5ea9f5c..10878f8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3113,11 +3113,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
 
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
-   if (next->tx_comp.rcs[i]) {
+   if (next->tx_comp.rcs[i])
dev_err(dev, "tx error %x\n",
next->tx_comp.rcs[i]);
-   continue;
-   }
index = be32_to_cpu(next->tx_comp.correlators[i]);
if (index & IBMVNIC_TSO_POOL_MASK) {
tx_pool = >tso_pool[pool];
-- 
1.8.3.1

[PATCH net v2 0/2] ibmvnic: Bug fixes for queue descriptor processing

2020-11-30 Thread Thomas Falcon

This series resolves a few issues in the ibmvnic driver's
RX buffer and TX completion processing. The first patch
includes memory barriers to synchronize queue descriptor
reads. The second patch fixes a memory leak that could
occur if the device returns a TX completion with an error
code in the descriptor, in which case the respective socket
buffer and other relevant data structures may not be freed
or updated properly.

v2: Provide more detailed comments explaining specifically what
reads are being ordered, suggested by Michael Ellerman

Thomas Falcon (2):
  ibmvnic: Ensure that SCRQ entry reads are correctly ordered
  ibmvnic: Fix TX completion error handling

 drivers/net/ethernet/ibm/ibmvnic.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

-- 
1.8.3.1

[PATCH net v2 1/2] ibmvnic: Ensure that SCRQ entry reads are correctly ordered

2020-11-30 Thread Thomas Falcon

Ensure that received Subordinate Command-Response Queue (SCRQ)
entries are properly read in order by the driver. These queues
are used in the ibmvnic device to process RX buffer and TX completion
descriptors. dma_rmb barriers have been added after checking for a
pending descriptor to ensure the correct descriptor entry is checked
and after reading the SCRQ descriptor to ensure the entire
descriptor is read before processing.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aa40b2..5ea9f5c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2403,6 +2403,12 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
break;
+   /* The queue entry at the current index is peeked at above
+* to determine that there is a valid descriptor awaiting
+* processing. We want to be sure that the current slot
+* holds a valid descriptor before reading its contents.
+*/
+   dma_rmb();
next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -3098,6 +3104,13 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
unsigned int pool = scrq->pool_index;
int num_entries = 0;
 
+   /* The queue entry at the current index is peeked at above
+* to determine that there is a valid descriptor awaiting
+* processing. We want to be sure that the current slot
+* holds a valid descriptor before reading its contents.
+*/
+   dma_rmb();
+
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
if (next->tx_comp.rcs[i]) {
@@ -3498,6 +3511,11 @@ static union sub_crq *ibmvnic_next_scrq(struct 
ibmvnic_adapter *adapter,
}
spin_unlock_irqrestore(>lock, flags);
 
+   /* Ensure that the entire buffer descriptor has been
+* loaded before reading its contents
+*/
+   dma_rmb();
+
return entry;
 }
 
-- 
1.8.3.1

Re: [PATCH net 1/2] ibmvnic: Ensure that SCRQ entry reads are correctly ordered

2020-11-25 Thread Thomas Falcon


On 11/24/20 11:43 PM, Michael Ellerman wrote:

Thomas Falcon  writes:

Ensure that received Subordinate Command-Response Queue (SCRQ)
entries are properly read in order by the driver. These queues
are used in the ibmvnic device to process RX buffer and TX completion
descriptors. dma_rmb barriers have been added after checking for a
pending descriptor to ensure the correct descriptor entry is checked
and after reading the SCRQ descriptor to ensure the entire
descriptor is read before processing.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
  drivers/net/ethernet/ibm/ibmvnic.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aa40b2..489ed5e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2403,6 +2403,8 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
  
  		if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))

break;
+   /* ensure that we do not prematurely exit the polling loop */
+   dma_rmb();

I'd be happier if these comments were more specific about which read(s)
they are ordering vs which other read(s).

I'm sure it's obvious to you, but it may not be to a future author,
and/or after the code has been refactored over time.


Thank you for reviewing! I will submit a v2 soon with clearer comments 
on the reads being ordered here.


Thanks,

Tom





next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -3098,6 +3100,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
unsigned int pool = scrq->pool_index;
int num_entries = 0;
  
+		/* ensure that the correct descriptor entry is read */

+   dma_rmb();
+
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
if (next->tx_comp.rcs[i]) {
@@ -3498,6 +3503,9 @@ static union sub_crq *ibmvnic_next_scrq(struct 
ibmvnic_adapter *adapter,
}
spin_unlock_irqrestore(>lock, flags);
  
+	/* ensure that the entire SCRQ descriptor is read */

+   dma_rmb();
+
return entry;
  }

cheers

[PATCH net 2/2] ibmvnic: Fix TX completion error handling

2020-11-24 Thread Thomas Falcon

TX completions received with an error return code are not
being processed properly. When an error code is seen, do not
proceed to the next completion before cleaning up the existing
entry's data structures.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 489ed5e..7097bcb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3105,11 +3105,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
 
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
-   if (next->tx_comp.rcs[i]) {
+   if (next->tx_comp.rcs[i])
dev_err(dev, "tx error %x\n",
next->tx_comp.rcs[i]);
-   continue;
-   }
index = be32_to_cpu(next->tx_comp.correlators[i]);
if (index & IBMVNIC_TSO_POOL_MASK) {
tx_pool = >tso_pool[pool];
-- 
1.8.3.1

[PATCH net 1/2] ibmvnic: Ensure that SCRQ entry reads are correctly ordered

2020-11-24 Thread Thomas Falcon

Ensure that received Subordinate Command-Response Queue (SCRQ)
entries are properly read in order by the driver. These queues
are used in the ibmvnic device to process RX buffer and TX completion
descriptors. dma_rmb barriers have been added after checking for a
pending descriptor to ensure the correct descriptor entry is checked
and after reading the SCRQ descriptor to ensure the entire
descriptor is read before processing.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aa40b2..489ed5e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2403,6 +2403,8 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
break;
+   /* ensure that we do not prematurely exit the polling loop */
+   dma_rmb();
next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -3098,6 +3100,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
unsigned int pool = scrq->pool_index;
int num_entries = 0;
 
+   /* ensure that the correct descriptor entry is read */
+   dma_rmb();
+
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
if (next->tx_comp.rcs[i]) {
@@ -3498,6 +3503,9 @@ static union sub_crq *ibmvnic_next_scrq(struct 
ibmvnic_adapter *adapter,
}
spin_unlock_irqrestore(>lock, flags);
 
+   /* ensure that the entire SCRQ descriptor is read */
+   dma_rmb();
+
return entry;
 }
 
-- 
1.8.3.1

[PATCH net 0/2] ibmvnic: Bug fixes for queue descriptor processing

2020-11-24 Thread Thomas Falcon

This series resolves a few issues in the ibmvnic driver's
RX buffer and TX completion processing. The first patch
includes memory barriers to synchronize queue descriptor
reads. The second patch fixes a memory leak that could
occur if the device returns a TX completion with an error
code in the descriptor, in which case the respective socket
buffer and other relevant data structures may not be freed
or updated properly.

Thomas Falcon (2):
  ibmvnic: Ensure that SCRQ entry reads are correctly ordered
  ibmvnic: Fix TX completion error handling

 drivers/net/ethernet/ibm/ibmvnic.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

-- 
1.8.3.1

Re: [PATCH net-next v2 9/9] ibmvnic: Do not replenish RX buffers after every polling loop

2020-11-19 Thread Thomas Falcon


On 11/19/20 2:38 PM, ljp wrote:

On 2020-11-19 14:26, Thomas Falcon wrote:

On 11/19/20 3:43 AM, ljp wrote:


On 2020-11-18 19:12, Thomas Falcon wrote:


From: "Dwip N. Banerjee" 

Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget.

Signed-off-by: Dwip N. Banerjee 
---
drivers/net/ethernet/ibm/ibmvnic.c | 5 -
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
b/drivers/net/ethernet/ibm/ibmvnic.c
index 96df6d8fa277..9fe43ab0496d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2537,7 +2537,10 @@ static int ibmvnic_poll(struct napi_struct

*napi, int budget)
frames_processed++;
}

- if (adapter->state != VNIC_CLOSING)
+ if (adapter->state != VNIC_CLOSING &&
+ ((atomic_read(>rx_pool[scrq_num].available) <
+ adapter->req_rx_add_entries_per_subcrq / 2) ||
+ frames_processed < budget))


1/2 seems a simple and good algorithm.
Explaining why "frames_process < budget" is necessary in the commit
message
or source code also helps.


Hello, Lijun. The patch author, Dwip Banerjee, suggested the modified
commit message below:

Reduce the amount of time spent replenishing RX buffers by
 only doing so once available buffers has fallen under a certain
 threshold, in this case half of the total number of buffers, or
 if the polling loop exits before the packets processed is less
 than its budget. Non-exhaustion of NAPI budget implies lower
 incoming packet pressure, allowing the leeway to refill the buffers
 in preparation for any impending burst.


It looks good to me.



Would such an update require a v3?


I assume you ask Jakub, right?


Yes. There was an issue with my mail client in my earlier response, so I 
am posting Dwip's modified commit message again below.


Reduce the amount of time spent replenishing RX buffers by only doing so 
once available buffers has fallen under a certain threshold, in this 
case half of the total number of buffers, or if the polling loop exits 
before the packets processed is less than its budget. Non-exhaustion of 
NAPI budget implies lower incoming packet pressure, allowing the leeway 
to refill the buffers in preparation for any impending burst.



replenish_rx_pool(adapter, >rx_pool[scrq_num]);
if (frames_processed < budget) {
if (napi_complete_done(napi, frames_processed)) {

Re: [PATCH net-next v2 9/9] ibmvnic: Do not replenish RX buffers after every polling loop

2020-11-19 Thread Thomas Falcon


On 11/19/20 3:43 AM, ljp wrote:

On 2020-11-18 19:12, Thomas Falcon wrote:

From: "Dwip N. Banerjee" 

Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
b/drivers/net/ethernet/ibm/ibmvnic.c
index 96df6d8fa277..9fe43ab0496d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2537,7 +2537,10 @@ static int ibmvnic_poll(struct napi_struct
*napi, int budget)
 frames_processed++;
 }

-    if (adapter->state != VNIC_CLOSING)
+    if (adapter->state != VNIC_CLOSING &&
+ ((atomic_read(>rx_pool[scrq_num].available) <
+  adapter->req_rx_add_entries_per_subcrq / 2) ||
+  frames_processed < budget))


1/2 seems a simple and good algorithm.
Explaining why "frames_process < budget" is necessary in the commit 
message

or source code also helps.

Hello, Lijun. The patch author, Dwip Banerjee, suggested the modified 
commit message below:


Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget. Non-exhaustion of NAPI budget implies lower
incoming packet pressure, allowing the leeway to refill the buffers
in preparation for any impending burst.

Would such an update require a v3?




 replenish_rx_pool(adapter, >rx_pool[scrq_num]);
 if (frames_processed < budget) {
 if (napi_complete_done(napi, frames_processed)) {

[PATCH net-next v2 9/9] ibmvnic: Do not replenish RX buffers after every polling loop

2020-11-18 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 96df6d8fa277..9fe43ab0496d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2537,7 +2537,10 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
frames_processed++;
}
 
-   if (adapter->state != VNIC_CLOSING)
+   if (adapter->state != VNIC_CLOSING &&
+   ((atomic_read(>rx_pool[scrq_num].available) <
+ adapter->req_rx_add_entries_per_subcrq / 2) ||
+ frames_processed < budget))
replenish_rx_pool(adapter, >rx_pool[scrq_num]);
if (frames_processed < budget) {
if (napi_complete_done(napi, frames_processed)) {
-- 
2.26.2

[PATCH net-next v2 7/9] ibmvnic: Correctly re-enable interrupts in NAPI polling routine

2020-11-18 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

If the current NAPI polling loop exits without completing it's
budget, only re-enable interrupts if there are no entries remaining
in the queue and napi_complete_done is successful. If there are entries
remaining on the queue that were missed, restart the polling loop.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 37 +++---
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 85df91c9861b..596546f0614d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2450,10 +2450,17 @@ static void remove_buff_from_pool(struct 
ibmvnic_adapter *adapter,
 
 static int ibmvnic_poll(struct napi_struct *napi, int budget)
 {
-   struct net_device *netdev = napi->dev;
-   struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-   int scrq_num = (int)(napi - adapter->napi);
-   int frames_processed = 0;
+   struct ibmvnic_sub_crq_queue *rx_scrq;
+   struct ibmvnic_adapter *adapter;
+   struct net_device *netdev;
+   int frames_processed;
+   int scrq_num;
+
+   netdev = napi->dev;
+   adapter = netdev_priv(netdev);
+   scrq_num = (int)(napi - adapter->napi);
+   frames_processed = 0;
+   rx_scrq = adapter->rx_scrq[scrq_num];
 
 restart_poll:
while (frames_processed < budget) {
@@ -2466,14 +2473,14 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (unlikely(test_bit(0, >resetting) &&
 adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
-   enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
+   enable_scrq_irq(adapter, rx_scrq);
napi_complete_done(napi, frames_processed);
return frames_processed;
}
 
-   if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
+   if (!pending_scrq(adapter, rx_scrq))
break;
-   next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
+   next = ibmvnic_next_scrq(adapter, rx_scrq);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
  rx_comp.correlator);
@@ -2532,14 +2539,16 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (adapter->state != VNIC_CLOSING)
replenish_rx_pool(adapter, >rx_pool[scrq_num]);
-
if (frames_processed < budget) {
-   enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
-   napi_complete_done(napi, frames_processed);
-   if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) &&
-   napi_reschedule(napi)) {
-   disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
-   goto restart_poll;
+   if (napi_complete_done(napi, frames_processed)) {
+   enable_scrq_irq(adapter, rx_scrq);
+   if (pending_scrq(adapter, rx_scrq)) {
+   rmb();
+   if (napi_reschedule(napi)) {
+   disable_scrq_irq(adapter, rx_scrq);
+   goto restart_poll;
+   }
+   }
}
}
return frames_processed;
-- 
2.26.2

[PATCH net-next v2 8/9] ibmvnic: Use netdev_alloc_skb instead of alloc_skb to replenish RX buffers

2020-11-18 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

Take advantage of the additional optimizations in netdev_alloc_skb when
allocating socket buffers to be used for packet reception.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 596546f0614d..96df6d8fa277 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -323,7 +323,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
rx_scrq = adapter->rx_scrq[pool->index];
ind_bufp = _scrq->ind_buf;
for (i = 0; i < count; ++i) {
-   skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
+   skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
if (!skb) {
dev_err(dev, "Couldn't replenish rx buff\n");
adapter->replenish_no_mem++;
-- 
2.26.2

[PATCH net-next v2 5/9] ibmvnic: Remove send_subcrq function

2020-11-18 Thread Thomas Falcon

It is not longer used, so remove it.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 34 --
 1 file changed, 34 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aace693559f..e9b0cb6dfd9d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -84,8 +84,6 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *);
-static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle,
-  union sub_crq *sub_crq);
 static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64);
 static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance);
 static int enable_scrq_irq(struct ibmvnic_adapter *,
@@ -3629,38 +3627,6 @@ static void print_subcrq_error(struct device *dev, int 
rc, const char *func)
}
 }
 
-static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle,
-  union sub_crq *sub_crq)
-{
-   unsigned int ua = adapter->vdev->unit_address;
-   struct device *dev = >vdev->dev;
-   u64 *u64_crq = (u64 *)sub_crq;
-   int rc;
-
-   netdev_dbg(adapter->netdev,
-  "Sending sCRQ %016lx: %016lx %016lx %016lx %016lx\n",
-  (unsigned long int)cpu_to_be64(remote_handle),
-  (unsigned long int)cpu_to_be64(u64_crq[0]),
-  (unsigned long int)cpu_to_be64(u64_crq[1]),
-  (unsigned long int)cpu_to_be64(u64_crq[2]),
-  (unsigned long int)cpu_to_be64(u64_crq[3]));
-
-   /* Make sure the hypervisor sees the complete request */
-   mb();
-
-   rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua,
-   cpu_to_be64(remote_handle),
-   cpu_to_be64(u64_crq[0]),
-   cpu_to_be64(u64_crq[1]),
-   cpu_to_be64(u64_crq[2]),
-   cpu_to_be64(u64_crq[3]));
-
-   if (rc)
-   print_subcrq_error(dev, rc, __func__);
-
-   return rc;
-}
-
 static int send_subcrq_indirect(struct ibmvnic_adapter *adapter,
u64 remote_handle, u64 ioba, u64 num_entries)
 {
-- 
2.26.2

[PATCH net-next v2 6/9] ibmvnic: Ensure that device queue memory is cache-line aligned

2020-11-18 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

PCI bus slowdowns were observed on IBM VNIC devices as a result
of partial cache line writes and non-cache aligned full cache line writes.
Ensure that packet data buffers are cache-line aligned to avoid these
slowdowns.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c |  9 ++---
 drivers/net/ethernet/ibm/ibmvnic.h | 10 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index e9b0cb6dfd9d..85df91c9861b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -498,7 +498,7 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 
if (rx_pool->buff_size != buff_size) {
free_long_term_buff(adapter, _pool->long_term_buff);
-   rx_pool->buff_size = buff_size;
+   rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
rc = alloc_long_term_buff(adapter,
  _pool->long_term_buff,
  rx_pool->size *
@@ -592,7 +592,7 @@ static int init_rx_pools(struct net_device *netdev)
 
rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
rx_pool->index = i;
-   rx_pool->buff_size = buff_size;
+   rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
rx_pool->active = 1;
 
rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
@@ -745,6 +745,7 @@ static int init_tx_pools(struct net_device *netdev)
 {
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
int tx_subcrqs;
+   u64 buff_size;
int i, rc;
 
tx_subcrqs = adapter->num_active_tx_scrqs;
@@ -761,9 +762,11 @@ static int init_tx_pools(struct net_device *netdev)
adapter->num_active_tx_pools = tx_subcrqs;
 
for (i = 0; i < tx_subcrqs; i++) {
+   buff_size = adapter->req_mtu + VLAN_HLEN;
+   buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
rc = init_one_tx_pool(netdev, >tx_pool[i],
  adapter->req_tx_entries_per_subcrq,
- adapter->req_mtu + VLAN_HLEN);
+ buff_size);
if (rc) {
release_tx_pools(adapter);
return rc;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 16d892c3db0f..9911d926dd7f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -883,7 +883,7 @@ struct ibmvnic_sub_crq_queue {
atomic_t used;
char name[32];
u64 handle;
-};
+} cacheline_aligned;
 
 struct ibmvnic_long_term_buff {
unsigned char *buff;
@@ -907,7 +907,7 @@ struct ibmvnic_tx_pool {
struct ibmvnic_long_term_buff long_term_buff;
int num_buffers;
int buf_size;
-};
+} cacheline_aligned;
 
 struct ibmvnic_rx_buff {
struct sk_buff *skb;
@@ -928,7 +928,7 @@ struct ibmvnic_rx_pool {
int next_alloc;
int active;
struct ibmvnic_long_term_buff long_term_buff;
-};
+} cacheline_aligned;
 
 struct ibmvnic_vpd {
unsigned char *buff;
@@ -1015,8 +1015,8 @@ struct ibmvnic_adapter {
atomic_t running_cap_crqs;
bool wait_capability;
 
-   struct ibmvnic_sub_crq_queue **tx_scrq;
-   struct ibmvnic_sub_crq_queue **rx_scrq;
+   struct ibmvnic_sub_crq_queue **tx_scrq cacheline_aligned;
+   struct ibmvnic_sub_crq_queue **rx_scrq cacheline_aligned;
 
/* rx structs */
struct napi_struct *napi;
-- 
2.26.2

[PATCH net-next v2 4/9] ibmvnic: Clean up TX code and TX buffer data structure

2020-11-18 Thread Thomas Falcon

Remove unused and superfluous code and members in
existing TX implementation and data structures.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 31 +++---
 drivers/net/ethernet/ibm/ibmvnic.h |  8 
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 650aaf100d65..2aace693559f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1496,17 +1496,18 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, 
int len, int *hdr_len,
  * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
  */
 
-static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
+static void build_hdr_descs_arr(struct sk_buff *skb,
+   union sub_crq *indir_arr,
int *num_entries, u8 hdr_field)
 {
int hdr_len[3] = {0, 0, 0};
+   u8 hdr_data[140] = {0};
int tot_len;
-   u8 *hdr_data = txbuff->hdr_data;
 
-   tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len,
-txbuff->hdr_data);
+   tot_len = build_hdr_data(hdr_field, skb, hdr_len,
+hdr_data);
*num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len,
-txbuff->indir_arr + 1);
+indir_arr + 1);
 }
 
 static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
@@ -1612,6 +1613,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
unsigned int tx_send_failed = 0;
netdev_tx_t ret = NETDEV_TX_OK;
unsigned int tx_map_failed = 0;
+   union sub_crq indir_arr[16];
unsigned int tx_dropped = 0;
unsigned int tx_packets = 0;
unsigned int tx_bytes = 0;
@@ -1696,11 +1698,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
 
tx_buff = _pool->tx_buff[index];
tx_buff->skb = skb;
-   tx_buff->data_dma[0] = data_dma_addr;
-   tx_buff->data_len[0] = skb->len;
tx_buff->index = index;
tx_buff->pool_index = queue_num;
-   tx_buff->last_frag = true;
 
memset(_crq, 0, sizeof(tx_crq));
tx_crq.v1.first = IBMVNIC_CRQ_CMD;
@@ -1747,7 +1746,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
}
 
if ((*hdrs >> 7) & 1)
-   build_hdr_descs_arr(tx_buff, _entries, *hdrs);
+   build_hdr_descs_arr(skb, indir_arr, _entries, *hdrs);
 
tx_crq.v1.n_crq_elem = num_entries;
tx_buff->num_entries = num_entries;
@@ -1758,8 +1757,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
goto tx_flush_err;
}
 
-   tx_buff->indir_arr[0] = tx_crq;
-   memcpy(_bufp->indir_arr[ind_bufp->index], tx_buff->indir_arr,
+   indir_arr[0] = tx_crq;
+   memcpy(_bufp->indir_arr[ind_bufp->index], _arr[0],
   num_entries * sizeof(struct ibmvnic_generic_scrq));
ind_bufp->index += num_entries;
if (__netdev_tx_sent_queue(txq, skb->len,
@@ -3185,7 +3184,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
struct netdev_queue *txq;
union sub_crq *next;
int index;
-   int i, j;
+   int i;
 
 restart_loop:
while (pending_scrq(adapter, scrq)) {
@@ -3210,14 +3209,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
}
 
txbuff = _pool->tx_buff[index];
-
-   for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
-   if (!txbuff->data_dma[j])
-   continue;
-
-   txbuff->data_dma[j] = 0;
-   }
-
num_packets++;
num_entries += txbuff->num_entries;
if (txbuff->skb) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 4a63e9886719..16d892c3db0f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -226,8 +226,6 @@ struct ibmvnic_tx_comp_desc {
 #define IBMVNIC_TCP_CHKSUM 0x20
 #define IBMVNIC_UDP_CHKSUM 0x08
 
-#define IBMVNIC_MAX_FRAGS_PER_CRQ 3
-
 struct ibmvnic_tx_desc {
u8 first;
u8 type;
@@ -896,14 +894,8 @@ struct ibmvnic_long_term_buff {
 
 struct ibmvnic_tx_buff {
struct sk_buff *skb;
-   dma_addr_t data_dma[IBMVNIC_MAX_FRAGS_PER_CRQ];
-   unsigned int data_len[IBMVNIC_MAX_FRAGS_PER_CRQ];
int index;
int pool_index;
-   bool last_frag;
-   union sub_crq indir_arr[6];
-   u8 hdr_data[140];
-   dma_addr_t indir_dma;
int num_entries;
 };
 
-- 
2.26.2

[PATCH net-next v2 3/9] ibmvnic: Introduce xmit_more support using batched subCRQ hcalls

2020-11-18 Thread Thomas Falcon

Include support for the xmit_more feature utilizing the
H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
of multiple subordinate Command Response Queue descriptors in one
hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
calls and thus hypervisor call overhead per TX descriptor.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 204 -
 1 file changed, 139 insertions(+), 65 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 17ba6db6f5f9..650aaf100d65 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1165,6 +1165,7 @@ static int __ibmvnic_open(struct net_device *netdev)
if (prev_state == VNIC_CLOSED)
enable_irq(adapter->tx_scrq[i]->irq);
enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+   netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
}
 
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
@@ -1523,16 +1524,93 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
return 0;
 }
 
+static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
+struct ibmvnic_sub_crq_queue *tx_scrq)
+{
+   struct ibmvnic_ind_xmit_queue *ind_bufp;
+   struct ibmvnic_tx_buff *tx_buff;
+   struct ibmvnic_tx_pool *tx_pool;
+   union sub_crq tx_scrq_entry;
+   int queue_num;
+   int entries;
+   int index;
+   int i;
+
+   ind_bufp = _scrq->ind_buf;
+   entries = (u64)ind_bufp->index;
+   queue_num = tx_scrq->pool_index;
+
+   for (i = entries - 1; i >= 0; --i) {
+   tx_scrq_entry = ind_bufp->indir_arr[i];
+   if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC)
+   continue;
+   index = be32_to_cpu(tx_scrq_entry.v1.correlator);
+   if (index & IBMVNIC_TSO_POOL_MASK) {
+   tx_pool = >tso_pool[queue_num];
+   index &= ~IBMVNIC_TSO_POOL_MASK;
+   } else {
+   tx_pool = >tx_pool[queue_num];
+   }
+   tx_pool->free_map[tx_pool->consumer_index] = index;
+   tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
+ tx_pool->num_buffers - 1 :
+ tx_pool->consumer_index - 1;
+   tx_buff = _pool->tx_buff[index];
+   adapter->netdev->stats.tx_packets--;
+   adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
+   adapter->tx_stats_buffers[queue_num].packets--;
+   adapter->tx_stats_buffers[queue_num].bytes -=
+   tx_buff->skb->len;
+   dev_kfree_skb_any(tx_buff->skb);
+   tx_buff->skb = NULL;
+   adapter->netdev->stats.tx_dropped++;
+   }
+   ind_bufp->index = 0;
+   if (atomic_sub_return(entries, _scrq->used) <=
+   (adapter->req_tx_entries_per_subcrq / 2) &&
+   __netif_subqueue_stopped(adapter->netdev, queue_num)) {
+   netif_wake_subqueue(adapter->netdev, queue_num);
+   netdev_dbg(adapter->netdev, "Started queue %d\n",
+  queue_num);
+   }
+}
+
+static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
+struct ibmvnic_sub_crq_queue *tx_scrq)
+{
+   struct ibmvnic_ind_xmit_queue *ind_bufp;
+   u64 dma_addr;
+   u64 entries;
+   u64 handle;
+   int rc;
+
+   ind_bufp = _scrq->ind_buf;
+   dma_addr = (u64)ind_bufp->indir_dma;
+   entries = (u64)ind_bufp->index;
+   handle = tx_scrq->handle;
+
+   if (!entries)
+   return 0;
+   rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
+   if (rc)
+   ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
+   else
+   ind_bufp->index = 0;
+   return 0;
+}
+
 static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
int queue_num = skb_get_queue_mapping(skb);
u8 *hdrs = (u8 *)>tx_rx_desc_req;
struct device *dev = >vdev->dev;
+   struct ibmvnic_ind_xmit_queue *ind_bufp;
struct ibmvnic_tx_buff *tx_buff = NULL;
struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_tx_pool *tx_pool;
unsigned int tx_send_failed = 0;
+   netdev_tx_t ret = NETDEV_TX_OK;
unsigned int tx_map_failed = 0;
unsigned int tx_dropped = 0;
unsigned int tx_packets = 0;
@@ -1546,8 +1624,10 @@ static n

[PATCH net-next v2 2/9] ibmvnic: Introduce batched RX buffer descriptor transmission

2020-11-18 Thread Thomas Falcon

Utilize the H_SEND_SUB_CRQ_INDIRECT hypervisor call to send
multiple RX buffer descriptors to the device in one hypervisor
call operation. This change will reduce the number of hypervisor
calls and thus hypervisor call overhead needed to transmit
RX buffer descriptors to the device.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 57 +++---
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 3884f8a683a7..17ba6db6f5f9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -306,9 +306,11 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
int count = pool->size - atomic_read(>available);
u64 handle = adapter->rx_scrq[pool->index]->handle;
struct device *dev = >vdev->dev;
+   struct ibmvnic_ind_xmit_queue *ind_bufp;
+   struct ibmvnic_sub_crq_queue *rx_scrq;
+   union sub_crq *sub_crq;
int buffers_added = 0;
unsigned long lpar_rc;
-   union sub_crq sub_crq;
struct sk_buff *skb;
unsigned int offset;
dma_addr_t dma_addr;
@@ -320,6 +322,8 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
if (!pool->active)
return;
 
+   rx_scrq = adapter->rx_scrq[pool->index];
+   ind_bufp = _scrq->ind_buf;
for (i = 0; i < count; ++i) {
skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
if (!skb) {
@@ -346,12 +350,13 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
pool->rx_buff[index].pool_index = pool->index;
pool->rx_buff[index].size = pool->buff_size;
 
-   memset(_crq, 0, sizeof(sub_crq));
-   sub_crq.rx_add.first = IBMVNIC_CRQ_CMD;
-   sub_crq.rx_add.correlator =
+   sub_crq = _bufp->indir_arr[ind_bufp->index++];
+   memset(sub_crq, 0, sizeof(*sub_crq));
+   sub_crq->rx_add.first = IBMVNIC_CRQ_CMD;
+   sub_crq->rx_add.correlator =
cpu_to_be64((u64)>rx_buff[index]);
-   sub_crq.rx_add.ioba = cpu_to_be32(dma_addr);
-   sub_crq.rx_add.map_id = pool->long_term_buff.map_id;
+   sub_crq->rx_add.ioba = cpu_to_be32(dma_addr);
+   sub_crq->rx_add.map_id = pool->long_term_buff.map_id;
 
/* The length field of the sCRQ is defined to be 24 bits so the
 * buffer size needs to be left shifted by a byte before it is
@@ -361,15 +366,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
 #ifdef __LITTLE_ENDIAN__
shift = 8;
 #endif
-   sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift);
-
-   lpar_rc = send_subcrq(adapter, handle, _crq);
-   if (lpar_rc != H_SUCCESS)
-   goto failure;
-
-   buffers_added++;
-   adapter->replenish_add_buff_success++;
+   sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
pool->next_free = (pool->next_free + 1) % pool->size;
+   if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
+   i == count - 1) {
+   lpar_rc =
+   send_subcrq_indirect(adapter, handle,
+(u64)ind_bufp->indir_dma,
+(u64)ind_bufp->index);
+   if (lpar_rc != H_SUCCESS)
+   goto failure;
+   buffers_added += ind_bufp->index;
+   adapter->replenish_add_buff_success += ind_bufp->index;
+   ind_bufp->index = 0;
+   }
}
atomic_add(buffers_added, >available);
return;
@@ -377,13 +387,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
 failure:
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
dev_err_ratelimited(dev, "rx: replenish packet buffer 
failed\n");
-   pool->free_map[pool->next_free] = index;
-   pool->rx_buff[index].skb = NULL;
-
-   dev_kfree_skb_any(skb);
-   adapter->replenish_add_buff_failure++;
-   atomic_add(buffers_added, >available);
+   for (i = ind_bufp->index - 1; i >= 0; --i) {
+   struct ibmvnic_rx_buff *rx_buff;
 
+   pool->next_free = pool->next_free == 0 ?
+ pool->size - 1 : pool->next_free - 1;
+   sub_crq = _bufp->indir_arr[i];
+   rx_buff = (struct ibmvnic_rx_buff *)
+   be64_to_cpu(sub_crq->

[PATCH net-next v2 1/9] ibmvnic: Introduce indirect subordinate Command Response Queue buffer

2020-11-18 Thread Thomas Falcon

This patch introduces the infrastructure to send batched subordinate
Command Response Queue descriptors, which are used by the ibmvnic
driver to send TX frame and RX buffer descriptors.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 23 +++
 drivers/net/ethernet/ibm/ibmvnic.h |  9 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index da15913879f8..3884f8a683a7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2858,6 +2858,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter 
*adapter,
memset(scrq->msgs, 0, 4 * PAGE_SIZE);
atomic_set(>used, 0);
scrq->cur = 0;
+   scrq->ind_buf.index = 0;
 
rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
   4 * PAGE_SIZE, >crq_num, >hw_irq);
@@ -2909,6 +2910,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter 
*adapter,
}
}
 
+   dma_free_coherent(dev,
+ IBMVNIC_IND_ARR_SZ,
+ scrq->ind_buf.indir_arr,
+ scrq->ind_buf.indir_dma);
+
dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 DMA_BIDIRECTIONAL);
free_pages((unsigned long)scrq->msgs, 2);
@@ -2955,6 +2961,17 @@ static struct ibmvnic_sub_crq_queue 
*init_sub_crq_queue(struct ibmvnic_adapter
 
scrq->adapter = adapter;
scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
+   scrq->ind_buf.index = 0;
+
+   scrq->ind_buf.indir_arr =
+   dma_alloc_coherent(dev,
+  IBMVNIC_IND_ARR_SZ,
+  >ind_buf.indir_dma,
+  GFP_KERNEL);
+
+   if (!scrq->ind_buf.indir_arr)
+   goto indir_failed;
+
spin_lock_init(>lock);
 
netdev_dbg(adapter->netdev,
@@ -2963,6 +2980,12 @@ static struct ibmvnic_sub_crq_queue 
*init_sub_crq_queue(struct ibmvnic_adapter
 
return scrq;
 
+indir_failed:
+   do {
+   rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+   adapter->vdev->unit_address,
+   scrq->crq_num);
+   } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc));
 reg_failed:
dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 DMA_BIDIRECTIONAL);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 217dcc7ded70..4a63e9886719 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -31,6 +31,8 @@
 #define IBMVNIC_BUFFS_PER_POOL 100
 #define IBMVNIC_MAX_QUEUES 16
 #define IBMVNIC_MAX_QUEUE_SZ   4096
+#define IBMVNIC_MAX_IND_DESCS  128
+#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
 
 #define IBMVNIC_TSO_BUF_SZ 65536
 #define IBMVNIC_TSO_BUFS   64
@@ -861,6 +863,12 @@ union sub_crq {
struct ibmvnic_rx_buff_add_desc rx_add;
 };
 
+struct ibmvnic_ind_xmit_queue {
+   union sub_crq *indir_arr;
+   dma_addr_t indir_dma;
+   int index;
+};
+
 struct ibmvnic_sub_crq_queue {
union sub_crq *msgs;
int size, cur;
@@ -873,6 +881,7 @@ struct ibmvnic_sub_crq_queue {
spinlock_t lock;
struct sk_buff *rx_skb_top;
struct ibmvnic_adapter *adapter;
+   struct ibmvnic_ind_xmit_queue ind_buf;
atomic_t used;
char name[32];
u64 handle;
-- 
2.26.2

[PATCH net-next v2 0/9] ibmvnic: Performance improvements and other updates

2020-11-18 Thread Thomas Falcon

The first three patches utilize a hypervisor call allowing multiple 
TX and RX buffer replenishment descriptors to be sent in one operation,
which significantly reduces hypervisor call overhead. The xmit_more
and Byte Queue Limit API's are leveraged to provide this support
for TX descriptors.

The subsequent two patches remove superfluous code and members in
TX completion handling function and TX buffer structure, respectively,
and remove unused routines.

Finally, four patches which ensure that device queue memory is
cache-line aligned, resolving slowdowns observed in PCI traces,
as well as optimize the driver's NAPI polling function and 
to RX buffer replenishment are provided by Dwip Banerjee.

This series provides significant performance improvements, allowing
the driver to fully utilize 100Gb NIC's.

v2 updates:

1) Removed three patches from the original series which
   were bug fixes and thus better suited for the net tree,
   suggested by Jakub Kicinski.
2) Fixed error handling when initializing device queues,
   suggested by Jakub Kicinski.
3) Fixed bug where queued entries were not flushed after a
   dropped frame, also suggested by Jakub. Two functions,
   ibmvnic_tx_scrq_flush and its helper ibmvnic_tx_scrq_clean_buffer,
   were introduced to ensure that queued frames are either submitted
   to firmware or, if that is not successful, freed as dropped and
   associated data structures are updated with the new device queue state.

Dwip N. Banerjee (4):
  ibmvnic: Ensure that device queue memory is cache-line aligned
  ibmvnic: Correctly re-enable interrupts in NAPI polling routine
  ibmvnic: Use netdev_alloc_skb instead of alloc_skb to replenish RX
buffers
  ibmvnic: Do not replenish RX buffers after every polling loop

Thomas Falcon (5):
  ibmvnic: Introduce indirect subordinate Command Response Queue buffer
  ibmvnic: Introduce batched RX buffer descriptor transmission
  ibmvnic: Introduce xmit_more support using batched subCRQ hcalls
  ibmvnic: Clean up TX code and TX buffer data structure
  ibmvnic: Remove send_subcrq function

 drivers/net/ethernet/ibm/ibmvnic.c | 398 ++---
 drivers/net/ethernet/ibm/ibmvnic.h |  27 +-
 2 files changed, 256 insertions(+), 169 deletions(-)

-- 
2.26.2

Re: [PATCH net-next 04/12] ibmvnic: Introduce xmit_more support using batched subCRQ hcalls

2020-11-16 Thread Thomas Falcon




On 11/14/20 5:46 PM, Jakub Kicinski wrote:

On Thu, 12 Nov 2020 13:09:59 -0600 Thomas Falcon wrote:

Include support for the xmit_more feature utilizing the
H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
of multiple subordinate Command Response Queue descriptors in one
hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
calls and thus hypervisor call overhead per TX descriptor.

Signed-off-by: Thomas Falcon 

The common bug with xmit_more is not flushing the already queued
notifications when there is a drop. Any time you drop a skb you need
to check it's not an skb that was the end of an xmit_more train and
if so flush notifications (or just always flush on error).

Looking at the driver e.g. this starting goto:

 if (ibmvnic_xmit_workarounds(skb, netdev)) {
 tx_dropped++;
 tx_send_failed++;
 ret = NETDEV_TX_OK;
 goto out;
 }

Does not seem to hit any flush on its way out AFAICS.


Hi, I included those updates in a later patch to ease review but see now 
that that was a mistake. I will merge those bits back into this patch 
and resubmit.


Thanks!

Re: [PATCH net-next 01/12] ibmvnic: Ensure that subCRQ entry reads are ordered

2020-11-16 Thread Thomas Falcon


On 11/14/20 5:35 PM, Jakub Kicinski wrote:

On Thu, 12 Nov 2020 13:09:56 -0600 Thomas Falcon wrote:

Ensure that received Subordinate Command-Response Queue
entries are properly read in order by the driver.

Signed-off-by: Thomas Falcon 

Are you sure this is not a bug fix?
Yes, I guess it does look like a bug fix. I can omit this in v2 and 
submit this as a stand-alone patch to net?

Re: [PATCH net-next 02/12] ibmvnic: Introduce indirect subordinate Command Response Queue buffer

2020-11-16 Thread Thomas Falcon




On 11/14/20 5:35 PM, Jakub Kicinski wrote:

On Thu, 12 Nov 2020 13:09:57 -0600 Thomas Falcon wrote:

This patch introduces the infrastructure to send batched subordinate
Command Response Queue descriptors, which are used by the ibmvnic
driver to send TX frame and RX buffer descriptors.

Signed-off-by: Thomas Falcon 
@@ -2957,6 +2963,19 @@ static struct ibmvnic_sub_crq_queue 
*init_sub_crq_queue(struct ibmvnic_adapter
  
  	scrq->adapter = adapter;

scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
+   scrq->ind_buf.index = 0;
+
+   scrq->ind_buf.indir_arr =
+   dma_alloc_coherent(dev,
+  IBMVNIC_IND_ARR_SZ,
+  >ind_buf.indir_dma,
+  GFP_KERNEL);
+
+   if (!scrq->ind_buf.indir_arr) {
+   dev_err(dev, "Couldn't allocate indirect scrq buffer\n");

This warning/error is not necessary, memory allocation will trigger an
OOM message already.

Thanks, I can fix that in a v2.



+   goto reg_failed;

Don't you have to do something like

 rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
 adapter->vdev->unit_address,
 scrq->crq_num);

?


Yes, you're right, I will include that in a v2 also.


+   }
+
spin_lock_init(>lock);

[PATCH net-next 12/12] ibmvnic: Do not replenish RX buffers after every polling loop

2020-11-12 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 0791dbf1cba8..66f8068bee5a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2476,7 +2476,10 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
frames_processed++;
}
 
-   if (adapter->state != VNIC_CLOSING)
+   if (adapter->state != VNIC_CLOSING &&
+   ((atomic_read(>rx_pool[scrq_num].available) <
+ adapter->req_rx_add_entries_per_subcrq / 2) ||
+ frames_processed < budget))
replenish_rx_pool(adapter, >rx_pool[scrq_num]);
if (frames_processed < budget) {
if (napi_complete_done(napi, frames_processed)) {
-- 
2.26.2

[PATCH net-next 11/12] ibmvnic: Use netdev_alloc_skb instead of alloc_skb to replenish RX buffers

2020-11-12 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

Take advantage of the additional optimizations in netdev_alloc_skb when
allocating socket buffers to be used for packet reception.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index e48a44d8884c..0791dbf1cba8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -323,7 +323,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
rx_scrq = adapter->rx_scrq[pool->index];
ind_bufp = _scrq->ind_buf;
for (i = 0; i < count; ++i) {
-   skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
+   skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
if (!skb) {
dev_err(dev, "Couldn't replenish rx buff\n");
adapter->replenish_no_mem++;
-- 
2.26.2

[PATCH net-next 10/12] ibmvnic: Correctly re-enable interrupts in NAPI polling routine

2020-11-12 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

If the current NAPI polling loop exits without completing it's
budget, only re-enable interrupts if there are no entries remaining
in the queue and napi_complete_done is successful. If there are entries
remaining on the queue that were missed, restart the polling loop.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 37 +++---
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index dc42bdc6d3e1..e48a44d8884c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2387,10 +2387,17 @@ static void remove_buff_from_pool(struct 
ibmvnic_adapter *adapter,
 
 static int ibmvnic_poll(struct napi_struct *napi, int budget)
 {
-   struct net_device *netdev = napi->dev;
-   struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-   int scrq_num = (int)(napi - adapter->napi);
-   int frames_processed = 0;
+   struct ibmvnic_sub_crq_queue *rx_scrq;
+   struct ibmvnic_adapter *adapter;
+   struct net_device *netdev;
+   int frames_processed;
+   int scrq_num;
+
+   netdev = napi->dev;
+   adapter = netdev_priv(netdev);
+   scrq_num = (int)(napi - adapter->napi);
+   frames_processed = 0;
+   rx_scrq = adapter->rx_scrq[scrq_num];
 
 restart_poll:
while (frames_processed < budget) {
@@ -2403,16 +2410,16 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (unlikely(test_bit(0, >resetting) &&
 adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
-   enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
+   enable_scrq_irq(adapter, rx_scrq);
napi_complete_done(napi, frames_processed);
return frames_processed;
}
 
-   if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
+   if (!pending_scrq(adapter, rx_scrq))
break;
/* ensure that we do not prematurely exit the polling loop */
dma_rmb();
-   next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
+   next = ibmvnic_next_scrq(adapter, rx_scrq);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
  rx_comp.correlator);
@@ -2471,14 +2478,16 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (adapter->state != VNIC_CLOSING)
replenish_rx_pool(adapter, >rx_pool[scrq_num]);
-
if (frames_processed < budget) {
-   enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
-   napi_complete_done(napi, frames_processed);
-   if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) &&
-   napi_reschedule(napi)) {
-   disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
-   goto restart_poll;
+   if (napi_complete_done(napi, frames_processed)) {
+   enable_scrq_irq(adapter, rx_scrq);
+   if (pending_scrq(adapter, rx_scrq)) {
+   rmb();
+   if (napi_reschedule(napi)) {
+   disable_scrq_irq(adapter, rx_scrq);
+   goto restart_poll;
+   }
+   }
}
}
return frames_processed;
-- 
2.26.2

[PATCH net-next 06/12] ibmvnic: Clean up TX code and TX buffer data structure

2020-11-12 Thread Thomas Falcon

Remove unused and superfluous code and members in
existing TX implementation and data structures.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 31 +++---
 drivers/net/ethernet/ibm/ibmvnic.h |  8 
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index c9437b2d1aa8..b523da20bffc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1496,17 +1496,18 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, 
int len, int *hdr_len,
  * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
  */
 
-static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
+static void build_hdr_descs_arr(struct sk_buff *skb,
+   union sub_crq *indir_arr,
int *num_entries, u8 hdr_field)
 {
int hdr_len[3] = {0, 0, 0};
+   u8 hdr_data[140] = {0};
int tot_len;
-   u8 *hdr_data = txbuff->hdr_data;
 
-   tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len,
-txbuff->hdr_data);
+   tot_len = build_hdr_data(hdr_field, skb, hdr_len,
+hdr_data);
*num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len,
-txbuff->indir_arr + 1);
+indir_arr + 1);
 }
 
 static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
@@ -1537,6 +1538,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
unsigned int tx_send_failed = 0;
netdev_tx_t ret = NETDEV_TX_OK;
unsigned int tx_map_failed = 0;
+   union sub_crq indir_arr[16];
unsigned int tx_dropped = 0;
unsigned int tx_packets = 0;
unsigned int tx_bytes = 0;
@@ -1620,11 +1622,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
 
tx_buff = _pool->tx_buff[index];
tx_buff->skb = skb;
-   tx_buff->data_dma[0] = data_dma_addr;
-   tx_buff->data_len[0] = skb->len;
tx_buff->index = index;
tx_buff->pool_index = queue_num;
-   tx_buff->last_frag = true;
 
memset(_crq, 0, sizeof(tx_crq));
tx_crq.v1.first = IBMVNIC_CRQ_CMD;
@@ -1671,7 +1670,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
}
 
if ((*hdrs >> 7) & 1)
-   build_hdr_descs_arr(tx_buff, _entries, *hdrs);
+   build_hdr_descs_arr(skb, indir_arr, _entries, *hdrs);
 
netdev_tx_sent_queue(txq, skb->len);
 
@@ -1688,8 +1687,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
ind_bufp->index = 0;
}
 
-   tx_buff->indir_arr[0] = tx_crq;
-   memcpy(_bufp->indir_arr[ind_bufp->index], tx_buff->indir_arr,
+   indir_arr[0] = tx_crq;
+   memcpy(_bufp->indir_arr[ind_bufp->index], _arr[0],
   num_entries * sizeof(struct ibmvnic_generic_scrq));
ind_bufp->index += num_entries;
if (!netdev_xmit_more() || netif_xmit_stopped(txq) ||
@@ -3140,7 +3139,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
struct netdev_queue *txq;
union sub_crq *next;
int index;
-   int i, j;
+   int i;
 
 restart_loop:
while (pending_scrq(adapter, scrq)) {
@@ -3169,14 +3168,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
}
 
txbuff = _pool->tx_buff[index];
-
-   for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
-   if (!txbuff->data_dma[j])
-   continue;
-
-   txbuff->data_dma[j] = 0;
-   }
-
num_packets++;
num_entries += txbuff->num_entries;
if (txbuff->skb) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 05bf212d387d..11af1f29210b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -225,8 +225,6 @@ struct ibmvnic_tx_comp_desc {
 #define IBMVNIC_TCP_CHKSUM 0x20
 #define IBMVNIC_UDP_CHKSUM 0x08
 
-#define IBMVNIC_MAX_FRAGS_PER_CRQ 3
-
 struct ibmvnic_tx_desc {
u8 first;
u8 type;
@@ -897,14 +895,8 @@ struct ibmvnic_long_term_buff {
 
 struct ibmvnic_tx_buff {
struct sk_buff *skb;
-   dma_addr_t data_dma[IBMVNIC_MAX_FRAGS_PER_CRQ];
-   unsigned int data_len[IBMVNIC_MAX_FRAGS_PER_CRQ];
int index;
int pool_index;
-   bool last_frag;
-   union sub_crq indir_arr[6];
-   u8 hdr_data[140];
-   dma_addr_t indir_dma;
int num_entries;
 };
 
-- 
2.26.2

[PATCH net-next 09/12] ibmvnic: Ensure that device queue memory is cache-line aligned

2020-11-12 Thread Thomas Falcon

From: "Dwip N. Banerjee" 

PCI bus slowdowns were observed on IBM VNIC devices as a result
of partial cache line writes and non-cache aligned full cache line writes.
Ensure that packet data buffers are cache-line aligned to avoid these
slowdowns.

Signed-off-by: Dwip N. Banerjee 
---
 drivers/net/ethernet/ibm/ibmvnic.c |  9 ++---
 drivers/net/ethernet/ibm/ibmvnic.h | 10 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index b2ca34e94078..dc42bdc6d3e1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -498,7 +498,7 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 
if (rx_pool->buff_size != buff_size) {
free_long_term_buff(adapter, _pool->long_term_buff);
-   rx_pool->buff_size = buff_size;
+   rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
rc = alloc_long_term_buff(adapter,
  _pool->long_term_buff,
  rx_pool->size *
@@ -592,7 +592,7 @@ static int init_rx_pools(struct net_device *netdev)
 
rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
rx_pool->index = i;
-   rx_pool->buff_size = buff_size;
+   rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
rx_pool->active = 1;
 
rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
@@ -745,6 +745,7 @@ static int init_tx_pools(struct net_device *netdev)
 {
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
int tx_subcrqs;
+   u64 buff_size;
int i, rc;
 
tx_subcrqs = adapter->num_active_tx_scrqs;
@@ -761,9 +762,11 @@ static int init_tx_pools(struct net_device *netdev)
adapter->num_active_tx_pools = tx_subcrqs;
 
for (i = 0; i < tx_subcrqs; i++) {
+   buff_size = adapter->req_mtu + VLAN_HLEN;
+   buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
rc = init_one_tx_pool(netdev, >tx_pool[i],
  adapter->req_tx_entries_per_subcrq,
- adapter->req_mtu + VLAN_HLEN);
+ buff_size);
if (rc) {
release_tx_pools(adapter);
return rc;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index c6f1842d2023..1e4c7702402b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -884,7 +884,7 @@ struct ibmvnic_sub_crq_queue {
atomic_t used;
char name[32];
u64 handle;
-};
+} cacheline_aligned;
 
 struct ibmvnic_long_term_buff {
unsigned char *buff;
@@ -908,7 +908,7 @@ struct ibmvnic_tx_pool {
struct ibmvnic_long_term_buff long_term_buff;
int num_buffers;
int buf_size;
-};
+} cacheline_aligned;
 
 struct ibmvnic_rx_buff {
struct sk_buff *skb;
@@ -929,7 +929,7 @@ struct ibmvnic_rx_pool {
int next_alloc;
int active;
struct ibmvnic_long_term_buff long_term_buff;
-};
+} cacheline_aligned;
 
 struct ibmvnic_vpd {
unsigned char *buff;
@@ -1014,8 +1014,8 @@ struct ibmvnic_adapter {
atomic_t running_cap_crqs;
bool wait_capability;
 
-   struct ibmvnic_sub_crq_queue **tx_scrq;
-   struct ibmvnic_sub_crq_queue **rx_scrq;
+   struct ibmvnic_sub_crq_queue **tx_scrq cacheline_aligned;
+   struct ibmvnic_sub_crq_queue **rx_scrq cacheline_aligned;
 
/* rx structs */
struct napi_struct *napi;
-- 
2.26.2

[PATCH net-next 04/12] ibmvnic: Introduce xmit_more support using batched subCRQ hcalls

2020-11-12 Thread Thomas Falcon

Include support for the xmit_more feature utilizing the
H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
of multiple subordinate Command Response Queue descriptors in one
hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
calls and thus hypervisor call overhead per TX descriptor.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 151 +
 1 file changed, 91 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 524020691ef8..0f6aba760d65 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1165,6 +1165,7 @@ static int __ibmvnic_open(struct net_device *netdev)
if (prev_state == VNIC_CLOSED)
enable_irq(adapter->tx_scrq[i]->irq);
enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+   netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
}
 
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
@@ -1529,10 +1530,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
int queue_num = skb_get_queue_mapping(skb);
u8 *hdrs = (u8 *)>tx_rx_desc_req;
struct device *dev = >vdev->dev;
+   struct ibmvnic_ind_xmit_queue *ind_bufp;
struct ibmvnic_tx_buff *tx_buff = NULL;
struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_tx_pool *tx_pool;
unsigned int tx_send_failed = 0;
+   netdev_tx_t ret = NETDEV_TX_OK;
unsigned int tx_map_failed = 0;
unsigned int tx_dropped = 0;
unsigned int tx_packets = 0;
@@ -1547,7 +1550,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
int index = 0;
u8 proto = 0;
u64 handle;
-   netdev_tx_t ret = NETDEV_TX_OK;
+   int i;
 
if (test_bit(0, >resetting)) {
if (!netif_subqueue_stopped(netdev, skb))
@@ -1666,55 +1669,37 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
hdrs += 2;
}
-   /* determine if l2/3/4 headers are sent to firmware */
-   if ((*hdrs >> 7) & 1) {
+
+   if ((*hdrs >> 7) & 1)
build_hdr_descs_arr(tx_buff, _entries, *hdrs);
-   tx_crq.v1.n_crq_elem = num_entries;
-   tx_buff->num_entries = num_entries;
-   tx_buff->indir_arr[0] = tx_crq;
-   tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
-   sizeof(tx_buff->indir_arr),
-   DMA_TO_DEVICE);
-   if (dma_mapping_error(dev, tx_buff->indir_dma)) {
-   dev_kfree_skb_any(skb);
-   tx_buff->skb = NULL;
-   if (!firmware_has_feature(FW_FEATURE_CMO))
-   dev_err(dev, "tx: unable to map descriptor 
array\n");
-   tx_map_failed++;
-   tx_dropped++;
-   ret = NETDEV_TX_OK;
-   goto tx_err_out;
-   }
-   lpar_rc = send_subcrq_indirect(adapter, handle,
-  (u64)tx_buff->indir_dma,
-  (u64)num_entries);
-   dma_unmap_single(dev, tx_buff->indir_dma,
-sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
-   } else {
-   tx_buff->num_entries = num_entries;
-   lpar_rc = send_subcrq(adapter, handle,
- _crq);
-   }
-   if (lpar_rc != H_SUCCESS) {
-   if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
-   dev_err_ratelimited(dev, "tx: send failed\n");
-   dev_kfree_skb_any(skb);
-   tx_buff->skb = NULL;
 
-   if (lpar_rc == H_CLOSED || adapter->failover_pending) {
-   /* Disable TX and report carrier off if queue is closed
-* or pending failover.
-* Firmware guarantees that a signal will be sent to the
-* driver, triggering a reset or some other action.
-*/
-   netif_tx_stop_all_queues(netdev);
-   netif_carrier_off(netdev);
-   }
+   netdev_tx_sent_queue(txq, skb->len);
 
-   tx_send_failed++;
-   tx_dropped++;
-   ret = NETDEV_TX_OK;
-   goto tx_err_out;
+   tx_crq.v1.n_crq_elem = num_entries;
+   tx_buff->num_entries = num_entries;
+   ind_buf

[PATCH net-next 02/12] ibmvnic: Introduce indirect subordinate Command Response Queue buffer

2020-11-12 Thread Thomas Falcon

This patch introduces the infrastructure to send batched subordinate
Command Response Queue descriptors, which are used by the ibmvnic
driver to send TX frame and RX buffer descriptors.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 19 +++
 drivers/net/ethernet/ibm/ibmvnic.h | 10 ++
 2 files changed, 29 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5647f54bf387..dd9ca06f355b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2860,6 +2860,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter 
*adapter,
memset(scrq->msgs, 0, 4 * PAGE_SIZE);
atomic_set(>used, 0);
scrq->cur = 0;
+   scrq->ind_buf.index = 0;
 
rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
   4 * PAGE_SIZE, >crq_num, >hw_irq);
@@ -2911,6 +2912,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter 
*adapter,
}
}
 
+   dma_free_coherent(dev,
+ IBMVNIC_IND_ARR_SZ,
+ scrq->ind_buf.indir_arr,
+ scrq->ind_buf.indir_dma);
+
dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 DMA_BIDIRECTIONAL);
free_pages((unsigned long)scrq->msgs, 2);
@@ -2957,6 +2963,19 @@ static struct ibmvnic_sub_crq_queue 
*init_sub_crq_queue(struct ibmvnic_adapter
 
scrq->adapter = adapter;
scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
+   scrq->ind_buf.index = 0;
+
+   scrq->ind_buf.indir_arr =
+   dma_alloc_coherent(dev,
+  IBMVNIC_IND_ARR_SZ,
+  >ind_buf.indir_dma,
+  GFP_KERNEL);
+
+   if (!scrq->ind_buf.indir_arr) {
+   dev_err(dev, "Couldn't allocate indirect scrq buffer\n");
+   goto reg_failed;
+   }
+
spin_lock_init(>lock);
 
netdev_dbg(adapter->netdev,
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 217dcc7ded70..05bf212d387d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -31,6 +31,7 @@
 #define IBMVNIC_BUFFS_PER_POOL 100
 #define IBMVNIC_MAX_QUEUES 16
 #define IBMVNIC_MAX_QUEUE_SZ   4096
+#define IBMVNIC_MAX_IND_DESCS  128
 
 #define IBMVNIC_TSO_BUF_SZ 65536
 #define IBMVNIC_TSO_BUFS   64
@@ -861,6 +862,14 @@ union sub_crq {
struct ibmvnic_rx_buff_add_desc rx_add;
 };
 
+#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * sizeof(union sub_crq))
+
+struct ibmvnic_ind_xmit_queue {
+   union sub_crq *indir_arr;
+   dma_addr_t indir_dma;
+   int index;
+};
+
 struct ibmvnic_sub_crq_queue {
union sub_crq *msgs;
int size, cur;
@@ -873,6 +882,7 @@ struct ibmvnic_sub_crq_queue {
spinlock_t lock;
struct sk_buff *rx_skb_top;
struct ibmvnic_adapter *adapter;
+   struct ibmvnic_ind_xmit_queue ind_buf;
atomic_t used;
char name[32];
u64 handle;
-- 
2.26.2

[PATCH net-next 01/12] ibmvnic: Ensure that subCRQ entry reads are ordered

2020-11-12 Thread Thomas Falcon

Ensure that received Subordinate Command-Response Queue
entries are properly read in order by the driver.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index da15913879f8..5647f54bf387 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2391,6 +2391,8 @@ static int ibmvnic_poll(struct napi_struct *napi, int 
budget)
 
if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
break;
+   /* ensure that we do not prematurely exit the polling loop */
+   dma_rmb();
next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -3087,6 +3089,8 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
int num_entries = 0;
 
next = ibmvnic_next_scrq(adapter, scrq);
+   /* ensure that we are reading the correct queue entry */
+   dma_rmb();
for (i = 0; i < next->tx_comp.num_comps; i++) {
if (next->tx_comp.rcs[i]) {
dev_err(dev, "tx error %x\n",
-- 
2.26.2

[PATCH net-next 03/12] ibmvnic: Introduce batched RX buffer descriptor transmission

2020-11-12 Thread Thomas Falcon

Utilize the H_SEND_SUB_CRQ_INDIRECT hypervisor call to send
multiple RX buffer descriptors to the device in one hypervisor
call operation. This change will reduce the number of hypervisor
calls and thus hypervisor call overhead needed to transmit
RX buffer descriptors to the device.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 57 +++---
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index dd9ca06f355b..524020691ef8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -306,9 +306,11 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
int count = pool->size - atomic_read(>available);
u64 handle = adapter->rx_scrq[pool->index]->handle;
struct device *dev = >vdev->dev;
+   struct ibmvnic_ind_xmit_queue *ind_bufp;
+   struct ibmvnic_sub_crq_queue *rx_scrq;
+   union sub_crq *sub_crq;
int buffers_added = 0;
unsigned long lpar_rc;
-   union sub_crq sub_crq;
struct sk_buff *skb;
unsigned int offset;
dma_addr_t dma_addr;
@@ -320,6 +322,8 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
if (!pool->active)
return;
 
+   rx_scrq = adapter->rx_scrq[pool->index];
+   ind_bufp = _scrq->ind_buf;
for (i = 0; i < count; ++i) {
skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
if (!skb) {
@@ -346,12 +350,13 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
pool->rx_buff[index].pool_index = pool->index;
pool->rx_buff[index].size = pool->buff_size;
 
-   memset(_crq, 0, sizeof(sub_crq));
-   sub_crq.rx_add.first = IBMVNIC_CRQ_CMD;
-   sub_crq.rx_add.correlator =
+   sub_crq = _bufp->indir_arr[ind_bufp->index++];
+   memset(sub_crq, 0, sizeof(*sub_crq));
+   sub_crq->rx_add.first = IBMVNIC_CRQ_CMD;
+   sub_crq->rx_add.correlator =
cpu_to_be64((u64)>rx_buff[index]);
-   sub_crq.rx_add.ioba = cpu_to_be32(dma_addr);
-   sub_crq.rx_add.map_id = pool->long_term_buff.map_id;
+   sub_crq->rx_add.ioba = cpu_to_be32(dma_addr);
+   sub_crq->rx_add.map_id = pool->long_term_buff.map_id;
 
/* The length field of the sCRQ is defined to be 24 bits so the
 * buffer size needs to be left shifted by a byte before it is
@@ -361,15 +366,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
 #ifdef __LITTLE_ENDIAN__
shift = 8;
 #endif
-   sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift);
-
-   lpar_rc = send_subcrq(adapter, handle, _crq);
-   if (lpar_rc != H_SUCCESS)
-   goto failure;
-
-   buffers_added++;
-   adapter->replenish_add_buff_success++;
+   sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
pool->next_free = (pool->next_free + 1) % pool->size;
+   if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
+   i == count - 1) {
+   lpar_rc =
+   send_subcrq_indirect(adapter, handle,
+(u64)ind_bufp->indir_dma,
+(u64)ind_bufp->index);
+   if (lpar_rc != H_SUCCESS)
+   goto failure;
+   buffers_added += ind_bufp->index;
+   adapter->replenish_add_buff_success += ind_bufp->index;
+   ind_bufp->index = 0;
+   }
}
atomic_add(buffers_added, >available);
return;
@@ -377,13 +387,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter 
*adapter,
 failure:
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
dev_err_ratelimited(dev, "rx: replenish packet buffer 
failed\n");
-   pool->free_map[pool->next_free] = index;
-   pool->rx_buff[index].skb = NULL;
-
-   dev_kfree_skb_any(skb);
-   adapter->replenish_add_buff_failure++;
-   atomic_add(buffers_added, >available);
+   for (i = ind_bufp->index - 1; i >= 0; --i) {
+   struct ibmvnic_rx_buff *rx_buff;
 
+   pool->next_free = pool->next_free == 0 ?
+ pool->size - 1 : pool->next_free - 1;
+   sub_crq = _bufp->indir_arr[i];
+   rx_buff = (struct ibmvnic_rx_buff *)
+   be64_to_cpu(sub_crq->

[PATCH net-next 07/12] ibmvnic: Clean up TX error handling and statistics tracking

2020-11-12 Thread Thomas Falcon

Update error handling code in ibmvnic_xmit to be more readable
and remove unused statistics counters. Also record statistics
when TX completions are received to improve accuracy.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 38 ++
 drivers/net/ethernet/ibm/ibmvnic.h |  2 --
 2 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index b523da20bffc..2c24d4774457 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1535,13 +1535,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
struct ibmvnic_tx_buff *tx_buff = NULL;
struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_tx_pool *tx_pool;
-   unsigned int tx_send_failed = 0;
netdev_tx_t ret = NETDEV_TX_OK;
-   unsigned int tx_map_failed = 0;
union sub_crq indir_arr[16];
unsigned int tx_dropped = 0;
-   unsigned int tx_packets = 0;
-   unsigned int tx_bytes = 0;
dma_addr_t data_dma_addr;
struct netdev_queue *txq;
unsigned long lpar_rc;
@@ -1558,18 +1554,13 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
if (!netif_subqueue_stopped(netdev, skb))
netif_stop_subqueue(netdev, queue_num);
dev_kfree_skb_any(skb);
-
-   tx_send_failed++;
tx_dropped++;
-   ret = NETDEV_TX_OK;
-   goto out;
+   goto err_out;
}
 
if (ibmvnic_xmit_workarounds(skb, netdev)) {
tx_dropped++;
-   tx_send_failed++;
-   ret = NETDEV_TX_OK;
-   goto out;
+   goto err_out;
}
if (skb_is_gso(skb))
tx_pool = >tso_pool[queue_num];
@@ -1584,10 +1575,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
 
if (index == IBMVNIC_INVALID_MAP) {
dev_kfree_skb_any(skb);
-   tx_send_failed++;
tx_dropped++;
-   ret = NETDEV_TX_OK;
-   goto out;
+   goto err_out;
}
 
tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
@@ -1707,12 +1696,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
netif_stop_subqueue(netdev, queue_num);
}
 
-   tx_packets++;
-   tx_bytes += skb->len;
txq->trans_start = jiffies;
-   ret = NETDEV_TX_OK;
-   goto out;
 
+   return ret;
 tx_flush_err:
dev_kfree_skb_any(skb);
tx_buff->skb = NULL;
@@ -1758,14 +1744,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
}
-out:
+err_out:
netdev->stats.tx_dropped += tx_dropped;
-   netdev->stats.tx_bytes += tx_bytes;
-   netdev->stats.tx_packets += tx_packets;
-   adapter->tx_send_failed += tx_send_failed;
-   adapter->tx_map_failed += tx_map_failed;
-   adapter->tx_stats_buffers[queue_num].packets += tx_packets;
-   adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
 
return ret;
@@ -3147,6 +3127,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
int num_entries = 0;
int total_bytes = 0;
int num_packets = 0;
+   int tx_dropped = 0;
 
next = ibmvnic_next_scrq(adapter, scrq);
/* ensure that we are reading the correct queue entry */
@@ -3157,6 +3138,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
if (next->tx_comp.rcs[i]) {
dev_err(dev, "tx error %x\n",
next->tx_comp.rcs[i]);
+   tx_dropped++;
error = true;
}
index = be32_to_cpu(next->tx_comp.correlators[i]);
@@ -3200,6 +3182,12 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
netdev_dbg(adapter->netdev, "Started queue %d\n",
   scrq->pool_index);
}
+   adapter->netdev->stats.tx_packets += num_packets;
+   adapter->netdev->stats.tx_bytes += total_bytes;
+   adapter->netdev->stats.tx_dropped += tx_dropped;
+   adapter->tx_stats_buffers[scrq->pool_index].packets += 
num_packets;
+   adapter->tx_stats_buffers[scrq->pool_index].bytes

[PATCH net-next 05/12] ibmvnic: Fix TX completion error handling

2020-11-12 Thread Thomas Falcon

When firmware reports that a transmission was not successful,
the driver is not correctly processing the completion.
It should be freeing the socket buffer and updating the
device queue's inflight frame count and BQL structures.
Do that now.

Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 0f6aba760d65..c9437b2d1aa8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3153,10 +3153,12 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
/* ensure that we are reading the correct queue entry */
dma_rmb();
for (i = 0; i < next->tx_comp.num_comps; i++) {
+   bool error = false;
+
if (next->tx_comp.rcs[i]) {
dev_err(dev, "tx error %x\n",
next->tx_comp.rcs[i]);
-   continue;
+   error = true;
}
index = be32_to_cpu(next->tx_comp.correlators[i]);
if (index & IBMVNIC_TSO_POOL_MASK) {
@@ -3179,7 +3181,10 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter 
*adapter,
num_entries += txbuff->num_entries;
if (txbuff->skb) {
total_bytes += txbuff->skb->len;
-   dev_consume_skb_irq(txbuff->skb);
+   if (error)
+   dev_kfree_skb_irq(txbuff->skb);
+   else
+   dev_consume_skb_irq(txbuff->skb);
txbuff->skb = NULL;
} else {
netdev_warn(adapter->netdev,
-- 
2.26.2

[PATCH net-next 00/12] ibmvnic: Performance improvements and other updates

2020-11-12 Thread Thomas Falcon

First, memory barrier protection of device queue reads to ensure RX
and TX buffer completions are not missed. The subsequent three
patches utilize a hypervisor call allowing multiple TX and RX buffer
replenishment descriptors to be sent in one operation, which
significantly reduces hypervisor call overhead. The xmit_more and
Byte Queue Limits API's are leveraged to provide this support
for TX descriptors.

The next four patches fix TX completion error handling, remove
superfluous code and members in TX completion handling function
and TX buffer structure respectively, update ndo_start_xmit error
handling and improve accuracy of statistics tracking, and remove
unused routines.

Finally, patches to ensure that device queue memory
is cache-line aligned, resolving slowdowns observed in PCI traces,
as well as optimizatons to the driver's NAPI polling function and 
to RX buffer replenishment are provided by Dwip Banerjee.

This series provides significant performance improvements, allowing
the driver to fully utilize 100Gb NIC's.

Dwip N. Banerjee (4):
  ibmvnic: Ensure that device queue memory is cache-line aligned
  ibmvnic: Correctly re-enable interrupts in NAPI polling routine
  ibmvnic: Use netdev_alloc_skb instead of alloc_skb to replenish RX
buffers
  ibmvnic: Do not replenish RX buffers after every polling loop

Thomas Falcon (8):
  ibmvnic: Ensure that subCRQ entry reads are ordered
  ibmvnic: Introduce indirect subordinate Command Response Queue buffer
  ibmvnic: Introduce batched RX buffer descriptor transmission
  ibmvnic: Introduce xmit_more support using batched subCRQ hcalls
  ibmvnic: Fix TX completion error handling
  ibmvnic: Clean up TX code and TX buffer data structure
  ibmvnic: Clean up TX error handling and statistics tracking
  ibmvnic: Remove send_subcrq function

 drivers/net/ethernet/ibm/ibmvnic.c | 390 -
 drivers/net/ethernet/ibm/ibmvnic.h |  30 +--
 2 files changed, 228 insertions(+), 192 deletions(-)

-- 
2.26.2

[PATCH net-next 08/12] ibmvnic: Remove send_subcrq function

2020-11-12 Thread Thomas Falcon

It is not longer used, so remove it.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 34 --
 1 file changed, 34 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2c24d4774457..b2ca34e94078 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -84,8 +84,6 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *);
-static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle,
-  union sub_crq *sub_crq);
 static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64);
 static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance);
 static int enable_scrq_irq(struct ibmvnic_adapter *,
@@ -3579,38 +3577,6 @@ static void print_subcrq_error(struct device *dev, int 
rc, const char *func)
}
 }
 
-static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle,
-  union sub_crq *sub_crq)
-{
-   unsigned int ua = adapter->vdev->unit_address;
-   struct device *dev = >vdev->dev;
-   u64 *u64_crq = (u64 *)sub_crq;
-   int rc;
-
-   netdev_dbg(adapter->netdev,
-  "Sending sCRQ %016lx: %016lx %016lx %016lx %016lx\n",
-  (unsigned long int)cpu_to_be64(remote_handle),
-  (unsigned long int)cpu_to_be64(u64_crq[0]),
-  (unsigned long int)cpu_to_be64(u64_crq[1]),
-  (unsigned long int)cpu_to_be64(u64_crq[2]),
-  (unsigned long int)cpu_to_be64(u64_crq[3]));
-
-   /* Make sure the hypervisor sees the complete request */
-   mb();
-
-   rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua,
-   cpu_to_be64(remote_handle),
-   cpu_to_be64(u64_crq[0]),
-   cpu_to_be64(u64_crq[1]),
-   cpu_to_be64(u64_crq[2]),
-   cpu_to_be64(u64_crq[3]));
-
-   if (rc)
-   print_subcrq_error(dev, rc, __func__);
-
-   return rc;
-}
-
 static int send_subcrq_indirect(struct ibmvnic_adapter *adapter,
u64 remote_handle, u64 ioba, u64 num_entries)
 {
-- 
2.26.2

Re: [PATCH net] ibmvnic: Fix IRQ mapping disposal in error path

2020-07-29 Thread Thomas Falcon




On 7/29/20 5:28 PM, Jakub Kicinski wrote:

On Wed, 29 Jul 2020 16:36:32 -0500 Thomas Falcon wrote:

RX queue IRQ mappings are disposed in both the TX IRQ and RX IRQ
error paths. Fix this and dispose of TX IRQ mappings correctly in
case of an error.

Signed-off-by: Thomas Falcon 

Thomas, please remember about Fixes tags (for networking patches,
at least):

Fixes: ea22d51a7831 ("ibmvnic: simplify and improve driver probe function")


Sorry, Jakub, I will try not to forget next time. Thanks.

[PATCH net] ibmvnic: Fix IRQ mapping disposal in error path

2020-07-29 Thread Thomas Falcon

RX queue IRQ mappings are disposed in both the TX IRQ and RX IRQ
error paths. Fix this and dispose of TX IRQ mappings correctly in
case of an error.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 0fd7eae..5afb3c9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3206,7 +3206,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter 
*adapter)
 req_tx_irq_failed:
for (j = 0; j < i; j++) {
free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
-   irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+   irq_dispose_mapping(adapter->tx_scrq[j]->irq);
}
release_sub_crqs(adapter, 1);
return rc;
-- 
1.8.3.1

Re: [PATCH net-next] ibmvnic: Increase driver logging

2020-07-16 Thread Thomas Falcon

On 7/15/20 8:29 PM, David Miller wrote:

From: Jakub Kicinski 
Date: Wed, 15 Jul 2020 17:06:32 -0700

On Wed, 15 Jul 2020 18:51:55 -0500 Thomas Falcon wrote:

free_netdev(netdev);
dev_set_drvdata(>dev, NULL);
+   netdev_info(netdev, "VNIC client device has been successfully 
removed.\n");

A step too far, perhaps.

In general this patch looks a little questionable IMHO, this amount of
logging output is not commonly seen in drivers. All the the info
messages are just static text, not even carrying any extra information.
In an era of ftrace, and bpftrace, do we really need this?

Agreed, this is too much.  This is debugging, and thus suitable for tracing
facilities, at best.

Thanks for your feedback. I see now that I was overly aggressive with 
this patch to be sure, but it would help with narrowing down problems at 
a first glance, should they arise. The driver in its current state logs 
very little of what is it doing without the use of additional debugging 
or tracing facilities. Would it be worth it to pursue a less aggressive 
version or would that be dead on arrival? What are acceptable driver 
operations to log at this level?

Thanks,

Tom

[PATCH net-next] ibmvnic: Increase driver logging

2020-07-15 Thread Thomas Falcon

Improve the ibmvnic driver's logging capabilities by providing
more informational messages to track driver operations, facilitating
first-pass debug.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 76 --
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 0fd7eae25fe9..7382f11872fc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -561,6 +561,7 @@ static int init_rx_pools(struct net_device *netdev)
u64 *size_array;
int i, j;
 
+   netdev_info(netdev, "Initializing RX queue buffer pools.\n");
rxadd_subcrqs =
be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -618,6 +619,7 @@ static int init_rx_pools(struct net_device *netdev)
rx_pool->next_alloc = 0;
rx_pool->next_free = 0;
}
+   netdev_info(netdev, "RX queue buffer pools allocated successfully.\n");
 
return 0;
 }
@@ -738,6 +740,7 @@ static int init_tx_pools(struct net_device *netdev)
int tx_subcrqs;
int i, rc;
 
+   netdev_info(netdev, "Initializing TX queue buffer pools.\n");
tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
adapter->tx_pool = kcalloc(tx_subcrqs,
   sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
@@ -768,7 +771,7 @@ static int init_tx_pools(struct net_device *netdev)
return rc;
}
}
-
+   netdev_info(netdev, "TX queue buffer pools allocated successfully.\n");
return 0;
 }
 
@@ -792,6 +795,7 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter 
*adapter)
if (!adapter->napi_enabled)
return;
 
+   netdev_info(adapter->netdev, "Disable all NAPI threads.\n");
for (i = 0; i < adapter->req_rx_queues; i++) {
netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i);
napi_disable(>napi[i]);
@@ -910,12 +914,14 @@ static int ibmvnic_login(struct net_device *netdev)
return -1;
}
} else if (adapter->init_done_rc) {
-   netdev_warn(netdev, "Adapter login failed\n");
+   netdev_warn(netdev, "Adapter login failed, rc = %d\n",
+   adapter->init_done_rc);
return -1;
}
} while (retry);
 
__ibmvnic_set_mac(netdev, adapter->mac_addr);
+   netdev_info(netdev, "Adapter login success!\n");
 
return 0;
 }
@@ -934,6 +940,7 @@ static void release_login_rsp_buffer(struct ibmvnic_adapter 
*adapter)
 
 static void release_resources(struct ibmvnic_adapter *adapter)
 {
+   netdev_info(adapter->netdev, "Releasing VNIC client device memory 
structures.\n");
release_vpd_data(adapter);
 
release_tx_pools(adapter);
@@ -941,6 +948,7 @@ static void release_resources(struct ibmvnic_adapter 
*adapter)
 
release_napi(adapter);
release_login_rsp_buffer(adapter);
+   netdev_info(adapter->netdev, "VNIC client device memory released.\n");
 }
 
 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
@@ -964,7 +972,7 @@ static int set_link_state(struct ibmvnic_adapter *adapter, 
u8 link_state)
reinit_completion(>init_done);
rc = ibmvnic_send_crq(adapter, );
if (rc) {
-   netdev_err(netdev, "Failed to set link state\n");
+   netdev_err(netdev, "Failed to set link state, rc = 
%d\n", rc);
return rc;
}
 
@@ -1098,6 +1106,8 @@ static int init_resources(struct ibmvnic_adapter *adapter)
struct net_device *netdev = adapter->netdev;
int rc;
 
+   netdev_info(netdev, "Allocate device resources.\n");
+
rc = set_real_num_queues(netdev);
if (rc)
return rc;
@@ -1126,7 +1136,11 @@ static int init_resources(struct ibmvnic_adapter 
*adapter)
return rc;
 
rc = init_tx_pools(netdev);
-   return rc;
+   if (rc)
+   return rc;
+
+   netdev_info(netdev, "Device resources allocated.\n");
+   return 0;
 }
 
 static int __ibmvnic_open(struct net_device *netdev)
@@ -1136,9 +1150,10 @@ static int __ibmvnic_open(struct net_device *netdev)
int i, rc;
 
adapter->state = VNIC_OPENING;
+   netdev_info(netdev, "Allocating RX buffer pools and enable NAPI 
structures.\n");
replenish_pools(adapter);

[PATCH net v2] ibmveth: Fix max MTU limit

2020-06-18 Thread Thomas Falcon

The max MTU limit defined for ibmveth is not accounting for
virtual ethernet buffer overhead, which is twenty-two additional
bytes set aside for the ethernet header and eight additional bytes
of an opaque handle reserved for use by the hypervisor. Update the
max MTU to reflect this overhead.

Signed-off-by: Thomas Falcon 
Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more 
drivers")
Fixes: 110447f8269a ("ethernet: fix min/max MTU typos")
---
v2: Include Fixes tags suggested by Jakub Kicisnki
---
 drivers/net/ethernet/ibm/ibmveth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c 
b/drivers/net/ethernet/ibm/ibmveth.c
index 96d36ae5049e..c5c732601e35 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1715,7 +1715,7 @@ static int ibmveth_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
}
 
netdev->min_mtu = IBMVETH_MIN_MTU;
-   netdev->max_mtu = ETH_MAX_MTU;
+   netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
 
memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
 
-- 
2.26.2

Re: [PATCH net] ibmveth: Fix max MTU limit

2020-06-18 Thread Thomas Falcon




On 6/18/20 10:57 AM, Jakub Kicinski wrote:

On Thu, 18 Jun 2020 10:43:46 -0500 Thomas Falcon wrote:

The max MTU limit defined for ibmveth is not accounting for
virtual ethernet buffer overhead, which is twenty-two additional
bytes set aside for the ethernet header and eight additional bytes
of an opaque handle reserved for use by the hypervisor. Update the
max MTU to reflect this overhead.

Signed-off-by: Thomas Falcon 

How about

Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more 
drivers")
Fixes: 110447f8269a ("ethernet: fix min/max MTU typos")

?


Thanks, do you need me to send a v2 with those tags?

Tom

[PATCH net] ibmveth: Fix max MTU limit

2020-06-18 Thread Thomas Falcon

The max MTU limit defined for ibmveth is not accounting for
virtual ethernet buffer overhead, which is twenty-two additional
bytes set aside for the ethernet header and eight additional bytes
of an opaque handle reserved for use by the hypervisor. Update the
max MTU to reflect this overhead.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmveth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c 
b/drivers/net/ethernet/ibm/ibmveth.c
index 96d36ae5049e..c5c732601e35 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1715,7 +1715,7 @@ static int ibmveth_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
}
 
netdev->min_mtu = IBMVETH_MIN_MTU;
-   netdev->max_mtu = ETH_MAX_MTU;
+   netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
 
memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
 
-- 
2.26.2

[PATCH net v2] ibmvnic: Harden device login requests

2020-06-15 Thread Thomas Falcon

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon 
---
v2: declare variables in Reverse Christmas tree format
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 1b4d04e..2baf7b3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -842,12 +842,13 @@ static int ibmvnic_login(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(3);
int retry_count = 0;
+   int retries = 10;
bool retry;
int rc;
 
do {
retry = false;
-   if (retry_count > IBMVNIC_MAX_QUEUES) {
+   if (retry_count > retries) {
netdev_warn(netdev, "Login attempts exceeded\n");
return -1;
}
@@ -862,11 +863,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
if (!wait_for_completion_timeout(>init_done,
 timeout)) {
-   netdev_warn(netdev, "Login timed out\n");
-   return -1;
+   netdev_warn(netdev, "Login timed out, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   continue;
}
 
-   if (adapter->init_done_rc == PARTIALSUCCESS) {
+   if (adapter->init_done_rc == ABORTED) {
+   netdev_warn(netdev, "Login aborted, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   /* FW or device may be busy, so
+* wait a bit before retrying login
+*/
+   msleep(500);
+   } else if (adapter->init_done_rc == PARTIALSUCCESS) {
retry_count++;
release_sub_crqs(adapter, 1);
 
-- 
1.8.3.1

Re: [PATCH net] ibmvnic: Harden device login requests

2020-06-15 Thread Thomas Falcon


On 6/12/20 4:10 PM, David Miller wrote:

From: Thomas Falcon 
Date: Fri, 12 Jun 2020 13:31:39 -0500


@@ -841,13 +841,14 @@ static int ibmvnic_login(struct net_device *netdev)
  {
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(3);
+   int retries = 10;
int retry_count = 0;
bool retry;
int rc;

Reverse christmas tree, please.


Oops, sending a v2 soon.

Thanks,

Tom

[PATCH net] ibmvnic: Flush existing work items before device removal

2020-06-12 Thread Thomas Falcon

Ensure that all scheduled work items have completed before continuing
with device removal and after further event scheduling has been
halted. This patch fixes a bug where a scheduled driver reset event
is processed following device removal.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 1cb2b7f3b2cb..a66fa75976d3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -5197,6 +5197,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
adapter->state = VNIC_REMOVING;
spin_unlock_irqrestore(>state_lock, flags);
 
+   flush_work(>ibmvnic_reset);
+   flush_delayed_work(>ibmvnic_delayed_reset);
+
rtnl_lock();
unregister_netdevice(netdev);
 
-- 
2.18.1

[PATCH net] ibmvnic: Harden device login requests

2020-06-12 Thread Thomas Falcon

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 197dc5b2c090..1cb2b7f3b2cb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -841,13 +841,14 @@ static int ibmvnic_login(struct net_device *netdev)
 {
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(3);
+   int retries = 10;
int retry_count = 0;
bool retry;
int rc;
 
do {
retry = false;
-   if (retry_count > IBMVNIC_MAX_QUEUES) {
+   if (retry_count > retries) {
netdev_warn(netdev, "Login attempts exceeded\n");
return -1;
}
@@ -862,11 +863,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
if (!wait_for_completion_timeout(>init_done,
 timeout)) {
-   netdev_warn(netdev, "Login timed out\n");
-   return -1;
+   netdev_warn(netdev, "Login timed out, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   continue;
}
 
-   if (adapter->init_done_rc == PARTIALSUCCESS) {
+   if (adapter->init_done_rc == ABORTED) {
+   netdev_warn(netdev, "Login aborted, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   /* FW or device may be busy, so
+* wait a bit before retrying login
+*/
+   msleep(500);
+   } else if (adapter->init_done_rc == PARTIALSUCCESS) {
retry_count++;
release_sub_crqs(adapter, 1);
 
-- 
2.18.1

[PATCH net] drivers/net/ibmvnic: Update VNIC protocol version reporting

2020-05-28 Thread Thomas Falcon

VNIC protocol version is reported in big-endian format, but it
is not byteswapped before logging. Fix that, and remove version
comparison as only one protocol version exists at this time.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 621a6e0..f2c7160 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4689,12 +4689,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc);
break;
}
-   dev_info(dev, "Partner protocol version is %d\n",
-crq->version_exchange_rsp.version);
-   if (be16_to_cpu(crq->version_exchange_rsp.version) <
-   ibmvnic_version)
-   ibmvnic_version =
+   ibmvnic_version =
be16_to_cpu(crq->version_exchange_rsp.version);
+   dev_info(dev, "Partner protocol version is %d\n",
+ibmvnic_version);
send_cap_queries(adapter);
break;
case QUERY_CAPABILITY_RSP:
-- 
1.8.3.1

Re: [PATCH] net/bonding: Do not transition down slave after speed/duplex check

2020-04-30 Thread Thomas Falcon




On 4/29/20 1:38 PM, Jay Vosburgh wrote:

Thomas Falcon  wrote:


The following behavior has been observed when testing logical partition
migration of LACP-bonded VNIC devices in a PowerVM pseries environment.

1. When performing the migration, the bond master detects that a slave has
   lost its link, deactivates the LACP port, and sets the port's
   is_enabled flag to false.
2. The slave device then updates it's carrier state to off while it resets
   itself. This update triggers a NETDEV_CHANGE notification, which performs
   a speed and duplex update. The device does not return a valid speed
   and duplex, so the master sets the slave link state to BOND_LINK_FAIL.
3. When the slave VNIC device(s) are active again, some operations, such
   as setting the port's is_enabled flag, are not performed when transitioning
   the link state back to BOND_LINK_UP from BOND_LINK_FAIL, though the state
   prior to the speed check was BOND_LINK_DOWN.

Just to make sure I'm understanding correctly, in regards to
"the state prior to the speed check was BOND_LINK_DOWN," do you mean
that during step 1, the slave link is set to BOND_LINK_DOWN, and then in
step 2 changed from _DOWN to _FAIL?


Affected devices are therefore not utilized in the aggregation though they
are operational. The simplest way to fix this seems to be to restrict the
link state change to devices that are currently up and running.

This sounds similar to an issue from last fall; can you confirm
that you're running with a kernel that includes:

1899bb325149 bonding: fix state transition issue in link monitoring


It did not have that fix.  I will patch the kernel and rerun the test.

Thanks,

Tom



-J



CC: Jay Vosburgh 
CC: Veaceslav Falico 
CC: Andy Gospodarek 
Signed-off-by: Thomas Falcon 
---
drivers/net/bonding/bond_main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2e70e43c5df5..d840da7cd379 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3175,7 +3175,8 @@ static int bond_slave_netdev_event(unsigned long event,
 * speeds/duplex are available.
 */
if (bond_update_speed_duplex(slave) &&
-   BOND_MODE(bond) == BOND_MODE_8023AD) {
+   BOND_MODE(bond) == BOND_MODE_8023AD &&
+   slave->link == BOND_LINK_UP) {
if (slave->last_link_up)
slave->link = BOND_LINK_FAIL;
else
--
2.18.2


---
-Jay Vosburgh, jay.vosbu...@canonical.com

Re: [PATCH] net/bonding: Do not transition down slave after speed/duplex check

2020-04-29 Thread Thomas Falcon




On 4/29/20 1:38 PM, Jay Vosburgh wrote:

Thomas Falcon  wrote:


The following behavior has been observed when testing logical partition
migration of LACP-bonded VNIC devices in a PowerVM pseries environment.

1. When performing the migration, the bond master detects that a slave has
   lost its link, deactivates the LACP port, and sets the port's
   is_enabled flag to false.
2. The slave device then updates it's carrier state to off while it resets
   itself. This update triggers a NETDEV_CHANGE notification, which performs
   a speed and duplex update. The device does not return a valid speed
   and duplex, so the master sets the slave link state to BOND_LINK_FAIL.
3. When the slave VNIC device(s) are active again, some operations, such
   as setting the port's is_enabled flag, are not performed when transitioning
   the link state back to BOND_LINK_UP from BOND_LINK_FAIL, though the state
   prior to the speed check was BOND_LINK_DOWN.

Just to make sure I'm understanding correctly, in regards to
"the state prior to the speed check was BOND_LINK_DOWN," do you mean
that during step 1, the slave link is set to BOND_LINK_DOWN, and then in
step 2 changed from _DOWN to _FAIL?


Yes, that's what I meant, thanks.




Affected devices are therefore not utilized in the aggregation though they
are operational. The simplest way to fix this seems to be to restrict the
link state change to devices that are currently up and running.

This sounds similar to an issue from last fall; can you confirm
that you're running with a kernel that includes:

1899bb325149 bonding: fix state transition issue in link monitoring

-J



I think so, but I will confirm ASAP.

Tom



CC: Jay Vosburgh 
CC: Veaceslav Falico 
CC: Andy Gospodarek 
Signed-off-by: Thomas Falcon 
---
drivers/net/bonding/bond_main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2e70e43c5df5..d840da7cd379 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3175,7 +3175,8 @@ static int bond_slave_netdev_event(unsigned long event,
 * speeds/duplex are available.
 */
if (bond_update_speed_duplex(slave) &&
-   BOND_MODE(bond) == BOND_MODE_8023AD) {
+   BOND_MODE(bond) == BOND_MODE_8023AD &&
+   slave->link == BOND_LINK_UP) {
if (slave->last_link_up)
slave->link = BOND_LINK_FAIL;
else
--
2.18.2


---
-Jay Vosburgh, jay.vosbu...@canonical.com

[PATCH] net/bonding: Do not transition down slave after speed/duplex check

2020-04-29 Thread Thomas Falcon

The following behavior has been observed when testing logical partition
migration of LACP-bonded VNIC devices in a PowerVM pseries environment.

1. When performing the migration, the bond master detects that a slave has
   lost its link, deactivates the LACP port, and sets the port's
   is_enabled flag to false.
2. The slave device then updates it's carrier state to off while it resets
   itself. This update triggers a NETDEV_CHANGE notification, which performs
   a speed and duplex update. The device does not return a valid speed
   and duplex, so the master sets the slave link state to BOND_LINK_FAIL.
3. When the slave VNIC device(s) are active again, some operations, such
   as setting the port's is_enabled flag, are not performed when transitioning
   the link state back to BOND_LINK_UP from BOND_LINK_FAIL, though the state
   prior to the speed check was BOND_LINK_DOWN.

Affected devices are therefore not utilized in the aggregation though they
are operational. The simplest way to fix this seems to be to restrict the
link state change to devices that are currently up and running.

CC: Jay Vosburgh 
CC: Veaceslav Falico 
CC: Andy Gospodarek 
Signed-off-by: Thomas Falcon 
---
 drivers/net/bonding/bond_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2e70e43c5df5..d840da7cd379 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3175,7 +3175,8 @@ static int bond_slave_netdev_event(unsigned long event,
 * speeds/duplex are available.
 */
if (bond_update_speed_duplex(slave) &&
-   BOND_MODE(bond) == BOND_MODE_8023AD) {
+   BOND_MODE(bond) == BOND_MODE_8023AD &&
+   slave->link == BOND_LINK_UP) {
if (slave->last_link_up)
slave->link = BOND_LINK_FAIL;
else
-- 
2.18.2

Re: [PATCH net] ibmvnic: Fall back to 16 H_SEND_SUB_CRQ_INDIRECT entries with old FW

2020-04-28 Thread Thomas Falcon


On 4/27/20 12:33 PM, Juliet Kim wrote:

The maximum entries for H_SEND_SUB_CRQ_INDIRECT has increased on
some platforms from 16 to 128. If Live Partition Mobility is used
to migrate a running OS image from a newer source platform to an
older target platform, then H_SEND_SUB_CRQ_INDIRECT will fail with
H_PARAMETER if 128 entries are queued.

Fix this by falling back to 16 entries if H_PARAMETER is returned
from the hcall().


Thanks for the submission, but I am having a hard time believing that 
this is what is happening since the driver does not support sending 
multiple frames per hypervisor call at this time. Even if it were the 
case, this approach would omit frame data needed by the VF, so the 
second attempt may still fail. Are there system logs available that show 
the driver is attempting to send transmissions with greater than 16 
descriptors?


Thanks,

Tom




Signed-off-by: Juliet Kim 
---
  drivers/net/ethernet/ibm/ibmvnic.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 4bd33245bad6..b66c2f26a427 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1656,6 +1656,17 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
   (u64)tx_buff->indir_dma,
   (u64)num_entries);
+
+   /* Old firmware accepts max 16 num_entries */
+   if (lpar_rc == H_PARAMETER && num_entries > 16) {
+   tx_crq.v1.n_crq_elem = 16;
+   tx_buff->num_entries = 16;
+   lpar_rc = send_subcrq_indirect(adapter,
+  handle_array[queue_num],
+  (u64)tx_buff->indir_dma,
+  16);
+   }
+
dma_unmap_single(dev, tx_buff->indir_dma,
 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
} else {

Re: [PATCH net v2] net/ibmvnic: Fix typo in retry check

2019-12-16 Thread Thomas Falcon




On 12/13/19 7:27 PM, Jakub Kicinski wrote:

On Wed, 11 Dec 2019 09:38:39 -0600, Thomas Falcon wrote:

This conditional is missing a bang, with the intent
being to break when the retry count reaches zero.

Fixes: 476d96ca9c ("ibmvnic: Bound waits for device queries")
Suggested-by: Juliet Kim 
Signed-off-by: Thomas Falcon 

Ah damn, looks like this originates from my pseudo code.

I had to fix the fixes tag:

Commit: 847496ccfa22 ("net/ibmvnic: Fix typo in retry check")
Fixes tag: Fixes: 476d96ca9c ("ibmvnic: Bound waits for device queries")
Has these problem(s):
- SHA1 should be at least 12 digits long
  Can be fixed by setting core.abbrev to 12 (or more) or (for 
git v2.11
  or later) just making sure it is not set (or set to "auto").


Thanks, I'll keep that in mind next time.  IIRC I was making some last 
minute cosmetic changes before sending, and it might have slipped in 
that way.  In any case, I should have been more thorough in testing it.


Thanks,

Tom





Applied to net, thanks!


diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index efb0f10..2d84523 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -184,7 +184,7 @@ static int ibmvnic_wait_for_completion(struct 
ibmvnic_adapter *adapter,
netdev_err(netdev, "Device down!\n");
return -ENODEV;
}
-   if (retry--)
+   if (!retry--)
break;
if (wait_for_completion_timeout(comp_done, div_timeout))
return 0;

[PATCH net v2] net/ibmvnic: Fix typo in retry check

2019-12-11 Thread Thomas Falcon

This conditional is missing a bang, with the intent
being to break when the retry count reaches zero.

Fixes: 476d96ca9c ("ibmvnic: Bound waits for device queries")
Suggested-by: Juliet Kim 
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index efb0f10..2d84523 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -184,7 +184,7 @@ static int ibmvnic_wait_for_completion(struct 
ibmvnic_adapter *adapter,
netdev_err(netdev, "Device down!\n");
return -ENODEV;
}
-   if (retry--)
+   if (!retry--)
break;
if (wait_for_completion_timeout(comp_done, div_timeout))
return 0;
-- 
1.8.3.1

Re: [PATCH] net/ibmvnic: Fix typo in retry check

2019-12-11 Thread Thomas Falcon




On 12/11/19 9:32 AM, Thomas Falcon wrote:

This conditional is missing a bang, with the intent
being to break when the retry count reaches zero.

Fixes: 476d96ca9c ("ibmvnic: Bound waits for device queries")
Suggested-by: Juliet Kim 
Signed-off-by: Thomas Falcon 
---


Excuse me, disregard this patch. I used the wrong email address for 
Juliet. And forgot the intended branch.  I will resend a v2 soon.


Tom



  drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index efb0f10..2d84523 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -184,7 +184,7 @@ static int ibmvnic_wait_for_completion(struct 
ibmvnic_adapter *adapter,
netdev_err(netdev, "Device down!\n");
return -ENODEV;
}
-   if (retry--)
+   if (!retry--)
break;
if (wait_for_completion_timeout(comp_done, div_timeout))
return 0;

[PATCH] net/ibmvnic: Fix typo in retry check

2019-12-11 Thread Thomas Falcon

This conditional is missing a bang, with the intent
being to break when the retry count reaches zero.

Fixes: 476d96ca9c ("ibmvnic: Bound waits for device queries")
Suggested-by: Juliet Kim 
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index efb0f10..2d84523 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -184,7 +184,7 @@ static int ibmvnic_wait_for_completion(struct 
ibmvnic_adapter *adapter,
netdev_err(netdev, "Device down!\n");
return -ENODEV;
}
-   if (retry--)
+   if (!retry--)
break;
if (wait_for_completion_timeout(comp_done, div_timeout))
return 0;
-- 
1.8.3.1

[PATCH net v2 3/4] ibmvnic: Bound waits for device queries

2019-11-25 Thread Thomas Falcon

Create a wrapper for wait_for_completion calls with additional
driver checks to ensure that the driver does not wait on a
disabled device. In those cases or if the device does not respond
in an extended amount of time, this will allow the driver an
opportunity to recover.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 112 -
 1 file changed, 97 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 78a3ef70f1ef..4504f96ee07d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -159,6 +159,40 @@ static long h_reg_sub_crq(unsigned long unit_address, 
unsigned long token,
return rc;
 }
 
+/**
+ * ibmvnic_wait_for_completion - Check device state and wait for completion
+ * @adapter: private device data
+ * @comp_done: completion structure to wait for
+ * @timeout: time to wait in milliseconds
+ *
+ * Wait for a completion signal or until the timeout limit is reached
+ * while checking that the device is still active.
+ */
+static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter,
+  struct completion *comp_done,
+  unsigned long timeout)
+{
+   struct net_device *netdev;
+   unsigned long div_timeout;
+   u8 retry;
+
+   netdev = adapter->netdev;
+   retry = 5;
+   div_timeout = msecs_to_jiffies(timeout / retry);
+   while (true) {
+   if (!adapter->crq.active) {
+   netdev_err(netdev, "Device down!\n");
+   return -ENODEV;
+   }
+   if (retry--)
+   break;
+   if (wait_for_completion_timeout(comp_done, div_timeout))
+   return 0;
+   }
+   netdev_err(netdev, "Operation timed out.\n");
+   return -ETIMEDOUT;
+}
+
 static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb, int size)
 {
@@ -183,7 +217,15 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
return rc;
}
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done, 1);
+   if (rc) {
+   dev_err(dev,
+   "Long term map request aborted or timed out,rc = %d\n",
+   rc);
+   dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   return rc;
+   }
 
if (adapter->fw_done_rc) {
dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
@@ -211,6 +253,7 @@ static void free_long_term_buff(struct ibmvnic_adapter 
*adapter,
 static int reset_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb)
 {
+   struct device *dev = >vdev->dev;
int rc;
 
memset(ltb->buff, 0, ltb->size);
@@ -219,10 +262,16 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
if (rc)
return rc;
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done, 1);
+   if (rc) {
+   dev_info(dev,
+"Reset failed, long term map request timed out or 
aborted\n");
+   return rc;
+   }
 
if (adapter->fw_done_rc) {
-   dev_info(>vdev->dev,
+   dev_info(dev,
 "Reset failed, attempting to free and reallocate 
buffer\n");
free_long_term_buff(adapter, ltb);
return alloc_long_term_buff(adapter, ltb, ltb->size);
@@ -949,7 +998,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
rc = ibmvnic_send_crq(adapter, );
if (rc)
return rc;
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done, 1);
+   if (rc) {
+   dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc);
+   return rc;
+   }
 
if (!adapter->vpd->len)
return -ENODATA;
@@ -987,7 +1041,14 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
adapter->vpd->buff = NULL;
return rc;
}
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done, 1);
+   if (rc) {
+   dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc);
+   kfree(adapter->vpd->buff);
+

[PATCH net v2 4/4] ibmvnic: Serialize device queries

2019-11-25 Thread Thomas Falcon

Provide some serialization for device CRQ commands
and queries to ensure that the shared variable used for
storing return codes is properly synchronized.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 54 ++
 drivers/net/ethernet/ibm/ibmvnic.h |  2 ++
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 4504f96ee07d..42e15b31a5ff 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -210,11 +210,14 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
ltb->map_id = adapter->map_id;
adapter->map_id++;
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr,
  ltb->size, ltb->map_id);
if (rc) {
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -224,6 +227,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
"Long term map request aborted or timed out,rc = %d\n",
rc);
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -231,8 +235,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
adapter->fw_done_rc);
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   mutex_unlock(>fw_lock);
return -1;
}
+   mutex_unlock(>fw_lock);
return 0;
 }
 
@@ -258,15 +264,21 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
 
memset(ltb->buff, 0, ltb->size);
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
+
reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
-   if (rc)
+   if (rc) {
+   mutex_unlock(>fw_lock);
return rc;
+   }
 
rc = ibmvnic_wait_for_completion(adapter, >fw_done, 1);
if (rc) {
dev_info(dev,
 "Reset failed, long term map request timed out or 
aborted\n");
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -274,8 +286,10 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
dev_info(dev,
 "Reset failed, attempting to free and reallocate 
buffer\n");
free_long_term_buff(adapter, ltb);
+   mutex_unlock(>fw_lock);
return alloc_long_term_buff(adapter, ltb, ltb->size);
}
+   mutex_unlock(>fw_lock);
return 0;
 }
 
@@ -992,18 +1006,25 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
+
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
rc = ibmvnic_send_crq(adapter, );
-   if (rc)
+   if (rc) {
+   mutex_unlock(>fw_lock);
return rc;
+   }
 
rc = ibmvnic_wait_for_completion(adapter, >fw_done, 1);
if (rc) {
dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc);
+   mutex_unlock(>fw_lock);
return rc;
}
+   mutex_unlock(>fw_lock);
 
if (!adapter->vpd->len)
return -ENODATA;
@@ -1030,7 +1051,10 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
return -ENOMEM;
}
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
+
crq.get_vpd.first = IBMVNIC_CRQ_CMD;
crq.get_vpd.cmd = GET_VPD;
crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
@@ -1039,6 +1063,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
if (rc) {
kfree(adapter->vpd->buff);
adapter->vpd->buff = NULL;
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -1047,9 +1072,11 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc);
kfree(adapter->vpd->buff);
adapter->vpd->buff = NULL;
+

[PATCH net v2 1/4] ibmvnic: Fix completion structure initialization

2019-11-25 Thread Thomas Falcon

Fix multiple calls to init_completion for device completion
structures. Instead, initialize them during device probe and
reinitialize them later as needed.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index f59d9a8e35e2..48225297a5e2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -176,7 +176,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
ltb->map_id = adapter->map_id;
adapter->map_id++;
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr,
  ltb->size, ltb->map_id);
if (rc) {
@@ -215,7 +215,7 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
 
memset(ltb->buff, 0, ltb->size);
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
if (rc)
return rc;
@@ -943,7 +943,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
rc = ibmvnic_send_crq(adapter, );
@@ -1689,7 +1689,7 @@ static int __ibmvnic_set_mac(struct net_device *netdev, 
u8 *dev_addr)
crq.change_mac_addr.cmd = CHANGE_MAC_ADDR;
ether_addr_copy(_mac_addr.mac_addr[0], dev_addr);
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = ibmvnic_send_crq(adapter, );
if (rc) {
rc = -EIO;
@@ -2316,7 +2316,7 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq;
adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
 
-   init_completion(>reset_done);
+   reinit_completion(>reset_done);
adapter->wait_for_reset = true;
rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
if (rc)
@@ -2332,7 +2332,7 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
adapter->desired.rx_entries = adapter->fallback.rx_entries;
adapter->desired.tx_entries = adapter->fallback.tx_entries;
 
-   init_completion(>reset_done);
+   reinit_completion(>reset_done);
adapter->wait_for_reset = true;
rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
if (rc)
@@ -2603,7 +2603,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device 
*dev,
cpu_to_be32(sizeof(struct ibmvnic_statistics));
 
/* Wait for data to be written */
-   init_completion(>stats_done);
+   reinit_completion(>stats_done);
rc = ibmvnic_send_crq(adapter, );
if (rc)
return;
@@ -4403,7 +4403,7 @@ static int send_query_phys_parms(struct ibmvnic_adapter 
*adapter)
memset(, 0, sizeof(crq));
crq.query_phys_parms.first = IBMVNIC_CRQ_CMD;
crq.query_phys_parms.cmd = QUERY_PHYS_PARMS;
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = ibmvnic_send_crq(adapter, );
if (rc)
return rc;
@@ -4955,6 +4955,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
init_completion(>init_done);
+   init_completion(>fw_done);
+   init_completion(>reset_done);
+   init_completion(>stats_done);
clear_bit(0, >resetting);
 
do {
-- 
2.12.3

[PATCH net v2 2/4] ibmvnic: Terminate waiting device threads after loss of service

2019-11-25 Thread Thomas Falcon

If we receive a notification that the device has been deactivated
or removed, force a completion of all waiting threads.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 48225297a5e2..78a3ef70f1ef 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4500,6 +4500,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev);
adapter->crq.active = false;
+   /* terminate any thread waiting for a response
+* from the device
+*/
+   if (!completion_done(>fw_done)) {
+   adapter->fw_done_rc = -EIO;
+   complete(>fw_done);
+   }
+   if (!completion_done(>stats_done))
+   complete(>stats_done);
if (test_bit(0, >resetting))
adapter->force_reset_recovery = true;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
-- 
2.12.3

[PATCH net v2 0/4] ibmvnic: Harden device commands and queries

2019-11-25 Thread Thomas Falcon

This patch series fixes some shortcomings with the current
VNIC device command implementation. The first patch fixes
the initialization of driver completion structures used
for device commands. Additionally, all waits for device
commands are bounded with a timeout in the event that the
device does not respond or becomes inoperable. Finally,
serialize queries to retain the integrity of device return
codes.

Changes in v2:

 - included header comment for ibmvnic_wait_for_completion
 - removed open-coded loop in patch 3/4, suggested by Jakub
 - ibmvnic_wait_for_completion accepts timeout value in milliseconds
   instead of jiffies
 - timeout calculations cleaned up and completed before wait loop
 - included missing mutex_destroy calls, suggested by Jakub
 - included comment before mutex declaration

Thomas Falcon (4):
  ibmvnic: Fix completion structure initialization
  ibmvnic: Terminate waiting device threads after loss of service
  ibmvnic: Bound waits for device queries
  ibmvnic: Serialize device queries

 drivers/net/ethernet/ibm/ibmvnic.c | 192 +++--
 drivers/net/ethernet/ibm/ibmvnic.h |   2 +
 2 files changed, 167 insertions(+), 27 deletions(-)

-- 
2.12.3

Re: [PATCH net 0/4] ibmvnic: Harden device commands and queries

2019-11-25 Thread Thomas Falcon




On 11/23/19 7:49 PM, Jakub Kicinski wrote:

On Fri, 22 Nov 2019 13:41:42 -0600, Thomas Falcon wrote:

This patch series fixes some shortcomings with the current
VNIC device command implementation. The first patch fixes
the initialization of driver completion structures used
for device commands. Additionally, all waits for device
commands are bounded with a timeout in the event that the
device does not respond or becomes inoperable. Finally,
serialize queries to retain the integrity of device return
codes.

I have minor comments on two patches, but also I think it's
a little late in the release cycle for putting this in net.

Could you target net-next and repost ASAP so it still makes
it into 5.5?

Thanks.


Thank you, sorry for the late response.  I will make the requested 
changes ASAP, but I've missed the net-next window.  What should I target 
for v2?


Thanks again,

Tom

[PATCH net 4/4] ibmvnic: Serialize device queries

2019-11-22 Thread Thomas Falcon

Provide some serialization for device CRQ commands
and queries to ensure that the shared variable used for
storing return codes is properly synchronized.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 51 ++
 drivers/net/ethernet/ibm/ibmvnic.h |  1 +
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 9806eccc5670..5e9b7711cd2e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -207,11 +207,14 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
ltb->map_id = adapter->map_id;
adapter->map_id++;
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr,
  ltb->size, ltb->map_id);
if (rc) {
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -222,6 +225,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
"Long term map request aborted or timed out,rc = %d\n",
rc);
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -229,8 +233,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
adapter->fw_done_rc);
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   mutex_unlock(>fw_lock);
return -1;
}
+   mutex_unlock(>fw_lock);
return 0;
 }
 
@@ -256,16 +262,21 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
 
memset(ltb->buff, 0, ltb->size);
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
-   if (rc)
+   if (rc) {
+   mutex_unlock(>fw_lock);
return rc;
+   }
 
rc = ibmvnic_wait_for_completion(adapter, >fw_done,
 msecs_to_jiffies(1));
if (rc) {
dev_info(dev,
 "Reset failed, long term map request timed out or 
aborted\n");
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -273,8 +284,10 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
dev_info(dev,
 "Reset failed, attempting to free and reallocate 
buffer\n");
free_long_term_buff(adapter, ltb);
+   mutex_unlock(>fw_lock);
return alloc_long_term_buff(adapter, ltb, ltb->size);
}
+   mutex_unlock(>fw_lock);
return 0;
 }
 
@@ -991,19 +1004,26 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
+
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
rc = ibmvnic_send_crq(adapter, );
-   if (rc)
+   if (rc) {
+   mutex_unlock(>fw_lock);
return rc;
+   }
 
rc = ibmvnic_wait_for_completion(adapter, >fw_done,
 msecs_to_jiffies(1));
if (rc) {
dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc);
+   mutex_unlock(>fw_lock);
return rc;
}
+   mutex_unlock(>fw_lock);
 
if (!adapter->vpd->len)
return -ENODATA;
@@ -1030,7 +1050,10 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
return -ENOMEM;
}
 
+   mutex_lock(>fw_lock);
+   adapter->fw_done_rc = 0;
reinit_completion(>fw_done);
+
crq.get_vpd.first = IBMVNIC_CRQ_CMD;
crq.get_vpd.cmd = GET_VPD;
crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
@@ -1039,6 +1062,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
if (rc) {
kfree(adapter->vpd->buff);
adapter->vpd->buff = NULL;
+   mutex_unlock(>fw_lock);
return rc;
}
 
@@ -1048,9 +1072,11 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc);

[PATCH net 3/4] ibmvnic: Bound waits for device queries

2019-11-22 Thread Thomas Falcon

Create a wrapper for wait_for_completion calls with additional
driver checks to ensure that the driver does not wait on a
disabled device. In those cases or if the device does not respond
in an extended amount of time, this will allow the driver an
opportunity to recover.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 117 -
 1 file changed, 102 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 78a3ef70f1ef..9806eccc5670 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -159,6 +159,37 @@ static long h_reg_sub_crq(unsigned long unit_address, 
unsigned long token,
return rc;
 }
 
+static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter,
+  struct completion *comp_done,
+  unsigned long timeout)
+{
+   struct net_device *netdev = adapter->netdev;
+   u8 retry = 5;
+
+restart_timer:
+   if (!adapter->crq.active) {
+   netdev_err(netdev, "Device down!\n");
+   return -ENODEV;
+   }
+   /* periodically check that the device is up while waiting for
+* a response
+*/
+   if (!wait_for_completion_timeout(comp_done, timeout / retry)) {
+   if (!adapter->crq.active) {
+   netdev_err(netdev, "Device down!\n");
+   return -ENODEV;
+   } else {
+   retry--;
+   if (retry)
+   goto restart_timer;
+   netdev_err(netdev, "Operation timing out...\n");
+   return -ETIMEDOUT;
+   }
+   }
+
+   return 0;
+}
+
 static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb, int size)
 {
@@ -183,7 +214,16 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
return rc;
}
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done,
+msecs_to_jiffies(1));
+   if (rc) {
+   dev_err(dev,
+   "Long term map request aborted or timed out,rc = %d\n",
+   rc);
+   dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   return rc;
+   }
 
if (adapter->fw_done_rc) {
dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
@@ -211,6 +251,7 @@ static void free_long_term_buff(struct ibmvnic_adapter 
*adapter,
 static int reset_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb)
 {
+   struct device *dev = >vdev->dev;
int rc;
 
memset(ltb->buff, 0, ltb->size);
@@ -219,10 +260,17 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
if (rc)
return rc;
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done,
+msecs_to_jiffies(1));
+   if (rc) {
+   dev_info(dev,
+"Reset failed, long term map request timed out or 
aborted\n");
+   return rc;
+   }
 
if (adapter->fw_done_rc) {
-   dev_info(>vdev->dev,
+   dev_info(dev,
 "Reset failed, attempting to free and reallocate 
buffer\n");
free_long_term_buff(adapter, ltb);
return alloc_long_term_buff(adapter, ltb, ltb->size);
@@ -949,7 +997,13 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
rc = ibmvnic_send_crq(adapter, );
if (rc)
return rc;
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done,
+msecs_to_jiffies(1));
+   if (rc) {
+   dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc);
+   return rc;
+   }
 
if (!adapter->vpd->len)
return -ENODATA;
@@ -987,7 +1041,15 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
adapter->vpd->buff = NULL;
return rc;
}
-   wait_for_completion(>fw_done);
+
+   rc = ibmvnic_wait_for_completion(adapter, >fw_done,
+msecs_to_jiffies(1));
+   if (rc) {
+

[PATCH net 1/4] ibmvnic: Fix completion structure initialization

2019-11-22 Thread Thomas Falcon

Fix multiple calls to init_completion for device completion
structures. Instead, initialize them during device probe and
reinitialize them later as needed.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index f59d9a8e35e2..48225297a5e2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -176,7 +176,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
ltb->map_id = adapter->map_id;
adapter->map_id++;
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr,
  ltb->size, ltb->map_id);
if (rc) {
@@ -215,7 +215,7 @@ static int reset_long_term_buff(struct ibmvnic_adapter 
*adapter,
 
memset(ltb->buff, 0, ltb->size);
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
if (rc)
return rc;
@@ -943,7 +943,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
rc = ibmvnic_send_crq(adapter, );
@@ -1689,7 +1689,7 @@ static int __ibmvnic_set_mac(struct net_device *netdev, 
u8 *dev_addr)
crq.change_mac_addr.cmd = CHANGE_MAC_ADDR;
ether_addr_copy(_mac_addr.mac_addr[0], dev_addr);
 
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = ibmvnic_send_crq(adapter, );
if (rc) {
rc = -EIO;
@@ -2316,7 +2316,7 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq;
adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
 
-   init_completion(>reset_done);
+   reinit_completion(>reset_done);
adapter->wait_for_reset = true;
rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
if (rc)
@@ -2332,7 +2332,7 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
adapter->desired.rx_entries = adapter->fallback.rx_entries;
adapter->desired.tx_entries = adapter->fallback.tx_entries;
 
-   init_completion(>reset_done);
+   reinit_completion(>reset_done);
adapter->wait_for_reset = true;
rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
if (rc)
@@ -2603,7 +2603,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device 
*dev,
cpu_to_be32(sizeof(struct ibmvnic_statistics));
 
/* Wait for data to be written */
-   init_completion(>stats_done);
+   reinit_completion(>stats_done);
rc = ibmvnic_send_crq(adapter, );
if (rc)
return;
@@ -4403,7 +4403,7 @@ static int send_query_phys_parms(struct ibmvnic_adapter 
*adapter)
memset(, 0, sizeof(crq));
crq.query_phys_parms.first = IBMVNIC_CRQ_CMD;
crq.query_phys_parms.cmd = QUERY_PHYS_PARMS;
-   init_completion(>fw_done);
+   reinit_completion(>fw_done);
rc = ibmvnic_send_crq(adapter, );
if (rc)
return rc;
@@ -4955,6 +4955,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
INIT_LIST_HEAD(>rwi_list);
spin_lock_init(>rwi_lock);
init_completion(>init_done);
+   init_completion(>fw_done);
+   init_completion(>reset_done);
+   init_completion(>stats_done);
clear_bit(0, >resetting);
 
do {
-- 
2.12.3

[PATCH net 0/4] ibmvnic: Harden device commands and queries

2019-11-22 Thread Thomas Falcon

This patch series fixes some shortcomings with the current
VNIC device command implementation. The first patch fixes
the initialization of driver completion structures used
for device commands. Additionally, all waits for device
commands are bounded with a timeout in the event that the
device does not respond or becomes inoperable. Finally,
serialize queries to retain the integrity of device return
codes.

Thomas Falcon (4):
  ibmvnic: Fix completion structure initialization
  ibmvnic: Terminate waiting device threads after loss of service
  ibmvnic: Bound waits for device queries
  ibmvnic: Serialize device queries

 drivers/net/ethernet/ibm/ibmvnic.c | 194 +++--
 drivers/net/ethernet/ibm/ibmvnic.h |   1 +
 2 files changed, 168 insertions(+), 27 deletions(-)

-- 
2.12.3

[PATCH net 2/4] ibmvnic: Terminate waiting device threads after loss of service

2019-11-22 Thread Thomas Falcon

If we receive a notification that the device has been deactivated
or removed, force a completion of all waiting threads.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 48225297a5e2..78a3ef70f1ef 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4500,6 +4500,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev);
adapter->crq.active = false;
+   /* terminate any thread waiting for a response
+* from the device
+*/
+   if (!completion_done(>fw_done)) {
+   adapter->fw_done_rc = -EIO;
+   complete(>fw_done);
+   }
+   if (!completion_done(>stats_done))
+   complete(>stats_done);
if (test_bit(0, >resetting))
adapter->force_reset_recovery = true;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
-- 
2.12.3

Re: [RFC PATCH] powerpc/pseries/mobility: notify network peers after migration

2019-11-07 Thread Thomas Falcon




On 11/6/19 4:14 PM, Nathan Lynch wrote:

Hi Tom,

Thomas Falcon  writes:

After a migration, it is necessary to send a gratuitous ARP
from all running interfaces so that the rest of the network
is aware of its new location. However, some supported network
devices are unaware that they have been migrated. To avoid network
interruptions and other unwanted behavior, force a GARP on all
valid, running interfaces as part of the post_mobility_fixup
routine.

[...]


@@ -331,6 +334,8 @@ void post_mobility_fixup(void)
  {
int rc;
int activate_fw_token;
+   struct net_device *netdev;
+   struct net *net;
  
  	activate_fw_token = rtas_token("ibm,activate-firmware");

if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
@@ -371,6 +376,21 @@ void post_mobility_fixup(void)
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
  
+	/* need to force a gratuitous ARP on running interfaces */

+   rtnl_lock();
+   for_each_net(net) {
+   for_each_netdev(net, netdev) {
+   if (netif_device_present(netdev) &&
+   netif_running(netdev) &&
+   !(netdev->flags & (IFF_NOARP | IFF_LOOPBACK)))
+   call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+netdev);
+   call_netdevice_notifiers(NETDEV_RESEND_IGMP,
+netdev);
+   }
+   }
+   rtnl_unlock();
+

This isn't an outright nak, but this is not nice. It illustrates the
need to rethink the pseries partition migration code. There is no
mechanism for drivers and other interested code to prepare for a
migration or to adjust to the destination. So post_mobility_fixup() will
continue to grow into a fragile collection of calls into unrelated
subsystems until there is a better design -- either a pseries-specific
notification/callback mechanism, or something based on the pm framework.

My understanding is that this is needed specifically for ibmveth and,
unlike ibmvnic, the platform does not provide any notification to that
driver that a migration has occurred, right?


Correct, the ibmveth device, unlike ibmvnic, receives no signal or 
notification at all in the event of a partition migration, so it can not 
handle it or send a gratuitous ARP because from the driver's perspective 
nothing has changed.  As you've described, there is no existing notifier 
in the kernel to inform interested parties that the system has migrated 
or is about to migrate. Without adding the needed infrastructure to do 
that, I'm not sure how else to fix this.


Tom

Re: [RFC PATCH] powerpc/pseries/mobility: notify network peers after migration

2019-11-07 Thread Thomas Falcon




On 11/6/19 7:33 PM, Michael Ellerman wrote:

Hi Thomas,

Thomas Falcon  writes:

After a migration, it is necessary to send a gratuitous ARP
from all running interfaces so that the rest of the network
is aware of its new location. However, some supported network
devices are unaware that they have been migrated. To avoid network
interruptions and other unwanted behavior, force a GARP on all
valid, running interfaces as part of the post_mobility_fixup
routine.

Signed-off-by: Thomas Falcon 
---
  arch/powerpc/platforms/pseries/mobility.c | 20 
  1 file changed, 20 insertions(+)

This patch is in powerpc code, but it's doing networking stuff that I
don't really understand.

So I'd like an Ack from Dave or someone else in netdev land before I
merge it.


Thanks, I've already included netdev in the CC list. I'll wait and keep 
an eye out for any comments from that side.


Tom





cheers



diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index b571285f6c14..c1abc14cf2bb 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -17,6 +17,9 @@
  #include 
  #include 
  #include 
+#include 
+#include 
+#include 
  
  #include 

  #include 
@@ -331,6 +334,8 @@ void post_mobility_fixup(void)
  {
int rc;
int activate_fw_token;
+   struct net_device *netdev;
+   struct net *net;
  
  	activate_fw_token = rtas_token("ibm,activate-firmware");

if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
@@ -371,6 +376,21 @@ void post_mobility_fixup(void)
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
  
+	/* need to force a gratuitous ARP on running interfaces */

+   rtnl_lock();
+   for_each_net(net) {
+   for_each_netdev(net, netdev) {
+   if (netif_device_present(netdev) &&
+   netif_running(netdev) &&
+   !(netdev->flags & (IFF_NOARP | IFF_LOOPBACK)))
+   call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+netdev);
+   call_netdevice_notifiers(NETDEV_RESEND_IGMP,
+netdev);
+   }
+   }
+   rtnl_unlock();
+
return;
  }
  
--

2.12.3

[RFC PATCH v2] powerpc/pseries/mobility: notify network peers after migration

2019-11-06 Thread Thomas Falcon

After a migration, it is necessary to send a gratuitous ARP
from all running interfaces so that the rest of the network
is aware of its new location. However, some supported network
devices are unaware that they have been migrated. To avoid network
interruptions and other unwanted behavior, force a GARP on all
valid, running interfaces as part of the post_mobility_fixup
routine.

Signed-off-by: Thomas Falcon 
---
v2: fix missing brackets caught by Russell Currey
---
 arch/powerpc/platforms/pseries/mobility.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index b571285..dddc3c1 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -17,6 +17,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -331,6 +334,8 @@ void post_mobility_fixup(void)
 {
int rc;
int activate_fw_token;
+   struct net_device *netdev;
+   struct net *net;
 
activate_fw_token = rtas_token("ibm,activate-firmware");
if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
@@ -371,6 +376,22 @@ void post_mobility_fixup(void)
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
 
+   /* need to force a gratuitous ARP on running interfaces */
+   rtnl_lock();
+   for_each_net(net) {
+   for_each_netdev(net, netdev) {
+   if (netif_device_present(netdev) &&
+   netif_running(netdev) &&
+   !(netdev->flags & (IFF_NOARP | IFF_LOOPBACK))) {
+   call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+netdev);
+   call_netdevice_notifiers(NETDEV_RESEND_IGMP,
+netdev);
+   }
+   }
+   }
+   rtnl_unlock();
+
return;
 }
 
-- 
1.8.3.1

Re: [RFC PATCH] powerpc/pseries/mobility: notify network peers after migration

2019-11-05 Thread Thomas Falcon




On 11/5/19 10:13 PM, Russell Currey wrote:

On Tue, 2019-11-05 at 18:06 -0600, Thomas Falcon wrote:

After a migration, it is necessary to send a gratuitous ARP
from all running interfaces so that the rest of the network
is aware of its new location. However, some supported network
devices are unaware that they have been migrated. To avoid network
interruptions and other unwanted behavior, force a GARP on all
valid, running interfaces as part of the post_mobility_fixup
routine.

Signed-off-by: Thomas Falcon 

Hi Thomas,


---
  arch/powerpc/platforms/pseries/mobility.c | 20 
  1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c
b/arch/powerpc/platforms/pseries/mobility.c
index b571285f6c14..c1abc14cf2bb 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -17,6 +17,9 @@
  #include 
  #include 
  #include 
+#include 
+#include 
+#include 
  
  #include 

  #include 
@@ -331,6 +334,8 @@ void post_mobility_fixup(void)
  {
int rc;
int activate_fw_token;
+   struct net_device *netdev;
+   struct net *net;
  
  	activate_fw_token = rtas_token("ibm,activate-firmware");

if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
@@ -371,6 +376,21 @@ void post_mobility_fixup(void)
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
  
+	/* need to force a gratuitous ARP on running interfaces */

+   rtnl_lock();
+   for_each_net(net) {
+   for_each_netdev(net, netdev) {
+   if (netif_device_present(netdev) &&
+   netif_running(netdev) &&
+   !(netdev->flags & (IFF_NOARP |
IFF_LOOPBACK)))
+   call_netdevice_notifiers(NETDEV_NOTIFY_
PEERS,
+netdev);

Without curly braces following the "if" statment, the second line
(below) will be executed unconditionally, which I assume with this
indentation isn't what you want.

(reported by snowpatch)

- Russell


Thanks for catching that! I'll fix that and send a v2 soon.

Tom



+   call_netdevice_notifiers(NETDEV_RESEND_
IGMP,
+netdev);
+   }
+   }
+   rtnl_unlock();
+
return;
  }

[RFC PATCH] powerpc/pseries/mobility: notify network peers after migration

2019-11-05 Thread Thomas Falcon

After a migration, it is necessary to send a gratuitous ARP
from all running interfaces so that the rest of the network
is aware of its new location. However, some supported network
devices are unaware that they have been migrated. To avoid network
interruptions and other unwanted behavior, force a GARP on all
valid, running interfaces as part of the post_mobility_fixup
routine.

Signed-off-by: Thomas Falcon 
---
 arch/powerpc/platforms/pseries/mobility.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index b571285f6c14..c1abc14cf2bb 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -17,6 +17,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -331,6 +334,8 @@ void post_mobility_fixup(void)
 {
int rc;
int activate_fw_token;
+   struct net_device *netdev;
+   struct net *net;
 
activate_fw_token = rtas_token("ibm,activate-firmware");
if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
@@ -371,6 +376,21 @@ void post_mobility_fixup(void)
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
 
+   /* need to force a gratuitous ARP on running interfaces */
+   rtnl_lock();
+   for_each_net(net) {
+   for_each_netdev(net, netdev) {
+   if (netif_device_present(netdev) &&
+   netif_running(netdev) &&
+   !(netdev->flags & (IFF_NOARP | IFF_LOOPBACK)))
+   call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+netdev);
+   call_netdevice_notifiers(NETDEV_RESEND_IGMP,
+netdev);
+   }
+   }
+   rtnl_unlock();
+
return;
 }
 
-- 
2.12.3

Re: [PATCH 0/9] Fixes and Enablement of ibm,drc-info property

2019-11-05 Thread Thomas Falcon




On 11/5/19 9:24 AM, Tyrel Datwyler wrote:

Hi, just pointing out a few typos...

There was a previous effort to add support for the PAPR
architected ibm,drc-info property. This property provides a more
memory compact representation of a paritions Dynamic Reconfig

s/paritions/partition's

Connectors (DRC). These can otherwise be thought of as currently
partitioned, or available but yet to be partitioned system resources
such as cpus, memory, and physical/logical IOA devices.

The initial implementation proved buggy and was fully turned of by


s/turned of/turned off


disabling the bit in the appropriate CAS support vector. We now have
PowerVM firmware in the field that supports this new property, and
further to suppport partitions with 24TB+ of possible memory this

s/suppport/support

property is required to perform platform migration.

This serious fixs the short comings of the previous submission


Either "seriously fixes the shortcomings", or "fixes the serious 
shortcomings?"


Thanks,

Tom


in the areas of general implementation, cpu hotplug, and IOA hotplug.

Tyrel Datwyler (9):
   powerpc/pseries: Fix bad drc_index_start value parsing of drc-info
 entry
   powerpc/pseries: Fix drc-info mappings of logical cpus to drc-index
   powerpc/pseries: Add cpu DLPAR support for drc-info property
   PCI: rpaphp: Fix up pointer to first drc-info entry
   PCI: rpaphp: Don't rely on firmware feature to imply drc-info support
   PCI: rpaphp: Add drc-info support for hotplug slot registration
   PCI: rpaphp: annotate and correctly byte swap DRC properties
   PCI: rpaphp: Correctly match ibm,my-drc-index to drc-name when using
 drc-info
   powerpc/pseries: Enable support for ibm,drc-info property

  arch/powerpc/kernel/prom_init.c |   2 +-
  arch/powerpc/platforms/pseries/hotplug-cpu.c| 101 ---
  arch/powerpc/platforms/pseries/of_helpers.c |   8 +-
  arch/powerpc/platforms/pseries/pseries_energy.c |  23 ++---
  drivers/pci/hotplug/rpaphp_core.c   | 124 +---
  5 files changed, 187 insertions(+), 71 deletions(-)

Re: [PATCH 3/9] powerpc/pseries: Add cpu DLPAR support for drc-info property

2019-11-05 Thread Thomas Falcon




On 11/5/19 9:24 AM, Tyrel Datwyler wrote:

From: Tyrel Datwyler 

Older firmwares provided information about Dynamic Reconfig
Connectors (DRC) through several device tree properties, namely
ibm,drc-types, ibm,drc-indexes, ibm,drc-names, and
ibm,drc-power-domains. New firmwares have the ability to present this
same information in a much condensed format through a device tree
property called ibm,drc-info.

The existing cpu DLPAR hotplug code only understands the older DRC
property format when validating the drc-index of a cpu during a
hotplug add. This updates those code paths to use the ibm,drc-info
property, when present, instead for validation.

Signed-off-by: Tyrel Datwyler 
---
  arch/powerpc/platforms/pseries/hotplug-cpu.c | 101 ++-
  1 file changed, 85 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index bbda646..9ba006c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -407,17 +407,58 @@ static bool dlpar_cpu_exists(struct device_node *parent, 
u32 drc_index)
return found;
  }

+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+   struct property *info;
+   struct of_drc_info drc;
+   const __be32 *value;
+   int count, i, j;
+
+   info = of_find_property(parent, "ibm,drc-info", NULL);
+   if (!info)
+   return false;
+
+   value = of_prop_next_u32(info, NULL, );
+
+   /* First value of ibm,drc-info is number of drc-info records */
+   if (value)
+   value++;
+   else
+   return false;
+
+   for (i = 0; i < count; i++) {
+   if (of_read_drc_info_cell(, , ))
+   return false;
+
+   if (strncmp(drc.drc_type, "CPU", 3))
+   break;
+
+   if (drc_index > drc.last_drc_index)
+   continue;
+
+   for (j = 0; j < drc.num_sequential_elems; j++)
+   if (drc_index == (drc.drc_index_start + 
(drc.sequential_inc * j)))
+   return true;
+   }
+
+   return false;
+}
+
  static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
  {
bool found = false;
int rc, index;

-   index = 0;
+   if (of_find_property(parent, "ibm,drc-info", NULL))
+   return drc_info_valid_index(parent, drc_index);
+
+   index = 1;


Hi, this change was confusing to me until I continued reading the patch 
and saw the comment below regarding the first element of the 
ibm,drc-info property.  Would it be good to have a similar comment here too?




while (!found) {
u32 drc;

rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
index++, );
+


Another nitpick but this could be cleaned up.

Thanks,

Tom



if (rc)
break;

@@ -720,8 +761,11 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
  static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
  {
struct device_node *parent;
+   struct property *info;
int cpus_found = 0;
int index, rc;
+   int i, j;
+   u32 drc_index;

parent = of_find_node_by_path("/cpus");
if (!parent) {
@@ -730,24 +774,49 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 
cpus_to_add)
return -1;
}

-   /* Search the ibm,drc-indexes array for possible CPU drcs to
-* add. Note that the format of the ibm,drc-indexes array is
-* the number of entries in the array followed by the array
-* of drc values so we start looking at index = 1.
-*/
-   index = 1;
-   while (cpus_found < cpus_to_add) {
-   u32 drc;
+   info = of_find_property(parent, "ibm,drc-info", NULL);
+   if (info) {
+   struct of_drc_info drc;
+   const __be32 *value;
+   int count;

-   rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
-   index++, );
-   if (rc)
-   break;
+   value = of_prop_next_u32(info, NULL, );
+   if (value)
+   value++;

-   if (dlpar_cpu_exists(parent, drc))
-   continue;
+   for (i = 0; i < count; i++) {
+   of_read_drc_info_cell(, , );
+   if (strncmp(drc.drc_type, "CPU", 3))
+   break;
+
+   for (j = 0; j < drc.num_sequential_elems && cpus_found 
< cpus_to_add; j++) {
+   drc_index = drc.drc_index_start + 
(drc.sequential_inc * j);
+
+   if (dlpar_cpu_exists(parent,

Re: [PATCH] net/ibmvnic: Fix EOI when running in XIVE mode.

2019-10-11 Thread Thomas Falcon


On 10/11/19 12:52 AM, Cédric Le Goater wrote:

pSeries machines on POWER9 processors can run with the XICS (legacy)
interrupt mode or with the XIVE exploitation interrupt mode. These
interrupt contollers have different interfaces for interrupt
management : XICS uses hcalls and XIVE loads and stores on a page.
H_EOI being a XICS interface the enable_scrq_irq() routine can fail
when the machine runs in XIVE mode.

Fix that by calling the EOI handler of the interrupt chip.

Fixes: f23e0643cd0b ("ibmvnic: Clear pending interrupt after device reset")
Signed-off-by: Cédric Le Goater 
---


Thank you for this fix, Cedric!

Tom


  drivers/net/ethernet/ibm/ibmvnic.c | 8 +++-
  1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 2b073a3c0b84..f59d9a8e35e2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2878,12 +2878,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter 
*adapter,
  
  	if (test_bit(0, >resetting) &&

adapter->reset_reason == VNIC_RESET_MOBILITY) {
-   u64 val = (0xff00) | scrq->hw_irq;
+   struct irq_desc *desc = irq_to_desc(scrq->irq);
+   struct irq_chip *chip = irq_desc_get_chip(desc);
  
-		rc = plpar_hcall_norets(H_EOI, val);

-   if (rc)
-   dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
-   val, rc);
+   chip->irq_eoi(>irq_data);
}
  
  	rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,

[PATCH net] ibmvnic: Do not process reset during or after device removal

2019-08-27 Thread Thomas Falcon

Currently, the ibmvnic driver will not schedule device resets
if the device is being removed, but does not check the device
state before the reset is actually processed. This leads to a race
where a reset is scheduled with a valid device state but is
processed after the driver has been removed, resulting in an oops.

Fix this by checking the device state before processing a queued
reset event.

Reported-by: Abdul Haleem 
Tested-by: Abdul Haleem 
Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index cebd20f..fa4bb94 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1983,6 +1983,10 @@ static void __ibmvnic_reset(struct work_struct *work)
 
rwi = get_next_rwi(adapter);
while (rwi) {
+   if (adapter->state == VNIC_REMOVING ||
+   adapter->state == VNIC_REMOVED)
+   goto out;
+
if (adapter->force_reset_recovery) {
adapter->force_reset_recovery = false;
rc = do_hard_reset(adapter, rwi, reset_state);
@@ -2007,7 +2011,7 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev_dbg(adapter->netdev, "Reset failed\n");
free_all_rwi(adapter);
}
-
+out:
adapter->resetting = false;
if (we_lock_rtnl)
rtnl_unlock();
-- 
1.8.3.1

[PATCH net-next v2] ibmveth: Allow users to update reported speed and duplex

2019-08-06 Thread Thomas Falcon

Reported ethtool link settings for the ibmveth driver are currently
hardcoded and no longer reflect the actual capabilities of supported
hardware. There is no interface designed for retrieving this information
from device firmware nor is there any way to update current settings
to reflect observed or expected link speeds.

To avoid breaking existing configurations, retain current values as
default settings but let users update them to match the expected
capabilities of underlying hardware if needed. This update would
allow the use of configurations that rely on certain link speed
settings, such as LACP. This patch is based on the implementation
in virtio_net.

Signed-off-by: Thomas Falcon 
---
v2: Updated default driver speed/duplex settings to avoid
breaking existing setups
---
 drivers/net/ethernet/ibm/ibmveth.c | 83 --
 drivers/net/ethernet/ibm/ibmveth.h |  3 ++
 2 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c 
b/drivers/net/ethernet/ibm/ibmveth.c
index d654c23..5dc634f 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -712,31 +712,68 @@ static int ibmveth_close(struct net_device *netdev)
return 0;
 }
 
-static int netdev_get_link_ksettings(struct net_device *dev,
-struct ethtool_link_ksettings *cmd)
+static bool
+ibmveth_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
 {
-   u32 supported, advertising;
-
-   supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
-   SUPPORTED_FIBRE);
-   advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg |
-   ADVERTISED_FIBRE);
-   cmd->base.speed = SPEED_1000;
-   cmd->base.duplex = DUPLEX_FULL;
-   cmd->base.port = PORT_FIBRE;
-   cmd->base.phy_address = 0;
-   cmd->base.autoneg = AUTONEG_ENABLE;
-
-   ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-   supported);
-   ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-   advertising);
+   struct ethtool_link_ksettings diff1 = *cmd;
+   struct ethtool_link_ksettings diff2 = {};
+
+   diff2.base.port = PORT_OTHER;
+   diff1.base.speed = 0;
+   diff1.base.duplex = 0;
+   diff1.base.cmd = 0;
+   diff1.base.link_mode_masks_nwords = 0;
+   ethtool_link_ksettings_zero_link_mode(, advertising);
+
+   return !memcmp(, , sizeof(diff1.base)) &&
+   bitmap_empty(diff1.link_modes.supported,
+__ETHTOOL_LINK_MODE_MASK_NBITS) &&
+   bitmap_empty(diff1.link_modes.advertising,
+__ETHTOOL_LINK_MODE_MASK_NBITS) &&
+   bitmap_empty(diff1.link_modes.lp_advertising,
+__ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int ibmveth_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+   struct ibmveth_adapter *adapter = netdev_priv(dev);
+   u32 speed;
+   u8 duplex;
+
+   speed = cmd->base.speed;
+   duplex = cmd->base.duplex;
+   /* don't allow custom speed and duplex */
+   if (!ethtool_validate_speed(speed) ||
+   !ethtool_validate_duplex(duplex) ||
+   !ibmveth_validate_ethtool_cmd(cmd))
+   return -EINVAL;
+   adapter->speed = speed;
+   adapter->duplex = duplex;
 
return 0;
 }
 
-static void netdev_get_drvinfo(struct net_device *dev,
-  struct ethtool_drvinfo *info)
+static int ibmveth_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+   struct ibmveth_adapter *adapter = netdev_priv(dev);
+
+   cmd->base.speed = adapter->speed;
+   cmd->base.duplex = adapter->duplex;
+   cmd->base.port = PORT_OTHER;
+
+   return 0;
+}
+
+static void ibmveth_init_link_settings(struct ibmveth_adapter *adapter)
+{
+   adapter->duplex = DUPLEX_FULL;
+   adapter->speed = SPEED_1000;
+}
+
+static void ibmveth_get_drvinfo(struct net_device *dev,
+   struct ethtool_drvinfo *info)
 {
strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
strlcpy(info->version, ibmveth_driver_version, sizeof(info->version));
@@ -965,12 +1002,13 @@ static void ibmveth_get_ethtool_stats(struct net_device 
*dev,
 }
 
 static const struct ethtool_ops netdev_ethtool_ops = {
-   .get_drvinfo= netdev_get_drvinfo,
+   .get_drvinfo= ibmveth_get_drvinfo,
.get_link   = ethtool_op_get_link,
.get_strings= ibmveth_get_str

Re: [PATCH net-next] ibmveth: Allow users to update reported speed and duplex

2019-08-06 Thread Thomas Falcon




On 8/6/19 5:25 AM, Michael Ellerman wrote:

Thomas Falcon  writes:

Reported ethtool link settings for the ibmveth driver are currently
hardcoded and no longer reflect the actual capabilities of supported
hardware. There is no interface designed for retrieving this information
from device firmware nor is there any way to update current settings
to reflect observed or expected link speeds.

To avoid confusion, initially define speed and duplex as unknown and

Doesn't that risk break existing setups?



You're right, sorry for missing that.





allow the user to alter these settings to match the expected
capabilities of underlying hardware if needed. This update would allow
the use of configurations that rely on certain link speed settings,
such as LACP. This patch is based on the implementation in virtio_net.

Wouldn't it be safer to keep the current values as the default, and then
also allow them to be overridden by a motivated user.


That is a good compromise.  I will resend an updated version soon with 
that change.


Thanks!




cheers

[PATCH net-next] ibmveth: Allow users to update reported speed and duplex

2019-08-05 Thread Thomas Falcon

Reported ethtool link settings for the ibmveth driver are currently
hardcoded and no longer reflect the actual capabilities of supported
hardware. There is no interface designed for retrieving this information
from device firmware nor is there any way to update current settings
to reflect observed or expected link speeds.

To avoid confusion, initially define speed and duplex as unknown and
allow the user to alter these settings to match the expected
capabilities of underlying hardware if needed. This update would allow
the use of configurations that rely on certain link speed settings,
such as LACP. This patch is based on the implementation in virtio_net.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmveth.c | 83 --
 drivers/net/ethernet/ibm/ibmveth.h |  3 ++
 2 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c 
b/drivers/net/ethernet/ibm/ibmveth.c
index d654c23..77af9c2 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -712,31 +712,68 @@ static int ibmveth_close(struct net_device *netdev)
return 0;
 }
 
-static int netdev_get_link_ksettings(struct net_device *dev,
-struct ethtool_link_ksettings *cmd)
+static bool
+ibmveth_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
 {
-   u32 supported, advertising;
-
-   supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
-   SUPPORTED_FIBRE);
-   advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg |
-   ADVERTISED_FIBRE);
-   cmd->base.speed = SPEED_1000;
-   cmd->base.duplex = DUPLEX_FULL;
-   cmd->base.port = PORT_FIBRE;
-   cmd->base.phy_address = 0;
-   cmd->base.autoneg = AUTONEG_ENABLE;
-
-   ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-   supported);
-   ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-   advertising);
+   struct ethtool_link_ksettings diff1 = *cmd;
+   struct ethtool_link_ksettings diff2 = {};
+
+   diff2.base.port = PORT_OTHER;
+   diff1.base.speed = 0;
+   diff1.base.duplex = 0;
+   diff1.base.cmd = 0;
+   diff1.base.link_mode_masks_nwords = 0;
+   ethtool_link_ksettings_zero_link_mode(, advertising);
+
+   return !memcmp(, , sizeof(diff1.base)) &&
+   bitmap_empty(diff1.link_modes.supported,
+__ETHTOOL_LINK_MODE_MASK_NBITS) &&
+   bitmap_empty(diff1.link_modes.advertising,
+__ETHTOOL_LINK_MODE_MASK_NBITS) &&
+   bitmap_empty(diff1.link_modes.lp_advertising,
+__ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int ibmveth_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+   struct ibmveth_adapter *adapter = netdev_priv(dev);
+   u32 speed;
+   u8 duplex;
+
+   speed = cmd->base.speed;
+   duplex = cmd->base.duplex;
+   /* don't allow custom speed and duplex */
+   if (!ethtool_validate_speed(speed) ||
+   !ethtool_validate_duplex(duplex) ||
+   !ibmveth_validate_ethtool_cmd(cmd))
+   return -EINVAL;
+   adapter->speed = speed;
+   adapter->duplex = duplex;
 
return 0;
 }
 
-static void netdev_get_drvinfo(struct net_device *dev,
-  struct ethtool_drvinfo *info)
+static int ibmveth_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+   struct ibmveth_adapter *adapter = netdev_priv(dev);
+
+   cmd->base.speed = adapter->speed;
+   cmd->base.duplex = adapter->duplex;
+   cmd->base.port = PORT_OTHER;
+
+   return 0;
+}
+
+static void ibmveth_init_link_settings(struct ibmveth_adapter *adapter)
+{
+   adapter->duplex = DUPLEX_UNKNOWN;
+   adapter->speed = SPEED_UNKNOWN;
+}
+
+static void ibmveth_get_drvinfo(struct net_device *dev,
+   struct ethtool_drvinfo *info)
 {
strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
strlcpy(info->version, ibmveth_driver_version, sizeof(info->version));
@@ -965,12 +1002,13 @@ static void ibmveth_get_ethtool_stats(struct net_device 
*dev,
 }
 
 static const struct ethtool_ops netdev_ethtool_ops = {
-   .get_drvinfo= netdev_get_drvinfo,
+   .get_drvinfo= ibmveth_get_drvinfo,
.get_link   = ethtool_op_get_link,
.get_strings= ibmveth_get_strings,
.get_sset_count = ibmveth_get_sset_count,
.get_ethtool_stats

Re: [PATCH net] net/ibmvnic: Report last valid speed and duplex values to ethtool

2019-06-27 Thread Thomas Falcon




On 6/27/19 12:57 PM, Andrew Lunn wrote:

On Thu, Jun 27, 2019 at 12:09:13PM -0500, Thomas Falcon wrote:

This patch resolves an issue with sensitive bonding modes
that require valid speed and duplex settings to function
properly. Currently, the adapter will report that device
speed and duplex is unknown if the communication link
with firmware is unavailable.

Dumb question. If you cannot communicate with the firmware, isn't the
device FUBAR? So setting the LACP port to disabled is the correct
things to do.

Andrew

Yes, I think that is correct too.  The problem is that the link is only 
down temporarily.  In this case - we are testing with a pseries logical 
partition - the partition is migrated to another server. The driver must 
wait for a signal from the hypervisor to resume operation with the new 
device.  Once it resumes, we see that the device reboots and gets 
correct speed settings, but the port flag (AD_LACP_PORT_ENABLED) is 
still cleared.


Tom

[PATCH net] net/ibmvnic: Report last valid speed and duplex values to ethtool

2019-06-27 Thread Thomas Falcon

This patch resolves an issue with sensitive bonding modes
that require valid speed and duplex settings to function
properly. Currently, the adapter will report that device
speed and duplex is unknown if the communication link
with firmware is unavailable. This decision can break LACP
configurations if the timing is right.

For example, if invalid speeds are reported, the slave
device's link state is set to a transitional "fail" state
and the LACP port is disabled. However, if valid speeds
are reported later but the link state has not been altered,
the LACP port will remain disabled. If the link state then
transitions back to "up" from "fail," it results in a state
such that the slave reports valid speed/duplex and is up,
but the LACP port will remain disabled.

Workaround this by reporting the last recorded speed
and duplex settings unless the device has never been
activated. In that case or when the hypervisor gives
invalid values, continue to report unknown speed or
duplex to ethtool.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 3da6800..7c14e33 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2276,10 +2276,8 @@ static int ibmvnic_get_link_ksettings(struct net_device 
*netdev,
int rc;
 
rc = send_query_phys_parms(adapter);
-   if (rc) {
-   adapter->speed = SPEED_UNKNOWN;
-   adapter->duplex = DUPLEX_UNKNOWN;
-   }
+   if (rc)
+   netdev_warn(netdev, "Device query of current speed and duplex 
settings failed; reported values may be stale.\n");
cmd->base.speed = adapter->speed;
cmd->base.duplex = adapter->duplex;
cmd->base.port = PORT_FIBRE;
@@ -4834,6 +4832,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
dev_set_drvdata(>dev, netdev);
adapter->vdev = dev;
adapter->netdev = netdev;
+   adapter->speed = SPEED_UNKNOWN;
+   adapter->duplex = DUPLEX_UNKNOWN;
 
ether_addr_copy(adapter->mac_addr, mac_addr_p);
ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
-- 
1.8.3.1

[PATCH net 0/3] ibmvnic: Fixes for device reset handling

2019-06-07 Thread Thomas Falcon

This series contains three unrelated fixes to issues seen during
device resets. The first patch fixes an error when the driver requests
to deactivate the link of an uninitialized device, resulting in a 
failure to reset. Next, a patch to fix multicast transmission 
failures seen after a driver reset. The final patch fixes mishandling
of memory allocation failures during device initialization, which
caused a kernel oops.

Thomas Falcon (3):
  ibmvnic: Do not close unopened driver during reset
  ibmvnic: Refresh device multicast list after reset
  ibmvnic: Fix unchecked return codes of memory allocations

 drivers/net/ethernet/ibm/ibmvnic.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

-- 
1.8.3.1

[PATCH net 3/3] ibmvnic: Fix unchecked return codes of memory allocations

2019-06-07 Thread Thomas Falcon

The return values for these memory allocations are unchecked,
which may cause an oops if the driver does not handle them after
a failure. Fix by checking the function's return code.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 9e9f409..3da6800 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -428,9 +428,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
free_long_term_buff(adapter, _pool->long_term_buff);
rx_pool->buff_size = be64_to_cpu(size_array[i]);
-   alloc_long_term_buff(adapter, _pool->long_term_buff,
-rx_pool->size *
-rx_pool->buff_size);
+   rc = alloc_long_term_buff(adapter,
+ _pool->long_term_buff,
+ rx_pool->size *
+ rx_pool->buff_size);
} else {
rc = reset_long_term_buff(adapter,
  _pool->long_term_buff);
@@ -696,9 +697,9 @@ static int init_tx_pools(struct net_device *netdev)
return rc;
}
 
-   init_one_tx_pool(netdev, >tso_pool[i],
-IBMVNIC_TSO_BUFS,
-IBMVNIC_TSO_BUF_SZ);
+   rc = init_one_tx_pool(netdev, >tso_pool[i],
+ IBMVNIC_TSO_BUFS,
+ IBMVNIC_TSO_BUF_SZ);
if (rc) {
release_tx_pools(adapter);
return rc;
-- 
1.8.3.1

[PATCH net 1/3] ibmvnic: Do not close unopened driver during reset

2019-06-07 Thread Thomas Falcon

Check driver state before halting it during a reset. If the driver is
not running, do nothing. Otherwise, a request to deactivate a down link
can cause an error and the reset will fail.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 3da392b..bc2a912 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1745,7 +1745,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
ibmvnic_cleanup(netdev);
 
-   if (adapter->reset_reason != VNIC_RESET_MOBILITY &&
+   if (reset_state == VNIC_OPEN &&
+   adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
rc = __ibmvnic_close(netdev);
if (rc)
-- 
1.8.3.1

[PATCH net 2/3] ibmvnic: Refresh device multicast list after reset

2019-06-07 Thread Thomas Falcon

It was observed that multicast packets were no longer received after
a device reset.  The fix is to resend the current multicast list to
the backing device after recovery.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index bc2a912..9e9f409 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1845,6 +1845,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return 0;
}
 
+   /* refresh device's multicast list */
+   ibmvnic_set_multi(netdev);
+
/* kick napi */
for (i = 0; i < adapter->req_rx_queues; i++)
napi_schedule(>napi[i]);
-- 
1.8.3.1

[PATCH net 2/2] ibmvnic: Fix non-atomic memory allocation in IRQ context

2018-12-10 Thread Thomas Falcon

ibmvnic_reset allocated new reset work item objects in a non-atomic
context. This can be called from a tasklet, generating the output below.
Allocate work items with the GFP_ATOMIC flag instead.

BUG: sleeping function called from invalid context at mm/slab.h:421
in_atomic(): 1, irqs_disabled(): 1, pid: 93, name: kworker/0:2
INFO: lockdep is turned off.
irq event stamp: 66049
hardirqs last  enabled at (66048): [] 
tasklet_action_common.isra.12+0x78/0x1c0
hardirqs last disabled at (66049): [] 
_raw_spin_lock_irqsave+0x48/0xf0
softirqs last  enabled at (66044): [] 
dev_deactivate_queue.constprop.28+0xc8/0x160
softirqs last disabled at (66045): [] 
call_do_softirq+0x14/0x24
CPU: 0 PID: 93 Comm: kworker/0:2 Kdump: loaded Not tainted 
4.20.0-rc6-1-g1b50a8f03706 #7
Workqueue: events linkwatch_event
Call Trace:
[c003fffe7ae0] [c0bc83e4] dump_stack+0xe8/0x164 (unreliable)
[c003fffe7b30] [c015ba0c] ___might_sleep+0x2dc/0x320
[c003fffe7bb0] [c0391514] kmem_cache_alloc_trace+0x3e4/0x440
[c003fffe7c30] [d5b2309c] ibmvnic_reset+0x16c/0x360 [ibmvnic]
[c003fffe7cc0] [d5b29834] ibmvnic_tasklet+0x1054/0x2010 [ibmvnic]
[c003fffe7e00] [c01224c8] tasklet_action_common.isra.12+0xd8/0x1c0
[c003fffe7e60] [c0bf1238] __do_softirq+0x1a8/0x64c
[c003fffe7f90] [c00306e0] call_do_softirq+0x14/0x24
[c003f3967980] [c001ba50] do_softirq_own_stack+0x60/0xb0
[c003f39679c0] [c01218a8] do_softirq+0xa8/0x100
[c003f39679f0] [c0121a74] __local_bh_enable_ip+0x174/0x180
[c003f3967a60] [c0bf003c] _raw_spin_unlock_bh+0x5c/0x80
[c003f3967a90] [c0a8ac78] 
dev_deactivate_queue.constprop.28+0xc8/0x160
[c003f3967ad0] [c0a8c8b0] dev_deactivate_many+0xd0/0x520
[c003f3967b70] [c0a8cd40] dev_deactivate+0x40/0x60
[c003f3967ba0] [c0a5e0c4] linkwatch_do_dev+0x74/0xd0
[c003f3967bd0] [c0a5e694] __linkwatch_run_queue+0x1a4/0x1f0
[c003f3967c30] [c0a5e728] linkwatch_event+0x48/0x60
[c003f3967c50] [c01444e8] process_one_work+0x238/0x710
[c003f3967d20] [c0144a48] worker_thread+0x88/0x4e0
[c003f3967db0] [c014e3a8] kthread+0x178/0x1c0
[c003f3967e20] [c000bfd0] ret_from_kernel_thread+0x5c/0x6c

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index ffc0cab05b0f..67cc6d9c8fd7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2055,7 +2055,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
}
}
 
-   rwi = kzalloc(sizeof(*rwi), GFP_KERNEL);
+   rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
if (!rwi) {
spin_unlock_irqrestore(>rwi_lock, flags);
ibmvnic_close(netdev);
-- 
2.12.3

[PATCH net 1/2] ibmvnic: Convert reset work item mutex to spin lock

2018-12-10 Thread Thomas Falcon

ibmvnic_reset can create and schedule a reset work item from
an IRQ context, so do not use a mutex, which can sleep. Convert
the reset work item mutex to a spin lock. Locking debugger generated
the trace output below.

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:908
in_atomic(): 1, irqs_disabled(): 1, pid: 120, name: kworker/8:1
4 locks held by kworker/8:1/120:
 #0: 17c05720 ((wq_completion)"events"){+.+.}, at: 
process_one_work+0x188/0x710
 #1: ace90706 ((linkwatch_work).work){+.+.}, at: 
process_one_work+0x188/0x710
 #2: 7632871f (rtnl_mutex){+.+.}, at: rtnl_lock+0x30/0x50
 #3: fc36813a (&(>lock)->rlock){..-.}, at: 
ibmvnic_tasklet+0x88/0x2010 [ibmvnic]
irq event stamp: 26293
hardirqs last  enabled at (26292): [] 
tasklet_action_common.isra.12+0x78/0x1c0
hardirqs last disabled at (26293): [] 
_raw_spin_lock_irqsave+0x48/0xf0
softirqs last  enabled at (26288): [] 
dev_deactivate_queue.constprop.28+0xc8/0x160
softirqs last disabled at (26289): [] 
call_do_softirq+0x14/0x24
CPU: 8 PID: 120 Comm: kworker/8:1 Kdump: loaded Not tainted 4.20.0-rc6 #6
Workqueue: events linkwatch_event
Call Trace:
[c003fffa7a50] [c0bc83e4] dump_stack+0xe8/0x164 (unreliable)
[c003fffa7aa0] [c015ba0c] ___might_sleep+0x2dc/0x320
[c003fffa7b20] [c0be960c] __mutex_lock+0x8c/0xb40
[c003fffa7c30] [d6202ac8] ibmvnic_reset+0x78/0x330 [ibmvnic]
[c003fffa7cc0] [d62097f4] ibmvnic_tasklet+0x1054/0x2010 [ibmvnic]
[c003fffa7e00] [c01224c8] tasklet_action_common.isra.12+0xd8/0x1c0
[c003fffa7e60] [c0bf1238] __do_softirq+0x1a8/0x64c
[c003fffa7f90] [c00306e0] call_do_softirq+0x14/0x24
[c003f3f87980] [c001ba50] do_softirq_own_stack+0x60/0xb0
[c003f3f879c0] [c01218a8] do_softirq+0xa8/0x100
[c003f3f879f0] [c0121a74] __local_bh_enable_ip+0x174/0x180
[c003f3f87a60] [c0bf003c] _raw_spin_unlock_bh+0x5c/0x80
[c003f3f87a90] [c0a8ac78] 
dev_deactivate_queue.constprop.28+0xc8/0x160
[c003f3f87ad0] [c0a8c8b0] dev_deactivate_many+0xd0/0x520
[c003f3f87b70] [c0a8cd40] dev_deactivate+0x40/0x60
[c003f3f87ba0] [c0a5e0c4] linkwatch_do_dev+0x74/0xd0
[c003f3f87bd0] [c0a5e694] __linkwatch_run_queue+0x1a4/0x1f0
[c003f3f87c30] [c0a5e728] linkwatch_event+0x48/0x60
[c003f3f87c50] [c01444e8] process_one_work+0x238/0x710
[c003f3f87d20] [c0144a48] worker_thread+0x88/0x4e0
[c003f3f87db0] [c014e3a8] kthread+0x178/0x1c0
[c003f3f87e20] [c000bfd0] ret_from_kernel_thread+0x5c/0x6c

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 16 +---
 drivers/net/ethernet/ibm/ibmvnic.h |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index ed50b8dee44f..ffc0cab05b0f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1939,8 +1939,9 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
 {
struct ibmvnic_rwi *rwi;
+   unsigned long flags;
 
-   mutex_lock(>rwi_lock);
+   spin_lock_irqsave(>rwi_lock, flags);
 
if (!list_empty(>rwi_list)) {
rwi = list_first_entry(>rwi_list, struct ibmvnic_rwi,
@@ -1950,7 +1951,7 @@ static struct ibmvnic_rwi *get_next_rwi(struct 
ibmvnic_adapter *adapter)
rwi = NULL;
}
 
-   mutex_unlock(>rwi_lock);
+   spin_unlock_irqrestore(>rwi_lock, flags);
return rwi;
 }
 
@@ -2025,6 +2026,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
struct list_head *entry, *tmp_entry;
struct ibmvnic_rwi *rwi, *tmp;
struct net_device *netdev = adapter->netdev;
+   unsigned long flags;
int ret;
 
if (adapter->state == VNIC_REMOVING ||
@@ -2041,13 +2043,13 @@ static int ibmvnic_reset(struct ibmvnic_adapter 
*adapter,
goto err;
}
 
-   mutex_lock(>rwi_lock);
+   spin_lock_irqsave(>rwi_lock, flags);
 
list_for_each(entry, >rwi_list) {
tmp = list_entry(entry, struct ibmvnic_rwi, list);
if (tmp->reset_reason == reason) {
netdev_dbg(netdev, "Skipping matching reset\n");
-   mutex_unlock(>rwi_lock);
+   spin_unlock_irqrestore(>rwi_lock, flags);
ret = EBUSY;
goto err;
}
@@ -2055,7 +2057,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 
rwi = kzalloc(sizeof(*rwi), GFP_KERNEL);
if (!rwi) {
-   mutex_unlock(>rwi_lock);
+   spin_unlock_irqrestore(>rwi_loc

[PATCH net 0/2] net/ibmvnic: Fix reset work item locking bugs

2018-12-10 Thread Thomas Falcon

This patch set fixes issues with scheduling reset work items in
a tasklet context. Since ibmvnic_reset can called in an interrupt,
it should not use a mutex or allocate memory non-atomically.

Thomas Falcon (2):
  ibmvnic: Convert reset work item mutex to spin lock
  ibmvnic: Fix non-atomic memory allocation in IRQ context

 drivers/net/ethernet/ibm/ibmvnic.c | 18 ++
 drivers/net/ethernet/ibm/ibmvnic.h |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

-- 
2.12.3

[PATCH net 0/2] ibmvnic: Fix queue and buffer accounting errors

2018-11-21 Thread Thomas Falcon

This series includes two small fixes. The first resolves a typo bug
in the code to clean up unused RX buffers during device queue removal.
The second ensures that device queue memory is updated to reflect new
supported queue ring sizes after migration to other backing hardware.


Thomas Falcon (2):
  ibmvnic: Fix RX queue buffer cleanup
  ibmvnic: Update driver queues after change in ring size support

 drivers/net/ethernet/ibm/ibmvnic.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

-- 
1.8.3.1

[PATCH net 2/2] ibmvnic: Update driver queues after change in ring size support

2018-11-21 Thread Thomas Falcon

During device reset, queue memory is not being updated to accommodate
changes in ring buffer sizes supported by backing hardware. Track
any differences in ring buffer sizes following the reset and update
queue memory when possible.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 066897a..c0203a0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1737,6 +1737,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
struct ibmvnic_rwi *rwi, u32 reset_state)
 {
u64 old_num_rx_queues, old_num_tx_queues;
+   u64 old_num_rx_slots, old_num_tx_slots;
struct net_device *netdev = adapter->netdev;
int i, rc;
 
@@ -1748,6 +1749,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
old_num_rx_queues = adapter->req_rx_queues;
old_num_tx_queues = adapter->req_tx_queues;
+   old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq;
+   old_num_tx_slots = adapter->req_tx_entries_per_subcrq;
 
ibmvnic_cleanup(netdev);
 
@@ -1810,7 +1813,11 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (rc)
return rc;
} else if (adapter->req_rx_queues != old_num_rx_queues ||
-  adapter->req_tx_queues != old_num_tx_queues) {
+  adapter->req_tx_queues != old_num_tx_queues ||
+  adapter->req_rx_add_entries_per_subcrq !=
+   old_num_rx_slots ||
+  adapter->req_tx_entries_per_subcrq !=
+   old_num_tx_slots) {
release_rx_pools(adapter);
release_tx_pools(adapter);
release_napi(adapter);
-- 
1.8.3.1

[PATCH net 1/2] ibmvnic: Fix RX queue buffer cleanup

2018-11-21 Thread Thomas Falcon

The wrong index is used when cleaning up RX buffer objects during release
of RX queues. Update to use the correct index counter.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 27a6df3..066897a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -485,8 +485,8 @@ static void release_rx_pools(struct ibmvnic_adapter 
*adapter)
 
for (j = 0; j < rx_pool->size; j++) {
if (rx_pool->rx_buff[j].skb) {
-   dev_kfree_skb_any(rx_pool->rx_buff[i].skb);
-   rx_pool->rx_buff[i].skb = NULL;
+   dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
+   rx_pool->rx_buff[j].skb = NULL;
}
}
 
-- 
1.8.3.1

[PATCH net-next 7/8] ibmvnic: Set resetting state at earliest possible point

2018-05-23 Thread Thomas Falcon

Set device resetting state at the earliest possible point: as soon as a
reset is successfully scheduled. The reset state is toggled off when
all resets have been processed to completion.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index f26e1f8..ee51deb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1919,7 +1919,6 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev = adapter->netdev;
 
mutex_lock(>reset_lock);
-   adapter->resetting = true;
reset_state = adapter->state;
 
rwi = get_next_rwi(adapter);
@@ -1994,7 +1993,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi->reset_reason = reason;
list_add_tail(>list, >rwi_list);
mutex_unlock(>rwi_lock);
-
+   adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(>ibmvnic_reset);
 
-- 
2.7.5

[PATCH net-next 8/8] ibmvnic: Introduce hard reset recovery

2018-05-23 Thread Thomas Falcon

Introduce a recovery hard reset to handle reset failure as a result of
change of device context following a transport event, such as a
backing device failover or partition migration. These operations reset
the device context to its initial state. If this occurs during a reset,
any initialization commands are likely to fail with an invalid state
error as backing device firmware requests reinitialization.

When this happens, make one more attempt by performing a hard reset,
which frees any resources currently allocated and performs device
initialization. If a transport event occurs during a device reset, a
flag is set which will trigger a new hard reset following the
completionof the current reset event.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 101 +++--
 drivers/net/ethernet/ibm/ibmvnic.h |   1 +
 2 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index ee51deb..09f8e6b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1878,6 +1878,85 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return 0;
 }
 
+static int do_hard_reset(struct ibmvnic_adapter *adapter,
+struct ibmvnic_rwi *rwi, u32 reset_state)
+{
+   struct net_device *netdev = adapter->netdev;
+   int rc;
+
+   netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n",
+  rwi->reset_reason);
+
+   netif_carrier_off(netdev);
+   adapter->reset_reason = rwi->reset_reason;
+
+   ibmvnic_cleanup(netdev);
+   release_resources(adapter);
+   release_sub_crqs(adapter, 0);
+   release_crq_queue(adapter);
+
+   /* remove the closed state so when we call open it appears
+* we are coming from the probed state.
+*/
+   adapter->state = VNIC_PROBED;
+
+   rc = init_crq_queue(adapter);
+   if (rc) {
+   netdev_err(adapter->netdev,
+  "Couldn't initialize crq. rc=%d\n", rc);
+   return rc;
+   }
+
+   rc = ibmvnic_init(adapter);
+   if (rc)
+   return rc;
+
+   /* If the adapter was in PROBE state prior to the reset,
+* exit here.
+*/
+   if (reset_state == VNIC_PROBED)
+   return 0;
+
+   rc = ibmvnic_login(netdev);
+   if (rc) {
+   adapter->state = VNIC_PROBED;
+   return 0;
+   }
+   /* netif_set_real_num_xx_queues needs to take rtnl lock here
+* unless wait_for_reset is set, in which case the rtnl lock
+* has already been taken before initializing the reset
+*/
+   if (!adapter->wait_for_reset) {
+   rtnl_lock();
+   rc = init_resources(adapter);
+   rtnl_unlock();
+   } else {
+   rc = init_resources(adapter);
+   }
+   if (rc)
+   return rc;
+
+   ibmvnic_disable_irqs(adapter);
+   adapter->state = VNIC_CLOSED;
+
+   if (reset_state == VNIC_CLOSED)
+   return 0;
+
+   rc = __ibmvnic_open(netdev);
+   if (rc) {
+   if (list_empty(>rwi_list))
+   adapter->state = VNIC_CLOSED;
+   else
+   adapter->state = reset_state;
+
+   return 0;
+   }
+
+   netif_carrier_on(netdev);
+
+   return 0;
+}
+
 static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
 {
struct ibmvnic_rwi *rwi;
@@ -1923,9 +2002,15 @@ static void __ibmvnic_reset(struct work_struct *work)
 
rwi = get_next_rwi(adapter);
while (rwi) {
-   rc = do_reset(adapter, rwi, reset_state);
+   if (adapter->force_reset_recovery) {
+   adapter->force_reset_recovery = false;
+   rc = do_hard_reset(adapter, rwi, reset_state);
+   } else {
+   rc = do_reset(adapter, rwi, reset_state);
+   }
kfree(rwi);
-   if (rc && rc != IBMVNIC_INIT_FAILED)
+   if (rc && rc != IBMVNIC_INIT_FAILED &&
+   !adapter->force_reset_recovery)
break;
 
rwi = get_next_rwi(adapter);
@@ -1951,9 +2036,9 @@ static void __ibmvnic_reset(struct work_struct *work)
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 enum ibmvnic_reset_reason reason)
 {
+   struct list_head *entry, *tmp_entry;
struct ibmvnic_rwi *rwi, *tmp;
struct net_device *netdev = adapter->netdev;
-   struct list_head *entry;
int ret;
 
if (adapter->state == VNIC_REMOVING ||
@@ -1989,7 +2074,13 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,

[PATCH net-next 6/8] ibmvnic: Create separate initialization routine for resets

2018-05-23 Thread Thomas Falcon

Instead of having one initialization routine for all cases, create
a separate, simpler function for standard initialization, such as during
device probe. Use the original initialization function to handle
device reset scenarios. The goal of this patch is to avoid having
a single, cluttered init function to handle all possible
scenarios.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 48 --
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index b1bbd5b..f26e1f8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -116,6 +116,7 @@ static void send_cap_queries(struct ibmvnic_adapter 
*adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
 static int ibmvnic_init(struct ibmvnic_adapter *);
+static int ibmvnic_reset_init(struct ibmvnic_adapter *);
 static void release_crq_queue(struct ibmvnic_adapter *);
 static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
 static int init_crq_queue(struct ibmvnic_adapter *adapter);
@@ -1807,7 +1808,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return rc;
}
 
-   rc = ibmvnic_init(adapter);
+   rc = ibmvnic_reset_init(adapter);
if (rc)
return IBMVNIC_INIT_FAILED;
 
@@ -4571,7 +4572,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter)
return retrc;
 }
 
-static int ibmvnic_init(struct ibmvnic_adapter *adapter)
+static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
 {
struct device *dev = >vdev->dev;
unsigned long timeout = msecs_to_jiffies(3);
@@ -4630,6 +4631,49 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
return rc;
 }
 
+static int ibmvnic_init(struct ibmvnic_adapter *adapter)
+{
+   struct device *dev = >vdev->dev;
+   unsigned long timeout = msecs_to_jiffies(3);
+   int rc;
+
+   adapter->from_passive_init = false;
+
+   init_completion(>init_done);
+   adapter->init_done_rc = 0;
+   ibmvnic_send_crq_init(adapter);
+   if (!wait_for_completion_timeout(>init_done, timeout)) {
+   dev_err(dev, "Initialization sequence timed out\n");
+   return -1;
+   }
+
+   if (adapter->init_done_rc) {
+   release_crq_queue(adapter);
+   return adapter->init_done_rc;
+   }
+
+   if (adapter->from_passive_init) {
+   adapter->state = VNIC_OPEN;
+   adapter->from_passive_init = false;
+   return -1;
+   }
+
+   rc = init_sub_crqs(adapter);
+   if (rc) {
+   dev_err(dev, "Initialization of sub crqs failed\n");
+   release_crq_queue(adapter);
+   return rc;
+   }
+
+   rc = init_sub_crq_irqs(adapter);
+   if (rc) {
+   dev_err(dev, "Failed to initialize sub crq irqs\n");
+   release_crq_queue(adapter);
+   }
+
+   return rc;
+}
+
 static struct device_attribute dev_attr_failover;
 
 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
-- 
2.7.5

[PATCH net-next 5/8] ibmvnic: Handle error case when setting link state

2018-05-23 Thread Thomas Falcon

If setting the link state is not successful, print a warning
with the resulting return code and return it to be handled
by the caller.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index f1f744e..b1bbd5b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -929,6 +929,10 @@ static int set_link_state(struct ibmvnic_adapter *adapter, 
u8 link_state)
/* Partuial success, delay and re-send */
mdelay(1000);
resend = true;
+   } else if (adapter->init_done_rc) {
+   netdev_warn(netdev, "Unable to set link state, rc=%d\n",
+   adapter->init_done_rc);
+   return adapter->init_done_rc;
}
} while (resend);
 
-- 
2.7.5

[PATCH net-next 4/8] ibmvnic: Return error code if init interrupted by transport event

2018-05-23 Thread Thomas Falcon

If device init is interrupted by a failover, set the init return
code so that it can be checked and handled appropriately by the
init routine.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 7083519..f1f744e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4249,7 +4249,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
dev_info(dev, "Partner initialized\n");
adapter->from_passive_init = true;
adapter->failover_pending = false;
-   complete(>init_done);
+   if (!completion_done(>init_done)) {
+   complete(>init_done);
+   adapter->init_done_rc = -EIO;
+   }
ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
break;
case IBMVNIC_CRQ_INIT_COMPLETE:
-- 
2.7.5

[PATCH net-next 2/8] ibmvnic: Introduce active CRQ state

2018-05-23 Thread Thomas Falcon

Introduce an "active" state for a IBM vNIC Command-Response Queue. A CRQ
is considered active once it has initialized or linked with its partner by
sending an initialization request and getting a successful response back
from the management partition.  Until this has happened, do not allow CRQ
commands to be sent other than the initialization request.

This change will avoid a protocol error in case of a device transport
event occurring during a initialization. When the driver receives a
transport event notification indicating that the backing hardware
has changed and needs reinitialization, any further commands other
than the initialization handshake with the VIOS management partition
will result in an invalid state error. Instead of sending a command
that will be returned with an error, print a warning and return an
error that will be handled by the caller.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 10 ++
 drivers/net/ethernet/ibm/ibmvnic.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index eabc1e4..e6a081c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3147,6 +3147,12 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter 
*adapter,
   (unsigned long int)cpu_to_be64(u64_crq[0]),
   (unsigned long int)cpu_to_be64(u64_crq[1]));
 
+   if (!adapter->crq.active &&
+   crq->generic.first != IBMVNIC_CRQ_INIT_CMD) {
+   dev_warn(dev, "Invalid request detected while CRQ is inactive, 
possible device state change during reset\n");
+   return -EINVAL;
+   }
+
/* Make sure the hypervisor sees the complete request */
mb();
 
@@ -4225,6 +4231,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
break;
case IBMVNIC_CRQ_INIT_COMPLETE:
dev_info(dev, "Partner initialization complete\n");
+   adapter->crq.active = true;
send_version_xchg(adapter);
break;
default:
@@ -4233,6 +4240,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
return;
case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev);
+   adapter->crq.active = false;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
dev_info(dev, "Migrated, re-enabling adapter\n");
ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
@@ -4420,6 +4428,7 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter 
*adapter)
/* Clean out the queue */
memset(crq->msgs, 0, PAGE_SIZE);
crq->cur = 0;
+   crq->active = false;
 
/* And re-open it again */
rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
@@ -4454,6 +4463,7 @@ static void release_crq_queue(struct ibmvnic_adapter 
*adapter)
 DMA_BIDIRECTIONAL);
free_page((unsigned long)crq->msgs);
crq->msgs = NULL;
+   crq->active = false;
 }
 
 static int init_crq_queue(struct ibmvnic_adapter *adapter)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 22391e8..edfc312 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -865,6 +865,7 @@ struct ibmvnic_crq_queue {
int size, cur;
dma_addr_t msg_token;
spinlock_t lock;
+   bool active;
 };
 
 union sub_crq {
-- 
2.7.5

[PATCH net-next 3/8] ibmvnic: Check CRQ command return codes

2018-05-23 Thread Thomas Falcon

Check whether CRQ command is successful before awaiting a response
from the management partition. If the command was not successful, the
driver may hang waiting for a response that will never come.

Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 51 +++---
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index e6a081c..7083519 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -109,8 +109,8 @@ static union sub_crq *ibmvnic_next_scrq(struct 
ibmvnic_adapter *,
struct ibmvnic_sub_crq_queue *);
 static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
-static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
-static void send_request_unmap(struct ibmvnic_adapter *, u8);
+static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
+static int send_request_unmap(struct ibmvnic_adapter *, u8);
 static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
@@ -172,6 +172,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
struct ibmvnic_long_term_buff *ltb, int size)
 {
struct device *dev = >vdev->dev;
+   int rc;
 
ltb->size = size;
ltb->buff = dma_alloc_coherent(dev, ltb->size, >addr,
@@ -185,8 +186,12 @@ static int alloc_long_term_buff(struct ibmvnic_adapter 
*adapter,
adapter->map_id++;
 
init_completion(>fw_done);
-   send_request_map(adapter, ltb->addr,
-ltb->size, ltb->map_id);
+   rc = send_request_map(adapter, ltb->addr,
+ ltb->size, ltb->map_id);
+   if (rc) {
+   dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+   return rc;
+   }
wait_for_completion(>fw_done);
 
if (adapter->fw_done_rc) {
@@ -215,10 +220,14 @@ static void free_long_term_buff(struct ibmvnic_adapter 
*adapter,
 static int reset_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb)
 {
+   int rc;
+
memset(ltb->buff, 0, ltb->size);
 
init_completion(>fw_done);
-   send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
+   rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
+   if (rc)
+   return rc;
wait_for_completion(>fw_done);
 
if (adapter->fw_done_rc) {
@@ -952,6 +961,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
struct device *dev = >vdev->dev;
union ibmvnic_crq crq;
int len = 0;
+   int rc;
 
if (adapter->vpd->buff)
len = adapter->vpd->len;
@@ -959,7 +969,9 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
init_completion(>fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
-   ibmvnic_send_crq(adapter, );
+   rc = ibmvnic_send_crq(adapter, );
+   if (rc)
+   return rc;
wait_for_completion(>fw_done);
 
if (!adapter->vpd->len)
@@ -992,7 +1004,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter 
*adapter)
crq.get_vpd.cmd = GET_VPD;
crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
-   ibmvnic_send_crq(adapter, );
+   rc = ibmvnic_send_crq(adapter, );
+   if (rc) {
+   kfree(adapter->vpd->buff);
+   adapter->vpd->buff = NULL;
+   return rc;
+   }
wait_for_completion(>fw_done);
 
return 0;
@@ -1691,6 +1708,7 @@ static int __ibmvnic_set_mac(struct net_device *netdev, 
struct sockaddr *p)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
struct sockaddr *addr = p;
union ibmvnic_crq crq;
+   int rc;
 
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
@@ -1701,7 +1719,9 @@ static int __ibmvnic_set_mac(struct net_device *netdev, 
struct sockaddr *p)
ether_addr_copy(_mac_addr.mac_addr[0], addr->sa_data);
 
init_completion(>fw_done);
-   ibmvnic_send_crq(adapter, );
+   rc = ibmvnic_send_crq(adapter, );
+   if (rc)
+   return rc;
wait_for_completion(>fw_done);
/* netdev->dev_addr is changed in handle_change_mac_rsp function */
return adapter->fw_done_rc ? -EIO : 0;
@@ -2365,6 +2385,7 @@

1 2 >

1 - 100 of 168 matches

Mail list logo