Re: [PATCH net v2] xen-netback: fix race condition on XenBus disconnect

2017-03-06 Thread Igor Druzhinin
On 06/03/17 08:58, Paul Durrant wrote:
>> -Original Message-
>> From: Igor Druzhinin [mailto:igor.druzhi...@citrix.com]
>> Sent: 03 March 2017 20:23
>> To: netdev@vger.kernel.org; xen-de...@lists.xenproject.org
>> Cc: Paul Durrant <paul.durr...@citrix.com>; jgr...@suse.com; Wei Liu
>> <wei.l...@citrix.com>; Igor Druzhinin <igor.druzhi...@citrix.com>
>> Subject: [PATCH net v2] xen-netback: fix race condition on XenBus
>> disconnect
>>
>> In some cases during XenBus disconnect event handling and subsequent
>> queue resource release there may be some TX handlers active on
>> other processors. Use RCU in order to synchronize with them.
>>
>> Signed-off-by: Igor Druzhinin <igor.druzhi...@citrix.com>
>> ---
>> v2:
>>  * Add protection for xenvif_get_ethtool_stats
>>  * Additional comments and fixes
>> ---
>>  drivers/net/xen-netback/interface.c | 29 ++---
>>  drivers/net/xen-netback/netback.c   |  2 +-
>>  drivers/net/xen-netback/xenbus.c| 20 ++--
>>  3 files changed, 33 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-
>> netback/interface.c
>> index a2d32676..266b7cd 100644
>> --- a/drivers/net/xen-netback/interface.c
>> +++ b/drivers/net/xen-netback/interface.c
>> @@ -164,13 +164,17 @@ static int xenvif_start_xmit(struct sk_buff *skb,
>> struct net_device *dev)
>>  {
>>  struct xenvif *vif = netdev_priv(dev);
>>  struct xenvif_queue *queue = NULL;
>> -unsigned int num_queues = vif->num_queues;
>> +unsigned int num_queues;
>>  u16 index;
>>  struct xenvif_rx_cb *cb;
>>
>>  BUG_ON(skb->dev != dev);
>>
>> -/* Drop the packet if queues are not set up */
>> +/* Drop the packet if queues are not set up.
>> + * This handler should be called inside an RCU read section
>> + * so we don't need to enter it here explicitly.
>> + */
>> +num_queues = rcu_dereference(vif)->num_queues;
>>  if (num_queues < 1)
>>  goto drop;
>>
>> @@ -221,18 +225,21 @@ static struct net_device_stats
>> *xenvif_get_stats(struct net_device *dev)
>>  {
>>  struct xenvif *vif = netdev_priv(dev);
>>  struct xenvif_queue *queue = NULL;
>> +unsigned int num_queues;
>>  u64 rx_bytes = 0;
>>  u64 rx_packets = 0;
>>  u64 tx_bytes = 0;
>>  u64 tx_packets = 0;
>>  unsigned int index;
>>
>> -spin_lock(>lock);
>> -if (vif->queues == NULL)
>> +rcu_read_lock();
>> +
>> +num_queues = rcu_dereference(vif)->num_queues;
>> +if (num_queues < 1)
>>  goto out;
> 
> Is this if clause worth it? All it does is jump over the for loop, which 
> would not be executed anyway, since the initial test (0 < 0) would fail.

Probably not needed here, but it does make it consistent with other
similar checks across the file. Just looks more descriptive.

> 
>>
>>  /* Aggregate tx and rx stats from each queue */
>> -for (index = 0; index < vif->num_queues; ++index) {
>> +for (index = 0; index < num_queues; ++index) {
>>  queue = >queues[index];
>>  rx_bytes += queue->stats.rx_bytes;
>>  rx_packets += queue->stats.rx_packets;
>> @@ -241,7 +248,7 @@ static struct net_device_stats
>> *xenvif_get_stats(struct net_device *dev)
>>  }
>>
>>  out:
>> -spin_unlock(>lock);
>> +rcu_read_unlock();
>>
>>  vif->dev->stats.rx_bytes = rx_bytes;
>>  vif->dev->stats.rx_packets = rx_packets;
>> @@ -377,10 +384,16 @@ static void xenvif_get_ethtool_stats(struct
>> net_device *dev,
>>   struct ethtool_stats *stats, u64 * data)
>>  {
>>  struct xenvif *vif = netdev_priv(dev);
>> -unsigned int num_queues = vif->num_queues;
>> +unsigned int num_queues;
>>  int i;
>>  unsigned int queue_index;
>>
>> +rcu_read_lock();
>> +
>> +num_queues = rcu_dereference(vif)->num_queues;
>> +if (num_queues < 1)
>> +goto out;
>> +
> 
> You have introduced a semantic change with the above if clause. The 
> xenvif_stats array was previously zeroed if num_queues < 1. It appears that 
> ethtool does actually allocate a zeroed array to pass in here, but I wonder 
> whether it is still safer to have this function zero it anyway. 

RE: [PATCH net v2] xen-netback: fix race condition on XenBus disconnect

2017-03-06 Thread Paul Durrant
> -Original Message-
> From: Igor Druzhinin [mailto:igor.druzhi...@citrix.com]
> Sent: 03 March 2017 20:23
> To: netdev@vger.kernel.org; xen-de...@lists.xenproject.org
> Cc: Paul Durrant <paul.durr...@citrix.com>; jgr...@suse.com; Wei Liu
> <wei.l...@citrix.com>; Igor Druzhinin <igor.druzhi...@citrix.com>
> Subject: [PATCH net v2] xen-netback: fix race condition on XenBus
> disconnect
> 
> In some cases during XenBus disconnect event handling and subsequent
> queue resource release there may be some TX handlers active on
> other processors. Use RCU in order to synchronize with them.
> 
> Signed-off-by: Igor Druzhinin <igor.druzhi...@citrix.com>
> ---
> v2:
>  * Add protection for xenvif_get_ethtool_stats
>  * Additional comments and fixes
> ---
>  drivers/net/xen-netback/interface.c | 29 ++---
>  drivers/net/xen-netback/netback.c   |  2 +-
>  drivers/net/xen-netback/xenbus.c| 20 ++--
>  3 files changed, 33 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-
> netback/interface.c
> index a2d32676..266b7cd 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -164,13 +164,17 @@ static int xenvif_start_xmit(struct sk_buff *skb,
> struct net_device *dev)
>  {
>   struct xenvif *vif = netdev_priv(dev);
>   struct xenvif_queue *queue = NULL;
> - unsigned int num_queues = vif->num_queues;
> + unsigned int num_queues;
>   u16 index;
>   struct xenvif_rx_cb *cb;
> 
>   BUG_ON(skb->dev != dev);
> 
> - /* Drop the packet if queues are not set up */
> + /* Drop the packet if queues are not set up.
> +  * This handler should be called inside an RCU read section
> +  * so we don't need to enter it here explicitly.
> +  */
> + num_queues = rcu_dereference(vif)->num_queues;
>   if (num_queues < 1)
>   goto drop;
> 
> @@ -221,18 +225,21 @@ static struct net_device_stats
> *xenvif_get_stats(struct net_device *dev)
>  {
>   struct xenvif *vif = netdev_priv(dev);
>   struct xenvif_queue *queue = NULL;
> + unsigned int num_queues;
>   u64 rx_bytes = 0;
>   u64 rx_packets = 0;
>   u64 tx_bytes = 0;
>   u64 tx_packets = 0;
>   unsigned int index;
> 
> - spin_lock(>lock);
> - if (vif->queues == NULL)
> + rcu_read_lock();
> +
> + num_queues = rcu_dereference(vif)->num_queues;
> + if (num_queues < 1)
>   goto out;

Is this if clause worth it? All it does is jump over the for loop, which would 
not be executed anyway, since the initial test (0 < 0) would fail.

> 
>   /* Aggregate tx and rx stats from each queue */
> - for (index = 0; index < vif->num_queues; ++index) {
> + for (index = 0; index < num_queues; ++index) {
>   queue = >queues[index];
>   rx_bytes += queue->stats.rx_bytes;
>   rx_packets += queue->stats.rx_packets;
> @@ -241,7 +248,7 @@ static struct net_device_stats
> *xenvif_get_stats(struct net_device *dev)
>   }
> 
>  out:
> - spin_unlock(>lock);
> + rcu_read_unlock();
> 
>   vif->dev->stats.rx_bytes = rx_bytes;
>   vif->dev->stats.rx_packets = rx_packets;
> @@ -377,10 +384,16 @@ static void xenvif_get_ethtool_stats(struct
> net_device *dev,
>struct ethtool_stats *stats, u64 * data)
>  {
>   struct xenvif *vif = netdev_priv(dev);
> - unsigned int num_queues = vif->num_queues;
> + unsigned int num_queues;
>   int i;
>   unsigned int queue_index;
> 
> + rcu_read_lock();
> +
> + num_queues = rcu_dereference(vif)->num_queues;
> + if (num_queues < 1)
> + goto out;
> +

You have introduced a semantic change with the above if clause. The 
xenvif_stats array was previously zeroed if num_queues < 1. It appears that 
ethtool does actually allocate a zeroed array to pass in here, but I wonder 
whether it is still safer to have this function zero it anyway. 

>   for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
>   unsigned long accum = 0;
>   for (queue_index = 0; queue_index < num_queues;
> ++queue_index) {
> @@ -389,6 +402,8 @@ static void xenvif_get_ethtool_stats(struct
> net_device *dev,
>   }
>   data[i] = accum;
>   }
> +out:
> + rcu_read_unlock();
>  }
> 
>  static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 *
> data)
> diff --git a/drivers/net/xen-netback/netback.c b/

[PATCH net v2] xen-netback: fix race condition on XenBus disconnect

2017-03-03 Thread Igor Druzhinin
In some cases during XenBus disconnect event handling and subsequent
queue resource release there may be some TX handlers active on
other processors. Use RCU in order to synchronize with them.

Signed-off-by: Igor Druzhinin 
---
v2:
 * Add protection for xenvif_get_ethtool_stats
 * Additional comments and fixes
---
 drivers/net/xen-netback/interface.c | 29 ++---
 drivers/net/xen-netback/netback.c   |  2 +-
 drivers/net/xen-netback/xenbus.c| 20 ++--
 3 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c 
b/drivers/net/xen-netback/interface.c
index a2d32676..266b7cd 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -164,13 +164,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct 
net_device *dev)
 {
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
-   unsigned int num_queues = vif->num_queues;
+   unsigned int num_queues;
u16 index;
struct xenvif_rx_cb *cb;
 
BUG_ON(skb->dev != dev);
 
-   /* Drop the packet if queues are not set up */
+   /* Drop the packet if queues are not set up.
+* This handler should be called inside an RCU read section
+* so we don't need to enter it here explicitly.
+*/
+   num_queues = rcu_dereference(vif)->num_queues;
if (num_queues < 1)
goto drop;
 
@@ -221,18 +225,21 @@ static struct net_device_stats *xenvif_get_stats(struct 
net_device *dev)
 {
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
+   unsigned int num_queues;
u64 rx_bytes = 0;
u64 rx_packets = 0;
u64 tx_bytes = 0;
u64 tx_packets = 0;
unsigned int index;
 
-   spin_lock(>lock);
-   if (vif->queues == NULL)
+   rcu_read_lock();
+
+   num_queues = rcu_dereference(vif)->num_queues;
+   if (num_queues < 1)
goto out;
 
/* Aggregate tx and rx stats from each queue */
-   for (index = 0; index < vif->num_queues; ++index) {
+   for (index = 0; index < num_queues; ++index) {
queue = >queues[index];
rx_bytes += queue->stats.rx_bytes;
rx_packets += queue->stats.rx_packets;
@@ -241,7 +248,7 @@ static struct net_device_stats *xenvif_get_stats(struct 
net_device *dev)
}
 
 out:
-   spin_unlock(>lock);
+   rcu_read_unlock();
 
vif->dev->stats.rx_bytes = rx_bytes;
vif->dev->stats.rx_packets = rx_packets;
@@ -377,10 +384,16 @@ static void xenvif_get_ethtool_stats(struct net_device 
*dev,
 struct ethtool_stats *stats, u64 * data)
 {
struct xenvif *vif = netdev_priv(dev);
-   unsigned int num_queues = vif->num_queues;
+   unsigned int num_queues;
int i;
unsigned int queue_index;
 
+   rcu_read_lock();
+
+   num_queues = rcu_dereference(vif)->num_queues;
+   if (num_queues < 1)
+   goto out;
+
for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
unsigned long accum = 0;
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -389,6 +402,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
}
data[i] = accum;
}
+out:
+   rcu_read_unlock();
 }
 
 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * 
data)
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index f9bcf4a..62fa74d 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
netdev_err(vif->dev, "fatal error; disabling device\n");
vif->disabled = true;
/* Disable the vif from queue 0's kthread */
-   if (vif->queues)
+   if (vif->num_queues > 0)
xenvif_kick_thread(>queues[0]);
 }
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d2d7cd9..a56d3ea 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be)
struct xenvif *vif = be->vif;
 
if (vif) {
+   unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
-   struct xenvif_queue *queues;
 
xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
xenvif_disconnect_data(vif);
-   for (queue_index = 0;
-queue_index < vif->num_queues;
-++queue_index)
-   xenvif_deinit_queue(>queues[queue_index]);
 
-   spin_lock(>lock);
-