Re: [PATCH v3] tuntap: add flow control to support back pressure

2014-04-14 Thread Steven Galgano
On 04/14/2014 09:31 AM, Michael S. Tsirkin wrote:
> On Mon, Apr 14, 2014 at 09:21:47AM -0400, Steven Galgano wrote:
>> Added optional per queue flow control support using IFF_FLOW_CONTROL. When 
>> the 
>> IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
>> indicate that the queue should be stopped using netif_tx_stop_queue(), 
>> rather 
>> than discarding frames once full. After reading a frame from the respective 
>> stopped queue, a netif_tx_wake_queue() is issued to signal resource 
>> availability.
>>
>> The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This 
>> provides 
>> the flexibility to enable flow control on all, none or some queues when 
>> using 
>> IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply 
>> to 
>> the single queue. No changes were made to the default drop frame policy.
>>
>> This change adds support for back pressure use cases.
>>
>> Reported-by: Brian Adamson 
>> Tested-by: Joseph Giovatto 
>> Signed-off-by: Steven Galgano 
>> ---
>> Version 3 patch reformatted commit message to be 80 columns max width. 
>>  Corrected Tested-By email address.
>> Version 2 patch added support for individual queues when applying flow
>> control instead of using 
>> netif_tx_stop_all_queues()/netif_tx_wake_all_queues().
> 
> Any plans to try and address the more material comments on v2?
> 
Yes. My apologies, I missed your response.

>>  drivers/net/tun.c   | 32 
>>  include/uapi/linux/if_tun.h |  2 ++
>>  2 files changed, 30 insertions(+), 4 deletions(-)
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index ee328ba..3d09f5a 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -137,7 +137,7 @@ struct tun_file {
>>  struct tun_struct __rcu *tun;
>>  struct net *net;
>>  struct fasync_struct *fasync;
>> -/* only used for fasnyc */
>> +/* used for fasnyc and flow control */
>>  unsigned int flags;
>>  union {
>>  u16 queue_index;
>> @@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
>> struct net_device *dev)
>>   * number of queues.
>>   */
>>  if (skb_queue_len(>socket.sk->sk_receive_queue) * numqueues
>> -  >= dev->tx_queue_len)
>> -goto drop;
>> +>= dev->tx_queue_len) {
>> +if (tfile->flags & TUN_FLOW_CONTROL) {
>> +/* Resources unavailable stop queue */
>> +netif_tx_stop_queue(netdev_get_tx_queue(dev, txq));
>> +
>> +/* We won't see all dropped packets individually, so
>> + * over run error is more appropriate.
>> + */
>> +dev->stats.tx_fifo_errors++;
>> +} else {
>> +goto drop;
>> +}
>> +}
>>  
>>  if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
>>  goto drop;
>> @@ -1333,6 +1344,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
>> struct tun_file *tfile,
>>  DECLARE_WAITQUEUE(wait, current);
>>  struct sk_buff *skb;
>>  ssize_t ret = 0;
>> +struct netdev_queue *ntxq;
>>  
>>  tun_debug(KERN_INFO, tun, "tun_do_read\n");
>>  
>> @@ -1362,6 +1374,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
>> struct tun_file *tfile,
>>  continue;
>>  }
>>  
>> +ntxq = netdev_get_tx_queue(tun->dev, tfile->queue_index);
>> +
>> +if (tfile->flags & TUN_FLOW_CONTROL &&
>> +netif_tx_queue_stopped(ntxq))
>> +netif_tx_wake_queue(ntxq);
>> +
>>  ret = tun_put_user(tun, tfile, skb, iv, len);
>>  kfree_skb(skb);
>>  break;
>> @@ -1732,6 +1750,11 @@ static int tun_set_iff(struct net *net, struct file 
>> *file, struct ifreq *ifr)
>>  else
>>  tun->flags &= ~TUN_TAP_MQ;
>>  
>> +if (ifr->ifr_flags & IFF_FLOW_CONTROL)
>> +tfile->flags |= TUN_FLOW_CONTROL;
>> +else
>> +tfile->flags &= ~TUN_FLOW_CONTROL;
>> +
>>  /* Make sure persistent devices do not get stuck in
>>   * xoff state.
>>   */
>> @@ -1900,7 +1923,8 @@ static long __tun_chr_ioctl(

[PATCH v3] tuntap: add flow control to support back pressure

2014-04-14 Thread Steven Galgano
Added optional per queue flow control support using IFF_FLOW_CONTROL. When the 
IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
indicate that the queue should be stopped using netif_tx_stop_queue(), rather 
than discarding frames once full. After reading a frame from the respective 
stopped queue, a netif_tx_wake_queue() is issued to signal resource 
availability.

The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This provides 
the flexibility to enable flow control on all, none or some queues when using 
IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply to 
the single queue. No changes were made to the default drop frame policy.

This change adds support for back pressure use cases.

Reported-by: Brian Adamson 
Tested-by: Joseph Giovatto 
Signed-off-by: Steven Galgano 
---
Version 3 patch reformatted commit message to be 80 columns max width. 
 Corrected Tested-By email address.
Version 2 patch added support for individual queues when applying flow
control instead of using netif_tx_stop_all_queues()/netif_tx_wake_all_queues().

 drivers/net/tun.c   | 32 
 include/uapi/linux/if_tun.h |  2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..3d09f5a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -137,7 +137,7 @@ struct tun_file {
struct tun_struct __rcu *tun;
struct net *net;
struct fasync_struct *fasync;
-   /* only used for fasnyc */
+   /* used for fasnyc and flow control */
unsigned int flags;
union {
u16 queue_index;
@@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * number of queues.
 */
if (skb_queue_len(>socket.sk->sk_receive_queue) * numqueues
- >= dev->tx_queue_len)
-   goto drop;
+   >= dev->tx_queue_len) {
+   if (tfile->flags & TUN_FLOW_CONTROL) {
+   /* Resources unavailable stop queue */
+   netif_tx_stop_queue(netdev_get_tx_queue(dev, txq));
+
+   /* We won't see all dropped packets individually, so
+* over run error is more appropriate.
+*/
+   dev->stats.tx_fifo_errors++;
+   } else {
+   goto drop;
+   }
+   }
 
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
@@ -1333,6 +1344,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct 
tun_file *tfile,
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t ret = 0;
+   struct netdev_queue *ntxq;
 
tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
@@ -1362,6 +1374,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
struct tun_file *tfile,
continue;
}
 
+   ntxq = netdev_get_tx_queue(tun->dev, tfile->queue_index);
+
+   if (tfile->flags & TUN_FLOW_CONTROL &&
+   netif_tx_queue_stopped(ntxq))
+   netif_tx_wake_queue(ntxq);
+
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
break;
@@ -1732,6 +1750,11 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
else
tun->flags &= ~TUN_TAP_MQ;
 
+   if (ifr->ifr_flags & IFF_FLOW_CONTROL)
+   tfile->flags |= TUN_FLOW_CONTROL;
+   else
+   tfile->flags &= ~TUN_FLOW_CONTROL;
+
/* Make sure persistent devices do not get stuck in
 * xoff state.
 */
@@ -1900,7 +1923,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR | IFF_MULTI_QUEUE,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE |
+   IFF_FLOW_CONTROL,
(unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE)
return tun_set_queue(file, );
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..bcf2790 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -36,6 +36,7 @@
 #define TUN_PERSIST0x0100  
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ  0x0400
+#define TUN_FLOW_CONTROL 0x0800
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -70,6 +71,7 @@
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_

[PATCH v3] tuntap: add flow control to support back pressure

2014-04-14 Thread Steven Galgano
Added optional per queue flow control support using IFF_FLOW_CONTROL. When the 
IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
indicate that the queue should be stopped using netif_tx_stop_queue(), rather 
than discarding frames once full. After reading a frame from the respective 
stopped queue, a netif_tx_wake_queue() is issued to signal resource 
availability.

The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This provides 
the flexibility to enable flow control on all, none or some queues when using 
IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply to 
the single queue. No changes were made to the default drop frame policy.

This change adds support for back pressure use cases.

Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
Tested-by: Joseph Giovatto jgiova...@adjacentlink.com
Signed-off-by: Steven Galgano sgalg...@adjacentlink.com
---
Version 3 patch reformatted commit message to be 80 columns max width. 
 Corrected Tested-By email address.
Version 2 patch added support for individual queues when applying flow
control instead of using netif_tx_stop_all_queues()/netif_tx_wake_all_queues().

 drivers/net/tun.c   | 32 
 include/uapi/linux/if_tun.h |  2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..3d09f5a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -137,7 +137,7 @@ struct tun_file {
struct tun_struct __rcu *tun;
struct net *net;
struct fasync_struct *fasync;
-   /* only used for fasnyc */
+   /* used for fasnyc and flow control */
unsigned int flags;
union {
u16 queue_index;
@@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * number of queues.
 */
if (skb_queue_len(tfile-socket.sk-sk_receive_queue) * numqueues
- = dev-tx_queue_len)
-   goto drop;
+   = dev-tx_queue_len) {
+   if (tfile-flags  TUN_FLOW_CONTROL) {
+   /* Resources unavailable stop queue */
+   netif_tx_stop_queue(netdev_get_tx_queue(dev, txq));
+
+   /* We won't see all dropped packets individually, so
+* over run error is more appropriate.
+*/
+   dev-stats.tx_fifo_errors++;
+   } else {
+   goto drop;
+   }
+   }
 
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
@@ -1333,6 +1344,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct 
tun_file *tfile,
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t ret = 0;
+   struct netdev_queue *ntxq;
 
tun_debug(KERN_INFO, tun, tun_do_read\n);
 
@@ -1362,6 +1374,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
struct tun_file *tfile,
continue;
}
 
+   ntxq = netdev_get_tx_queue(tun-dev, tfile-queue_index);
+
+   if (tfile-flags  TUN_FLOW_CONTROL 
+   netif_tx_queue_stopped(ntxq))
+   netif_tx_wake_queue(ntxq);
+
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
break;
@@ -1732,6 +1750,11 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
else
tun-flags = ~TUN_TAP_MQ;
 
+   if (ifr-ifr_flags  IFF_FLOW_CONTROL)
+   tfile-flags |= TUN_FLOW_CONTROL;
+   else
+   tfile-flags = ~TUN_FLOW_CONTROL;
+
/* Make sure persistent devices do not get stuck in
 * xoff state.
 */
@@ -1900,7 +1923,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR | IFF_MULTI_QUEUE,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE |
+   IFF_FLOW_CONTROL,
(unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE)
return tun_set_queue(file, ifr);
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..bcf2790 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -36,6 +36,7 @@
 #define TUN_PERSIST0x0100  
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ  0x0400
+#define TUN_FLOW_CONTROL 0x0800
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -70,6 +71,7 @@
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400

Re: [PATCH v3] tuntap: add flow control to support back pressure

2014-04-14 Thread Steven Galgano
On 04/14/2014 09:31 AM, Michael S. Tsirkin wrote:
 On Mon, Apr 14, 2014 at 09:21:47AM -0400, Steven Galgano wrote:
 Added optional per queue flow control support using IFF_FLOW_CONTROL. When 
 the 
 IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
 indicate that the queue should be stopped using netif_tx_stop_queue(), 
 rather 
 than discarding frames once full. After reading a frame from the respective 
 stopped queue, a netif_tx_wake_queue() is issued to signal resource 
 availability.

 The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This 
 provides 
 the flexibility to enable flow control on all, none or some queues when 
 using 
 IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply 
 to 
 the single queue. No changes were made to the default drop frame policy.

 This change adds support for back pressure use cases.

 Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
 Tested-by: Joseph Giovatto jgiova...@adjacentlink.com
 Signed-off-by: Steven Galgano sgalg...@adjacentlink.com
 ---
 Version 3 patch reformatted commit message to be 80 columns max width. 
  Corrected Tested-By email address.
 Version 2 patch added support for individual queues when applying flow
 control instead of using 
 netif_tx_stop_all_queues()/netif_tx_wake_all_queues().
 
 Any plans to try and address the more material comments on v2?
 
Yes. My apologies, I missed your response.

  drivers/net/tun.c   | 32 
  include/uapi/linux/if_tun.h |  2 ++
  2 files changed, 30 insertions(+), 4 deletions(-)
 diff --git a/drivers/net/tun.c b/drivers/net/tun.c
 index ee328ba..3d09f5a 100644
 --- a/drivers/net/tun.c
 +++ b/drivers/net/tun.c
 @@ -137,7 +137,7 @@ struct tun_file {
  struct tun_struct __rcu *tun;
  struct net *net;
  struct fasync_struct *fasync;
 -/* only used for fasnyc */
 +/* used for fasnyc and flow control */
  unsigned int flags;
  union {
  u16 queue_index;
 @@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
 struct net_device *dev)
   * number of queues.
   */
  if (skb_queue_len(tfile-socket.sk-sk_receive_queue) * numqueues
 -  = dev-tx_queue_len)
 -goto drop;
 += dev-tx_queue_len) {
 +if (tfile-flags  TUN_FLOW_CONTROL) {
 +/* Resources unavailable stop queue */
 +netif_tx_stop_queue(netdev_get_tx_queue(dev, txq));
 +
 +/* We won't see all dropped packets individually, so
 + * over run error is more appropriate.
 + */
 +dev-stats.tx_fifo_errors++;
 +} else {
 +goto drop;
 +}
 +}
  
  if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
  goto drop;
 @@ -1333,6 +1344,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
 struct tun_file *tfile,
  DECLARE_WAITQUEUE(wait, current);
  struct sk_buff *skb;
  ssize_t ret = 0;
 +struct netdev_queue *ntxq;
  
  tun_debug(KERN_INFO, tun, tun_do_read\n);
  
 @@ -1362,6 +1374,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
 struct tun_file *tfile,
  continue;
  }
  
 +ntxq = netdev_get_tx_queue(tun-dev, tfile-queue_index);
 +
 +if (tfile-flags  TUN_FLOW_CONTROL 
 +netif_tx_queue_stopped(ntxq))
 +netif_tx_wake_queue(ntxq);
 +
  ret = tun_put_user(tun, tfile, skb, iv, len);
  kfree_skb(skb);
  break;
 @@ -1732,6 +1750,11 @@ static int tun_set_iff(struct net *net, struct file 
 *file, struct ifreq *ifr)
  else
  tun-flags = ~TUN_TAP_MQ;
  
 +if (ifr-ifr_flags  IFF_FLOW_CONTROL)
 +tfile-flags |= TUN_FLOW_CONTROL;
 +else
 +tfile-flags = ~TUN_FLOW_CONTROL;
 +
  /* Make sure persistent devices do not get stuck in
   * xoff state.
   */
 @@ -1900,7 +1923,8 @@ static long __tun_chr_ioctl(struct file *file, 
 unsigned int cmd,
   * This is needed because we never checked for invalid flags on
   * TUNSETIFF. */
  return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
 -IFF_VNET_HDR | IFF_MULTI_QUEUE,
 +IFF_VNET_HDR | IFF_MULTI_QUEUE |
 +IFF_FLOW_CONTROL,
  (unsigned int __user*)argp);
  } else if (cmd == TUNSETQUEUE)
  return tun_set_queue(file, ifr);
 diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
 index e9502dd..bcf2790 100644
 --- a/include/uapi/linux/if_tun.h
 +++ b/include/uapi/linux/if_tun.h
 @@ -36,6 +36,7 @@
  #define TUN_PERSIST 0x0100  
  #define TUN_VNET_HDR0x0200
  #define TUN_TAP_MQ  0x0400
 +#define TUN_FLOW_CONTROL 0x0800

Re: [PATCH v2] tuntap: add flow control to support back pressure

2014-04-13 Thread Steven Galgano
On 04/13/2014 09:40 PM, David Miller wrote:
> From: Steven Galgano 
> Date: Sun, 13 Apr 2014 21:30:27 -0400
> 
>> Added optional per queue flow control support using IFF_FLOW_CONTROL. When 
>> the IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue 
>> flag to indicate that the queue should be stopped using 
>> netif_tx_stop_queue(), rather than discarding frames once full. After 
>> reading a frame from the respective stopped queue, a netif_tx_wake_queue() 
>> is issued to signal resource availability.
>>
>> The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This 
>> provides the flexibility to enable flow control on all, none or some queues 
>> when using IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL 
>> will apply to the single queue. No changes were made to the default drop 
>> frame policy.
>>
>> This change adds support for back pressure use cases.
>>
>> Reported-by: Brian Adamson 
>> Tested-by: Joseph Giovatto 
>> Signed-off-by: Steven Galgano 
> 
> Please format your commit messages to ~80 columns of text.
> 
> It won't be automatically formatted by GIT and in fact it looks ugly
> with all the wrapping in text based tools.
> 
Added optional per queue flow control support using IFF_FLOW_CONTROL. When the 
IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
indicate that the queue should be stopped using netif_tx_stop_queue(), rather 
than discarding frames once full. After reading a frame from the respective 
stopped queue, a netif_tx_wake_queue() is issued to signal resource 
availability.

The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This provides 
the flexibility to enable flow control on all, none or some queues when using 
IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply to 
the single queue. No changes were made to the default drop frame policy.

This change adds support for back pressure use cases.

Reported-by: Brian Adamson 
Tested-by: Joseph Giovatto 
Signed-off-by: Steven Galgano 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] tuntap: add flow control to support back pressure

2014-04-13 Thread Steven Galgano
Added optional per queue flow control support using IFF_FLOW_CONTROL. When the 
IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
indicate that the queue should be stopped using netif_tx_stop_queue(), rather 
than discarding frames once full. After reading a frame from the respective 
stopped queue, a netif_tx_wake_queue() is issued to signal resource 
availability.

The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This provides 
the flexibility to enable flow control on all, none or some queues when using 
IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply to 
the single queue. No changes were made to the default drop frame policy.

This change adds support for back pressure use cases.

Reported-by: Brian Adamson 
Tested-by: Joseph Giovatto 
Signed-off-by: Steven Galgano 
---
Previous version of patch did not respect individual queues when applying flow 
control using netif_tx_stop_all_queues()/netif_tx_wake_all_queues().

 drivers/net/tun.c   | 32 
 include/uapi/linux/if_tun.h |  2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..3d09f5a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -137,7 +137,7 @@ struct tun_file {
struct tun_struct __rcu *tun;
struct net *net;
struct fasync_struct *fasync;
-   /* only used for fasnyc */
+   /* used for fasnyc and flow control */
unsigned int flags;
union {
u16 queue_index;
@@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * number of queues.
 */
if (skb_queue_len(>socket.sk->sk_receive_queue) * numqueues
- >= dev->tx_queue_len)
-   goto drop;
+   >= dev->tx_queue_len) {
+   if (tfile->flags & TUN_FLOW_CONTROL) {
+   /* Resources unavailable stop queue */
+   netif_tx_stop_queue(netdev_get_tx_queue(dev, txq));
+
+   /* We won't see all dropped packets individually, so
+* over run error is more appropriate.
+*/
+   dev->stats.tx_fifo_errors++;
+   } else {
+   goto drop;
+   }
+   }
 
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
@@ -1333,6 +1344,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct 
tun_file *tfile,
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t ret = 0;
+   struct netdev_queue *ntxq;
 
tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
@@ -1362,6 +1374,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
struct tun_file *tfile,
continue;
}
 
+   ntxq = netdev_get_tx_queue(tun->dev, tfile->queue_index);
+
+   if (tfile->flags & TUN_FLOW_CONTROL &&
+   netif_tx_queue_stopped(ntxq))
+   netif_tx_wake_queue(ntxq);
+
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
break;
@@ -1732,6 +1750,11 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
else
tun->flags &= ~TUN_TAP_MQ;
 
+   if (ifr->ifr_flags & IFF_FLOW_CONTROL)
+   tfile->flags |= TUN_FLOW_CONTROL;
+   else
+   tfile->flags &= ~TUN_FLOW_CONTROL;
+
/* Make sure persistent devices do not get stuck in
 * xoff state.
 */
@@ -1900,7 +1923,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR | IFF_MULTI_QUEUE,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE |
+   IFF_FLOW_CONTROL,
(unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE)
return tun_set_queue(file, );
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..bcf2790 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -36,6 +36,7 @@
 #define TUN_PERSIST0x0100  
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ  0x0400
+#define TUN_FLOW_CONTROL 0x0800
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -70,6 +71,7 @@
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400
+#define IFF_FLOW_CONTROL 0x0010
 /* read-only flag */
 #define IFF_PERSIST0x

Re: [PATCH] tuntap: add flow control to support back pressure

2014-04-13 Thread Steven Galgano
behaved and consume packets in a timely manner:
> a misbehaving userspace operating a tun device can cause other
> tun devices and/or sockets to get blocked forever and prevent them
> from communicating with all destinations (not just the misbehaving one)
> as their wmem limit is exhausted.
> 
> It should be possible to reproduce with an old kernel and your userspace
> drivers, too - just stop the daemon temporarily.
> I realize that your daemon normally is well-behaved, and
> simply moves all incoming packets to the backend without
> delay, but I'd like to find a solution that addresses
> this without trusting userspace to be responsive.
> 
> 
> 
> 
> At the moment, for this use-case it's possible to limit the speed of tun
> interface using a non work conserving qdisc.  Make that match the speed
> of the backend device, and you get back basically the old behaviour
> without the security problem in that the rate is basically ensured
> by kernel and all packets queued are eventually consumed.
> 
> Does this solve the problem for you?
> 
> 
I do not believe this solves the problem.

The issue is with what happens when a queue is full. When multiqueue
support was introduced, the tun went from stopping the queue to dropping
packets.

In the back pressure use case, the application code using the character
device side of the tun interface stops reading frames in order to cause
the tun queue to backup and eventually stop. This leads to tx socket
buffer backup which provides back pressure to the applications sending
traffic over the tun interface.

The determination as to when to stop reading frames or when to throttle
back from reading frames is dynamic. It can be based on many things:
congestion on the backend media, power level, data rate, etc.

And the application response to back pressure can be as simple as just
blocking on a send() or more complex like dynamically throttling data
rates or changing compression ratios to reduce overall network load so
that when congestion clears there is less of a strain on the network.

Once the tun started dropping packets instead of stopping the queue, it
removed the ability for the application controlling the tun to apply its
own queue management policy: read frames and possibly discard or stop
reading frames. That is the problem, the fixed discard policy.

I'll be posting another version of the patch that allows you to specify
the flow control behavior on a queue by queue basis. This would let the
application decide what should happen once a queue is full: discard or
netif_tx_stop_queue().

I'm not hopeful this solution will be accepted but it might be helpful
to someone else.

>> BTW, in my initial noticing this issue, it _seemed_ that even the default 
>> interface pfifo_fast priority bands were not being properly enforced for the 
>> tap interface without the old flow control behavior?.  I need to do a little 
>> more "old vs new" comparison testing on this regard.
>>
>> best regards,
>>
>> Brian 
> 
> I think that as tun is never stopped, nothing is ever queued in qdisc.
> 
> 
>> On Apr 10, 2014, at 9:42 PM, Steven Galgano  
>> wrote:
>>
>>> On 04/10/2014 06:29 AM, Michael S. Tsirkin wrote:
>>>> On Wed, Apr 09, 2014 at 10:19:40PM -0400, Steven Galgano wrote:
>>>>> Add tuntap flow control support for use by back pressure routing 
>>>>> protocols. Setting the new TUNSETIFF flag IFF_FLOW_CONTROL, will signal 
>>>>> resources as unavailable when the tx queue limit is reached by issuing a 
>>>>> netif_tx_stop_all_queues() rather than discarding frames. A 
>>>>> netif_tx_wake_all_queues() is issued after reading a frame from the queue 
>>>>> to signal resource availability.
>>>>>
>>>>> Back pressure capability was previously supported by the legacy tun 
>>>>> default mode. This change restores that functionality, which was last 
>>>>> present in v3.7.
>>>>>
>>>>> Reported-by: Brian Adamson 
>>>>> Tested-by: Joseph Giovatto 
>>>>> Signed-off-by: Steven Galgano 
>>>>
>>>> I don't think it's a good idea.
>>>>
>>>> This trivial flow control really created more problems than it was worth.
>>>>
>>>> In particular this blocks all flows so it's trivially easy for one flow
>>>> to block and starve all others: just send a bunch of packets to loopback
>>>> destinations that get queued all over the place.
>>>>
>>>> Luckily it was never documented so we changed the default and nothing
>>>> seems to break, but we won't be so lucky if we add an explicit API.
>>>>
&g

Re: [PATCH] tuntap: add flow control to support back pressure

2014-04-13 Thread Steven Galgano
 that addresses
 this without trusting userspace to be responsive.
 
 
 
 
 At the moment, for this use-case it's possible to limit the speed of tun
 interface using a non work conserving qdisc.  Make that match the speed
 of the backend device, and you get back basically the old behaviour
 without the security problem in that the rate is basically ensured
 by kernel and all packets queued are eventually consumed.
 
 Does this solve the problem for you?
 
 
I do not believe this solves the problem.

The issue is with what happens when a queue is full. When multiqueue
support was introduced, the tun went from stopping the queue to dropping
packets.

In the back pressure use case, the application code using the character
device side of the tun interface stops reading frames in order to cause
the tun queue to backup and eventually stop. This leads to tx socket
buffer backup which provides back pressure to the applications sending
traffic over the tun interface.

The determination as to when to stop reading frames or when to throttle
back from reading frames is dynamic. It can be based on many things:
congestion on the backend media, power level, data rate, etc.

And the application response to back pressure can be as simple as just
blocking on a send() or more complex like dynamically throttling data
rates or changing compression ratios to reduce overall network load so
that when congestion clears there is less of a strain on the network.

Once the tun started dropping packets instead of stopping the queue, it
removed the ability for the application controlling the tun to apply its
own queue management policy: read frames and possibly discard or stop
reading frames. That is the problem, the fixed discard policy.

I'll be posting another version of the patch that allows you to specify
the flow control behavior on a queue by queue basis. This would let the
application decide what should happen once a queue is full: discard or
netif_tx_stop_queue().

I'm not hopeful this solution will be accepted but it might be helpful
to someone else.

 BTW, in my initial noticing this issue, it _seemed_ that even the default 
 interface pfifo_fast priority bands were not being properly enforced for the 
 tap interface without the old flow control behavior?.  I need to do a little 
 more old vs new comparison testing on this regard.

 best regards,

 Brian 
 
 I think that as tun is never stopped, nothing is ever queued in qdisc.
 
 
 On Apr 10, 2014, at 9:42 PM, Steven Galgano sgalg...@adjacentlink.com 
 wrote:

 On 04/10/2014 06:29 AM, Michael S. Tsirkin wrote:
 On Wed, Apr 09, 2014 at 10:19:40PM -0400, Steven Galgano wrote:
 Add tuntap flow control support for use by back pressure routing 
 protocols. Setting the new TUNSETIFF flag IFF_FLOW_CONTROL, will signal 
 resources as unavailable when the tx queue limit is reached by issuing a 
 netif_tx_stop_all_queues() rather than discarding frames. A 
 netif_tx_wake_all_queues() is issued after reading a frame from the queue 
 to signal resource availability.

 Back pressure capability was previously supported by the legacy tun 
 default mode. This change restores that functionality, which was last 
 present in v3.7.

 Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
 Tested-by: Joseph Giovatto jgiova...@adjacentlink.com
 Signed-off-by: Steven Galgano sgalg...@adjacentlink.com

 I don't think it's a good idea.

 This trivial flow control really created more problems than it was worth.

 In particular this blocks all flows so it's trivially easy for one flow
 to block and starve all others: just send a bunch of packets to loopback
 destinations that get queued all over the place.

 Luckily it was never documented so we changed the default and nothing
 seems to break, but we won't be so lucky if we add an explicit API.


 One way to implement this would be with ubuf_info callback this is
 already invoked in most places where a packet might get stuck for a long
 time.  It's still incomplete though: this will prevent head of queue
 blocking literally forever, but a single bad flow can still degrade
 performance significantly.

 Another alternative is to try and isolate the flows that we
 can handle and throttle them.

 It's all fixable but we really need to fix the issues *before*
 exposing the interface to userspace.




 It was only after recent upgrades that we picked up a newer kernel and
 discovered the change to the default tun mode.

 The new default behavior has broken applications that depend on the
 legacy behavior. Although not documented, the legacy behavior was well
 known at least to those working in the back pressure research community.
 The default legacy mode was/is a valid use case although I am not sure
 it fits with recent multiqueue support.

 When back pressure protocols are running over a tun interface they
 require legacy flow control in order to communicate congestion detected
 on the physical media they are using. Multiqueues do not apply here

[PATCH v2] tuntap: add flow control to support back pressure

2014-04-13 Thread Steven Galgano
Added optional per queue flow control support using IFF_FLOW_CONTROL. When the 
IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
indicate that the queue should be stopped using netif_tx_stop_queue(), rather 
than discarding frames once full. After reading a frame from the respective 
stopped queue, a netif_tx_wake_queue() is issued to signal resource 
availability.

The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This provides 
the flexibility to enable flow control on all, none or some queues when using 
IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply to 
the single queue. No changes were made to the default drop frame policy.

This change adds support for back pressure use cases.

Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
Tested-by: Joseph Giovatto jgiova...@djacentlink.com
Signed-off-by: Steven Galgano sgalg...@adjacentlink.com
---
Previous version of patch did not respect individual queues when applying flow 
control using netif_tx_stop_all_queues()/netif_tx_wake_all_queues().

 drivers/net/tun.c   | 32 
 include/uapi/linux/if_tun.h |  2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..3d09f5a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -137,7 +137,7 @@ struct tun_file {
struct tun_struct __rcu *tun;
struct net *net;
struct fasync_struct *fasync;
-   /* only used for fasnyc */
+   /* used for fasnyc and flow control */
unsigned int flags;
union {
u16 queue_index;
@@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * number of queues.
 */
if (skb_queue_len(tfile-socket.sk-sk_receive_queue) * numqueues
- = dev-tx_queue_len)
-   goto drop;
+   = dev-tx_queue_len) {
+   if (tfile-flags  TUN_FLOW_CONTROL) {
+   /* Resources unavailable stop queue */
+   netif_tx_stop_queue(netdev_get_tx_queue(dev, txq));
+
+   /* We won't see all dropped packets individually, so
+* over run error is more appropriate.
+*/
+   dev-stats.tx_fifo_errors++;
+   } else {
+   goto drop;
+   }
+   }
 
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
@@ -1333,6 +1344,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct 
tun_file *tfile,
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t ret = 0;
+   struct netdev_queue *ntxq;
 
tun_debug(KERN_INFO, tun, tun_do_read\n);
 
@@ -1362,6 +1374,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
struct tun_file *tfile,
continue;
}
 
+   ntxq = netdev_get_tx_queue(tun-dev, tfile-queue_index);
+
+   if (tfile-flags  TUN_FLOW_CONTROL 
+   netif_tx_queue_stopped(ntxq))
+   netif_tx_wake_queue(ntxq);
+
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
break;
@@ -1732,6 +1750,11 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
else
tun-flags = ~TUN_TAP_MQ;
 
+   if (ifr-ifr_flags  IFF_FLOW_CONTROL)
+   tfile-flags |= TUN_FLOW_CONTROL;
+   else
+   tfile-flags = ~TUN_FLOW_CONTROL;
+
/* Make sure persistent devices do not get stuck in
 * xoff state.
 */
@@ -1900,7 +1923,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR | IFF_MULTI_QUEUE,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE |
+   IFF_FLOW_CONTROL,
(unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE)
return tun_set_queue(file, ifr);
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..bcf2790 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -36,6 +36,7 @@
 #define TUN_PERSIST0x0100  
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ  0x0400
+#define TUN_FLOW_CONTROL 0x0800
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -70,6 +71,7 @@
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400
+#define IFF_FLOW_CONTROL 0x0010
 /* read-only flag */
 #define IFF_PERSIST0x0800
 #define IFF_NOFILTER

Re: [PATCH v2] tuntap: add flow control to support back pressure

2014-04-13 Thread Steven Galgano
On 04/13/2014 09:40 PM, David Miller wrote:
 From: Steven Galgano sgalg...@adjacentlink.com
 Date: Sun, 13 Apr 2014 21:30:27 -0400
 
 Added optional per queue flow control support using IFF_FLOW_CONTROL. When 
 the IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue 
 flag to indicate that the queue should be stopped using 
 netif_tx_stop_queue(), rather than discarding frames once full. After 
 reading a frame from the respective stopped queue, a netif_tx_wake_queue() 
 is issued to signal resource availability.

 The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This 
 provides the flexibility to enable flow control on all, none or some queues 
 when using IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL 
 will apply to the single queue. No changes were made to the default drop 
 frame policy.

 This change adds support for back pressure use cases.

 Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
 Tested-by: Joseph Giovatto jgiova...@djacentlink.com
 Signed-off-by: Steven Galgano sgalg...@adjacentlink.com
 
 Please format your commit messages to ~80 columns of text.
 
 It won't be automatically formatted by GIT and in fact it looks ugly
 with all the wrapping in text based tools.
 
Added optional per queue flow control support using IFF_FLOW_CONTROL. When the 
IFF_FLOW_CONTROL TUNSETIFF flag is specified it will set a per queue flag to 
indicate that the queue should be stopped using netif_tx_stop_queue(), rather 
than discarding frames once full. After reading a frame from the respective 
stopped queue, a netif_tx_wake_queue() is issued to signal resource 
availability.

The per queue TUN_FLOW_CONTROL flag is stored in struct tun_file. This provides 
the flexibility to enable flow control on all, none or some queues when using 
IFF_MULTI_QUEUE. When not using IFF_MULTI_QUEUE, IFF_FLOW_CONTROL will apply to 
the single queue. No changes were made to the default drop frame policy.

This change adds support for back pressure use cases.

Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
Tested-by: Joseph Giovatto jgiova...@djacentlink.com
Signed-off-by: Steven Galgano sgalg...@adjacentlink.com


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] tuntap: add flow control to support back pressure

2014-04-10 Thread Steven Galgano
On 04/10/2014 06:29 AM, Michael S. Tsirkin wrote:
> On Wed, Apr 09, 2014 at 10:19:40PM -0400, Steven Galgano wrote:
>> Add tuntap flow control support for use by back pressure routing protocols. 
>> Setting the new TUNSETIFF flag IFF_FLOW_CONTROL, will signal resources as 
>> unavailable when the tx queue limit is reached by issuing a 
>> netif_tx_stop_all_queues() rather than discarding frames. A 
>> netif_tx_wake_all_queues() is issued after reading a frame from the queue to 
>> signal resource availability.
>>
>> Back pressure capability was previously supported by the legacy tun default 
>> mode. This change restores that functionality, which was last present in 
>> v3.7.
>>
>> Reported-by: Brian Adamson 
>> Tested-by: Joseph Giovatto 
>> Signed-off-by: Steven Galgano 
> 
> I don't think it's a good idea.
> 
> This trivial flow control really created more problems than it was worth.
> 
> In particular this blocks all flows so it's trivially easy for one flow
> to block and starve all others: just send a bunch of packets to loopback
> destinations that get queued all over the place.
> 
> Luckily it was never documented so we changed the default and nothing
> seems to break, but we won't be so lucky if we add an explicit API.
> 
> 
> One way to implement this would be with ubuf_info callback this is
> already invoked in most places where a packet might get stuck for a long
> time.  It's still incomplete though: this will prevent head of queue
> blocking literally forever, but a single bad flow can still degrade
> performance significantly.
> 
> Another alternative is to try and isolate the flows that we
> can handle and throttle them.
> 
> It's all fixable but we really need to fix the issues *before*
> exposing the interface to userspace.
> 
> 
> 

It was only after recent upgrades that we picked up a newer kernel and
discovered the change to the default tun mode.

The new default behavior has broken applications that depend on the
legacy behavior. Although not documented, the legacy behavior was well
known at least to those working in the back pressure research community.
The default legacy mode was/is a valid use case although I am not sure
it fits with recent multiqueue support.

When back pressure protocols are running over a tun interface they
require legacy flow control in order to communicate congestion detected
on the physical media they are using. Multiqueues do not apply here.
These protocols only use one queue, so netif_tx_stop_all_queues() is the
necessary behavior.

I'm not tied to the idea of IFF_FLOW_CONTROL. I was aiming for middle
ground where an application controlling the tun interface can state it
wants the legacy flow control behavior understanding its limitations
concerning multiple queues.

What if we resurrect IFF_ONE_QUEUE and use that as a mechanism to
indicate legacy flow control. A tun instance initially configured with
IFF_ONE_QUEUE would not be allowed to attach or detach queues with
TUNSETQUEUE and any additional opens with the same device name would
fail. This mode would use the
netif_tx_stop_all_queues()/netif_tx_wake_all_queues() flow control
mechanism.

If a tun application wants the current default behavior with only a
single queue, it would not set the IFF_ONE_QUEUE flag. Not setting
IFF_MULTI_QUEUE would not imply IFF_ONE_QUEUE.

I'd be happy to implement this change if it is an acceptable solution.
This would allow multiqueue tun development to advance while still
supporting use cases dependent on legacy flow control.

-steve

>> ---
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index ee328ba..268130c 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
>> struct net_device *dev)
>>   * number of queues.
>>   */
>>  if (skb_queue_len(>socket.sk->sk_receive_queue) * numqueues
>> -  >= dev->tx_queue_len)
>> -goto drop;
>> +>= dev->tx_queue_len) {
>> +if (tun->flags & TUN_FLOW_CONTROL) {
>> +/* Resources unavailable stop transmissions */
>> +netif_tx_stop_all_queues(dev);
>> +
>> +/* We won't see all dropped packets individually, so
>> + * over run error is more appropriate.
>> + */
>> +dev->stats.tx_fifo_errors++;
>> +} else {
>> +goto drop;
>> +}
>> +}
>>  
>>  if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
>>  goto drop;
>> @@ -1362,6 +1

Re: [PATCH] tuntap: add flow control to support back pressure

2014-04-10 Thread Steven Galgano
On 04/10/2014 06:29 AM, Michael S. Tsirkin wrote:
 On Wed, Apr 09, 2014 at 10:19:40PM -0400, Steven Galgano wrote:
 Add tuntap flow control support for use by back pressure routing protocols. 
 Setting the new TUNSETIFF flag IFF_FLOW_CONTROL, will signal resources as 
 unavailable when the tx queue limit is reached by issuing a 
 netif_tx_stop_all_queues() rather than discarding frames. A 
 netif_tx_wake_all_queues() is issued after reading a frame from the queue to 
 signal resource availability.

 Back pressure capability was previously supported by the legacy tun default 
 mode. This change restores that functionality, which was last present in 
 v3.7.

 Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
 Tested-by: Joseph Giovatto jgiova...@adjacentlink.com
 Signed-off-by: Steven Galgano sgalg...@adjacentlink.com
 
 I don't think it's a good idea.
 
 This trivial flow control really created more problems than it was worth.
 
 In particular this blocks all flows so it's trivially easy for one flow
 to block and starve all others: just send a bunch of packets to loopback
 destinations that get queued all over the place.
 
 Luckily it was never documented so we changed the default and nothing
 seems to break, but we won't be so lucky if we add an explicit API.
 
 
 One way to implement this would be with ubuf_info callback this is
 already invoked in most places where a packet might get stuck for a long
 time.  It's still incomplete though: this will prevent head of queue
 blocking literally forever, but a single bad flow can still degrade
 performance significantly.
 
 Another alternative is to try and isolate the flows that we
 can handle and throttle them.
 
 It's all fixable but we really need to fix the issues *before*
 exposing the interface to userspace.
 
 
 

It was only after recent upgrades that we picked up a newer kernel and
discovered the change to the default tun mode.

The new default behavior has broken applications that depend on the
legacy behavior. Although not documented, the legacy behavior was well
known at least to those working in the back pressure research community.
The default legacy mode was/is a valid use case although I am not sure
it fits with recent multiqueue support.

When back pressure protocols are running over a tun interface they
require legacy flow control in order to communicate congestion detected
on the physical media they are using. Multiqueues do not apply here.
These protocols only use one queue, so netif_tx_stop_all_queues() is the
necessary behavior.

I'm not tied to the idea of IFF_FLOW_CONTROL. I was aiming for middle
ground where an application controlling the tun interface can state it
wants the legacy flow control behavior understanding its limitations
concerning multiple queues.

What if we resurrect IFF_ONE_QUEUE and use that as a mechanism to
indicate legacy flow control. A tun instance initially configured with
IFF_ONE_QUEUE would not be allowed to attach or detach queues with
TUNSETQUEUE and any additional opens with the same device name would
fail. This mode would use the
netif_tx_stop_all_queues()/netif_tx_wake_all_queues() flow control
mechanism.

If a tun application wants the current default behavior with only a
single queue, it would not set the IFF_ONE_QUEUE flag. Not setting
IFF_MULTI_QUEUE would not imply IFF_ONE_QUEUE.

I'd be happy to implement this change if it is an acceptable solution.
This would allow multiqueue tun development to advance while still
supporting use cases dependent on legacy flow control.

-steve

 ---
 diff --git a/drivers/net/tun.c b/drivers/net/tun.c
 index ee328ba..268130c 100644
 --- a/drivers/net/tun.c
 +++ b/drivers/net/tun.c
 @@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
 struct net_device *dev)
   * number of queues.
   */
  if (skb_queue_len(tfile-socket.sk-sk_receive_queue) * numqueues
 -  = dev-tx_queue_len)
 -goto drop;
 += dev-tx_queue_len) {
 +if (tun-flags  TUN_FLOW_CONTROL) {
 +/* Resources unavailable stop transmissions */
 +netif_tx_stop_all_queues(dev);
 +
 +/* We won't see all dropped packets individually, so
 + * over run error is more appropriate.
 + */
 +dev-stats.tx_fifo_errors++;
 +} else {
 +goto drop;
 +}
 +}
  
  if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
  goto drop;
 @@ -1362,6 +1373,9 @@ static ssize_t tun_do_read(struct tun_struct *tun, 
 struct tun_file *tfile,
  continue;
  }
  
 +/* Wake in case resources previously signaled unavailable */
 +netif_tx_wake_all_queues(tun-dev);
 +
  ret = tun_put_user(tun, tfile, skb, iv, len);
  kfree_skb(skb);
  break;
 @@ -1550,6 +1564,9 @@ static int tun_flags

[PATCH] tuntap: add flow control to support back pressure

2014-04-09 Thread Steven Galgano
Add tuntap flow control support for use by back pressure routing protocols. 
Setting the new TUNSETIFF flag IFF_FLOW_CONTROL, will signal resources as 
unavailable when the tx queue limit is reached by issuing a 
netif_tx_stop_all_queues() rather than discarding frames. A 
netif_tx_wake_all_queues() is issued after reading a frame from the queue to 
signal resource availability.

Back pressure capability was previously supported by the legacy tun default 
mode. This change restores that functionality, which was last present in v3.7.

Reported-by: Brian Adamson 
Tested-by: Joseph Giovatto 
Signed-off-by: Steven Galgano 
---
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..268130c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * number of queues.
 */
if (skb_queue_len(>socket.sk->sk_receive_queue) * numqueues
- >= dev->tx_queue_len)
-   goto drop;
+   >= dev->tx_queue_len) {
+   if (tun->flags & TUN_FLOW_CONTROL) {
+   /* Resources unavailable stop transmissions */
+   netif_tx_stop_all_queues(dev);
+
+   /* We won't see all dropped packets individually, so
+* over run error is more appropriate.
+*/
+   dev->stats.tx_fifo_errors++;
+   } else {
+   goto drop;
+   }
+   }
 
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
@@ -1362,6 +1373,9 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct 
tun_file *tfile,
continue;
}
 
+   /* Wake in case resources previously signaled unavailable */
+   netif_tx_wake_all_queues(tun->dev);
+
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
break;
@@ -1550,6 +1564,9 @@ static int tun_flags(struct tun_struct *tun)
if (tun->flags & TUN_PERSIST)
flags |= IFF_PERSIST;
 
+   if (tun->flags & TUN_FLOW_CONTROL)
+   flags |= IFF_FLOW_CONTROL;
+
return flags;
 }
 
@@ -1732,6 +1749,11 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
else
tun->flags &= ~TUN_TAP_MQ;
 
+   if (ifr->ifr_flags & IFF_FLOW_CONTROL)
+   tun->flags |= TUN_FLOW_CONTROL;
+   else
+   tun->flags &= ~TUN_FLOW_CONTROL;
+
/* Make sure persistent devices do not get stuck in
 * xoff state.
 */
@@ -1900,7 +1922,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR | IFF_MULTI_QUEUE,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE |
+   IFF_FLOW_CONTROL,
(unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE)
return tun_set_queue(file, );
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..bcf2790 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -36,6 +36,7 @@
 #define TUN_PERSIST0x0100  
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ  0x0400
+#define TUN_FLOW_CONTROL 0x0800
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -70,6 +71,7 @@
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400
+#define IFF_FLOW_CONTROL 0x0010
 /* read-only flag */
 #define IFF_PERSIST0x0800
 #define IFF_NOFILTER   0x1000
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] tuntap: add flow control to support back pressure

2014-04-09 Thread Steven Galgano
Add tuntap flow control support for use by back pressure routing protocols. 
Setting the new TUNSETIFF flag IFF_FLOW_CONTROL, will signal resources as 
unavailable when the tx queue limit is reached by issuing a 
netif_tx_stop_all_queues() rather than discarding frames. A 
netif_tx_wake_all_queues() is issued after reading a frame from the queue to 
signal resource availability.

Back pressure capability was previously supported by the legacy tun default 
mode. This change restores that functionality, which was last present in v3.7.

Reported-by: Brian Adamson brian.adam...@nrl.navy.mil
Tested-by: Joseph Giovatto jgiova...@adjacentlink.com
Signed-off-by: Steven Galgano sgalg...@adjacentlink.com
---
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba..268130c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -783,8 +783,19 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * number of queues.
 */
if (skb_queue_len(tfile-socket.sk-sk_receive_queue) * numqueues
- = dev-tx_queue_len)
-   goto drop;
+   = dev-tx_queue_len) {
+   if (tun-flags  TUN_FLOW_CONTROL) {
+   /* Resources unavailable stop transmissions */
+   netif_tx_stop_all_queues(dev);
+
+   /* We won't see all dropped packets individually, so
+* over run error is more appropriate.
+*/
+   dev-stats.tx_fifo_errors++;
+   } else {
+   goto drop;
+   }
+   }
 
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
@@ -1362,6 +1373,9 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct 
tun_file *tfile,
continue;
}
 
+   /* Wake in case resources previously signaled unavailable */
+   netif_tx_wake_all_queues(tun-dev);
+
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
break;
@@ -1550,6 +1564,9 @@ static int tun_flags(struct tun_struct *tun)
if (tun-flags  TUN_PERSIST)
flags |= IFF_PERSIST;
 
+   if (tun-flags  TUN_FLOW_CONTROL)
+   flags |= IFF_FLOW_CONTROL;
+
return flags;
 }
 
@@ -1732,6 +1749,11 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
else
tun-flags = ~TUN_TAP_MQ;
 
+   if (ifr-ifr_flags  IFF_FLOW_CONTROL)
+   tun-flags |= TUN_FLOW_CONTROL;
+   else
+   tun-flags = ~TUN_FLOW_CONTROL;
+
/* Make sure persistent devices do not get stuck in
 * xoff state.
 */
@@ -1900,7 +1922,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR | IFF_MULTI_QUEUE,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE |
+   IFF_FLOW_CONTROL,
(unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE)
return tun_set_queue(file, ifr);
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..bcf2790 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -36,6 +36,7 @@
 #define TUN_PERSIST0x0100  
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ  0x0400
+#define TUN_FLOW_CONTROL 0x0800
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -70,6 +71,7 @@
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400
+#define IFF_FLOW_CONTROL 0x0010
 /* read-only flag */
 #define IFF_PERSIST0x0800
 #define IFF_NOFILTER   0x1000
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/