Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-29 Thread Jason Wang



On 2016年06月28日 15:09, Michael S. Tsirkin wrote:

On Thu, Jun 23, 2016 at 01:14:07PM +0800, Jason Wang wrote:


On 2016年06月23日 02:18, Michael S. Tsirkin wrote:

On Fri, Jun 17, 2016 at 03:41:20AM +0300, Michael S. Tsirkin wrote:

Would it help to have ptr_ring_resize that gets an array of
rings and resizes them both to same length?

OK, here it is. Untested so far, and no skb wrapper.
Pls let me know whether this is what you had in mind.

Exactly what I want.

Thanks

Ok and this for skb_array

-->
skb_array: add wrappers for resizing

Signed-off-by: Michael S. Tsirkin 

--

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index c900708..7e01c1f 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -151,16 +151,24 @@ static inline int skb_array_init(struct skb_array *a, int 
size, gfp_t gfp)
return ptr_ring_init(>ring, size, 0, gfp);
  }
  
-void __skb_array_destroy_skb(void *ptr)

+static void __skb_array_destroy_skb(void *ptr)
  {
kfree_skb(ptr);
  }
  
-int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)

+static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
  {
return ptr_ring_resize(>ring, size, gfp, __skb_array_destroy_skb);
  }


Will split up the above tweak into another patch when reposting.

  
+static inline int skb_raay_resize_multiple(struct skb_array **rings, int nrings,


I think you mean 'skb_array_resize' here.


+  int size, gfp_t gfp)
+{
+   BUILD_BUG_ON(offsetof(struct skb_array, ring));
+   ptr_ring_resize_multiple((struct ptr_ring **)rings, nrings, size, gfp,
+__skb_array_destroy_skb);


This should be return ptr_ring_resize_multiple(...


+}
+
  static inline void skb_array_cleanup(struct skb_array *a)
  {
ptr_ring_cleanup(>ring, __skb_array_destroy_skb);


With this, looks like there's no need for a new flag. Will repost the 
series with those two patches.


Thanks


Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-28 Thread Michael S. Tsirkin
On Thu, Jun 23, 2016 at 01:14:07PM +0800, Jason Wang wrote:
> 
> 
> On 2016年06月23日 02:18, Michael S. Tsirkin wrote:
> > On Fri, Jun 17, 2016 at 03:41:20AM +0300, Michael S. Tsirkin wrote:
> > > >Would it help to have ptr_ring_resize that gets an array of
> > > >rings and resizes them both to same length?
> > OK, here it is. Untested so far, and no skb wrapper.
> > Pls let me know whether this is what you had in mind.
> 
> Exactly what I want.
> 
> Thanks

Ok and this for skb_array

-->
skb_array: add wrappers for resizing

Signed-off-by: Michael S. Tsirkin 

--

diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index c900708..7e01c1f 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -151,16 +151,24 @@ static inline int skb_array_init(struct skb_array *a, int 
size, gfp_t gfp)
return ptr_ring_init(>ring, size, 0, gfp);
 }
 
-void __skb_array_destroy_skb(void *ptr)
+static void __skb_array_destroy_skb(void *ptr)
 {
kfree_skb(ptr);
 }
 
-int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
 {
return ptr_ring_resize(>ring, size, gfp, __skb_array_destroy_skb);
 }
 
+static inline int skb_raay_resize_multiple(struct skb_array **rings, int 
nrings,
+  int size, gfp_t gfp)
+{
+   BUILD_BUG_ON(offsetof(struct skb_array, ring));
+   ptr_ring_resize_multiple((struct ptr_ring **)rings, nrings, size, gfp,
+__skb_array_destroy_skb);
+}
+
 static inline void skb_array_cleanup(struct skb_array *a)
 {
ptr_ring_cleanup(>ring, __skb_array_destroy_skb);


Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-22 Thread Jason Wang



On 2016年06月23日 02:18, Michael S. Tsirkin wrote:

On Fri, Jun 17, 2016 at 03:41:20AM +0300, Michael S. Tsirkin wrote:

>Would it help to have ptr_ring_resize that gets an array of
>rings and resizes them both to same length?

OK, here it is. Untested so far, and no skb wrapper.
Pls let me know whether this is what you had in mind.


Exactly what I want.

Thanks


Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-22 Thread Michael S. Tsirkin
On Fri, Jun 17, 2016 at 03:41:20AM +0300, Michael S. Tsirkin wrote:
> Would it help to have ptr_ring_resize that gets an array of
> rings and resizes them both to same length?

OK, here it is. Untested so far, and no skb wrapper.
Pls let me know whether this is what you had in mind.

-->

ptr_ring: support resizing multiple queues

Signed-off-by: Michael S. Tsirkin 

---

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index a29b023..e576801 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -354,20 +354,14 @@ static inline int ptr_ring_init(struct ptr_ring *r, int 
size, int pad, gfp_t gfp
return 0;
 }
 
-static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
- void (*destroy)(void *))
+static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
+  int size, gfp_t gfp,
+  void (*destroy)(void *))
 {
-   unsigned long flags;
int producer = 0;
-   void **queue = __ptr_ring_init_queue_alloc(size, gfp);
void **old;
void *ptr;
 
-   if (!queue)
-   return -ENOMEM;
-
-   spin_lock_irqsave(&(r)->producer_lock, flags);
-
while ((ptr = ptr_ring_consume(r)))
if (producer < size)
queue[producer++] = ptr;
@@ -380,6 +374,23 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int 
size, gfp_t gfp,
old = r->queue;
r->queue = queue;
 
+   return old;
+}
+
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+ void (*destroy)(void *))
+{
+   unsigned long flags;
+   void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+   void **old;
+
+   if (!queue)
+   return -ENOMEM;
+
+   spin_lock_irqsave(&(r)->producer_lock, flags);
+
+   old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+
spin_unlock_irqrestore(&(r)->producer_lock, flags);
 
kfree(old);
@@ -387,6 +398,49 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int 
size, gfp_t gfp,
return 0;
 }
 
+static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
+  int size,
+  gfp_t gfp, void (*destroy)(void *))
+{
+   unsigned long flags;
+   void ***queues;
+   int i;
+
+   queues = kmalloc(nrings * sizeof *queues, gfp);
+   if (!queues)
+   goto noqueues;
+
+   for (i = 0; i < nrings; ++i) {
+   queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+   if (!queues[i])
+   goto nomem;
+   }
+
+   spin_lock_irqsave(&(rings[i])->producer_lock, flags);
+
+   for (i = 0; i < nrings; ++i)
+   queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+ size, gfp, destroy);
+
+   spin_unlock_irqrestore(&(rings[i])->producer_lock, flags);
+
+   for (i = 0; i < nrings; ++i)
+   kfree(queues[i]);
+
+   kfree(queues);
+
+   return 0;
+
+nomem:
+   while (--i >= 0)
+   kfree(queues[i]);
+
+   kfree(queues);
+
+noqueues:
+   return -ENOMEM;
+}
+
 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void 
*))
 {
void *ptr;
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 26dc1d2..deb36af 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -17,6 +17,11 @@
 typedef pthread_spinlock_t  spinlock_t;
 
 typedef int gfp_t;
+static void *kmalloc(unsigned size, gfp_t gfp)
+{
+   return memalign(64, size);
+}
+
 static void *kzalloc(unsigned size, gfp_t gfp)
 {
void *p = memalign(64, size);


Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-17 Thread Jason Wang



On 2016年06月17日 08:41, Michael S. Tsirkin wrote:

On Wed, Jun 15, 2016 at 04:38:17PM +0800, Jason Wang wrote:

>We used to queue tx packets in sk_receive_queue, this is less
>efficient since it requires spinlocks to synchronize between producer
>and consumer.
>
>This patch tries to address this by:
>
>- introduce a new mode which will be only enabled with IFF_TX_ARRAY
>   set and switch from sk_receive_queue to a fixed size of skb
>   array with 256 entries in this mode.
>- introduce a new proto_ops peek_len which was used for peeking the
>   skb length.
>- implement a tun version of peek_len for vhost_net to use and convert
>   vhost_net to use peek_len if possible.
>
>Pktgen test shows about 18% improvement on guest receiving pps for small
>buffers:
>
>Before: ~122pps
>After : ~144pps
>
>The reason why I stick to new mode is because:
>
>- though resize is supported by skb array, in multiqueue mode, it's
>   not easy to recover from a partial success of queue resizing.
>- tx_queue_len is a user visible feature.
>
>Signed-off-by: Jason Wang

I still think it's wrong to add a new feature for this.
For example, why 256 entries?


It's the value of virtqueue size supported by qemu.


Queue len is user visible but it's there precisely for this
reason so people can tune queue for workload.


Right.



Would it help to have ptr_ring_resize that gets an array of
rings and resizes them both to same length?


Yes, that would be very helpful.


Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-16 Thread Michael S. Tsirkin
On Wed, Jun 15, 2016 at 04:38:17PM +0800, Jason Wang wrote:
> We used to queue tx packets in sk_receive_queue, this is less
> efficient since it requires spinlocks to synchronize between producer
> and consumer.
> 
> This patch tries to address this by:
> 
> - introduce a new mode which will be only enabled with IFF_TX_ARRAY
>   set and switch from sk_receive_queue to a fixed size of skb
>   array with 256 entries in this mode.
> - introduce a new proto_ops peek_len which was used for peeking the
>   skb length.
> - implement a tun version of peek_len for vhost_net to use and convert
>   vhost_net to use peek_len if possible.
> 
> Pktgen test shows about 18% improvement on guest receiving pps for small
> buffers:
> 
> Before: ~122pps
> After : ~144pps
> 
> The reason why I stick to new mode is because:
> 
> - though resize is supported by skb array, in multiqueue mode, it's
>   not easy to recover from a partial success of queue resizing.
> - tx_queue_len is a user visible feature.
> 
> Signed-off-by: Jason Wang 

I still think it's wrong to add a new feature for this.
For example, why 256 entries?
Queue len is user visible but it's there precisely for this
reason so people can tune queue for workload.

Would it help to have ptr_ring_resize that gets an array of
rings and resizes them both to same length?

> ---
> - The patch is based on [PATCH v8 0/5] skb_array: array based FIFO for skbs
> 
> Changes from V1:
> - switch to use skb array instead of a customized circular buffer
> - add non-blocking support
> - rename .peek to .peek_len
> - drop lockless peeking since test show very minor improvement
> ---
>  drivers/net/tun.c   | 138 
> 
>  drivers/vhost/net.c |  16 -
>  include/linux/net.h |   1 +
>  include/uapi/linux/if_tun.h |   1 +
>  4 files changed, 143 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index e16487c..b22e475 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -71,6 +71,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -130,6 +131,7 @@ struct tap_filter {
>  #define MAX_TAP_FLOWS  4096
>  
>  #define TUN_FLOW_EXPIRE (3 * HZ)
> +#define TUN_RING_SIZE 256
>  
>  struct tun_pcpu_stats {
>   u64 rx_packets;
> @@ -167,6 +169,7 @@ struct tun_file {
>   };
>   struct list_head next;
>   struct tun_struct *detached;
> + struct skb_array tx_array;
>  };
>  
>  struct tun_flow_entry {
> @@ -513,8 +516,15 @@ static struct tun_struct *tun_enable_queue(struct 
> tun_file *tfile)
>   return tun;
>  }
>  
> -static void tun_queue_purge(struct tun_file *tfile)
> +static void tun_queue_purge(struct tun_struct *tun, struct tun_file *tfile)
>  {
> + struct sk_buff *skb;
> +
> + if (tun->flags & IFF_TX_ARRAY) {
> + while ((skb = skb_array_consume(>tx_array)) != NULL)
> + kfree_skb(skb);
> + }
> +
>   skb_queue_purge(>sk.sk_receive_queue);
>   skb_queue_purge(>sk.sk_error_queue);
>  }
> @@ -545,7 +555,7 @@ static void __tun_detach(struct tun_file *tfile, bool 
> clean)
>   synchronize_net();
>   tun_flow_delete_by_queue(tun, tun->numqueues + 1);
>   /* Drop read queue */
> - tun_queue_purge(tfile);
> + tun_queue_purge(tun, tfile);
>   tun_set_real_num_queues(tun);
>   } else if (tfile->detached && clean) {
>   tun = tun_enable_queue(tfile);
> @@ -560,6 +570,8 @@ static void __tun_detach(struct tun_file *tfile, bool 
> clean)
>   tun->dev->reg_state == NETREG_REGISTERED)
>   unregister_netdevice(tun->dev);
>   }
> + if (tun && tun->flags & IFF_TX_ARRAY)
> + skb_array_cleanup(>tx_array);
>   sock_put(>sk);
>   }
>  }
> @@ -596,12 +608,12 @@ static void tun_detach_all(struct net_device *dev)
>   for (i = 0; i < n; i++) {
>   tfile = rtnl_dereference(tun->tfiles[i]);
>   /* Drop read queue */
> - tun_queue_purge(tfile);
> + tun_queue_purge(tun, tfile);
>   sock_put(>sk);
>   }
>   list_for_each_entry_safe(tfile, tmp, >disabled, next) {
>   tun_enable_queue(tfile);
> - tun_queue_purge(tfile);
> + tun_queue_purge(tun, tfile);
>   sock_put(>sk);
>   }
>   BUG_ON(tun->numdisabled != 0);
> @@ -642,6 +654,13 @@ static int tun_attach(struct tun_struct *tun, struct 
> file *file, bool skip_filte
>   if (!err)
>   goto out;
>   }
> +
> + if (!tfile->detached && tun->flags & IFF_TX_ARRAY &&
> + skb_array_init(>tx_array, TUN_RING_SIZE, GFP_KERNEL)) {
> + err = -ENOMEM;
> + goto out;
> + }
> +
>   tfile->queue_index = tun->numqueues;
>   

Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-16 Thread David Miller
From: Jason Wang 
Date: Wed, 15 Jun 2016 16:38:17 +0800

> We used to queue tx packets in sk_receive_queue, this is less
> efficient since it requires spinlocks to synchronize between producer
> and consumer.
> 
> This patch tries to address this by:
> 
> - introduce a new mode which will be only enabled with IFF_TX_ARRAY
>   set and switch from sk_receive_queue to a fixed size of skb
>   array with 256 entries in this mode.
> - introduce a new proto_ops peek_len which was used for peeking the
>   skb length.
> - implement a tun version of peek_len for vhost_net to use and convert
>   vhost_net to use peek_len if possible.
> 
> Pktgen test shows about 18% improvement on guest receiving pps for small
> buffers:
> 
> Before: ~122pps
> After : ~144pps
> 
> The reason why I stick to new mode is because:
> 
> - though resize is supported by skb array, in multiqueue mode, it's
>   not easy to recover from a partial success of queue resizing.
> - tx_queue_len is a user visible feature.
> 
> Signed-off-by: Jason Wang 

Michael, can you please review this, especially as this is the first
user of your new infrastructure :-)


Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-16 Thread Jason Wang


On 2016年06月15日 19:55, Jamal Hadi Salim wrote:
> On 16-06-15 07:52 AM, Jamal Hadi Salim wrote:
>> On 16-06-15 04:38 AM, Jason Wang wrote:
>>> We used to queue tx packets in sk_receive_queue, this is less
>>> efficient since it requires spinlocks to synchronize between producer
>> So this is more exercising the skb array improvements. For tun
>> it would be useful to see general performance numbers on user/kernel
>> crossing (i.e tun read/write).
>> If you have the cycles can you run such tests?
>>
> Ignore my message - you are running pktgen from a VM towards the host.

Actually reversed, test were done from an external host to VM.

Thanks

> So the numbers you posted are what i was interested in.
> Thanks for the good work.
>
> cheers,
> jamal
>



Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-15 Thread Jamal Hadi Salim
On 16-06-15 07:52 AM, Jamal Hadi Salim wrote:
> On 16-06-15 04:38 AM, Jason Wang wrote:
>> We used to queue tx packets in sk_receive_queue, this is less
>> efficient since it requires spinlocks to synchronize between producer

> 
> So this is more exercising the skb array improvements. For tun
> it would be useful to see general performance numbers on user/kernel
> crossing (i.e tun read/write).
> If you have the cycles can you run such tests?
> 

Ignore my message - you are running pktgen from a VM towards the host.
So the numbers you posted are what i was interested in.
Thanks for the good work.

cheers,
jamal



Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-15 Thread Jamal Hadi Salim
On 16-06-15 04:38 AM, Jason Wang wrote:
> We used to queue tx packets in sk_receive_queue, this is less
> efficient since it requires spinlocks to synchronize between producer
> and consumer.
> 
> This patch tries to address this by:
> 
> - introduce a new mode which will be only enabled with IFF_TX_ARRAY
>set and switch from sk_receive_queue to a fixed size of skb
>array with 256 entries in this mode.
> - introduce a new proto_ops peek_len which was used for peeking the
>skb length.
> - implement a tun version of peek_len for vhost_net to use and convert
>vhost_net to use peek_len if possible.
> 
> Pktgen test shows about 18% improvement on guest receiving pps for small
> buffers:
> 
> Before: ~122pps
> After : ~144pps
> 

So this is more exercising the skb array improvements. For tun
it would be useful to see general performance numbers on user/kernel
crossing (i.e tun read/write).
If you have the cycles can you run such tests?

cheers,
jamal





Re: [PATCH net-next V2] tun: introduce tx skb ring

2016-06-15 Thread kbuild test robot
Hi,

[auto build test ERROR on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Jason-Wang/tun-introduce-tx-skb-ring/20160615-164041
config: x86_64-randconfig-s2-06151732 (attached as .config)
compiler: gcc-6 (Debian 6.1.1-1) 6.1.1 20160430
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> drivers/net/tun.c:74:29: fatal error: linux/skb_array.h: No such file or 
>> directory
#include 
^
   compilation terminated.

vim +74 drivers/net/tun.c

68  #include 
69  #include 
70  #include 
71  #include 
72  #include 
73  #include 
  > 74  #include 
75  
76  #include 
77  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


[PATCH net-next V2] tun: introduce tx skb ring

2016-06-15 Thread Jason Wang
We used to queue tx packets in sk_receive_queue, this is less
efficient since it requires spinlocks to synchronize between producer
and consumer.

This patch tries to address this by:

- introduce a new mode which will be only enabled with IFF_TX_ARRAY
  set and switch from sk_receive_queue to a fixed size of skb
  array with 256 entries in this mode.
- introduce a new proto_ops peek_len which was used for peeking the
  skb length.
- implement a tun version of peek_len for vhost_net to use and convert
  vhost_net to use peek_len if possible.

Pktgen test shows about 18% improvement on guest receiving pps for small
buffers:

Before: ~122pps
After : ~144pps

The reason why I stick to new mode is because:

- though resize is supported by skb array, in multiqueue mode, it's
  not easy to recover from a partial success of queue resizing.
- tx_queue_len is a user visible feature.

Signed-off-by: Jason Wang 
---
- The patch is based on [PATCH v8 0/5] skb_array: array based FIFO for skbs

Changes from V1:
- switch to use skb array instead of a customized circular buffer
- add non-blocking support
- rename .peek to .peek_len
- drop lockless peeking since test show very minor improvement
---
 drivers/net/tun.c   | 138 
 drivers/vhost/net.c |  16 -
 include/linux/net.h |   1 +
 include/uapi/linux/if_tun.h |   1 +
 4 files changed, 143 insertions(+), 13 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e16487c..b22e475 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -130,6 +131,7 @@ struct tap_filter {
 #define MAX_TAP_FLOWS  4096
 
 #define TUN_FLOW_EXPIRE (3 * HZ)
+#define TUN_RING_SIZE 256
 
 struct tun_pcpu_stats {
u64 rx_packets;
@@ -167,6 +169,7 @@ struct tun_file {
};
struct list_head next;
struct tun_struct *detached;
+   struct skb_array tx_array;
 };
 
 struct tun_flow_entry {
@@ -513,8 +516,15 @@ static struct tun_struct *tun_enable_queue(struct tun_file 
*tfile)
return tun;
 }
 
-static void tun_queue_purge(struct tun_file *tfile)
+static void tun_queue_purge(struct tun_struct *tun, struct tun_file *tfile)
 {
+   struct sk_buff *skb;
+
+   if (tun->flags & IFF_TX_ARRAY) {
+   while ((skb = skb_array_consume(>tx_array)) != NULL)
+   kfree_skb(skb);
+   }
+
skb_queue_purge(>sk.sk_receive_queue);
skb_queue_purge(>sk.sk_error_queue);
 }
@@ -545,7 +555,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
synchronize_net();
tun_flow_delete_by_queue(tun, tun->numqueues + 1);
/* Drop read queue */
-   tun_queue_purge(tfile);
+   tun_queue_purge(tun, tfile);
tun_set_real_num_queues(tun);
} else if (tfile->detached && clean) {
tun = tun_enable_queue(tfile);
@@ -560,6 +570,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
+   if (tun && tun->flags & IFF_TX_ARRAY)
+   skb_array_cleanup(>tx_array);
sock_put(>sk);
}
 }
@@ -596,12 +608,12 @@ static void tun_detach_all(struct net_device *dev)
for (i = 0; i < n; i++) {
tfile = rtnl_dereference(tun->tfiles[i]);
/* Drop read queue */
-   tun_queue_purge(tfile);
+   tun_queue_purge(tun, tfile);
sock_put(>sk);
}
list_for_each_entry_safe(tfile, tmp, >disabled, next) {
tun_enable_queue(tfile);
-   tun_queue_purge(tfile);
+   tun_queue_purge(tun, tfile);
sock_put(>sk);
}
BUG_ON(tun->numdisabled != 0);
@@ -642,6 +654,13 @@ static int tun_attach(struct tun_struct *tun, struct file 
*file, bool skip_filte
if (!err)
goto out;
}
+
+   if (!tfile->detached && tun->flags & IFF_TX_ARRAY &&
+   skb_array_init(>tx_array, TUN_RING_SIZE, GFP_KERNEL)) {
+   err = -ENOMEM;
+   goto out;
+   }
+
tfile->queue_index = tun->numqueues;
tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
rcu_assign_pointer(tfile->tun, tun);
@@ -891,8 +910,13 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
nf_reset(skb);
 
-   /* Enqueue packet */
-   skb_queue_tail(>socket.sk->sk_receive_queue, skb);
+   if (tun->flags & IFF_TX_ARRAY) {
+   if (skb_array_produce(>tx_array, skb))
+   goto drop;
+   } else {
+   /* Enqueue packet */
+   skb_queue_tail(>socket.sk->sk_receive_queue,