Re: [PATCH net-next RFC 2/2] tun: enable napi_gro_frags() for TUN/TAP driver

2017-09-06 Thread Eric Dumazet
On Tue, 2017-09-05 at 15:35 -0700, Petar Penkov wrote:
> Add a TUN/TAP receive mode that exercises the napi_gro_frags()
> interface. This mode is available only in TAP mode, as the interface
> expects packets with Ethernet headers.
> 


Hi Petar, thanks a lot for this work.

I must confess I have to retract one feedback I gave while reviewing
your patches.


> + local_bh_disable();
> + data = napi_alloc_frag(fragsz);
> + local_bh_enable();
> + if (!data) {
> + err = -ENOMEM;
> + goto free;
> + }
> +
> + page = virt_to_page(data);
> + offset = offset_in_page(data);

These two lines above indeed trigger too many problems in the kernel.
(Like the one you tried to cover here
https://patchwork.kernel.org/patch/9927927/ )

Please use for your next submission the code you originally had :

page = virt_to_head_page(data);
offset = data - page_address(page);


Thanks !




[PATCH net-next RFC 2/2] tun: enable napi_gro_frags() for TUN/TAP driver

2017-09-05 Thread Petar Penkov
Add a TUN/TAP receive mode that exercises the napi_gro_frags()
interface. This mode is available only in TAP mode, as the interface
expects packets with Ethernet headers.

Furthermore, packets follow the layout of the iovec_iter that was
received. The first iovec is the linear data, and every one after the
first is a fragment. If there are more fragments than the max number,
drop the packet. Additionally, invoke eth_get_headlen() to exercise flow
dissector code and to verify that the header resides in the linear data.

The napi_gro_frags() mode requires setting the IFF_NAPI_FRAGS option.
This is imposed because this mode is intended for testing via tools like
syzkaller and packetdrill, and the increased flexibility it provides can
introduce security vulnerabilities.

Signed-off-by: Petar Penkov 
Cc: Eric Dumazet 
Cc: Mahesh Bandewar 
Cc: Willem de Bruijn 
Cc: da...@davemloft.net
Cc: ppen...@stanford.edu
---
 drivers/net/tun.c   | 135 ++--
 include/uapi/linux/if_tun.h |   1 +
 2 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d5c824e3ec42..2ba9809ab6cd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -75,6 +75,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -120,8 +121,15 @@ do {   
\
 #define TUN_VNET_LE 0x8000
 #define TUN_VNET_BE 0x4000
 
+#if IS_ENABLED(CONFIG_TUN_NAPI)
+#define TUN_FEATURES_EXTRA IFF_NAPI_FRAGS
+#else
+#define TUN_FEATURES_EXTRA 0
+#endif
+
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
- IFF_MULTI_QUEUE)
+ IFF_MULTI_QUEUE | TUN_FEATURES_EXTRA)
+
 #define GOODCOPY_LEN 128
 
 #define FLT_EXACT_COUNT 8
@@ -173,6 +181,7 @@ struct tun_file {
unsigned int ifindex;
};
struct napi_struct napi;
+   struct mutex napi_mutex;/* Protects access to the above napi */
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
@@ -276,6 +285,7 @@ static void tun_napi_init(struct tun_struct *tun, struct 
tun_file *tfile)
netif_napi_add(tun->dev, >napi, tun_napi_poll,
   NAPI_POLL_WEIGHT);
napi_enable(>napi);
+   mutex_init(>napi_mutex);
}
 }
 
@@ -291,6 +301,11 @@ static void tun_napi_del(struct tun_file *tfile)
netif_napi_del(>napi);
 }
 
+static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+{
+   return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+}
+
 #ifdef CONFIG_TUN_VNET_CROSS_LE
 static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
 {
@@ -1034,7 +1049,8 @@ static void tun_poll_controller(struct net_device *dev)
 * supports polling, which enables bridge devices in virt setups to
 * still use netconsole
 * If NAPI is enabled, however, we need to schedule polling for all
-* queues.
+* queues unless we are using napi_gro_frags(), which we call in
+* process context and not in NAPI context.
 */
 
if (IS_ENABLED(CONFIG_TUN_NAPI)) {
@@ -1042,6 +1058,9 @@ static void tun_poll_controller(struct net_device *dev)
struct tun_file *tfile;
int i;
 
+   if (tun_napi_frags_enabled(tun))
+   return;
+
rcu_read_lock();
for (i = 0; i < tun->numqueues; i++) {
tfile = rcu_dereference(tun->tfiles[i]);
@@ -1264,6 +1283,64 @@ static unsigned int tun_chr_poll(struct file *file, 
poll_table *wait)
return mask;
 }
 
+static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+   size_t len,
+   const struct iov_iter *it)
+{
+   struct sk_buff *skb;
+   size_t linear;
+   int err;
+   int i;
+
+   if (it->nr_segs > MAX_SKB_FRAGS + 1)
+   return ERR_PTR(-ENOMEM);
+
+   local_bh_disable();
+   skb = napi_get_frags(>napi);
+   local_bh_enable();
+   if (!skb)
+   return ERR_PTR(-ENOMEM);
+
+   linear = iov_iter_single_seg_count(it);
+   err = __skb_grow(skb, linear);
+   if (err)
+   goto free;
+
+   skb->len = len;
+   skb->data_len = len - linear;
+   skb->truesize += skb->data_len;
+
+   for (i = 1; i < it->nr_segs; i++) {
+   size_t fragsz = it->iov[i].iov_len;
+   unsigned long offset;
+   struct page *page;
+   void *data;
+
+   if (fragsz == 0 || fragsz > PAGE_SIZE) {
+   err = -EINVAL;
+   goto free;
+   }
+
+   local_bh_disable();
+   data =