On Thu, Sep 06, 2018 at 12:05:23PM +0800, Jason Wang wrote:
> This patch introduces to a new tun/tap specific msg_control:
> 
> #define TUN_MSG_UBUF 1
> #define TUN_MSG_PTR  2
> struct tun_msg_ctl {
>        int type;
>        void *ptr;
> };
> 
> This allows us to pass different kinds of msg_control through
> sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will
> be used by the existed vhost_net zerocopy code. The second is XDP
> buff, which allows vhost_net to pass XDP buff to TUN. This could be
> used to implement accepting an array of XDP buffs from vhost_net in
> the following patches.
> 
> Signed-off-by: Jason Wang <[email protected]>

At this point, do we want to just add a new sock opt for tap's
benefit? Seems cleaner than (ab)using sendmsg.

> ---
>  drivers/net/tap.c      | 18 ++++++++++++------
>  drivers/net/tun.c      |  6 +++++-
>  drivers/vhost/net.c    |  7 +++++--
>  include/linux/if_tun.h |  7 +++++++
>  4 files changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index f0f7cd977667..7996ed7cbf18 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -619,7 +619,7 @@ static inline struct sk_buff *tap_alloc_skb(struct sock 
> *sk, size_t prepad,
>  #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN)
>  
>  /* Get packet from user space buffer */
> -static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m,
> +static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
>                           struct iov_iter *from, int noblock)
>  {
>       int good_linear = SKB_MAX_HEAD(TAP_RESERVE);
> @@ -663,7 +663,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct 
> msghdr *m,
>       if (unlikely(len < ETH_HLEN))
>               goto err;
>  
> -     if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
> +     if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
>               struct iov_iter i;
>  
>               copylen = vnet_hdr.hdr_len ?
> @@ -724,11 +724,11 @@ static ssize_t tap_get_user(struct tap_queue *q, struct 
> msghdr *m,
>       tap = rcu_dereference(q->tap);
>       /* copy skb_ubuf_info for callback when skb has no error */
>       if (zerocopy) {
> -             skb_shinfo(skb)->destructor_arg = m->msg_control;
> +             skb_shinfo(skb)->destructor_arg = msg_control;
>               skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
>               skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
> -     } else if (m && m->msg_control) {
> -             struct ubuf_info *uarg = m->msg_control;
> +     } else if (msg_control) {
> +             struct ubuf_info *uarg = msg_control;
>               uarg->callback(uarg, false);
>       }
>  
> @@ -1150,7 +1150,13 @@ static int tap_sendmsg(struct socket *sock, struct 
> msghdr *m,
>                      size_t total_len)
>  {
>       struct tap_queue *q = container_of(sock, struct tap_queue, sock);
> -     return tap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT);
> +     struct tun_msg_ctl *ctl = m->msg_control;
> +
> +     if (ctl && ctl->type != TUN_MSG_UBUF)
> +             return -EINVAL;
> +
> +     return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter,
> +                         m->msg_flags & MSG_DONTWAIT);
>  }
>  
>  static int tap_recvmsg(struct socket *sock, struct msghdr *m,
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index ff1cbf3ebd50..c839a4bdcbd9 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2429,11 +2429,15 @@ static int tun_sendmsg(struct socket *sock, struct 
> msghdr *m, size_t total_len)
>       int ret;
>       struct tun_file *tfile = container_of(sock, struct tun_file, socket);
>       struct tun_struct *tun = tun_get(tfile);
> +     struct tun_msg_ctl *ctl = m->msg_control;
>  
>       if (!tun)
>               return -EBADFD;
>  
> -     ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
> +     if (ctl && ctl->type != TUN_MSG_UBUF)
> +             return -EINVAL;
> +
> +     ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
>                          m->msg_flags & MSG_DONTWAIT,
>                          m->msg_flags & MSG_MORE);
>       tun_put(tun);
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4e656f89cb22..fb01ce6d981c 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -620,6 +620,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
> struct socket *sock)
>               .msg_controllen = 0,
>               .msg_flags = MSG_DONTWAIT,
>       };
> +     struct tun_msg_ctl ctl;
>       size_t len, total_len = 0;
>       int err;
>       struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
> @@ -664,8 +665,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
> struct socket *sock)
>                       ubuf->ctx = nvq->ubufs;
>                       ubuf->desc = nvq->upend_idx;
>                       refcount_set(&ubuf->refcnt, 1);
> -                     msg.msg_control = ubuf;
> -                     msg.msg_controllen = sizeof(ubuf);
> +                     msg.msg_control = &ctl;
> +                     ctl.type = TUN_MSG_UBUF;
> +                     ctl.ptr = ubuf;
> +                     msg.msg_controllen = sizeof(ctl);
>                       ubufs = nvq->ubufs;
>                       atomic_inc(&ubufs->refcount);
>                       nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 3d2996dc7d85..ba46dced1f38 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -19,6 +19,13 @@
>  
>  #define TUN_XDP_FLAG 0x1UL
>  
> +#define TUN_MSG_UBUF 1
> +#define TUN_MSG_PTR  2

Looks like TUN_MSG_PTR should be pushed out to a follow-up patch?

> +struct tun_msg_ctl {
> +     int type;
> +     void *ptr;
> +};
> +

type actually includes a size. Why not two short fields then?


>  #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
>  struct socket *tun_get_socket(struct file *);
>  struct ptr_ring *tun_get_tx_ring(struct file *file);
> -- 
> 2.17.1
_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to