This patches implement a TUN specific msg_control:

#define TUN_MSG_UBUF 1
#define TUN_MSG_PTR  2
struct tun_msg_ctl {
       int type;
       void *ptr;
};

The first supported type is ubuf which is already used by vhost_net
zerocopy code. The second is XDP buff, which allows vhost_net to pass
XDP buff to TUN. This could be used to implement accepting an array of
XDP buffs from vhost_net in the following patches.

Signed-off-by: Jason Wang <jasow...@redhat.com>
---
 drivers/net/tun.c      | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/vhost/net.c    | 21 ++++++++++--
 include/linux/if_tun.h |  7 ++++
 3 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2560378..b586b3f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2387,18 +2387,107 @@ static void tun_sock_write_space(struct sock *sk)
        kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
 }
 
+static int tun_xdp_one(struct tun_struct *tun,
+                      struct tun_file *tfile,
+                      struct xdp_buff *xdp)
+{
+       struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int);
+       struct tun_pcpu_stats *stats;
+       struct bpf_prog *xdp_prog;
+       struct sk_buff *skb = NULL;
+       u32 rxhash = 0, act;
+       int buflen = *(int *)xdp->data_hard_start;
+       int err = 0;
+       bool skb_xdp = false;
+
+       preempt_disable();
+       rcu_read_lock();
+
+       xdp_prog = rcu_dereference(tun->xdp_prog);
+       if (xdp_prog) {
+               if (gso->gso_type) {
+                       skb_xdp = true;
+                       goto build;
+               }
+               xdp_set_data_meta_invalid(xdp);
+               xdp->rxq = &tfile->xdp_rxq;
+               act = tun_do_xdp(tun, tfile, xdp_prog, xdp, &err);
+               if (err)
+                       goto out;
+               if (act != XDP_PASS)
+                       goto out;
+       }
+
+build:
+       skb = build_skb(xdp->data_hard_start, buflen);
+       if (!skb) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       if (skb_xdp) {
+               err = do_xdp_generic(xdp_prog, skb);
+               if (err != XDP_PASS)
+                       goto out;
+       }
+
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       skb_put(skb, xdp->data_end - xdp->data);
+
+       if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+               this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+               kfree_skb(skb);
+               err = -EINVAL;
+               goto out;
+       }
+
+       skb->protocol = eth_type_trans(skb, tun->dev);
+       skb_reset_network_header(skb);
+       skb_probe_transport_header(skb, 0);
+
+       if (!rcu_dereference(tun->steering_prog))
+               rxhash = __skb_get_hash_symmetric(skb);
+
+       netif_receive_skb(skb);
+
+       stats = get_cpu_ptr(tun->pcpu_stats);
+       u64_stats_update_begin(&stats->syncp);
+       stats->rx_packets++;
+       stats->rx_bytes += skb->len;
+       u64_stats_update_end(&stats->syncp);
+       put_cpu_ptr(stats);
+
+       if (rxhash)
+               tun_flow_update(tun, rxhash, tfile);
+
+out:
+       rcu_read_unlock();
+       preempt_enable();
+
+       return err;
+}
+
 static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
        int ret;
        struct tun_file *tfile = container_of(sock, struct tun_file, socket);
        struct tun_struct *tun = tun_get(tfile);
+       struct tun_msg_ctl *ctl = m->msg_control;
 
        if (!tun)
                return -EBADFD;
 
-       ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
+       if (ctl && ctl->type == TUN_MSG_PTR) {
+               ret = tun_xdp_one(tun, tfile, ctl->ptr);
+               if (!ret)
+                       ret = total_len;
+               goto out;
+       }
+
+       ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
                           m->msg_flags & MSG_DONTWAIT,
                           m->msg_flags & MSG_MORE);
+out:
        tun_put(tun);
        return ret;
 }
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1209e84..0d84de6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -117,6 +117,7 @@ struct vhost_net_virtqueue {
        struct vhost_net_ubuf_ref *ubufs;
        struct ptr_ring *rx_ring;
        struct vhost_net_buf rxq;
+       struct xdp_buff xdp[VHOST_RX_BATCH];
 };
 
 struct vhost_net {
@@ -570,6 +571,7 @@ static void handle_tx_copy(struct vhost_net *net)
 {
        struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
        struct vhost_virtqueue *vq = &nvq->vq;
+       struct xdp_buff xdp;
        unsigned out, in;
        int head;
        struct msghdr msg = {
@@ -584,6 +586,7 @@ static void handle_tx_copy(struct vhost_net *net)
        size_t hdr_size;
        struct socket *sock;
        struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+       struct tun_msg_ctl ctl;
        int sent_pkts = 0;
        s16 nheads = 0;
 
@@ -628,6 +631,14 @@ static void handle_tx_copy(struct vhost_net *net)
                vq->heads[nheads].id = cpu_to_vhost32(vq, head);
                vq->heads[nheads].len = 0;
 
+               err = vhost_net_build_xdp(nvq, &msg.msg_iter, &xdp);
+               if (!err) {
+                       ctl.type = TUN_MSG_PTR;
+                       ctl.ptr = &xdp;
+                       msg.msg_control = &ctl;
+               } else
+                       msg.msg_control = NULL;
+
                total_len += len;
                if (total_len < VHOST_NET_WEIGHT &&
                    vhost_has_more_pkts(net, vq)) {
@@ -734,16 +745,21 @@ static void handle_tx_zerocopy(struct vhost_net *net)
                /* use msg_control to pass vhost zerocopy ubuf info to skb */
                if (zcopy_used) {
                        struct ubuf_info *ubuf;
+                       struct tun_msg_ctl ctl;
+
                        ubuf = nvq->ubuf_info + nvq->upend_idx;
 
+                       ctl.type = TUN_MSG_UBUF;
+                       ctl.ptr = ubuf;
+
                        vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
                        vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
                        ubuf->callback = vhost_zerocopy_callback;
                        ubuf->ctx = nvq->ubufs;
                        ubuf->desc = nvq->upend_idx;
                        refcount_set(&ubuf->refcnt, 1);
-                       msg.msg_control = ubuf;
-                       msg.msg_controllen = sizeof(ubuf);
+                       msg.msg_control = &ctl;
+                       msg.msg_controllen = sizeof(ctl);
                        ubufs = nvq->ubufs;
                        atomic_inc(&ubufs->refcount);
                        nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
@@ -751,6 +767,7 @@ static void handle_tx_zerocopy(struct vhost_net *net)
                        msg.msg_control = NULL;
                        ubufs = NULL;
                }
+
                total_len += len;
                if (total_len < VHOST_NET_WEIGHT &&
                    vhost_has_more_pkts(net, vq)) {
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3d2996d..ba46dce 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -19,6 +19,13 @@
 
 #define TUN_XDP_FLAG 0x1UL
 
+#define TUN_MSG_UBUF 1
+#define TUN_MSG_PTR  2
+struct tun_msg_ctl {
+       int type;
+       void *ptr;
+};
+
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
-- 
2.7.4

Reply via email to