This patch adds a simple flow director to tun/tap device. It is just a
page that contains the hash to queue mapping which could be changed by
user-space. The backend (tap/macvtap) would query this table to get
the desired queue of a packets when it send packets to userspace.

The page address were set through a new kind of ioctl - TUNSETFD and
were pinned until device exit or another new page were specified.

Signed-off-by: Jason Wang <[email protected]>
---
 drivers/net/tun.c      |   63 ++++++++++++++++++++++++++++++++++++++++--------
 include/linux/if_tun.h |   10 ++++++++
 2 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7d22b4b..2efaf81 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -64,6 +64,7 @@
 #include <linux/nsproxy.h>
 #include <linux/virtio_net.h>
 #include <linux/rcupdate.h>
+#include <linux/highmem.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
@@ -109,6 +110,7 @@ struct tap_filter {
 };
 
 #define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
+#define TAP_HASH_MASK  0xFF
 
 struct tun_file {
        struct sock sk;
@@ -128,6 +130,7 @@ struct tun_sock;
 
 struct tun_struct {
        struct tun_file         *tfiles[MAX_TAP_QUEUES];
+       struct page             *fd_page[1];
        unsigned int            numqueues;
        unsigned int            flags;
        uid_t                   owner;
@@ -156,7 +159,7 @@ static struct tun_file *tun_get_queue(struct net_device 
*dev,
        struct tun_struct *tun = netdev_priv(dev);
        struct tun_file *tfile = NULL;
        int numqueues = tun->numqueues;
-       __u32 rxq;
+       __u32 rxq, rxhash;
 
        BUG_ON(!rcu_read_lock_held());
 
@@ -168,6 +171,22 @@ static struct tun_file *tun_get_queue(struct net_device 
*dev,
                goto out;
        }
 
+       rxhash = skb_get_rxhash(skb);
+       if (rxhash) {
+               if (tun->fd_page[0]) {
+                       u16 *table = kmap_atomic(tun->fd_page[0]);
+                       rxq = table[rxhash & TAP_HASH_MASK];
+                       kunmap_atomic(table);
+                       if (rxq < numqueues) {
+                               tfile = rcu_dereference(tun->tfiles[rxq]);
+                               goto out;
+                       }
+               }
+               rxq = ((u64)rxhash * numqueues) >> 32;
+               tfile = rcu_dereference(tun->tfiles[rxq]);
+               goto out;
+       }
+
        if (likely(skb_rx_queue_recorded(skb))) {
                rxq = skb_get_rx_queue(skb);
 
@@ -178,14 +197,6 @@ static struct tun_file *tun_get_queue(struct net_device 
*dev,
                goto out;
        }
 
-       /* Check if we can use flow to select a queue */
-       rxq = skb_get_rxhash(skb);
-       if (rxq) {
-               u32 idx = ((u64)rxq * numqueues) >> 32;
-               tfile = rcu_dereference(tun->tfiles[idx]);
-               goto out;
-       }
-
        tfile = rcu_dereference(tun->tfiles[0]);
 out:
        return tfile;
@@ -1020,6 +1031,14 @@ out:
        return ret;
 }
 
+static void tun_destructor(struct net_device *dev)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+       if (tun->fd_page[0])
+               put_page(tun->fd_page[0]);
+       free_netdev(dev);
+}
+
 static void tun_setup(struct net_device *dev)
 {
        struct tun_struct *tun = netdev_priv(dev);
@@ -1028,7 +1047,7 @@ static void tun_setup(struct net_device *dev)
        tun->group = -1;
 
        dev->ethtool_ops = &tun_ethtool_ops;
-       dev->destructor = free_netdev;
+       dev->destructor = tun_destructor;
 }
 
 /* Trivial set of netlink ops to allow deleting tun or tap
@@ -1230,6 +1249,7 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
                tun = netdev_priv(dev);
                tun->dev = dev;
                tun->flags = flags;
+               tun->fd_page[0] = NULL;
 
                security_tun_dev_post_create(&tfile->sk);
 
@@ -1353,6 +1373,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
        struct net_device *dev = NULL;
        void __user* argp = (void __user*)arg;
        struct ifreq ifr;
+       struct tun_fd tfd;
        int ret;
 
        if (cmd == TUNSETIFF || cmd == TUNATTACHQUEUE || _IOC_TYPE(cmd) == 0x89)
@@ -1364,7 +1385,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
                 * This is needed because we never checked for invalid flags on
                 * TUNSETIFF. */
                return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-                               IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_RXHASH,
+                               IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_RXHASH |
+                               IFF_FD,
                                (unsigned int __user*)argp);
        }
 
@@ -1476,6 +1498,25 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
                ret = set_offload(tun, arg);
                break;
 
+       case TUNSETFD:
+               if (copy_from_user(&tfd, argp, sizeof(tfd)))
+                       ret = -EFAULT;
+               else {
+                       if (tun->fd_page[0]) {
+                               put_page(tun->fd_page[0]);
+                               tun->fd_page[0] = NULL;
+                       }
+
+                       /* put_page() in tun_destructor() */
+                       if (get_user_pages_fast(tfd.addr, 1, 0,
+                                               &tun->fd_page[0]) != 1)
+                               ret = -EFAULT;
+                       else
+                               ret = 0;
+               }
+
+               break;
+
        case SIOCGIFHWADDR:
                /* Get hw address */
                memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index a1f6f3f..726731d 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -36,6 +36,8 @@
 #define TUN_VNET_HDR   0x0200
 #define TUN_TAP_MQ      0x0400
 
+struct tun_fd;
+
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
 #define TUNSETDEBUG   _IOW('T', 201, int) 
@@ -56,6 +58,7 @@
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
 #define TUNATTACHQUEUE  _IOW('T', 217, int)
 #define TUNDETACHQUEUE  _IOW('T', 218, int)
+#define TUNSETFD        _IOW('T', 219, struct tun_fd)
 
 
 /* TUNSETIFF ifr flags */
@@ -67,6 +70,7 @@
 #define IFF_TUN_EXCL   0x8000
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_RXHASH      0x0200
+#define IFF_FD          0x0400
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM     0x01    /* You can hand me unchecksummed packets. */
@@ -97,6 +101,12 @@ struct tun_filter {
        __u8   addr[0][ETH_ALEN];
 };
 
+/* Programmable flow director */
+struct tun_fd {
+       unsigned long addr;
+       size_t size;
+};
+
 #ifdef __KERNEL__
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);

_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to