[PATCH RFC v1 2/3] net: Abstracting out common routines from veth for use by vethtap
From: Sainath Grandhi <sainath.gran...@intel.com> Abstracting out common routines for link operations in veth implementation for use by vethtap interfaces Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/veth.c | 47 +-- include/linux/if_veth.h | 9 + 2 files changed, 42 insertions(+), 14 deletions(-) create mode 100644 include/linux/if_veth.h diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f5438d0..a1b370d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #define DRV_NAME "veth" @@ -29,12 +30,6 @@ struct pcpu_vstats { struct u64_stats_sync syncp; }; -struct veth_priv { - struct net_device __rcu *peer; - atomic64_t dropped; - unsignedrequested_headroom; -}; - /* * ethtool interface */ @@ -298,13 +293,12 @@ static const struct net_device_ops veth_netdev_ops = { NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) -static void veth_setup(struct net_device *dev) +void veth_common_setup(struct net_device *dev) { ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_PHONY_HEADROOM; dev->netdev_ops = _netdev_ops; @@ -325,6 +319,12 @@ static void veth_setup(struct net_device *dev) dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; } +static void veth_setup(struct net_device *dev) +{ + veth_common_setup(dev); + dev->priv_flags |= IFF_NO_QUEUE; +} + /* * netlink interface */ @@ -465,7 +465,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, return err; } -static void veth_dellink(struct net_device *dev, struct list_head *head) +void veth_dellink(struct net_device *dev, struct list_head *head) { struct veth_priv *priv; struct net_device *peer; @@ -503,21 +503,40 @@ static struct rtnl_link_ops veth_link_ops = { .kind = DRV_NAME, .priv_size = sizeof(struct veth_priv), .setup = veth_setup, - .validate = veth_validate, .newlink= veth_newlink, .dellink= veth_dellink, - .policy = veth_policy, - .maxtype= VETH_INFO_MAX, - .get_link_net = veth_get_link_net, }; +int veth_link_register(struct rtnl_link_ops *ops) +{ + /* common fields */ + ops->validate = veth_validate; + ops->policy = veth_policy; + ops->maxtype = VETH_INFO_MAX; + ops->get_link_net = veth_get_link_net; + + return rtnl_link_register(ops); +} + +void veth_link_ops_init(struct rtnl_link_ops *ops) +{ + /*common fields*/ + ops->validate = veth_validate; + ops->policy = veth_policy; + ops->maxtype= VETH_INFO_MAX; + ops->get_link_net = veth_get_link_net; +} /* * init/fini */ static __init int veth_init(void) { - return rtnl_link_register(_link_ops); + int err; + + err = veth_link_register(_link_ops); + + return err; } static __exit void veth_exit(void) diff --git a/include/linux/if_veth.h b/include/linux/if_veth.h new file mode 100644 index 000..b007891 --- /dev/null +++ b/include/linux/if_veth.h @@ -0,0 +1,9 @@ +struct veth_priv { + struct net_device __rcu *peer; + atomic64_t dropped; + unsigned intrequested_headroom; +}; + +void veth_common_setup(struct net_device *dev); +void veth_dellink(struct net_device *dev, struct list_head *head); +void veth_link_ops_init(struct rtnl_link_ops *ops); -- 2.7.4
[PATCH RFC v1 3/3] vethtap: veth based tap driver
From: Sainath Grandhi <sainath.gran...@intel.com> This patch adds a tap character device driver that is based on the veth network interface, called vethtap. This patchset allows vethtap device to be created ONLY as a peer interface to a veth network interface. It can be created in the following way, ip link add veth1 type veth peer name veth2 type vethtap With this packets on veth2 can be accessed using tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 1 + drivers/net/Makefile| 2 + drivers/net/{veth.c => veth_main.c} | 33 +- drivers/net/vethtap.c | 216 include/linux/if_veth.h | 4 + 5 files changed, 255 insertions(+), 1 deletion(-) rename drivers/net/{veth.c => veth_main.c} (94%) create mode 100644 drivers/net/vethtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index aba0d65..265853e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -323,6 +323,7 @@ config TUN_VNET_CROSS_LE config VETH tristate "Virtual ethernet pair device" + select TAP ---help--- This device is a local ethernet tunnel. Devices are created in pairs. When one end receives the packet it appears on its pair and vice diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 8dff900..7c63e69 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -32,6 +32,8 @@ obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o obj-$(CONFIG_VSOCKMON) += vsockmon.o +veth-objs := veth_main.o vethtap.o + # # Networking Drivers # diff --git a/drivers/net/veth.c b/drivers/net/veth_main.c similarity index 94% rename from drivers/net/veth.c rename to drivers/net/veth_main.c index a1b370d..fc91dd7 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth_main.c @@ -359,6 +359,9 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, unsigned char name_assign_type; struct ifinfomsg *ifmp; struct net *net; + struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + char peer_type[8]; + struct rtnl_link_ops *link_ops; /* * create and register peer first @@ -393,17 +396,38 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, name_assign_type = NET_NAME_ENUM; } + link_ops = _link_ops; + if (tbp[IFLA_LINKINFO]) { + err = rtnl_nla_parse_ifla_info(linkinfo, + nla_data(tbp[IFLA_LINKINFO]), + nla_len(tbp[IFLA_LINKINFO]), + NULL); + + if (err < 0) + return err; + + if (linkinfo[IFLA_INFO_KIND]) { + nla_strlcpy(peer_type, linkinfo[IFLA_INFO_KIND], + sizeof(peer_type)); + if (!strncmp(peer_type, "vethtap", sizeof(peer_type))) + link_ops = _link_ops; + } + } + net = rtnl_link_get_net(src_net, tbp); if (IS_ERR(net)) return PTR_ERR(net); peer = rtnl_create_link(net, ifname, name_assign_type, - _link_ops, tbp); + link_ops, tbp); if (IS_ERR(peer)) { put_net(net); return PTR_ERR(peer); } + if (!strncmp(peer_type, "vethtap", sizeof(peer_type))) + link_ops->newlink(net, peer, tbp, NULL, NULL); + if (!ifmp || !tbp[IFLA_ADDRESS]) eth_hw_addr_random(peer); @@ -536,12 +560,19 @@ static __init int veth_init(void) err = veth_link_register(_link_ops); + if (err) + goto out1; + + err = vethtap_init(); + +out1: return err; } static __exit void veth_exit(void) { rtnl_link_unregister(_link_ops); + vethtap_exit(); } module_init(veth_init); diff --git a/drivers/net/vethtap.c b/drivers/net/vethtap.c new file mode 100644 index 000..922b3ea --- /dev/null +++ b/drivers/net/vethtap.c @@ -0,0 +1,216 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct vethtap_dev { + struct veth_priv veth; + struct tap_devtap; +}; + +/* Variables for dealing with vethtaps device numbers. + */ +static dev_t vethtap_major; + +static const void *vethtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + + return dev_net(dev); +} + +static struct class vethtap_class = { + .name = "vethtap", +
[PATCH RFC v1 1/3] net: Adding API to parse IFLA_LINKINFO attribute
From: Sainath Grandhi <sainath.gran...@intel.com> Adding rtnl_nla_parse_ifla_info as an exported symbol in rtnetlink.c helps other modules to parse IFLA_LINKINFO attribute Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- include/net/rtnetlink.h | 3 +++ net/core/rtnetlink.c| 8 2 files changed, 11 insertions(+) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 21837ca..cb15ddb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -170,6 +170,9 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); +int rtnl_nla_parse_ifla_info(struct nlattr **tb, const struct nlattr *head, +int len, struct netlink_ext_ack *exterr); + #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a78fd61..0784b7d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1688,6 +1688,14 @@ int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, } EXPORT_SYMBOL(rtnl_nla_parse_ifla); +int rtnl_nla_parse_ifla_info(struct nlattr **tb, const struct nlattr *head, +int len, struct netlink_ext_ack *exterr) +{ + return nla_parse(tb, IFLA_INFO_MAX, head, len, ifla_info_policy, +exterr); +} +EXPORT_SYMBOL(rtnl_nla_parse_ifla_info); + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net; -- 2.7.4
[PATCH RFC v1 0/3] Support for tap user-space access with veth interfaces
From: Sainath Grandhi <sainath.gran...@intel.com> This patchset adds a tap device driver for veth virtual network interface. With this implementation, tap character interface can be added only to the peer veth interface. Adding tap interface to veth is for usecases that forwards packets between host and VMs. This eliminates the need for an additional software bridge. This can be extended to create both the peer interfaces as tap interfaces. These patches are a step in that direction. Sainath Grandhi (3): net: Adding API to parse IFLA_LINKINFO attribute net: Abstracting out common routines from veth for use by vethtap vethtap: veth based tap driver drivers/net/Kconfig | 1 + drivers/net/Makefile| 2 + drivers/net/{veth.c => veth_main.c} | 80 ++--- drivers/net/vethtap.c | 216 include/linux/if_veth.h | 13 +++ include/net/rtnetlink.h | 3 + net/core/rtnetlink.c| 8 ++ 7 files changed, 308 insertions(+), 15 deletions(-) rename drivers/net/{veth.c => veth_main.c} (89%) create mode 100644 drivers/net/vethtap.c create mode 100644 include/linux/if_veth.h -- 2.7.4
[patch iproute2] ipvtap: Adding support for ipvtap device management
This patch adds support for managing ipvtap devices using ip link. ipvtap support is added to linux with commit 235a9d89da976e2975b3de9afc0bed7b72557983 --- ip/iplink_ipvlan.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ip/iplink_ipvlan.c b/ip/iplink_ipvlan.c index f7735f3..153aa2f 100644 --- a/ip/iplink_ipvlan.c +++ b/ip/iplink_ipvlan.c @@ -1,4 +1,4 @@ -/* iplink_ipvlan.c IPVLAN device support +/* iplink_ipvlan.c IPVLAN/IPVTAP device support * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -90,3 +90,11 @@ struct link_util ipvlan_link_util = { .print_opt = ipvlan_print_opt, .print_help = ipvlan_print_help, }; + +struct link_util ipvtap_link_util = { + .id = "ipvtap", + .maxattr= IFLA_IPVLAN_MAX, + .parse_opt = ipvlan_parse_opt, + .print_opt = ipvlan_print_opt, + .print_help = ipvlan_print_help, +}; -- 2.7.4
[PATCHv6 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): tap: Refactoring macvtap.c tap: Renaming tap related APIs, data structures, macros tap: Tap character device creation/destroy API tap: Abstract type of virtual interface from tap implementation tap: Extending tap device create/destroy APIs tap: tap as an independent module ipvtap: IP-VLAN based tap driver drivers/net/Kconfig | 20 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |3 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 241 +++ drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1229 ++-- drivers/net/tap.c| 1285 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1706 insertions(+), 1208 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv6 1/7] tap: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile | 2 + drivers/net/macvtap_main.c | 218 +++ drivers/net/{macvtap.c => tap.c} | 204 ++-- include/linux/if_macvtap.h | 10 ++ 4 files changed, 238 insertions(+), 196 deletions(-) create mode 100644 drivers/net/macvtap_main.c rename drivers/net/{macvtap.c => tap.c} (84%) create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c new file mode 100644 index 000..96ffa60 --- /dev/null +++ b/drivers/net/macvtap_main.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; +#define MACVTAP_NUM_DEVS (1U << MINORBITS) + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlan->tap_features = TUN_OFFLOADS; + + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink +* because we can't undo what it does. +*/ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + netdev_rx_handler_unregister(dev); + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink= macvtap_newlink, + .dellink= macvtap_dellink, +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvlan_dev *vlan; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != _link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlan = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has +* been registered but before register_netdevice has +* finished running. +*/ + err = macvtap_get_minor(vlan); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + classdev = device_create(_class, >dev, devt, +dev, tap_name); + if (IS_ERR(classdev)) { + macvtap_free_minor(vlan); + return notifier_from_errno(PTR_E
[PATCHv6 2/7] tap: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 6f6228e..15ca2d5 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_
[PATCHv6 4/7] tap: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 71 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 57 +- 4 files changed, 229 insertions(+), 165 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cbfc1be..9261722 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1525,7 +1525,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1548,6 +1547,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 215ab7a..0238df6 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,55 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +112,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +138,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused,
[PATCHv6 7/7] ipvtap: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 13 +++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 3 +- drivers/net/ipvlan/ipvlan_main.c | 27 +++-- drivers/net/ipvlan/ipvtap.c | 241 +++ 7 files changed, 280 insertions(+), 13 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5763503..823bc2f 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -166,6 +166,19 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP + tristate "IP-VLAN based tap driver" + depends on IPVLAN + depends on INET + select TAP + ---help--- + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called ipvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 406ae4f..800a46c 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -135,4 +135,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8ae335d..1f3295e 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,7 +16,7 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, unsigned int len, bool success, bool mcast) { if (likely(success)) { @@ -33,6 +33,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 95b18f4..aa8575c 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -496,8 +496,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, return ret; } -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port; @@ -594,8 +594,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_port_destroy(phy_dev); return err; } +EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) +void ipvlan_link_delete(struct net_dev
[PATCHv6 5/7] tap: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 118 + include/linux/if_tap.h | 4 +- 3 files changed, 102 insertions(+), 26 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 0238df6..a4bfc10 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -163,7 +163,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -171,7 +171,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -186,7 +186,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7d3e8b1..71bbf0b 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,17 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { + struct rcu_head rcu; dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +390,89 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major; + + list_for_each_entry_rcu(tap_major, _list, next) { + if (tap_major->major == major) + return tap_major; + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; + + rcu_read_lock(); + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) { + retval = -EINVAL; + goto unlock; + } - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); + +unlock: + rcu_read_unlock(); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major; + + rcu_read_lock(); + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) { + goto unlock; + } + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); + +unlock: + rcu_read_unlock(); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + rcu_read_lock(); + tap_major = tap_get_major(major); + if (!tap_major) { + tap = NULL; +
[PATCHv6 6/7] tap: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 7 +++ drivers/net/Makefile | 3 +-- drivers/net/{macvtap_main.c => macvtap.c} | 0 drivers/net/tap.c | 11 +++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h| 4 ++-- 6 files changed, 22 insertions(+), 5 deletions(-) rename drivers/net/{macvtap_main.c => macvtap.c} (100%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index a993cbe..5763503 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + select TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -287,6 +288,12 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP + tristate + ---help--- + This option is selected by any driver implementing tap user space + interface for a virtual interface to re-use core tap functionality. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap.c similarity index 100% rename from drivers/net/macvtap_main.c rename to drivers/net/macvtap.c diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 71bbf0b..35b55a2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -312,6 +312,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -389,6 +390,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -428,6 +430,7 @@ int tap_get_minor(dev_t major, struct tap_dev *tap) rcu_read_unlock(); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -449,6 +452,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) unlock: rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1210,6 +1214,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1257,6 +1262,7 @@ int tap_create_cdev(struct cdev *tap_cdev, out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1272,3 +1278,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) } } } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); +MODULE_AUTHOR("Sainath Grandhi <sainath.gran...@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ec..cfdecea 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 362e71c..3482c3c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -1,7 +1,7 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ -#if IS_ENABLED(CONFIG_MACVTAP) +#if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); #el
[PATCHv6 3/7] tap: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 30 +++--- drivers/net/tap.c | 62 ++ include/linux/if_tap.h | 3 +++ 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..215ab7a 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,57 +158,46 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + tap_destroy_cdev(macvtap_major, _cdev); out1: return err; } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 15ca2d5..04ba978 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,39 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); +
[PATCHv5 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): tap: Refactoring macvtap.c tap: Renaming tap related APIs, data structures, macros tap: Tap character device creation/destroy API tap: Abstract type of virtual interface from tap implementation tap: Extending tap device create/destroy APIs tap: tap as an independent module ipvtap: IP-VLAN based tap driver drivers/net/Kconfig | 20 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 241 drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1229 ++-- drivers/net/tap.c| 1268 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1690 insertions(+), 1209 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv5 3/7] tap: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 30 +++--- drivers/net/tap.c | 62 ++ include/linux/if_tap.h | 3 +++ 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..215ab7a 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,57 +158,46 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + tap_destroy_cdev(macvtap_major, _cdev); out1: return err; } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 15ca2d5..04ba978 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,39 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); +
[PATCHv5 4/7] tap: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 71 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 57 +- 4 files changed, 229 insertions(+), 165 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf2..79383f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1526,7 +1526,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1549,6 +1548,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 215ab7a..0238df6 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,55 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +112,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +138,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused,
[PATCHv5 2/7] tap: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 6f6228e..15ca2d5 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_
[PATCHv5 5/7] tap: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 101 +++-- include/linux/if_tap.h | 4 +- 3 files changed, 85 insertions(+), 26 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 0238df6..a4bfc10 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -163,7 +163,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -171,7 +171,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -186,7 +186,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7d3e8b1..b7cdc90 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,17 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { + struct rcu_head rcu; dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +390,72 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major; + + list_for_each_entry_rcu(tap_major, _list, next) { + if (tap_major->major == major) + return tap_major; + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return -EINVAL; + + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return; + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; + + tap_major = tap_get_major(major); + if (!tap_major) + return NULL; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + mutex_lock(_major->minor_lock); + tap = idr_find(_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } - mutex_unlock(_majo
[PATCHv5 1/7] tap: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile | 2 + drivers/net/macvtap_main.c | 218 +++ drivers/net/{macvtap.c => tap.c} | 204 ++-- include/linux/if_macvtap.h | 10 ++ 4 files changed, 238 insertions(+), 196 deletions(-) create mode 100644 drivers/net/macvtap_main.c rename drivers/net/{macvtap.c => tap.c} (84%) create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c new file mode 100644 index 000..96ffa60 --- /dev/null +++ b/drivers/net/macvtap_main.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; +#define MACVTAP_NUM_DEVS (1U << MINORBITS) + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlan->tap_features = TUN_OFFLOADS; + + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink +* because we can't undo what it does. +*/ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + netdev_rx_handler_unregister(dev); + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink= macvtap_newlink, + .dellink= macvtap_dellink, +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvlan_dev *vlan; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != _link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlan = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has +* been registered but before register_netdevice has +* finished running. +*/ + err = macvtap_get_minor(vlan); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + classdev = device_create(_class, >dev, devt, +dev, tap_name); + if (IS_ERR(classdev)) { + macvtap_free_minor(vlan); + return notifier_from_errno(PTR_E
[PATCHv5 7/7] ipvtap: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 13 +++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +++-- drivers/net/ipvlan/ipvtap.c | 241 +++ 7 files changed, 281 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8f6d21b4..fe83dc1 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -166,6 +166,19 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP + tristate "IP-VLAN based tap driver" + depends on IPVLAN + depends on INET + select TAP + ---help--- + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called ipvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index dbfbb33..4362d88 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 83ce74a..9af16ab 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, - unsigned int len, bool success, bool mcast) +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast) { if (!ipvlan) return; @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 8b0f993..ed750e2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, return ret; } -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port; @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_port_destroy(phy_dev); return err; } +EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void ipvlan_link_delete(struct net_dev
[PATCHv5 6/7] tap: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 7 +++ drivers/net/Makefile | 3 +-- drivers/net/{macvtap_main.c => macvtap.c} | 0 drivers/net/tap.c | 11 +++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h| 4 ++-- 6 files changed, 22 insertions(+), 5 deletions(-) rename drivers/net/{macvtap_main.c => macvtap.c} (100%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..8f6d21b4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + select TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -284,6 +285,12 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP + tristate + ---help--- + This option is selected by any driver implementing tap user space + interface for a virtual interface to re-use core tap functionality. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap.c similarity index 100% rename from drivers/net/macvtap_main.c rename to drivers/net/macvtap.c diff --git a/drivers/net/tap.c b/drivers/net/tap.c index b7cdc90..a0ed508 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -312,6 +312,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -389,6 +390,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -422,6 +424,7 @@ int tap_get_minor(dev_t major, struct tap_dev *tap) mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -438,6 +441,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) } mutex_unlock(_major->minor_lock); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1193,6 +1197,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1240,6 +1245,7 @@ int tap_create_cdev(struct cdev *tap_cdev, out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1255,3 +1261,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) list_del_rcu(_major->next); kfree_rcu(tap_major, rcu); } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); +MODULE_AUTHOR("Sainath Grandhi <sainath.gran...@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ec..cfdecea 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 362e71c..3482c3c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -1,7 +1,7 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ -#if IS_ENABLED(CONFIG_
[PATCHv4 2/7] tap: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index e192d25..ec35efe 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_
[PATCHv4 3/7] tap: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 30 +++--- drivers/net/tap.c | 62 ++ include/linux/if_tap.h | 3 +++ 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..215ab7a 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,57 +158,46 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + tap_destroy_cdev(macvtap_major, _cdev); out1: return err; } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ec35efe..3084806 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,39 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); +
[PATCHv4 4/7] tap: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 71 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 57 +- 4 files changed, 229 insertions(+), 165 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf2..79383f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1526,7 +1526,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1549,6 +1548,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 215ab7a..0238df6 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,55 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +112,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +138,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused,
[PATCHv4 1/7] tap: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile | 2 + drivers/net/macvtap_main.c | 218 +++ drivers/net/{macvtap.c => tap.c} | 204 ++-- include/linux/if_macvtap.h | 10 ++ 4 files changed, 238 insertions(+), 196 deletions(-) create mode 100644 drivers/net/macvtap_main.c rename drivers/net/{macvtap.c => tap.c} (84%) create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c new file mode 100644 index 000..96ffa60 --- /dev/null +++ b/drivers/net/macvtap_main.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; +#define MACVTAP_NUM_DEVS (1U << MINORBITS) + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlan->tap_features = TUN_OFFLOADS; + + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink +* because we can't undo what it does. +*/ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + netdev_rx_handler_unregister(dev); + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink= macvtap_newlink, + .dellink= macvtap_dellink, +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvlan_dev *vlan; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != _link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlan = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has +* been registered but before register_netdevice has +* finished running. +*/ + err = macvtap_get_minor(vlan); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + classdev = device_create(_class, >dev, devt, +dev, tap_name); + if (IS_ERR(classdev)) { + macvtap_free_minor(vlan); + return notifier_from_errno(PTR_E
[PATCHv4 5/7] tap: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 101 +++-- include/linux/if_tap.h | 4 +- 3 files changed, 85 insertions(+), 26 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 0238df6..a4bfc10 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -163,7 +163,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -171,7 +171,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -186,7 +186,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5d9c534..427a113 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,17 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { + struct rcu_head rcu; dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +390,72 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major; + + list_for_each_entry_rcu(tap_major, _list, next) { + if (tap_major->major == major) + return tap_major; + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return -EINVAL; + + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return; + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; + + tap_major = tap_get_major(major); + if (!tap_major) + return NULL; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + mutex_lock(_major->minor_lock); + tap = idr_find(_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } - mutex_unlock(_majo
[PATCHv4 7/7] ipvtap: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 13 +++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +++-- drivers/net/ipvlan/ipvtap.c | 241 +++ 7 files changed, 281 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1c88437..d07b5f5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -166,6 +166,19 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP + tristate "IP-VLAN based tap driver" + depends on IPVLAN + depends on INET + depends on TAP + ---help--- + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called ipvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index dbfbb33..4362d88 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 83ce74a..9af16ab 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, - unsigned int len, bool success, bool mcast) +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast) { if (!ipvlan) return; @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 8b0f993..ed750e2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, return ret; } -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port; @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_port_destroy(phy_dev); return err; } +EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void ipvlan_link_delete(struct net_dev
[PATCHv4 6/7] tap: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 15 +++ drivers/net/Makefile | 3 +-- drivers/net/{macvtap_main.c => macvtap.c} | 0 drivers/net/tap.c | 11 +++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h| 4 ++-- 6 files changed, 30 insertions(+), 5 deletions(-) rename drivers/net/{macvtap_main.c => macvtap.c} (100%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..1c88437 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + depends on TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -284,6 +285,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP +tristate "TAP module support for virtual interfaces" +---help--- + TAP module serves two purposes. This can be used as library of functions + for virtual interfaces to implement tap functionality. + + This module also includes character device file and socket operations + that can be used by virtual interface implementing tap. + + To compile this driver as a module, choose M here: the module + will be called tap. + + If you don't know what to use this for, you don't need it. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap.c similarity index 100% rename from drivers/net/macvtap_main.c rename to drivers/net/macvtap.c diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 427a113..b4266c8 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -312,6 +312,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -389,6 +390,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -422,6 +424,7 @@ int tap_get_minor(dev_t major, struct tap_dev *tap) mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -438,6 +441,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) } mutex_unlock(_major->minor_lock); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1193,6 +1197,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1240,6 +1245,7 @@ int tap_create_cdev(struct cdev *tap_cdev, out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1255,3 +1261,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) list_del_rcu(_major->next); kfree_rcu(tap_major, rcu); } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); +MODULE_AUTHOR("Sainath Grandhi <sainath.gran...@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ec..cfdecea 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET && EVENTFD && (TUN
[PATCHv4 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): tap: Refactoring macvtap.c tap: Renaming tap related APIs, data structures, macros tap: Tap character device creation/destroy API tap: Abstract type of virtual interface from tap implementation tap: Extending tap device create/destroy APIs tap: tap as an independent module ipvtap: IP-VLAN based tap driver drivers/net/Kconfig | 28 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 241 drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1229 ++-- drivers/net/tap.c| 1268 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1698 insertions(+), 1209 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv3 1/7] tap: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile | 2 + drivers/net/macvtap_main.c | 218 +++ drivers/net/{macvtap.c => tap.c} | 204 ++-- include/linux/if_macvtap.h | 10 ++ 4 files changed, 238 insertions(+), 196 deletions(-) create mode 100644 drivers/net/macvtap_main.c rename drivers/net/{macvtap.c => tap.c} (84%) create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c new file mode 100644 index 000..96ffa60 --- /dev/null +++ b/drivers/net/macvtap_main.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; +#define MACVTAP_NUM_DEVS (1U << MINORBITS) + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlan->tap_features = TUN_OFFLOADS; + + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink +* because we can't undo what it does. +*/ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + netdev_rx_handler_unregister(dev); + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink= macvtap_newlink, + .dellink= macvtap_dellink, +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvlan_dev *vlan; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != _link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlan = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has +* been registered but before register_netdevice has +* finished running. +*/ + err = macvtap_get_minor(vlan); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + classdev = device_create(_class, >dev, devt, +dev, tap_name); + if (IS_ERR(classdev)) { + macvtap_free_minor(vlan); + return notifier_from_errno(PTR_E
[PATCHv3 4/7] tap: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 71 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 57 +- 4 files changed, 229 insertions(+), 165 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf2..79383f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1526,7 +1526,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1549,6 +1548,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 694e385..dd6f4e4 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,55 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +112,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +138,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused,
[PATCHv3 2/7] tap: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index e192d25..ec35efe 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_
[PATCHv3 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): tap: Refactoring macvtap.c tap: Renaming tap related APIs, data structures, macros tap: Tap character device creation/destroy API tap: Abstract type of virtual interface from tap implementation tap: Extending tap device create/destroy APIs tap: tap as an independent module ipvtap: IP-VLAN based tap driver drivers/net/Kconfig | 28 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 241 drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1229 ++--- drivers/net/tap.c| 1261 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1691 insertions(+), 1209 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv3 5/7] tap: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 98 +++--- include/linux/if_tap.h | 4 +- 3 files changed, 80 insertions(+), 28 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index dd6f4e4..50fe993 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -163,7 +163,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -171,7 +171,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -186,7 +186,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ede436a..1219ee9 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,16 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +389,73 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major, *tmp; + + list_for_each_entry_safe(tap_major, tmp, _list, next) { + if (tap_major->major == major) { + return tap_major; + } + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return -EINVAL; - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return; + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; + + tap_major = tap_get_major(major); + if (!tap_major) + return NULL; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + mutex_lock(_major->minor_lock); + tap = idr_find(_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } - mutex_unlock(_majo
[PATCHv3 6/7] tap: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 15 +++ drivers/net/Makefile | 3 +-- drivers/net/{macvtap_main.c => macvtap.c} | 0 drivers/net/tap.c | 11 +++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h| 4 ++-- 6 files changed, 30 insertions(+), 5 deletions(-) rename drivers/net/{macvtap_main.c => macvtap.c} (100%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..1c88437 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + depends on TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -284,6 +285,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP +tristate "TAP module support for virtual interfaces" +---help--- + TAP module serves two purposes. This can be used as library of functions + for virtual interfaces to implement tap functionality. + + This module also includes character device file and socket operations + that can be used by virtual interface implementing tap. + + To compile this driver as a module, choose M here: the module + will be called tap. + + If you don't know what to use this for, you don't need it. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap.c similarity index 100% rename from drivers/net/macvtap_main.c rename to drivers/net/macvtap.c diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1219ee9..9f49280 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -311,6 +311,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -388,6 +389,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -422,6 +424,7 @@ int tap_get_minor(dev_t major, struct tap_dev *tap) mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -438,6 +441,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) } mutex_unlock(_major->minor_lock); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1193,6 +1197,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1235,6 +1240,7 @@ int tap_create_cdev(struct cdev *tap_cdev, out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1248,3 +1254,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) unregister_chrdev_region(major, TAP_NUM_DEVS); idr_destroy(_major->minor_idr); } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); +MODULE_AUTHOR("Sainath Grandhi <sainath.gran...@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ec..cfdecea 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET &&
[PATCHv3 3/7] tap: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 30 +++--- drivers/net/tap.c | 62 ++ include/linux/if_tap.h | 3 +++ 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..694e385 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,57 +158,46 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + cdev_del(_cdev); out1: return err; } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ec35efe..ec7ebed 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,39 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); + if (err) +
[PATCHv3 7/7] ipvtap: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 13 +++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +++-- drivers/net/ipvlan/ipvtap.c | 241 +++ 7 files changed, 281 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1c88437..d07b5f5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -166,6 +166,19 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP + tristate "IP-VLAN based tap driver" + depends on IPVLAN + depends on INET + depends on TAP + ---help--- + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called ipvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index dbfbb33..4362d88 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 83ce74a..9af16ab 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, - unsigned int len, bool success, bool mcast) +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast) { if (!ipvlan) return; @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 8b0f993..ed750e2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, return ret; } -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port; @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_port_destroy(phy_dev); return err; } +EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void ipvlan_link_delete(struct net_dev
[PATCHv3 1/7] TAP: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile | 2 + drivers/net/macvtap_main.c | 218 +++ drivers/net/{macvtap.c => tap.c} | 204 ++-- include/linux/if_macvtap.h | 10 ++ 4 files changed, 238 insertions(+), 196 deletions(-) create mode 100644 drivers/net/macvtap_main.c rename drivers/net/{macvtap.c => tap.c} (84%) create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c new file mode 100644 index 000..96ffa60 --- /dev/null +++ b/drivers/net/macvtap_main.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; +#define MACVTAP_NUM_DEVS (1U << MINORBITS) + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlan->tap_features = TUN_OFFLOADS; + + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink +* because we can't undo what it does. +*/ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + netdev_rx_handler_unregister(dev); + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink= macvtap_newlink, + .dellink= macvtap_dellink, +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvlan_dev *vlan; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != _link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlan = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has +* been registered but before register_netdevice has +* finished running. +*/ + err = macvtap_get_minor(vlan); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + classdev = device_create(_class, >dev, devt, +dev, tap_name); + if (IS_ERR(classdev)) { + macvtap_free_minor(vlan); + return notifier_from_errno(PTR_E
[PATCHv3 4/7] TAP: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 71 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 57 +- 4 files changed, 229 insertions(+), 165 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf2..79383f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1526,7 +1526,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1549,6 +1548,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 694e385..dd6f4e4 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,55 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = >vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +112,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +138,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused,
[PATCHv3 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): TAP: Refactoring macvtap.c TAP: Renaming tap related APIs, data structures, macros TAP: Tap character device creation/destroy API TAP: Abstract type of virtual interface from tap implementation TAP: Extending tap device create/destroy APIs TAP: tap as an independent module IPVTAP: IP-VLAN based tap driver drivers/net/Kconfig | 28 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 241 drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1229 ++--- drivers/net/tap.c| 1261 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1691 insertions(+), 1209 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv3 6/7] TAP: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 15 +++ drivers/net/Makefile | 3 +-- drivers/net/{macvtap_main.c => macvtap.c} | 0 drivers/net/tap.c | 11 +++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h| 4 ++-- 6 files changed, 30 insertions(+), 5 deletions(-) rename drivers/net/{macvtap_main.c => macvtap.c} (100%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..1c88437 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + depends on TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -284,6 +285,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP +tristate "TAP module support for virtual interfaces" +---help--- + TAP module serves two purposes. This can be used as library of functions + for virtual interfaces to implement tap functionality. + + This module also includes character device file and socket operations + that can be used by virtual interface implementing tap. + + To compile this driver as a module, choose M here: the module + will be called tap. + + If you don't know what to use this for, you don't need it. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap.c similarity index 100% rename from drivers/net/macvtap_main.c rename to drivers/net/macvtap.c diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1219ee9..9f49280 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -311,6 +311,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -388,6 +389,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -422,6 +424,7 @@ int tap_get_minor(dev_t major, struct tap_dev *tap) mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -438,6 +441,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) } mutex_unlock(_major->minor_lock); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1193,6 +1197,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1235,6 +1240,7 @@ int tap_create_cdev(struct cdev *tap_cdev, out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1248,3 +1254,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) unregister_chrdev_region(major, TAP_NUM_DEVS); idr_destroy(_major->minor_idr); } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); +MODULE_AUTHOR("Sainath Grandhi <sainath.gran...@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ec..cfdecea 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET &&
[PATCHv3 2/7] TAP: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index e192d25..ec35efe 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_
[PATCHv3 3/7] TAP: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 30 +++--- drivers/net/tap.c | 62 ++ include/linux/if_tap.h | 3 +++ 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..694e385 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,57 +158,46 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + cdev_del(_cdev); out1: return err; } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ec35efe..ec7ebed 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,39 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); + if (err) +
[PATCHv3 7/7] IPVTAP: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 13 +++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +++-- drivers/net/ipvlan/ipvtap.c | 241 +++ 7 files changed, 281 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1c88437..d07b5f5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -166,6 +166,19 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP + tristate "IP-VLAN based tap driver" + depends on IPVLAN + depends on INET + depends on TAP + ---help--- + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called ipvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index dbfbb33..4362d88 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 83ce74a..9af16ab 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, - unsigned int len, bool success, bool mcast) +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast) { if (!ipvlan) return; @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 8b0f993..ed750e2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, return ret; } -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port; @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_port_destroy(phy_dev); return err; } +EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void ipvlan_link_delete(struct net_dev
[PATCHv3 5/7] TAP: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 98 +++--- include/linux/if_tap.h | 4 +- 3 files changed, 80 insertions(+), 28 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index dd6f4e4..50fe993 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -163,7 +163,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -171,7 +171,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -186,7 +186,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ede436a..1219ee9 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,16 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +389,73 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major, *tmp; + + list_for_each_entry_safe(tap_major, tmp, _list, next) { + if (tap_major->major == major) { + return tap_major; + } + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return -EINVAL; - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return; + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; + + tap_major = tap_get_major(major); + if (!tap_major) + return NULL; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + mutex_lock(_major->minor_lock); + tap = idr_find(_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } - mutex_unlock(_majo
[PATCHv2 1/7] TAP: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile | 2 + drivers/net/macvtap_main.c | 218 +++ drivers/net/{macvtap.c => tap.c} | 204 ++-- include/linux/if_macvtap.h | 10 ++ 4 files changed, 238 insertions(+), 196 deletions(-) create mode 100644 drivers/net/macvtap_main.c rename drivers/net/{macvtap.c => tap.c} (84%) create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c new file mode 100644 index 000..96ffa60 --- /dev/null +++ b/drivers/net/macvtap_main.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; +#define MACVTAP_NUM_DEVS (1U << MINORBITS) + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlan->tap_features = TUN_OFFLOADS; + + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink +* because we can't undo what it does. +*/ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + netdev_rx_handler_unregister(dev); + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink= macvtap_newlink, + .dellink= macvtap_dellink, +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvlan_dev *vlan; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != _link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlan = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has +* been registered but before register_netdevice has +* finished running. +*/ + err = macvtap_get_minor(vlan); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + classdev = device_create(_class, >dev, devt, +dev, tap_name); + if (IS_ERR(classdev)) { + macvtap_free_minor(vlan); + return notifier_from_errno(PTR_E
[PATCHv2 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): TAP: Refactoring macvtap.c TAP: Renaming tap related APIs, data structures, macros TAP: Tap character device creation/destroy API TAP: Abstract type of virtual interface from tap implementation TAP: Extending tap device create/destroy APIs TAP: tap as an independent module IPVTAP: IP-VLAN based tap driver drivers/net/Kconfig | 28 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 238 +++ drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1226 ++-- drivers/net/tap.c| 1262 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1686 insertions(+), 1209 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv2 4/7] TAP: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 68 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 57 +- 4 files changed, 226 insertions(+), 165 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf2..79383f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1526,7 +1526,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1549,6 +1548,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 32ad560..6326a82 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,52 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +109,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +135,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = net
[PATCHv2 7/7] IPVTAP: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 13 +++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +++-- drivers/net/ipvlan/ipvtap.c | 238 +++ 7 files changed, 278 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1c88437..d07b5f5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -166,6 +166,19 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP + tristate "IP-VLAN based tap driver" + depends on IPVLAN + depends on INET + depends on TAP + ---help--- + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called ipvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index dbfbb33..4362d88 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 83ce74a..9af16ab 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, - unsigned int len, bool success, bool mcast) +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast) { if (!ipvlan) return; @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 8b0f993..ed750e2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, return ret; } -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port; @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan_port_destroy(phy_dev); return err; } +EXPORT_SYMBOL_GPL(ipvlan_link_new); -static void ipvlan_link_delete(struct net_dev
[PATCHv2 3/7] TAP: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 29 +++-- drivers/net/tap.c | 63 ++ include/linux/if_tap.h | 5 +++- 3 files changed, 65 insertions(+), 32 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..32ad560 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,43 +158,35 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + cdev_del(_cdev); out1: return err; } @@ -207,9 +198,7 @@ static void macvtap_exit(void) rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d0807c2..774ef33 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,40 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); + + if (err) + goto out1; + + cdev_init(
[PATCHv2 2/7] TAP: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 8f12a39..d0807c2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_
[PATCHv2 6/7] TAP: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 15 +++ drivers/net/Makefile | 3 +-- drivers/net/{macvtap_main.c => macvtap.c} | 1 - drivers/net/tap.c | 11 +++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h| 4 ++-- 6 files changed, 30 insertions(+), 6 deletions(-) rename drivers/net/{macvtap_main.c => macvtap.c} (99%) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..1c88437 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + depends on TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -284,6 +285,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP +tristate "TAP module support for virtual interfaces" +---help--- + TAP module serves two purposes. This can be used as library of functions + for virtual interfaces to implement tap functionality. + + This module also includes character device file and socket operations + that can be used by virtual interface implementing tap. + + To compile this driver as a module, choose M here: the module + will be called tap. + + If you don't know what to use this for, you don't need it. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap.c similarity index 99% rename from drivers/net/macvtap_main.c rename to drivers/net/macvtap.c index 3f047b4..3efed94 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap.c @@ -232,7 +232,6 @@ static int macvtap_init(void) } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(_link_ops); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7f38dbe..32066dd 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -311,6 +311,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -388,6 +389,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -422,6 +424,7 @@ int tap_get_minor(dev_t major, struct tap_dev *tap) mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -438,6 +441,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) } mutex_unlock(_major->minor_lock); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1193,6 +1197,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1236,6 +1241,7 @@ int tap_create_cdev(struct cdev *tap_cdev, out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1249,3 +1255,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) unregister_chrdev_region(major, TAP_NUM_DEVS); idr_destroy(_major->minor_idr); } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); +MODULE_AUTHOR("Sainath Grandhi <sainath.gran...@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ec..cfdecea 100644 --- a/drivers/vhost/
[PATCHv2 5/7] TAP: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 98 +++--- include/linux/if_tap.h | 4 +- 3 files changed, 80 insertions(+), 28 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 6326a82..3f047b4 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -160,7 +160,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -168,7 +168,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -183,7 +183,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 43d9d54..7f38dbe 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,16 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +389,73 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major, *tmp; + + list_for_each_entry_safe(tap_major, tmp, _list, next) { + if (tap_major->major == major) { + return tap_major; + } + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return -EINVAL; - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major; + + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) + return; + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; + + tap_major = tap_get_major(major); + if (!tap_major) + return NULL; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + mutex_lock(_major->minor_lock); + tap = idr_find(_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } - mutex_unlock(_majo
[PATCHv2 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. These patches are tested on x86 platform. Sainath Grandhi (7): TAP: Refactoring macvtap.c TAP: Renaming tap related APIs, data structures, macros TAP: Tap character device creation/destroy API TAP: Abstract type of virtual interface from tap implementation TAP: Extending tap device create/destroy APIs TAP: tap as an independent module IPVTAP: IP-VLAN based tap driver drivers/net/Kconfig | 28 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 27 +- drivers/net/ipvlan/ipvtap.c | 238 +++ drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1226 ++-- drivers/net/tap.c| 1262 ++ drivers/vhost/Kconfig|2 +- drivers/vhost/net.c |3 +- include/linux/if_macvlan.h | 17 +- include/linux/if_tap.h | 75 +++ 14 files changed, 1686 insertions(+), 1209 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv1 1/7] TAP: Refactoring macvtap.c
macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Makefile |2 + drivers/net/macvtap.c | 1374 drivers/net/macvtap_main.c | 218 +++ drivers/net/tap.c | 1186 ++ include/linux/if_macvtap.h | 10 + 5 files changed, 1416 insertions(+), 1374 deletions(-) delete mode 100644 drivers/net/macvtap.c create mode 100644 drivers/net/macvtap_main.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_macvtap.h diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7336cbd..19b03a9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o +macvtap-objs := macvtap_main.o tap.o + # # Networking Drivers # diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c deleted file mode 100644 index 5c26653..000 --- a/drivers/net/macvtap.c +++ /dev/null @@ -1,1374 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * A macvtap queue is the central object of this driver, it connects - * an open character device to a macvlan interface. There can be - * multiple queues on one interface, which map back to queues - * implemented in hardware on the underlying device. - * - * macvtap_proto is used to allocate queues through the sock allocation - * mechanism. - * - */ -struct macvtap_queue { - struct sock sk; - struct socket sock; - struct socket_wq wq; - int vnet_hdr_sz; - struct macvlan_dev __rcu *vlan; - struct file *file; - unsigned int flags; - u16 queue_index; - bool enabled; - struct list_head next; - struct skb_array skb_array; -}; - -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) - -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) -{ - return q->flags & MACVTAP_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) -{ - int s = !!(q->flags & MACVTAP_VNET_BE); - - if (put_user(s, sp)) - return -EFAULT; - - return 0; -} - -static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) -{ - int s; - - if (get_user(s, sp)) - return -EFAULT; - - if (s) - q->flags |= MACVTAP_VNET_BE; - else - q->flags &= ~MACVTAP_VNET_BE; - - return 0; -} -#else -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) -{ - return virtio_legacy_is_little_endian(); -} - -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) -{ - return -EINVAL; -} - -static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool macvtap_is_little_endian(struct macvtap_queue *q) -{ - return q->flags & MACVTAP_VNET_LE || - macvtap_legacy_is_little_endian(q); -} - -static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) -{ - return __virtio16_to_cpu(macvtap_is_little_endian(q), val); -} - -static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) -{ - return __cpu_to_virtio16(macvtap_is_little_endian(q), val); -} - -static struct proto macvtap_proto = { - .name = "macvtap", - .owner = THIS_MODULE, - .obj_size = sizeof (struct macvtap_queue), -}; - -/* - * Variables for dealing with macvtaps device numbers. - */ -static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -static DEFINE_IDR(minor_idr); - -#define GOODCOPY_LEN 128 -static const void *macvtap_net_namespace(struct device *d) -{ - struct net_device *dev = to_net_dev(d->parent); - return dev_net(dev); -} - -static struct class macvtap_class = { - .name = "macvtap", - .owner = THIS_MODULE, - .ns_type = _ns_type_operations, - .namespace = macvtap_net_namespace, -}; -static struct cdev macvtap_cdev; - -static const struct proto_ops macvtap_socket_ops; - -#define TUN_OFFLOADS (NETIF_F_HW_CSU
[PATCHv1 4/7] TAP: Abstract type of virtual interface from tap implementation
macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 68 +--- drivers/net/tap.c | 264 - include/linux/if_tap.h | 59 +- 4 files changed, 227 insertions(+), 166 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf2..79383f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1526,7 +1526,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype= IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1549,6 +1548,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink= macvlan_newlink, .dellink= macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 32ad560..6326a82 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,52 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(>queue_list); + INIT_LIST_HEAD(>tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); if (err) return err; @@ -74,14 +109,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(>tap); macvlan_dellink(dev, head); } @@ -96,13 +135,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink= macvtap_newlink, .dellink= macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused,
[PATCHv1 0/7] Refactor macvtap to re-use tap functionality by other virtual intefaces
Tap character devices can be implemented on other virtual interfaces like ipvlan, similar to macvtap. Source code for tap functionality in macvtap can be re-used for this purpose. This patch series splits macvtap source into two modules, macvtap and tap. This patch series also includes a patch for implementing tap character device driver based on the IP-VLAN network interface, called ipvtap. Sainath Grandhi (7): TAP: Refactoring macvtap.c TAP: Renaming tap related APIs, data structures, macros TAP: Tap character device creation/destroy API TAP: Abstract type of virtual interface from tap implementation TAP: Extending tap device create/destroy APIs TAP: tap as an independent module IPVTAP: IP-VLAN based tap driver drivers/net/Kconfig | 26 + drivers/net/Makefile |2 + drivers/net/ipvlan/Makefile |1 + drivers/net/ipvlan/ipvlan.h |7 + drivers/net/ipvlan/ipvlan_core.c |5 +- drivers/net/ipvlan/ipvlan_main.c | 37 +- drivers/net/ipvlan/ipvtap.c | 238 +++ drivers/net/macvlan.c|2 +- drivers/net/macvtap.c| 1227 ++-- drivers/net/tap.c| 1276 ++ drivers/vhost/net.c |3 +- include/linux/if_macvlan.h |4 +- include/linux/if_tap.h | 63 ++ 13 files changed, 1691 insertions(+), 1200 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c create mode 100644 drivers/net/tap.c create mode 100644 include/linux/if_tap.h -- 2.7.4
[PATCHv1 2/7] TAP: Renaming tap related APIs, data structures, macros
Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++--- drivers/vhost/net.c| 3 +- include/linux/if_macvlan.h | 4 +- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 11 ++ 6 files changed, 190 insertions(+), 188 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60..548f339 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(_cdev, _fops); + cdev_init(_cdev, _fops); err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 8f12a39..d0807c2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x8000 -#define MACVTAP_VNET_BE 0x4000 +#define TAP_VNET_LE 0x8000 +#define TAP_VNET_BE 0x4000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct ma
[PATCHv1 3/7] TAP: Tap character device creation/destroy API
This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 29 +++-- drivers/net/tap.c | 64 ++ include/linux/if_tap.h | 3 +++ 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339..32ad560 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,43 +158,35 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(_cdev, _major, "macvtap"); - cdev_init(_cdev, _fops); - err = cdev_add(_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(_notifier_block); out4: - class_unregister(_class); + unregister_netdevice_notifier(_notifier_block); out3: - cdev_del(_cdev); + class_unregister(_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + cdev_del(_cdev); out1: return err; } @@ -207,9 +198,7 @@ static void macvtap_exit(void) rtnl_link_unregister(_link_ops); unregister_netdevice_notifier(_notifier_block); class_unregister(_class); - cdev_del(_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(_idr); + tap_destroy_cdev(macvtap_major, _cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d0807c2..52692d2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(_lock); - retval = idr_alloc(_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(_major.minor_lock); + retval = idr_alloc(_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(_lock); + mutex_lock(_major.minor_lock); if (vlan->minor) { - idr_remove(_idr, vlan->minor); + idr_remove(_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(_lock); - vlan = idr_find(_idr, minor); + mutex_lock(_major.minor_lock); + vlan = idr_find(_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(_lock); + mutex_unlock(_major.minor_lock); return dev; } @@ -1184,3 +1188,41 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); + + if (
[PATCHv1 7/7] IPVTAP: IP-VLAN based tap driver
This patch adds a tap character device driver that is based on the IP-VLAN network interface, called ipvtap. An ipvtap device can be created in the same way as an ipvlan device, using 'type ipvtap', and then accessed using the tap user space interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig | 12 ++ drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 1 + drivers/net/ipvlan/ipvlan.h | 7 ++ drivers/net/ipvlan/ipvlan_core.c | 5 +- drivers/net/ipvlan/ipvlan_main.c | 37 +++--- drivers/net/ipvlan/ipvtap.c | 238 +++ 7 files changed, 282 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ipvlan/ipvtap.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 280380d..ddfb30a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -165,6 +165,18 @@ config IPVLAN To compile this driver as a module, choose M here: the module will be called ipvlan. +config IPVTAP +tristate "IP-VLAN based tap driver" +depends on IPVLAN +depends on INET +help + This adds a specialized tap character device driver that is based + on the IP-VLAN network interface, called ipvtap. An ipvtap device + can be added in the same way as a ipvlan device, using 'type + ipvtap', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called macvtap. config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7dd86ca..98ed4d9 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -7,6 +7,7 @@ # obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ +obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index df79910..8a2c64d 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_IPVLAN) += ipvlan.o +obj-$(CONFIG_IPVTAP) += ipvtap.o ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index dbfbb33..4362d88 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto); unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast); +int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]); +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); +void ipvlan_link_setup(struct net_device *dev); +int ipvlan_link_register(struct rtnl_link_ops *ops); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 83ce74a..9af16ab 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) net_get_random_once(_jhash_secret, sizeof(ipvlan_jhash_secret)); } -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, - unsigned int len, bool success, bool mcast) +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, +unsigned int len, bool success, bool mcast) { if (!ipvlan) return; @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } +EXPORT_SYMBOL_GPL(ipvlan_count_rx); static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 8b0f993..666a05d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -13,14 +13,14 @@ static u32 ipvl_nf_hook_refcnt = 0; static struct nf_hook_ops ipvl_nfops[] __read_mostly = { { - .hook = ipvlan_nf_input, - .pf = NFPROTO_IPV4, + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = INT_MAX, }, { - .hook = ipvlan_nf_input, - .pf = NFPROTO_IPV6, + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = INT_MAX,
[PATCHv1 6/7] TAP: tap as an independent module
This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/Kconfig| 14 +++ drivers/net/Makefile | 3 +- drivers/net/macvtap.c | 247 + drivers/net/macvtap_main.c | 247 - drivers/net/tap.c | 10 ++ 5 files changed, 272 insertions(+), 249 deletions(-) create mode 100644 drivers/net/macvtap.c delete mode 100644 drivers/net/macvtap_main.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2..280380d 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -284,6 +284,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP +tristate "TAP module support for virtual interfaces" +---help--- + TAP module serves two purposes. This can be used as library of functions + for virtual interfaces to implement tap functionality. + + This module also includes character device file and socket operations + that can be used by virtual interface implementing tap. + + To compile this driver as a module, choose M here: the module + will be called tap. + + If you don't know what to use this for, you don't need it. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9..7dd86ca 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c new file mode 100644 index 000..3f047b4 --- /dev/null +++ b/drivers/net/macvtap.c @@ -0,0 +1,247 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_devtap; +}; + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = _ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvlan_dev *vlan = (struct macvlan_dev *)container_of(tap, struct macvtap_dev, tap); + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvtap_dev *vlantap = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(>tap.queue_list); + + /* Since macvlan supports all offloads by default, make +* tap support all offloads also. +*/ + vlantap->tap.tap_features = TUN_OFFLOADS; + + /* Register callbacks for rx/tx drops accounting and updating +* net_device features +*/ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, >tap); +
[PATCHv1 5/7] TAP: Extending tap device create/destroy APIs
Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> Tested-by: Sainath Grandhi <sainath.gran...@intel.com> --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 110 - include/linux/if_tap.h | 4 +- 3 files changed, 93 insertions(+), 27 deletions(-) diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 6326a82..3f047b4 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -160,7 +160,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(>tap); + err = tap_get_minor(macvtap_major, >tap); if (err) return notifier_from_errno(err); @@ -168,7 +168,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(_class, >dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(>dev.kobj, >kobj, @@ -183,7 +183,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(>dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(_class, devt); - tap_free_minor(>tap); + tap_free_minor(macvtap_major, >tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(>tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 6306ab9..1d5bcf3 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,16 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +LIST_HEAD(major_list); + struct major_info { dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +389,81 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major, *tmp; + bool found = false; - mutex_lock(_major.minor_lock); - retval = idr_alloc(_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + list_for_each_entry_safe(tap_major, tmp, _list, next) { + if (tap_major->major == MAJOR(major)) { + found = true; + break; + } + } + + if (!found) + return -EINVAL; + + mutex_lock(_major->minor_lock); + retval = idr_alloc(_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(_major.minor_lock); + struct major_info *tap_major, *tmp; + bool found = false; + + list_for_each_entry_safe(tap_major, tmp, _list, next) { + if (tap_major->major == MAJOR(major)) { + found = true; + break; + } + } + + if (!found) + return; + + mutex_lock(_major->minor_lock); if (tap->minor) { - idr_remove(_major.minor_idr, tap->minor); + idr_remove(_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(_major.minor_lock); + mutex_unlock(_major->minor_lock); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major, *tmp; + bool found = false; - mutex_lock(_major.minor_lock); - tap = idr_find(_major.minor_idr, minor); + list_for_each_entry_safe(tap_major, tmp, _list, next) { + if (tap_major->major == major) { +