few superficial comments inline.
On Fri, Jan 6, 2017 at 2:33 PM, Sainath Grandhi <sainath.gran...@intel.com> wrote: > This patch adds a tap character device driver that is based on the > IP-VLAN network interface, called ipvtap. An ipvtap device can be created > in the same way as an ipvlan device, using 'type ipvtap', and then accessed > using the tap user space interface. > > Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com> > Tested-by: Sainath Grandhi <sainath.gran...@intel.com> > --- > drivers/net/Kconfig | 12 ++ > drivers/net/Makefile | 1 + > drivers/net/ipvlan/Makefile | 1 + > drivers/net/ipvlan/ipvlan.h | 7 ++ > drivers/net/ipvlan/ipvlan_core.c | 5 +- > drivers/net/ipvlan/ipvlan_main.c | 37 +++--- > drivers/net/ipvlan/ipvtap.c | 238 > +++++++++++++++++++++++++++++++++++++++ > 7 files changed, 282 insertions(+), 19 deletions(-) > create mode 100644 drivers/net/ipvlan/ipvtap.c > > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig > index 280380d..ddfb30a 100644 > --- a/drivers/net/Kconfig > +++ b/drivers/net/Kconfig > @@ -165,6 +165,18 @@ config IPVLAN > To compile this driver as a module, choose M here: the module > will be called ipvlan. > > +config IPVTAP > + tristate "IP-VLAN based tap driver" > + depends on IPVLAN > + depends on INET > + help > + This adds a specialized tap character device driver that is based > + on the IP-VLAN network interface, called ipvtap. An ipvtap device > + can be added in the same way as a ipvlan device, using 'type > + ipvtap', and then be accessed through the tap user space interface. > + > + To compile this driver as a module, choose M here: the module > + will be called macvtap. > > config VXLAN > tristate "Virtual eXtensible Local Area Network (VXLAN)" > diff --git a/drivers/net/Makefile b/drivers/net/Makefile > index 7dd86ca..98ed4d9 100644 > --- a/drivers/net/Makefile > +++ b/drivers/net/Makefile > @@ -7,6 +7,7 @@ > # > obj-$(CONFIG_BONDING) += bonding/ > obj-$(CONFIG_IPVLAN) += ipvlan/ > +obj-$(CONFIG_IPVTAP) += ipvlan/ > obj-$(CONFIG_DUMMY) += dummy.o > obj-$(CONFIG_EQUALIZER) += eql.o > obj-$(CONFIG_IFB) += ifb.o > diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile > index df79910..8a2c64d 100644 > --- a/drivers/net/ipvlan/Makefile > +++ b/drivers/net/ipvlan/Makefile > @@ -3,5 +3,6 @@ > # > > obj-$(CONFIG_IPVLAN) += ipvlan.o > +obj-$(CONFIG_IPVTAP) += ipvtap.o > > ipvlan-objs := ipvlan_core.o ipvlan_main.o > diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h > index dbfbb33..4362d88 100644 > --- a/drivers/net/ipvlan/ipvlan.h > +++ b/drivers/net/ipvlan/ipvlan.h > @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, > struct sk_buff *skb, > u16 proto); > unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, > const struct nf_hook_state *state); > +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, > + unsigned int len, bool success, bool mcast); > +int ipvlan_link_new(struct net *src_net, struct net_device *dev, > + struct nlattr *tb[], struct nlattr *data[]); > +void ipvlan_link_delete(struct net_device *dev, struct list_head *head); > +void ipvlan_link_setup(struct net_device *dev); > +int ipvlan_link_register(struct rtnl_link_ops *ops); > #endif /* __IPVLAN_H */ > diff --git a/drivers/net/ipvlan/ipvlan_core.c > b/drivers/net/ipvlan/ipvlan_core.c > index 83ce74a..9af16ab 100644 > --- a/drivers/net/ipvlan/ipvlan_core.c > +++ b/drivers/net/ipvlan/ipvlan_core.c > @@ -16,8 +16,8 @@ void ipvlan_init_secret(void) > net_get_random_once(&ipvlan_jhash_secret, > sizeof(ipvlan_jhash_secret)); > } > > -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, > - unsigned int len, bool success, bool mcast) > +void ipvlan_count_rx(const struct ipvl_dev *ipvlan, > + unsigned int len, bool success, bool mcast) > { > if (!ipvlan) > return; > @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, > this_cpu_inc(ipvlan->pcpu_stats->rx_errs); > } > } > +EXPORT_SYMBOL_GPL(ipvlan_count_rx); Why export, isn't just removing 'static' enough? > > static u8 ipvlan_get_v6_hash(const void *iaddr) > { > diff --git a/drivers/net/ipvlan/ipvlan_main.c > b/drivers/net/ipvlan/ipvlan_main.c > index 8b0f993..666a05d 100644 > --- a/drivers/net/ipvlan/ipvlan_main.c > +++ b/drivers/net/ipvlan/ipvlan_main.c > @@ -13,14 +13,14 @@ static u32 ipvl_nf_hook_refcnt = 0; > > static struct nf_hook_ops ipvl_nfops[] __read_mostly = { > { > - .hook = ipvlan_nf_input, > - .pf = NFPROTO_IPV4, > + .hook = ipvlan_nf_input, > + .pf = NFPROTO_IPV4, spurious? > .hooknum = NF_INET_LOCAL_IN, > .priority = INT_MAX, > }, > { > - .hook = ipvlan_nf_input, > - .pf = NFPROTO_IPV6, > + .hook = ipvlan_nf_input, > + .pf = NFPROTO_IPV6, spurious?? > .hooknum = NF_INET_LOCAL_IN, > .priority = INT_MAX, > }, > @@ -398,7 +398,7 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct > net_device *dev, > } > > static const struct header_ops ipvlan_header_ops = { > - .create = ipvlan_hard_header, > + .create = ipvlan_hard_header, spurious??? > .parse = eth_header_parse, > .cache = eth_header_cache, > .cache_update = eth_header_cache_update, > @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb, > return ret; > } > > -static int ipvlan_link_new(struct net *src_net, struct net_device *dev, > - struct nlattr *tb[], struct nlattr *data[]) > +int ipvlan_link_new(struct net *src_net, struct net_device *dev, > + struct nlattr *tb[], struct nlattr *data[]) > { > struct ipvl_dev *ipvlan = netdev_priv(dev); > struct ipvl_port *port; > @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net, struct > net_device *dev, > ipvlan_port_destroy(phy_dev); > return err; > } > +EXPORT_SYMBOL_GPL(ipvlan_link_new); > > -static void ipvlan_link_delete(struct net_device *dev, struct list_head > *head) > +void ipvlan_link_delete(struct net_device *dev, struct list_head *head) > { > struct ipvl_dev *ipvlan = netdev_priv(dev); > struct ipvl_addr *addr, *next; > @@ -583,8 +584,9 @@ static void ipvlan_link_delete(struct net_device *dev, > struct list_head *head) > unregister_netdevice_queue(dev, head); > netdev_upper_dev_unlink(ipvlan->phy_dev, dev); > } > +EXPORT_SYMBOL_GPL(ipvlan_link_delete); > > -static void ipvlan_link_setup(struct net_device *dev) > +void ipvlan_link_setup(struct net_device *dev) > { > ether_setup(dev); > > @@ -595,6 +597,7 @@ static void ipvlan_link_setup(struct net_device *dev) > dev->header_ops = &ipvlan_header_ops; > dev->ethtool_ops = &ipvlan_ethtool_ops; > } > +EXPORT_SYMBOL_GPL(ipvlan_link_setup); > > static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = > { > @@ -605,22 +608,22 @@ static struct rtnl_link_ops ipvlan_link_ops = { > .kind = "ipvlan", > .priv_size = sizeof(struct ipvl_dev), > > - .get_size = ipvlan_nl_getsize, > - .policy = ipvlan_nl_policy, > - .validate = ipvlan_nl_validate, > - .fill_info = ipvlan_nl_fillinfo, > - .changelink = ipvlan_nl_changelink, > - .maxtype = IFLA_IPVLAN_MAX, > - > .setup = ipvlan_link_setup, > .newlink = ipvlan_link_new, > .dellink = ipvlan_link_delete, > }; > > -static int ipvlan_link_register(struct rtnl_link_ops *ops) > +int ipvlan_link_register(struct rtnl_link_ops *ops) > { > + ops->get_size = ipvlan_nl_getsize; > + ops->policy = ipvlan_nl_policy; > + ops->validate = ipvlan_nl_validate; > + ops->fill_info = ipvlan_nl_fillinfo; > + ops->changelink = ipvlan_nl_changelink; > + ops->maxtype = IFLA_IPVLAN_MAX; > return rtnl_link_register(ops); > } > +EXPORT_SYMBOL_GPL(ipvlan_link_register); > > static int ipvlan_device_event(struct notifier_block *unused, > unsigned long event, void *ptr) > diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c > new file mode 100644 > index 0000000..0422cdf > --- /dev/null > +++ b/drivers/net/ipvlan/ipvtap.c > @@ -0,0 +1,238 @@ > +#include <linux/etherdevice.h> > +#include "ipvlan.h" > +#include <linux/if_vlan.h> > +#include <linux/if_tap.h> > +#include <linux/interrupt.h> > +#include <linux/nsproxy.h> > +#include <linux/compat.h> > +#include <linux/if_tun.h> > +#include <linux/module.h> > +#include <linux/skbuff.h> > +#include <linux/cache.h> > +#include <linux/sched.h> > +#include <linux/types.h> > +#include <linux/slab.h> > +#include <linux/wait.h> > +#include <linux/cdev.h> > +#include <linux/idr.h> > +#include <linux/fs.h> > +#include <linux/uio.h> > + > +#include <net/net_namespace.h> > +#include <net/rtnetlink.h> > +#include <net/sock.h> > +#include <linux/virtio_net.h> > + > +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ > + NETIF_F_TSO6 | NETIF_F_UFO) > + > +static dev_t ipvtap_major; > +static struct cdev ipvtap_cdev; > + > +static const void *ipvtap_net_namespace(struct device *d) > +{ > + struct net_device *dev = to_net_dev(d->parent); > + return dev_net(dev); > +} > + > +static struct class ipvtap_class = { > + .name = "ipvtap", > + .owner = THIS_MODULE, > + .ns_type = &net_ns_type_operations, > + .namespace = ipvtap_net_namespace, > +}; > + > +struct ipvtap_dev { > + struct ipvl_dev vlan; > + struct tap_dev tap; > +}; > + > +static void ipvtap_count_tx_dropped(struct tap_dev *tap) > +{ > + struct ipvl_dev *vlan = (struct ipvl_dev *)container_of(tap, struct > ipvtap_dev, tap); > + > + this_cpu_inc(vlan->pcpu_stats->tx_drps); > +} > + > +static void ipvtap_count_rx_dropped(struct tap_dev *tap) > +{ > + struct ipvl_dev *vlan = (struct ipvl_dev *)container_of(tap, struct > ipvtap_dev, tap); > + > + ipvlan_count_rx(vlan, 0, 0, 0); > +} > + > +static void ipvtap_update_features(struct tap_dev *tap, > + netdev_features_t features) > +{ > + struct ipvl_dev *vlan = (struct ipvl_dev *)container_of(tap, struct > ipvtap_dev, tap); > + > + vlan->sfeatures = features; > + netdev_update_features(vlan->dev); > +} > + > +static int ipvtap_newlink(struct net *src_net, > + struct net_device *dev, > + struct nlattr *tb[], > + struct nlattr *data[]) > +{ > + struct ipvtap_dev *vlantap = netdev_priv(dev); > + int err; > + > + INIT_LIST_HEAD(&vlantap->tap.queue_list); > + > + /* Since macvlan supports all offloads by default, make > + * tap support all offloads also. > + */ > + vlantap->tap.tap_features = TUN_OFFLOADS; > + vlantap->tap.count_tx_dropped = ipvtap_count_tx_dropped; > + vlantap->tap.update_features = ipvtap_update_features; > + vlantap->tap.count_rx_dropped = ipvtap_count_rx_dropped; > + > + err = netdev_rx_handler_register(dev, tap_handle_frame, > &vlantap->tap); > + if (err) > + return err; > + > + /* Don't put anything that may fail after macvlan_common_newlink > + * because we can't undo what it does. > + */ > + err = ipvlan_link_new(src_net, dev, tb, data); > + if (err) { > + netdev_rx_handler_unregister(dev); > + return err; > + } > + > + vlantap->tap.dev = vlantap->vlan.dev; > + > + return err; > +} > + > +static void ipvtap_dellink(struct net_device *dev, > + struct list_head *head) > +{ > + struct ipvtap_dev *vlan = netdev_priv(dev); > + > + netdev_rx_handler_unregister(dev); > + tap_del_queues(&vlan->tap); > + ipvlan_link_delete(dev, head); > +} > + > +static void ipvtap_setup(struct net_device *dev) > +{ > + ipvlan_link_setup(dev); > + dev->tx_queue_len = TUN_READQ_SIZE; > + dev->priv_flags &= ~IFF_NO_QUEUE; > +} > + > +static struct rtnl_link_ops ipvtap_link_ops __read_mostly = { > + .kind = "ipvtap", > + .setup = ipvtap_setup, > + .newlink = ipvtap_newlink, > + .dellink = ipvtap_dellink, > + .priv_size = sizeof(struct ipvtap_dev), > +}; > + > +static int ipvtap_device_event(struct notifier_block *unused, > + unsigned long event, void *ptr) > +{ > + struct net_device *dev = netdev_notifier_info_to_dev(ptr); > + struct ipvtap_dev *vlantap; > + struct device *classdev; > + dev_t devt; > + int err; > + char tap_name[IFNAMSIZ]; > + > + if (dev->rtnl_link_ops != &ipvtap_link_ops) > + return NOTIFY_DONE; > + > + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); > + vlantap = netdev_priv(dev); > + > + switch (event) { > + case NETDEV_REGISTER: > + /* Create the device node here after the network device has > + * been registered but before register_netdevice has > + * finished running. > + */ > + err = tap_get_minor(ipvtap_major, &vlantap->tap); > + if (err) > + return notifier_from_errno(err); > + > + devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor); > + classdev = device_create(&ipvtap_class, &dev->dev, devt, > + dev, tap_name); > + if (IS_ERR(classdev)) { > + tap_free_minor(ipvtap_major, &vlantap->tap); > + return notifier_from_errno(PTR_ERR(classdev)); > + } > + err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, > + tap_name); > + if (err) > + return notifier_from_errno(err); > + break; > + case NETDEV_UNREGISTER: > + /* vlan->minor == 0 if NETDEV_REGISTER above failed */ > + if (vlantap->tap.minor == 0) > + break; > + sysfs_remove_link(&dev->dev.kobj, tap_name); > + devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor); > + device_destroy(&ipvtap_class, devt); > + tap_free_minor(ipvtap_major, &vlantap->tap); > + break; > + case NETDEV_CHANGE_TX_QUEUE_LEN: > + if (tap_queue_resize(&vlantap->tap)) > + return NOTIFY_BAD; > + break; > + } > + > + return NOTIFY_DONE; > +} > + > +static struct notifier_block ipvtap_notifier_block __read_mostly = { > + .notifier_call = ipvtap_device_event, > +}; > + > +static int ipvtap_init(void) > +{ > + int err; > + > + err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap"); > + > + if (err) > + goto out1; > + > + err = class_register(&ipvtap_class); > + if (err) > + goto out2; > + > + err = register_netdevice_notifier(&ipvtap_notifier_block); > + if (err) > + goto out3; > + > + err = ipvlan_link_register(&ipvtap_link_ops); > + if (err) > + goto out4; > + > + return 0; > + > +out4: > + unregister_netdevice_notifier(&ipvtap_notifier_block); > +out3: > + class_unregister(&ipvtap_class); > +out2: > + cdev_del(&ipvtap_cdev); > +out1: > + return err; > +} > +module_init(ipvtap_init); > + > +static void ipvtap_exit(void) > +{ > + rtnl_link_unregister(&ipvtap_link_ops); > + unregister_netdevice_notifier(&ipvtap_notifier_block); > + class_unregister(&ipvtap_class); > + tap_destroy_cdev(ipvtap_major, &ipvtap_cdev); > +} > +module_exit(ipvtap_exit); > +MODULE_ALIAS_RTNL_LINK("ipvtap"); > +MODULE_AUTHOR("Arnd Bergmann <a...@arndb.de>"); > +MODULE_LICENSE("GPL"); > -- > 2.7.4 >