From: Anton Ivanov <antiv...@cisco.com> Support for multi-packet vector IO - multiple packets read in one syscall and (optionally) written in one syscall. Support for (optional) queueing on EAGAIN/ENOBUFS - applies only to socket transports. Sorry TAP, -EYOULOSE - it will remain slower than any socket transport for a very log time because sendmmsg/recvmmsg is supported only for sockets, not for tap fds.
Should work with legacy UML, thorough tested only for the epoll based IRQ controller Minimal host kernel version for RX - 2.6.32 Minimal host kernel version for TX - 3.0 - optional, config option UML_NET_VECTOR_TX Tested on Debian 7.0/Ubuntu 12.x LTS host which have the relevant syscalls, but do not have the appropriate glibc routine for TX (this is why it is a direct syscall). Tested thoroughly with Debian and OpenWRT guests across a range of kernels (3.2, 3.3, 3.4, 3.8, 3.12). Signed-off-by: Anton Ivanov <antiv...@cisco.com> --- arch/um/Kconfig.net | 9 ++ arch/um/drivers/Makefile | 2 +- arch/um/drivers/net_extra_kern.c | 308 +++++++++++++++++++++++++++++++++++ arch/um/drivers/net_extra_user.c | 317 +++++++++++++++++++++++++++++++++++++ arch/um/drivers/net_kern.c | 63 +++++--- arch/um/include/asm/irq.h | 26 +-- arch/um/include/shared/net_kern.h | 31 ++++ arch/um/include/shared/net_user.h | 24 +++ arch/um/kernel/irq.c | 5 + 9 files changed, 752 insertions(+), 33 deletions(-) create mode 100644 arch/um/drivers/net_extra_kern.c create mode 100644 arch/um/drivers/net_extra_user.c diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net index 820a56f..e4a7cf2 100644 --- a/arch/um/Kconfig.net +++ b/arch/um/Kconfig.net @@ -21,6 +21,15 @@ config UML_NET enable at least one of the following transport options to actually make use of UML networking. +config UML_NET_VECTOR_TX + bool "Vector transmit in network devices" + depends on UML_NET + help + Accelerate network IO by using sendmmsg() linux syscall. This option + requires the host running UML to run at least linux 3.0 + Presently the acceleration is only for forwarding including firewall, + NAT, etc where it yields 25%+ improvement in packet rates and throughput + config UML_NET_ETHERTAP bool "Ethertap transport" depends on UML_NET diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index e7582e1..836baaf 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -10,7 +10,7 @@ slip-objs := slip_kern.o slip_user.o slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o umcast-objs := umcast_kern.o umcast_user.o -net-objs := net_kern.o net_user.o +net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o diff --git a/arch/um/drivers/net_extra_kern.c b/arch/um/drivers/net_extra_kern.c new file mode 100644 index 0000000..5ee6f9b --- /dev/null +++ b/arch/um/drivers/net_extra_kern.c @@ -0,0 +1,308 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buyt...@gnu.org) and + * James Leu (j...@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <linux/bootmem.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include "init.h" +#include "irq_kern.h" +#include "irq_user.h" +#include "mconsole_kern.h" +#include "net_kern.h" +#include "net_user.h" + +#define DRIVER_NAME "uml-netdev" + +/* + These are wrappers around key kernel side functions so we can + invoke them from the user side of our Schizofreniac self + +*/ + +extern spinlock_t uml_sigio_lock; +extern int in_epoll_loop; + +static DEFINE_SPINLOCK(net_queue_list); + +static struct mmsg_queue_info * pending_queue = NULL; + +void uml_net_destroy_skb(void * skb) +{ + if (skb) { + kfree_skb((struct sk_buff *) skb); + } +} + +void * uml_net_build_skb (void * dev) +{ + struct uml_net_private *lp = netdev_priv((struct net_device *) dev); + struct sk_buff * skb; + + skb = dev_alloc_skb(lp->max_packet + 32); + if (skb) { + /* add some tunneling space just in case, we usually do not need it as we use vector IO */ + skb_reserve(skb,32); + skb->dev = dev; + skb_put(skb, lp->max_packet); + skb_reset_mac_header(skb); + skb->ip_summed = CHECKSUM_NONE; + } else { + printk("Failed Atomic SKB Allocation, will drop\n"); + } + return skb; +} + +void * uml_net_skb_data (void * skb) { + if (skb) { + return ((struct sk_buff *) skb)->data; + } else { + printk("hole in vector!!!\n"); + return NULL; + } +} + +int uml_net_advance_head( struct mmsg_queue_info * queue_info, int advance) +{ + int queue_depth; + queue_info->head = + (queue_info->head + advance) + % queue_info->max_depth; + + /* caller is already holding the head_lock */ + + spin_lock(&queue_info->tail_lock); + queue_info->queue_depth -= advance; + + /* we are at 0, use this to + * reset head and tail so we can use max size vectors + */ + if (queue_info->queue_depth == 0) { + queue_info->head = 0; + queue_info->tail = 0; + } + queue_depth = queue_info->queue_depth; + spin_unlock(&queue_info->tail_lock); + return queue_depth; +} + +/* +* This is called by enqueuers which should hold the +* head lock already +*/ + +int uml_net_advance_tail( struct mmsg_queue_info * queue_info, int advance) +{ + int queue_depth; + queue_info->tail = + (queue_info->tail + advance) + % queue_info->max_depth; + spin_lock(&queue_info->head_lock); + queue_info->queue_depth += advance; + queue_depth = queue_info->queue_depth; + spin_unlock(&queue_info->head_lock); + return queue_depth; +} + +/* +* Generic vector enqueue with support for forming headers using transport +* specific callback. Allows GRE, L2TPv3, RAW (and potentially when ported) +* daemon to use a common enqueue procedure in vector mode +*/ + +int uml_net_enqueue ( + struct mmsg_queue_info * queue_info, + struct sk_buff * skb, + struct uml_net_private *lp, + void (*form_header)(void * header, struct sk_buff * skb, struct uml_net_private * lp), + void * remote_addr, + int remote_addr_size) +{ + + int queue_depth; + struct sk_buff * mmsg_clone; + struct mmsghdr * mmsg_send_vector; + void ** skb_send_vector; + struct iovec * iov; + + if (!queue_info) { + /* someone passed us a NULL queue */ + return 0; + } + + spin_lock(&queue_info->tail_lock); + spin_lock(&queue_info->head_lock); + queue_depth = queue_info->queue_depth; + spin_unlock(&queue_info->head_lock); + + if (queue_depth < queue_info->max_depth) { + mmsg_clone = skb_clone(skb, GFP_ATOMIC); + if (mmsg_clone) { + + skb_send_vector = queue_info->skb_send_vector; + skb_send_vector += queue_info->tail; + + (* skb_send_vector) = mmsg_clone; + + mmsg_send_vector = queue_info->mmsg_send_vector; + mmsg_send_vector += queue_info->tail; + + iov = mmsg_send_vector->msg_hdr.msg_iov; + + if (iov) { + mmsg_send_vector->msg_hdr.msg_name = remote_addr; + mmsg_send_vector->msg_hdr.msg_namelen = remote_addr_size; + if (form_header != NULL) { + (* form_header)(iov->iov_base, skb, lp); + iov++; + } + iov->iov_base = skb->data; + iov->iov_len = skb->len; + + queue_depth = uml_net_advance_tail(queue_info, 1); + } else { + printk("no iov, cannot enqueue\n"); + } + } else { + printk("cloning failed\n"); + } + } + spin_unlock(&queue_info->tail_lock); + return queue_depth; +} + +static int send_mmsg_queue(struct mmsg_queue_info * queue_info, int queue_depth) +{ + int fd = queue_info->fd; + struct mmsghdr * send_from; + void ** skb_send_vector; + int result = 0, send_len, skb_index, allowed_drop = 0; + + if (! queue_info) { + /* someone passed a null queue, should not occur */ + return 0; + } + + if (spin_trylock(&queue_info->head_lock)) { + if (spin_trylock(&queue_info->tail_lock)) { + /* update queue_depth to current value */ + queue_depth = queue_info->queue_depth; + spin_unlock(&queue_info->tail_lock); + if (queue_depth > 0) { + send_len = queue_depth; + send_from = queue_info->mmsg_send_vector; + send_from += queue_info->head; + if (send_len + queue_info->head > queue_info->max_depth) { + send_len = queue_info->max_depth - queue_info->head; + } + if (send_len > 0) { + result = net_sendmmsg( + fd, send_from, send_len, 0 + ); + } + if (result < 0) { + printk("error %i in multisend\n", result); + result = send_len; /* drop the lot */ + } + if (result > 0) { + if (result != send_len) { + /* we need to drop a few, exponentially increasing + * drop bucket in use + */ + result += allowed_drop; + allowed_drop += allowed_drop * 2 + 1; + if (result > send_len) { + /* do not drop beyond requested size */ + result = send_len; + } + } else { + /* clear drop bucket size */ + allowed_drop = 0; + } + skb_send_vector = queue_info->skb_send_vector; + skb_send_vector += queue_info->head; + for (skb_index = 0; skb_index < send_len; skb_index++) { + uml_net_destroy_skb(* skb_send_vector); + (* skb_send_vector) = NULL; /* just in case */ + skb_send_vector ++ ; + } + queue_depth = uml_net_advance_head(queue_info, result); + } + } + } + spin_unlock(&queue_info->head_lock); + } + return queue_depth; +} + +int uml_net_flush_mmsg_queue( + struct mmsg_queue_info * queue_info, int queue_depth) +{ + int old_queue_depth; + + if (queue_depth >= (queue_info->max_depth - 1)) { + /* queue full, flush some regardless */ + queue_depth = send_mmsg_queue(queue_info, queue_depth); + } + if ((queue_depth > 0) && (spin_trylock(¨_sigio_lock))) { + /* unconditional flush, non zero queue - not in epoll loop so not forwarding */ + if (!(in_epoll_loop)) { + while (queue_depth > 0) { + queue_depth = send_mmsg_queue(queue_info, queue_depth); + } + } + spin_unlock(¨_sigio_lock); + } + + /* we are forwarding (most likely) - check if there is a pending queue, if there is a + * pending queue, flush it, then put the current queue as pending + */ + + spin_lock(&net_queue_list); + if ((pending_queue) && (pending_queue != queue_info)) { + old_queue_depth = send_mmsg_queue(pending_queue, 1); + while (old_queue_depth > 0) { + old_queue_depth = + send_mmsg_queue(pending_queue, old_queue_depth); + } + } + if (queue_depth) { + pending_queue = queue_info; + } else { + pending_queue = NULL; + } + spin_unlock(&net_queue_list); + + return queue_depth; +} + +/* +* this is invoked out of the IRQ IO event loop to flush pending +* packets on "current" interface +*/ + +void flush_pending_netio(void) { + int result; + spin_lock(&net_queue_list); + if (pending_queue) { + do { + result = send_mmsg_queue(pending_queue, 1); + } while (result > 0); + } + pending_queue = NULL; + spin_unlock(&net_queue_list); +} diff --git a/arch/um/drivers/net_extra_user.c b/arch/um/drivers/net_extra_user.c new file mode 100644 index 0000000..1037899 --- /dev/null +++ b/arch/um/drivers/net_extra_user.c @@ -0,0 +1,317 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdarg.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "net_user.h" +#include "os.h" +#include "um_malloc.h" + +/* +* Principles of operation: +* +* EVERYTHING here is built to tolerate a failed memory allocation. +* If either a header buffer or a data buffer (taken from skb->data) +* is NULL the read will fail and the packet will be dropped. This +* is the normal behaviour of recvmsg and recvmmsg functions - if a +* particular iov_base == NULL and its corresponding iov_baselen is +* 0 we truncate and/or drop the packet altogether. +* +* On the negative side this means that we have to do a few more +* checks for NULL here and there. On the positive side this means +* that the whole thing is more robust including under low +* memory conditions. +* +* There is one special case which we need to handle as a result of +* this - any header verification functions should return "broken +* header" on hitting a NULL. This will in turn invoke the applicable +* packet drop logic. +* +* Any changes should follow this overall design. +* +* Side effect - none of these need to use the shared (and mutexed) +* drop skb. This is surplus to reqs, the normal recvm(m)msg drop +* mechanics will drop it. +*/ + +int net_readv(int fd, void *iov, int iovcnt) +{ + int n; + + CATCH_EINTR(n = readv(fd, iov, iovcnt)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_recvfrom2(int fd, void *buf, int len, void *src_addr, int *addrlen) +{ + int n; + + CATCH_EINTR(n = recvfrom(fd, buf, len, 0, src_addr, addrlen)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_writev(int fd, void *iov, int iovcnt) +{ + int n; + + CATCH_EINTR(n = writev(fd, iov, iovcnt)); + + if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS))) + return 0; + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_sendmessage(int fd, void *msg, int flags) +{ + int n; + + CATCH_EINTR(n = sendmsg(fd, msg, flags)); + if (n < 0) { + if ((errno == EAGAIN) || (errno == ENOBUFS)) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} +int net_recvmessage(int fd, void *msg, int flags) +{ + int n; + + CATCH_EINTR(n = recvmsg(fd, msg, flags)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_recvmmsg(int fd, void *msgvec, unsigned int vlen, + unsigned int flags, struct timespec *timeout) +{ + int n; + + CATCH_EINTR(n = recvmmsg(fd, msgvec, vlen, flags, timeout)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_sendmmsg(int fd, void *msgvec, unsigned int vlen, + unsigned int flags) +{ + int n; + +#ifdef HAS_SENDMMSG + + /* has proper sendmmsg */ + + CATCH_EINTR(n = sendmmsg(fd, msgvec, vlen, flags)); +#else + + /* no glibc wrapper for sendmmsg - Ubuntu LTS 12.04, Debian 7.x */ + + CATCH_EINTR(n = syscall(__NR_sendmmsg, fd, msgvec, vlen, flags)); +#endif + if (n < 0) { + if ((errno == EAGAIN) || (errno == ENOBUFS)) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +void destroy_skb_vector(void ** vector, int size) +{ + int i; + void ** tofree = vector; + + for (i=0;i<size;i++) { + if ( * vector) { + uml_net_destroy_skb(* vector); + } + vector ++; + } + kfree(tofree); +} + +void destroy_mmsg_vector(void * mmsgvector, int size, int free_iov_base) +{ + struct mmsghdr * vector = (struct mmsghdr *) mmsgvector; + struct iovec * iov; + int i; + if (vector) { + for (i = 0; i < size; i++) { + iov = vector->msg_hdr.msg_iov; + if (iov) { + if (free_iov_base) { + kfree(iov->iov_base); + } + kfree(iov); + } + vector ++; + } + kfree(mmsgvector); + } else { + printk("NULL mmsg vector in destroy, should not occur\n"); + } +} + +void * build_skbuf_vector(int size, void * dev) +{ + int i; + void **result, **vector; + result = uml_kmalloc(size * sizeof(void *), UM_GFP_KERNEL); + vector = result; + if (vector) { + for (i = 0; i < size; i++) { + * vector = uml_net_build_skb(dev); + vector++; + } + } + return result; +} + +void rebuild_skbuf_vector(void ** skbvec, int size, void * dev) +{ + int i; + if (skbvec) { + for (i = 0; i < size; i++) { + * skbvec = uml_net_build_skb(dev); + skbvec++; + } + } +} + +void repair_mmsg (void *vec, int iovsize, int header_size) +{ + struct mmsghdr * msgvec = (struct mmsghdr *) vec; + struct iovec * iov; + if (! msgvec->msg_hdr.msg_iov) { + msgvec->msg_hdr.msg_iov = uml_kmalloc(sizeof(struct iovec) * iovsize, UM_GFP_KERNEL); + } + iov = msgvec->msg_hdr.msg_iov; + if (iov) { + if (! iov->iov_base) { + iov->iov_base=uml_kmalloc(header_size, UM_GFP_KERNEL); + } + if (iov->iov_base) { + /* put correct header size just in case - we may have had a short frame */ + iov->iov_len = header_size; + } else { + printk("failed to allocate a header buffer, will cause a packet drop later\n"); + iov->iov_len = 0; + } + } +} + +void * build_mmsg_vector(int size, int iovsize) +{ + int i; + struct mmsghdr *msgvec, *result; + struct iovec * iov; + + result = uml_kmalloc(sizeof(struct mmsghdr) * size, UM_GFP_KERNEL); + msgvec = result; + if (msgvec) { + memset(msgvec, '\0', sizeof(struct mmsghdr) * size); + for ( i = 0; i < size; i++) { + iov = uml_kmalloc(sizeof(struct iovec) * iovsize, UM_GFP_KERNEL); + msgvec->msg_hdr.msg_iov=iov; + if (iov) { + memset(iov, '\0', sizeof(struct iovec) * iovsize); + msgvec->msg_hdr.msg_iovlen=iovsize; + } else { + printk("failed to allocate iov\n"); + msgvec->msg_hdr.msg_iovlen=0; /* silent drop on receive, no xmit */ + } + msgvec++; + } + } + return result; +} + +void add_header_buffers(void * msgvec, int size, int header_size) +{ + int i; + struct iovec * iov; + struct mmsghdr * mmsgvec = (struct mmsghdr *) msgvec; + for ( i = 0; i < size; i++) { + iov = mmsgvec->msg_hdr.msg_iov; + if (iov) { + iov->iov_base=uml_kmalloc(header_size, UM_GFP_KERNEL); + if (iov->iov_base) { + iov->iov_len = header_size; + } else { + printk("failed to allocate a header buffer, will cause a packet drop later\n"); + iov->iov_len = 0; + } + } + mmsgvec++; + } +} + +/* NOTE - this is only for offset = 0 or 1, other cases are unhandled!!! */ + +void add_skbuffs(void * msgvec, void ** skbvec, int size, int skb_size, int offset) { + int i; + struct iovec * iov; + struct mmsghdr * mmsgvec = (struct mmsghdr *) msgvec; + for ( i = 0; i < size; i++) { + /* + This heavily relies on all IOVs being present, if the initial allocation + fails it must clean up and switch to "normal" per-packet receive instead + Later allocations of skbufs can fail - this will result in short reads + and skips + + */ + iov = mmsgvec->msg_hdr.msg_iov; + if (iov) { + iov += offset; + iov->iov_base=uml_net_skb_data(* skbvec); + if (iov->iov_base) { + iov->iov_len = skb_size; + } else { + printk("NULL SKB will drop\n"); + iov->iov_len = 0; + } + } else { + printk("NULL IOV will drop\n"); + } + mmsgvec++; + skbvec++; + } +} diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 64d8426..2889804 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2001 Lennert Buytenhek (buyt...@gnu.org) and * James Leu (j...@mindspring.net). @@ -29,6 +30,7 @@ static DEFINE_SPINLOCK(opened_lock); static LIST_HEAD(opened); +static int rr_counter = 0; /* * The drop_skb is used when we can't allocate an skb. The @@ -42,6 +44,7 @@ static DEFINE_SPINLOCK(drop_lock); static struct sk_buff *drop_skb; static int drop_max; + static int update_drop_skb(int max) { struct sk_buff *new; @@ -77,24 +80,38 @@ static int uml_net_rx(struct net_device *dev) struct sk_buff *skb; /* If we can't allocate memory, try again next round. */ - skb = dev_alloc_skb(lp->max_packet); - if (skb == NULL) { - drop_skb->dev = dev; - /* Read a packet into drop_skb and don't do anything with it. */ - (*lp->read)(lp->fd, drop_skb, lp); - dev->stats.rx_dropped++; + if (lp->options & UML_NET_USE_SKB_READ) { + /* we expect a full formed, well behaved skb from zero copy drivers here */ + skb = (*lp->skb_read)(lp); + if (skb == NULL) { return 0; - } - - skb->dev = dev; - skb_put(skb, lp->max_packet); - skb_reset_mac_header(skb); - pkt_len = (*lp->read)(lp->fd, skb, lp); - - if (pkt_len > 0) { + } + pkt_len = skb->len; + } else { + skb = dev_alloc_skb(lp->max_packet + 32); + if (skb == NULL) { + drop_skb->dev = dev; + /* Read a packet into drop_skb and don't do anything with it. */ + (*lp->read)(lp->fd, drop_skb, lp); + dev->stats.rx_dropped++; + return 0; + } + + skb_reserve(skb,32); + skb->dev = dev; + skb_put(skb, lp->max_packet); + skb_reset_mac_header(skb); + + // Mark that virtual devices cannot provide required checksum. + skb->ip_summed = CHECKSUM_NONE; + pkt_len = (*lp->read)(lp->fd, skb, lp); + if (pkt_len > 0) { skb_trim(skb, pkt_len); skb->protocol = (*lp->protocol)(skb); + } + } + if (pkt_len > 0) { dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; netif_rx(skb); @@ -192,8 +209,9 @@ static int uml_net_close(struct net_device *dev) struct uml_net_private *lp = netdev_priv(dev); netif_stop_queue(dev); + deactivate_fd(lp->fd, dev->irq); - um_free_irq(dev->irq, dev); + free_irq(dev->irq, dev); if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user); lp->fd = -1; @@ -216,7 +234,6 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&lp->lock, flags); len = (*lp->write)(lp->fd, skb, lp); - skb_tx_timestamp(skb); if (len == skb->len) { dev->stats.tx_packets++; @@ -273,14 +290,13 @@ static void uml_net_poll_controller(struct net_device *dev) static void uml_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, "42", sizeof(info->version)); + strcpy(info->driver, DRIVER_NAME); + strcpy(info->version, "42"); } static const struct ethtool_ops uml_net_ethtool_ops = { .get_drvinfo = uml_net_get_drvinfo, .get_link = ethtool_op_get_link, - .get_ts_info = ethtool_op_get_ts_info, }; static void uml_net_user_timer_expire(unsigned long _conn) @@ -447,6 +463,7 @@ static void eth_configure(int n, void *init, char *mac, * These just fill in a data structure, so there's no failure * to be worried about. */ + dev->ethtool_ops = ¨_net_ethtool_ops; (*transport->kern->init)(dev, init); *lp = ((struct uml_net_private) @@ -459,7 +476,9 @@ static void eth_configure(int n, void *init, char *mac, .open = transport->user->open, .close = transport->user->close, .remove = transport->user->remove, + .options = transport->kern->options, .read = transport->kern->read, + .skb_read = transport->kern->skb_read, .write = transport->kern->write, .add_address = transport->user->add_address, .delete_address = transport->user->delete_address }); @@ -475,9 +494,9 @@ static void eth_configure(int n, void *init, char *mac, dev->mtu = transport->user->mtu; dev->netdev_ops = ¨_netdev_ops; - dev->ethtool_ops = ¨_net_ethtool_ops; dev->watchdog_timeo = (HZ >> 1); - dev->irq = UM_ETH_IRQ; + dev->irq = UM_ETH_BASE_IRQ + (rr_counter % UM_ETH_IRQ_RR); + rr_counter++; err = update_drop_skb(lp->max_packet); if (err) @@ -829,7 +848,7 @@ static void close_devices(void) spin_lock(&opened_lock); list_for_each(ele, &opened) { lp = list_entry(ele, struct uml_net_private, list); - um_free_irq(lp->dev->irq, lp->dev); + free_irq(lp->dev->irq, lp->dev); if ((lp->close != NULL) && (lp->fd >= 0)) (*lp->close)(lp->fd, &lp->user); if (lp->remove != NULL) diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index 4a2037f..be9128b 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -1,21 +1,27 @@ + #ifndef __UM_IRQ_H #define __UM_IRQ_H +#define UM_ETH_IRQ_RR 32 + #define TIMER_IRQ 0 #define UMN_IRQ 1 #define CONSOLE_IRQ 2 #define CONSOLE_WRITE_IRQ 3 #define UBD_IRQ 4 -#define UM_ETH_IRQ 5 -#define SSL_IRQ 6 -#define SSL_WRITE_IRQ 7 -#define ACCEPT_IRQ 8 -#define MCONSOLE_IRQ 9 -#define WINCH_IRQ 10 -#define SIGIO_WRITE_IRQ 11 -#define TELNETD_IRQ 12 -#define XTERM_IRQ 13 -#define RANDOM_IRQ 14 +#define UM_ETH_BASE_IRQ 5 + +#define UM_END_ETH_IRQ UM_ETH_BASE_IRQ + UM_ETH_IRQ_RR + +#define SSL_IRQ UM_END_ETH_IRQ + 1 +#define SSL_WRITE_IRQ UM_END_ETH_IRQ + 2 +#define ACCEPT_IRQ UM_END_ETH_IRQ + 3 +#define MCONSOLE_IRQ UM_END_ETH_IRQ + 4 +#define WINCH_IRQ UM_END_ETH_IRQ + 5 +#define SIGIO_WRITE_IRQ UM_END_ETH_IRQ + 6 +#define TELNETD_IRQ UM_END_ETH_IRQ + 7 +#define XTERM_IRQ UM_END_ETH_IRQ + 8 +#define RANDOM_IRQ UM_END_ETH_IRQ + 9 #define LAST_IRQ RANDOM_IRQ #define NR_IRQS (LAST_IRQ + 1) diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h index 012ac87..1e64658 100644 --- a/arch/um/include/shared/net_kern.h +++ b/arch/um/include/shared/net_kern.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -13,6 +14,8 @@ #include <linux/list.h> #include <linux/workqueue.h> +#define UML_NET_USE_SKB_READ 1 + struct uml_net { struct list_head list; struct net_device *dev; @@ -28,6 +31,7 @@ struct uml_net_private { struct work_struct work; int fd; + unsigned int options; unsigned char mac[ETH_ALEN]; int max_packet; unsigned short (*protocol)(struct sk_buff *); @@ -36,6 +40,7 @@ struct uml_net_private { void (*remove)(void *); int (*read)(int, struct sk_buff *skb, struct uml_net_private *); int (*write)(int, struct sk_buff *skb, struct uml_net_private *); + struct sk_buff * (*skb_read)(struct uml_net_private *); void (*add_address)(unsigned char *, unsigned char *, void *); void (*delete_address)(unsigned char *, unsigned char *, void *); @@ -47,6 +52,8 @@ struct net_kern_info { unsigned short (*protocol)(struct sk_buff *); int (*read)(int, struct sk_buff *skb, struct uml_net_private *); int (*write)(int, struct sk_buff *skb, struct uml_net_private *); + struct sk_buff * (*skb_read)(struct uml_net_private *); + unsigned int options; }; struct transport { @@ -59,11 +66,35 @@ struct transport { const int setup_size; }; +struct mmsg_queue_info { + int fd; + struct mmsghdr * mmsg_send_vector; + void ** skb_send_vector; + int queue_depth, head, tail, max_depth; + spinlock_t head_lock; + spinlock_t tail_lock; +}; + extern struct net_device *ether_init(int); extern unsigned short ether_protocol(struct sk_buff *); extern int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, char **gate_addr); extern void register_transport(struct transport *new); extern unsigned short eth_protocol(struct sk_buff *skb); +extern struct sk_buff *my_build_skb(void * head, void *data, unsigned int frag_size); + +extern void flush_pending_netio(void); + +extern int uml_net_advance_tail( struct mmsg_queue_info * queue_info, int advance); +extern int uml_net_advance_head( struct mmsg_queue_info * queue_info, int advance); +extern int uml_net_flush_mmsg_queue(struct mmsg_queue_info * queue_info, int queue_depth); + +extern int uml_net_enqueue ( + struct mmsg_queue_info * queue_info, + struct sk_buff * skb, + struct uml_net_private *lp, + void (*form_header)(void * header, struct sk_buff * skb, struct uml_net_private * lp), + void * remote_addr, + int remote_addr_size); #endif diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h index 3dabbe1..4b46f37 100644 --- a/arch/um/include/shared/net_user.h +++ b/arch/um/include/shared/net_user.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -38,10 +39,15 @@ extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr); extern void read_output(int fd, char *output_out, int len); extern int net_read(int fd, void *buf, int len); +extern int net_readv(int fd, void *iov, int iovcnt); extern int net_recvfrom(int fd, void *buf, int len); +extern int net_recvfrom2(int fd, void *buf, int len, void *src_addr, int *addrlen); extern int net_write(int fd, void *buf, int len); +extern int net_writev(int fd, void *iov, int iovcnt); extern int net_send(int fd, void *buf, int len); extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); +extern int net_sendmessage(int fd, void *msg, int flags); +extern int net_recvmessage(int fd, void *msg, int flags); extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); @@ -50,4 +56,22 @@ extern char *split_if_spec(char *str, ...); extern int dev_netmask(void *d, void *m); + +extern void uml_net_destroy_skb(void * skb); +extern void * uml_net_build_skb (void * dev); +extern void * uml_net_skb_data (void * skb); + +extern void add_skbuffs(void * msgvec, void ** skbvec, int size, int skb_size, int offset); +extern void add_header_buffers(void * msgvec, int size, int header_size); +extern void * build_mmsg_vector(int size, int iovsize); +extern void rebuild_skbuf_vector(void ** skbvec, int size, void * dev); +extern void * build_skbuf_vector(int size, void * dev); +extern int net_recvmmsg(int fd, void *msgvec, unsigned int vlen, + unsigned int flags, struct timespec *timeout); +extern int net_sendmmsg(int fd, void *msgvec, unsigned int vlen, + unsigned int flags); +extern void repair_mmsg (void *msgvec, int iovsize, int header_size); +extern void destroy_skb_vector(void ** vector, int size); +extern void destroy_mmsg_vector(void * mmsgvector, int size, int free_iov_base); + #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 2869160..a67a551 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -17,6 +17,7 @@ #include <as-layout.h> #include <kern_util.h> #include <os.h> +#include <net_kern.h> /* * We are on the "kernel side" so we cannot pick up the sys/epoll.h @@ -136,6 +137,10 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) spin_unlock_irqrestore(¨_sigio_lock, flags); } +#ifdef CONFIG_UML_NET_VECTOR_TX + flush_pending_netio(); +#endif + /* This needs a better way - it slows down the event loop */ free_irqs(); -- 1.7.10.4 ------------------------------------------------------------------------------ Slashdot TV. Video for Nerds. Stuff that matters. http://tv.slashdot.org/ _______________________________________________ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel