Hi Richard, hi list, some notes on this patchset: we have had the multipacket rx portion of this stable and in use for quite a while (nearly a year).
The tx portion is new and it looks like it has some issues which did not show up before I ported it to a more recent kernel (I would not be surprised if I introduced them when porting this from 3.3.y/OpenWRT to 3.12.y/Stock). If I do not figure out what exactly is going on with the tx by end of next week I am going to resubmit an older version of this patch (and its corresponding transports) which has only multi-packet rx (which is stable) and leave TX for a later incremental. A. On 29/08/14 08:05, anton.iva...@kot-begemot.co.uk wrote: > From: Anton Ivanov <antiv...@cisco.com> > > Support for multi-packet vector IO - multiple packets > read in one syscall and written in one syscall. Should work with > legacy UML, thorough tested only for the epoll based IRQ controller > > Minimal host kernel version for RX - 2.6.32 > Minimal host kernel version for TX - 3.0 > > Tested on Debian 7.0/Ubuntu 12.x LTS which have the relevant > syscalls, but do not have the appropriate glibc routine for TX > (this is why it is a direct syscall). > > Signed-off-by: Anton Ivanov <antiv...@cisco.com> > --- > arch/um/drivers/Makefile | 2 +- > arch/um/drivers/net_kern.c | 63 > ++++++++++++++++++++++++------------- > arch/um/include/asm/irq.h | 26 +++++++++------ > arch/um/include/shared/net_kern.h | 24 ++++++++++++++ > arch/um/include/shared/net_user.h | 24 ++++++++++++++ > arch/um/kernel/irq.c | 3 ++ > 6 files changed, 109 insertions(+), 33 deletions(-) > > diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile > index e7582e1..836baaf 100644 > --- a/arch/um/drivers/Makefile > +++ b/arch/um/drivers/Makefile > @@ -10,7 +10,7 @@ slip-objs := slip_kern.o slip_user.o > slirp-objs := slirp_kern.o slirp_user.o > daemon-objs := daemon_kern.o daemon_user.o > umcast-objs := umcast_kern.o umcast_user.o > -net-objs := net_kern.o net_user.o > +net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o > mconsole-objs := mconsole_kern.o mconsole_user.o > hostaudio-objs := hostaudio_kern.o > ubd-objs := ubd_kern.o ubd_user.o > diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c > index 64d8426..1d253fa 100644 > --- a/arch/um/drivers/net_kern.c > +++ b/arch/um/drivers/net_kern.c > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Copyright (C) 2001 Lennert Buytenhek (buyt...@gnu.org) and > * James Leu (j...@mindspring.net). > @@ -29,6 +30,7 @@ > > static DEFINE_SPINLOCK(opened_lock); > static LIST_HEAD(opened); > +static int rr_counter = 0; > > /* > * The drop_skb is used when we can't allocate an skb. The > @@ -42,6 +44,7 @@ static DEFINE_SPINLOCK(drop_lock); > static struct sk_buff *drop_skb; > static int drop_max; > > + > static int update_drop_skb(int max) > { > struct sk_buff *new; > @@ -77,24 +80,38 @@ static int uml_net_rx(struct net_device *dev) > struct sk_buff *skb; > > /* If we can't allocate memory, try again next round. */ > - skb = dev_alloc_skb(lp->max_packet); > - if (skb == NULL) { > - drop_skb->dev = dev; > - /* Read a packet into drop_skb and don't do anything with it. */ > - (*lp->read)(lp->fd, drop_skb, lp); > - dev->stats.rx_dropped++; > + if (lp->options & UML_NET_USE_SKB_READ) { > + /* we expect a full formed, well behaved skb from zero copy drivers > here */ > + skb = (*lp->skb_read)(lp); > + if (skb == NULL) { > return 0; > - } > - > - skb->dev = dev; > - skb_put(skb, lp->max_packet); > - skb_reset_mac_header(skb); > - pkt_len = (*lp->read)(lp->fd, skb, lp); > - > - if (pkt_len > 0) { > + } > + pkt_len = skb->len; > + } else { > + skb = dev_alloc_skb(lp->max_packet + 32); > + if (skb == NULL) { > + drop_skb->dev = dev; > + /* Read a packet into drop_skb and don't do anything with > it. */ > + (*lp->read)(lp->fd, drop_skb, lp); > + dev->stats.rx_dropped++; > + return 0; > + } > + > + skb_reserve(skb,32); > + skb->dev = dev; > + skb_put(skb, lp->max_packet); > + skb_reset_mac_header(skb); > + > + // Mark that virtual devices cannot provide required checksum. > + skb->ip_summed = CHECKSUM_NONE; > + pkt_len = (*lp->read)(lp->fd, skb, lp); > + if (pkt_len > 0) { > skb_trim(skb, pkt_len); > skb->protocol = (*lp->protocol)(skb); > + } > + } > > + if (pkt_len > 0) { > dev->stats.rx_bytes += skb->len; > dev->stats.rx_packets++; > netif_rx(skb); > @@ -192,8 +209,9 @@ static int uml_net_close(struct net_device *dev) > struct uml_net_private *lp = netdev_priv(dev); > > netif_stop_queue(dev); > + deactivate_fd(lp->fd, dev->irq); > > - um_free_irq(dev->irq, dev); > + free_irq(dev->irq, dev); > if (lp->close != NULL) > (*lp->close)(lp->fd, &lp->user); > lp->fd = -1; > @@ -216,7 +234,6 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct > net_device *dev) > spin_lock_irqsave(&lp->lock, flags); > > len = (*lp->write)(lp->fd, skb, lp); > - skb_tx_timestamp(skb); > > if (len == skb->len) { > dev->stats.tx_packets++; > @@ -273,14 +290,13 @@ static void uml_net_poll_controller(struct net_device > *dev) > static void uml_net_get_drvinfo(struct net_device *dev, > struct ethtool_drvinfo *info) > { > - strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); > - strlcpy(info->version, "42", sizeof(info->version)); > + strcpy(info->driver, DRIVER_NAME); > + strcpy(info->version, "42"); > } > > static const struct ethtool_ops uml_net_ethtool_ops = { > .get_drvinfo = uml_net_get_drvinfo, > .get_link = ethtool_op_get_link, > - .get_ts_info = ethtool_op_get_ts_info, > }; > > static void uml_net_user_timer_expire(unsigned long _conn) > @@ -447,6 +463,7 @@ static void eth_configure(int n, void *init, char *mac, > * These just fill in a data structure, so there's no failure > * to be worried about. > */ > + dev->ethtool_ops = ¨_net_ethtool_ops; > (*transport->kern->init)(dev, init); > > *lp = ((struct uml_net_private) > @@ -459,7 +476,9 @@ static void eth_configure(int n, void *init, char *mac, > .open = transport->user->open, > .close = transport->user->close, > .remove = transport->user->remove, > + .options = transport->kern->options, > .read = transport->kern->read, > + .skb_read = transport->kern->skb_read, > .write = transport->kern->write, > .add_address = transport->user->add_address, > .delete_address = transport->user->delete_address }); > @@ -475,9 +494,9 @@ static void eth_configure(int n, void *init, char *mac, > > dev->mtu = transport->user->mtu; > dev->netdev_ops = ¨_netdev_ops; > - dev->ethtool_ops = ¨_net_ethtool_ops; > dev->watchdog_timeo = (HZ >> 1); > - dev->irq = UM_ETH_IRQ; > + dev->irq = UM_ETH_BASE_IRQ + (rr_counter % UM_ETH_IRQ_RR); > + rr_counter++; > > err = update_drop_skb(lp->max_packet); > if (err) > @@ -829,7 +848,7 @@ static void close_devices(void) > spin_lock(&opened_lock); > list_for_each(ele, &opened) { > lp = list_entry(ele, struct uml_net_private, list); > - um_free_irq(lp->dev->irq, lp->dev); > + free_irq(lp->dev->irq, lp->dev); > if ((lp->close != NULL) && (lp->fd >= 0)) > (*lp->close)(lp->fd, &lp->user); > if (lp->remove != NULL) > diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h > index 4a2037f..be9128b 100644 > --- a/arch/um/include/asm/irq.h > +++ b/arch/um/include/asm/irq.h > @@ -1,21 +1,27 @@ > + > #ifndef __UM_IRQ_H > #define __UM_IRQ_H > > +#define UM_ETH_IRQ_RR 32 > + > #define TIMER_IRQ 0 > #define UMN_IRQ 1 > #define CONSOLE_IRQ 2 > #define CONSOLE_WRITE_IRQ 3 > #define UBD_IRQ 4 > -#define UM_ETH_IRQ 5 > -#define SSL_IRQ 6 > -#define SSL_WRITE_IRQ 7 > -#define ACCEPT_IRQ 8 > -#define MCONSOLE_IRQ 9 > -#define WINCH_IRQ 10 > -#define SIGIO_WRITE_IRQ 11 > -#define TELNETD_IRQ 12 > -#define XTERM_IRQ 13 > -#define RANDOM_IRQ 14 > +#define UM_ETH_BASE_IRQ 5 > + > +#define UM_END_ETH_IRQ UM_ETH_BASE_IRQ + UM_ETH_IRQ_RR > + > +#define SSL_IRQ UM_END_ETH_IRQ + 1 > +#define SSL_WRITE_IRQ UM_END_ETH_IRQ + 2 > +#define ACCEPT_IRQ UM_END_ETH_IRQ + 3 > +#define MCONSOLE_IRQ UM_END_ETH_IRQ + 4 > +#define WINCH_IRQ UM_END_ETH_IRQ + 5 > +#define SIGIO_WRITE_IRQ UM_END_ETH_IRQ + 6 > +#define TELNETD_IRQ UM_END_ETH_IRQ + 7 > +#define XTERM_IRQ UM_END_ETH_IRQ + 8 > +#define RANDOM_IRQ UM_END_ETH_IRQ + 9 > > #define LAST_IRQ RANDOM_IRQ > #define NR_IRQS (LAST_IRQ + 1) > diff --git a/arch/um/include/shared/net_kern.h > b/arch/um/include/shared/net_kern.h > index 012ac87..2229126 100644 > --- a/arch/um/include/shared/net_kern.h > +++ b/arch/um/include/shared/net_kern.h > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -13,6 +14,8 @@ > #include <linux/list.h> > #include <linux/workqueue.h> > > +#define UML_NET_USE_SKB_READ 1 > + > struct uml_net { > struct list_head list; > struct net_device *dev; > @@ -28,6 +31,7 @@ struct uml_net_private { > > struct work_struct work; > int fd; > + unsigned int options; > unsigned char mac[ETH_ALEN]; > int max_packet; > unsigned short (*protocol)(struct sk_buff *); > @@ -36,6 +40,7 @@ struct uml_net_private { > void (*remove)(void *); > int (*read)(int, struct sk_buff *skb, struct uml_net_private *); > int (*write)(int, struct sk_buff *skb, struct uml_net_private *); > + struct sk_buff * (*skb_read)(struct uml_net_private *); > > void (*add_address)(unsigned char *, unsigned char *, void *); > void (*delete_address)(unsigned char *, unsigned char *, void *); > @@ -47,6 +52,8 @@ struct net_kern_info { > unsigned short (*protocol)(struct sk_buff *); > int (*read)(int, struct sk_buff *skb, struct uml_net_private *); > int (*write)(int, struct sk_buff *skb, struct uml_net_private *); > + struct sk_buff * (*skb_read)(struct uml_net_private *); > + unsigned int options; > }; > > struct transport { > @@ -59,11 +66,28 @@ struct transport { > const int setup_size; > }; > > +struct mmsg_queue_info { > + int fd; > + struct mmsghdr * mmsg_send_vector; > + void ** skb_send_vector; > + int queue_depth, head, tail, max_depth; > + spinlock_t head_lock; > + spinlock_t tail_lock; > + unsigned int queue_fsm; > +}; > + > extern struct net_device *ether_init(int); > extern unsigned short ether_protocol(struct sk_buff *); > extern int tap_setup_common(char *str, char *type, char **dev_name, > char **mac_out, char **gate_addr); > extern void register_transport(struct transport *new); > extern unsigned short eth_protocol(struct sk_buff *skb); > +extern struct sk_buff *my_build_skb(void * head, void *data, unsigned int > frag_size); > + > +extern void flush_pending_netio(void); > + > +extern int uml_net_advance_tail( struct mmsg_queue_info * queue_info, int > advance); > +extern int uml_net_advance_head( struct mmsg_queue_info * queue_info, int > advance); > +extern int uml_net_flush_mmsg_queue(struct mmsg_queue_info * queue_info, int > queue_depth); > > #endif > diff --git a/arch/um/include/shared/net_user.h > b/arch/um/include/shared/net_user.h > index 3dabbe1..4b46f37 100644 > --- a/arch/um/include/shared/net_user.h > +++ b/arch/um/include/shared/net_user.h > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -38,10 +39,15 @@ extern void tap_check_ips(char *gate_addr, unsigned char > *eth_addr); > extern void read_output(int fd, char *output_out, int len); > > extern int net_read(int fd, void *buf, int len); > +extern int net_readv(int fd, void *iov, int iovcnt); > extern int net_recvfrom(int fd, void *buf, int len); > +extern int net_recvfrom2(int fd, void *buf, int len, void *src_addr, int > *addrlen); > extern int net_write(int fd, void *buf, int len); > +extern int net_writev(int fd, void *iov, int iovcnt); > extern int net_send(int fd, void *buf, int len); > extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); > +extern int net_sendmessage(int fd, void *msg, int flags); > +extern int net_recvmessage(int fd, void *msg, int flags); > > extern void open_addr(unsigned char *addr, unsigned char *netmask, void > *arg); > extern void close_addr(unsigned char *addr, unsigned char *netmask, void > *arg); > @@ -50,4 +56,22 @@ extern char *split_if_spec(char *str, ...); > > extern int dev_netmask(void *d, void *m); > > + > +extern void uml_net_destroy_skb(void * skb); > +extern void * uml_net_build_skb (void * dev); > +extern void * uml_net_skb_data (void * skb); > + > +extern void add_skbuffs(void * msgvec, void ** skbvec, int size, int > skb_size, int offset); > +extern void add_header_buffers(void * msgvec, int size, int header_size); > +extern void * build_mmsg_vector(int size, int iovsize); > +extern void rebuild_skbuf_vector(void ** skbvec, int size, void * dev); > +extern void * build_skbuf_vector(int size, void * dev); > +extern int net_recvmmsg(int fd, void *msgvec, unsigned int vlen, > + unsigned int flags, struct timespec *timeout); > +extern int net_sendmmsg(int fd, void *msgvec, unsigned int vlen, > + unsigned int flags); > +extern void repair_mmsg (void *msgvec, int iovsize, int header_size); > +extern void destroy_skb_vector(void ** vector, int size); > +extern void destroy_mmsg_vector(void * mmsgvector, int size, int > free_iov_base); > + > #endif > diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c > index 5d7ee49e..f4c6fb1 100644 > --- a/arch/um/kernel/irq.c > +++ b/arch/um/kernel/irq.c > @@ -17,6 +17,7 @@ > #include <as-layout.h> > #include <kern_util.h> > #include <os.h> > +#include <net_kern.h> > > /* > * We are on the "kernel side" so we cannot pick up the sys/epoll.h > @@ -136,6 +137,8 @@ void sigio_handler(int sig, struct siginfo *unused_si, > struct uml_pt_regs *regs) > spin_unlock_irqrestore(¨_sigio_lock, flags); > } > > + flush_pending_netio(); > + > /* This needs a better way - it slows down the event loop */ > > free_irqs(); ------------------------------------------------------------------------------ Slashdot TV. Video for Nerds. Stuff that matters. http://tv.slashdot.org/ _______________________________________________ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel