[uml-devel] PATCH v5-redo

2017-10-06 Thread anton . ivanov
This is a resubmit of the patch from earlier today. I had
forgotten to include all files - apologies.

A.



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


[uml-devel] [PATCH v5-redo 1/3] Epoll based IRQ controller

2017-10-06 Thread anton . ivanov
From: Anton Ivanov 

1. Removes the need to walk the IRQ/Device list to determine
who triggered the IRQ.
2. Improves scalability (up to several times performance
improvement for cases with 10s of devices).
3. Improves UML baseline IO performance for one disk + one NIC
use case by up to 10%.
4. Introduces write poll triggered IRQs.
5. Prerequisite for introducing high performance mmesg family
of functions in network IO.
6. Fixes RNG shutdown which was leaking a file descriptor

Signed-off-by: Anton Ivanov 
---
 arch/um/drivers/chan_kern.c   |  53 +
 arch/um/drivers/line.c|   2 +-
 arch/um/drivers/random.c  |  11 +-
 arch/um/drivers/ubd_kern.c|   4 +-
 arch/um/include/shared/irq_user.h |  12 +-
 arch/um/include/shared/os.h   |  17 +-
 arch/um/kernel/irq.c  | 460 --
 arch/um/os-Linux/irq.c| 202 +
 8 files changed, 444 insertions(+), 317 deletions(-)

diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c67afba..05588f9466c7 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
return err;
 }
 
-/* Items are added in IRQ context, when free_irq can't be called, and
- * removed in process context, when it can.
- * This handles interrupt sources which disappear, and which need to
- * be permanently disabled.  This is discovered in IRQ context, but
- * the freeing of the IRQ must be done later.
- */
-static DEFINE_SPINLOCK(irqs_to_free_lock);
-static LIST_HEAD(irqs_to_free);
-
-void free_irqs(void)
-{
-   struct chan *chan;
-   LIST_HEAD(list);
-   struct list_head *ele;
-   unsigned long flags;
-
-   spin_lock_irqsave(_to_free_lock, flags);
-   list_splice_init(_to_free, );
-   spin_unlock_irqrestore(_to_free_lock, flags);
-
-   list_for_each(ele, ) {
-   chan = list_entry(ele, struct chan, free_list);
-
-   if (chan->input && chan->enabled)
-   um_free_irq(chan->line->driver->read_irq, chan);
-   if (chan->output && chan->enabled)
-   um_free_irq(chan->line->driver->write_irq, chan);
-   chan->enabled = 0;
-   }
-}
-
 static void close_one_chan(struct chan *chan, int delay_free_irq)
 {
-   unsigned long flags;
-
if (!chan->opened)
return;
 
-   if (delay_free_irq) {
-   spin_lock_irqsave(_to_free_lock, flags);
-   list_add(>free_list, _to_free);
-   spin_unlock_irqrestore(_to_free_lock, flags);
-   }
-   else {
-   if (chan->input && chan->enabled)
-   um_free_irq(chan->line->driver->read_irq, chan);
-   if (chan->output && chan->enabled)
-   um_free_irq(chan->line->driver->write_irq, chan);
-   chan->enabled = 0;
-   }
+/* we can safely call free now - it will be marked
+ *  as free and freed once the IRQ stopped processing
+ */
+   if (chan->input && chan->enabled)
+   um_free_irq(chan->line->driver->read_irq, chan);
+   if (chan->output && chan->enabled)
+   um_free_irq(chan->line->driver->write_irq, chan);
+   chan->enabled = 0;
if (chan->ops->close != NULL)
(*chan->ops->close)(chan->fd, chan->data);
 
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 366e57f5e8d6..8d80b27502e6 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct 
line *line, void *data)
if (err)
return err;
if (output)
-   err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+   err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
 line_write_interrupt, IRQF_SHARED,
 driver->write_irq_name, data);
return err;
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 37c51a6be690..778a0e52d5a5 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -154,7 +155,14 @@ static int __init rng_init (void)
 /*
  * rng_cleanup - shutdown RNG module
  */
-static void __exit rng_cleanup (void)
+
+static void cleanup(void)
+{
+   free_irq_by_fd(random_fd);
+   os_close_file(random_fd);
+}
+
+static void __exit rng_cleanup(void)
 {
os_close_file(random_fd);
misc_deregister (_miscdev);
@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
 
 module_init (rng_init);
 module_exit (rng_cleanup);
+__uml_exitcall(cleanup);
 
 MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
 MODULE_LICENSE("GPL");
diff --git 

[uml-devel] [PATCH v5-redo 2/3] High Performance Vector Network Driver

2017-10-06 Thread anton . ivanov
From: Anton Ivanov 

1. Provides infrastructure for vector IO using recvmmsg/sendmmsg.
1.1. Multi-message read.
1.2. Multi-message write.
1.3. Optimized queue support for multi-packet enqueue/dequeue.
1.4. BQL/DQL support.
2. Implements transports for several transports as well support
for direct wiring of PWEs to NIC. Allows direct connection of VMs
to host, other VMs and network devices with no switch in use.
2.1. Raw socket >4 times faster than existing pcap based transport
2.2. Tap transport using socket RX and tap xmit. >3 times faster
RX than existing tap, 10+ times faster on TX.
2.3. GRE transport - direct wiring to GRE PWE, >3 times faster RX
than any existing transport.
2.4. L2TPv3 transport - direct wiring to L2TPv3 PWE, > 3 times
faster RX than any existing transport.
2.5. TX in all cases shows only minor improvement, but consumes
significantly less CPU under load.
3. Tuning and performance related information via ethtool.
4. Rudimentary BPF support - used in tap only to avoid software
looping
5. Scatter Gather support.
6. VNET and checksum offload support for raw socket transport.

Signed-off-by: Anton Ivanov 
---
 arch/um/Kconfig.net |   11 +
 arch/um/drivers/Makefile|4 +-
 arch/um/drivers/net_kern.c  |4 +-
 arch/um/drivers/vector_kern.c   | 1531 +++
 arch/um/drivers/vector_kern.h   |  128 +++
 arch/um/drivers/vector_transports.c |  430 ++
 arch/um/drivers/vector_user.c   |  528 
 arch/um/drivers/vector_user.h   |   90 ++
 arch/um/include/asm/irq.h   |   12 +
 arch/um/include/shared/net_kern.h   |2 +
 10 files changed, 2737 insertions(+), 3 deletions(-)
 create mode 100644 arch/um/drivers/vector_kern.c
 create mode 100644 arch/um/drivers/vector_kern.h
 create mode 100644 arch/um/drivers/vector_transports.c
 create mode 100644 arch/um/drivers/vector_user.c
 create mode 100644 arch/um/drivers/vector_user.h

diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index 820a56f00332..0516dc76e6aa 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -108,6 +108,17 @@ config UML_NET_DAEMON
 more than one without conflict.  If you don't need UML networking,
 say N.
 
+config UML_NET_VECTOR
+   bool "Vector I/O high performance network devices"
+   depends on UML_NET
+   help
+   This User-Mode Linux network driver uses multi-message send
+   and receive functions. The host running the UML guest must have
+   a linux kernel version above 3.0 and a libc version > 2.13.
+   This driver provides tap, raw, gre and l2tpv3 network transports
+   with up to 4 times higher network throughput than the UML network
+   drivers.
+
 config UML_NET_VDE
bool "VDE transport"
depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..16b3cebddafb 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,6 +9,7 @@
 slip-objs := slip_kern.o slip_user.o
 slirp-objs := slirp_kern.o slirp_user.o
 daemon-objs := daemon_kern.o daemon_user.o
+vector-objs := vector_kern.o vector_user.o vector_transports.o
 umcast-objs := umcast_kern.o umcast_user.o
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
 obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
+obj-$(CONFIG_UML_NET_VECTOR) += vector.o
 obj-$(CONFIG_UML_NET_VDE) += vde.o
 obj-$(CONFIG_UML_NET_MCAST) += umcast.o
 obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 
 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o 
vde_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o 
vde_user.o vector_user.o
 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 1669240c7a25..03cc5857a3b6 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(unsigned long _conn)
 #endif
 }
 
-static void setup_etheraddr(struct net_device *dev, char *str)
+void uml_net_setup_etheraddr(struct net_device *dev, char *str)
 {
unsigned char *addr = dev->dev_addr;
char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
 */
snprintf(dev->name, sizeof(dev->name), "eth%d", n);
 
-   setup_etheraddr(dev, mac);
+   uml_net_setup_etheraddr(dev, mac);
 
printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
 
diff --git a/arch/um/drivers/vector_kern.c 

[uml-devel] [PATCH v5-redo 3/3] TSO/GSO Support for Vector Network drivers

2017-10-06 Thread anton . ivanov
From: Anton Ivanov 

1. TSO/GSO support where applicable or available
RX - raw and tapraw
TX - tap only (raw appears to be hitting a bug in the
af_packet family in the kernel resulting in it being
stuck in a -ENOBUFS loop.

This results in TX/RX TCP performance ~ 2-3 times higher
than qemu on same hardware (measured with iperf).

2. Cleanup and unification of the RX/TX code to use the
same skb and msg prep routines.

Adds two new transport arguments applicable to all transports

gro - enable/disable GRO in driver
vec - enable/disable multi-message vector IO

3. Adds change/set device features support. Gro,gso,gso,sg,etc
can now be adjusted via ethtool.

Signed-off-by: Anton Ivanov 
---
 arch/um/drivers/vector_kern.c   | 167 ++--
 arch/um/drivers/vector_kern.h   |   1 +
 arch/um/drivers/vector_transports.c |  15 ++--
 arch/um/drivers/vector_user.c   |   5 +-
 4 files changed, 135 insertions(+), 53 deletions(-)

diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index f0ea7f98b86c..268862d8f915 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -75,7 +75,7 @@ static void vector_eth_configure(int n, struct arglist *def);
 #define SAFETY_MARGIN 32
 #define DEFAULT_VECTOR_SIZE 64
 #define TX_SMALL_PACKET 128
-#define MAX_IOV_SIZE 8
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
 
 static const struct {
const char string[ETH_GSTRING_LEN];
@@ -162,15 +162,45 @@ static int get_headroom(struct arglist *def)
return DEFAULT_HEADROOM;
 }
 
+static int get_req_size(struct arglist *def)
+{
+   char *gro = uml_vector_fetch_arg(def, "gro");
+   long result;
+
+   if (gro != NULL) {
+   if (kstrtoul(gro, 10, ) == 0) {
+   if (result > 0)
+   return 65536;
+   }
+   }
+   return get_mtu(def) + ETH_HEADER_OTHER + get_headroom(def) + 
SAFETY_MARGIN;
+}
+
+
 static int get_transport_options(struct arglist *def)
 {
char *transport = uml_vector_fetch_arg(def, "transport");
+   char *vector = uml_vector_fetch_arg(def, "vec");
+
+   int vec_rx = VECTOR_RX;
+   int vec_tx = VECTOR_TX;
+   long parsed;
+
+   if (vector != NULL) {
+   if (kstrtoul(vector, 10, ) == 0) {
+   if (parsed == 0) {
+   vec_rx = 0;
+   vec_tx = 0;
+   }
+   }
+   }
+
 
if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
-   return (VECTOR_RX | VECTOR_BPF);
+   return (vec_rx | VECTOR_BPF);
if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
-   return (VECTOR_TX | VECTOR_RX | VECTOR_BPF);
-   return (VECTOR_TX | VECTOR_RX);
+   return (vec_rx | vec_tx | VECTOR_BPF);
+   return (vec_rx | vec_tx);
 }
 
 
@@ -547,13 +577,59 @@ static struct vector_queue *create_queue(
  * just read into a prepared queue filled with skbuffs.
  */
 
+static struct sk_buff *prep_skb(struct vector_private *vp, struct user_msghdr 
*msg)
+{
+   int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+   struct sk_buff *result;
+   int iov_index = 0, len;
+   struct iovec *iov = msg->msg_iov;
+   int err, nr_frags, frag;
+   skb_frag_t *skb_frag;
+
+   if (vp->req_size <= linear)
+   len = linear;
+   else
+   len = vp->req_size;
+   result = alloc_skb_with_frags(linear, len - vp->max_packet, 3, , 
GFP_ATOMIC);
+   if (vp->header_size > 0)
+   iov_index++;
+   if (result == NULL) {
+   iov[iov_index].iov_base = NULL;
+   iov[iov_index].iov_len = 0;
+   goto done;
+   }
+   skb_reserve(result, vp->headroom);
+   result->dev = vp->dev;
+   skb_put(result, vp->max_packet);
+   result->data_len = len - vp->max_packet;
+   result->len += len - vp->max_packet;
+   skb_reset_mac_header(result);
+   result->ip_summed = CHECKSUM_NONE;
+   iov[iov_index].iov_base = result->data;
+   iov[iov_index].iov_len = vp->max_packet;
+   iov_index++;
+
+   nr_frags = skb_shinfo(result)->nr_frags;
+   for (frag = 0; frag < nr_frags; frag++) {
+   skb_frag = _shinfo(result)->frags[frag];
+   iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+   if (iov[iov_index].iov_base != NULL)
+   iov[iov_index].iov_len = skb_frag_size(skb_frag);
+   else
+   iov[iov_index].iov_len = 0;
+   iov_index++;
+   }
+done:
+   msg->msg_iovlen = iov_index;
+   return result;
+}
+
+
 /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
 
 static void prep_queue_for_rx(struct vector_queue *qi)
 {
struct vector_private *vp =