Re: [uml-devel] [PATCH 10/10] High Resolution Timer subsystem for UML
The end result of applying all of 1-10 is: 1. UML now can push more than KVM for network related work (especially if QoS is in play) for a single virtual CPU use cases (both per vCPU and per Server). 2. As a side effect userspace has improved significantly as well. While it still looks sluggish compared to kvm due to the high cost of exec() on UML it is now perfectly usable for a lot of use cases which are happy to work with a single virtual CPU. Cheers A. On 29/08/14 08:05, anton.iva...@kot-begemot.co.uk wrote: > From: Anton Ivanov > > This patch adds an extra timer source which has correct timing > and uses an up-to-date OS API and. > > Results - correct kernel behaviour on timer related tasks. > > 1. Improvement in network performance (TCP state machines > are now fed correct time). > 2. Correct QoS and traffic shaping. > > This improvement does not (and cannot) fix UML userspace. Its > timer/time related behaviour is heavily dependent on getting > VTALRM pacing which is instantiated on a per userspace thread > basis. This patch does not fix this!!! It sorts out only the > kernel side - forwarding, qos, tcp, etc. > > Signed-off-by: Anton Ivanov > --- > arch/um/Makefile |2 +- > arch/um/include/asm/irq.h |3 +- > arch/um/include/shared/kern_util.h |1 + > arch/um/include/shared/os.h|5 + > arch/um/kernel/irq.c | 12 +++ > arch/um/kernel/process.c |7 +- > arch/um/kernel/time.c | 44 ++--- > arch/um/os-Linux/signal.c | 47 +- > arch/um/os-Linux/skas/process.c| 24 ++--- > arch/um/os-Linux/time.c| 178 > > 10 files changed, 250 insertions(+), 73 deletions(-) > > diff --git a/arch/um/Makefile b/arch/um/Makefile > index 133f7de..9864fb7 100644 > --- a/arch/um/Makefile > +++ b/arch/um/Makefile > @@ -121,7 +121,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) > # The wrappers will select whether using "malloc" or the kernel allocator. > LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc > > -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) > +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt > > # Used by link-vmlinux.sh which has special support for um link > export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) > diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h > index be9128b..4dd2f07 100644 > --- a/arch/um/include/asm/irq.h > +++ b/arch/um/include/asm/irq.h > @@ -22,8 +22,9 @@ > #define TELNETD_IRQ UM_END_ETH_IRQ + 7 > #define XTERM_IRQ UM_END_ETH_IRQ + 8 > #define RANDOM_IRQ UM_END_ETH_IRQ + 9 > +#define HRTIMER_IRQ UM_END_ETH_IRQ + 10 > > -#define LAST_IRQ RANDOM_IRQ > +#define LAST_IRQ HRTIMER_IRQ > #define NR_IRQS (LAST_IRQ + 1) > > #endif > diff --git a/arch/um/include/shared/kern_util.h > b/arch/um/include/shared/kern_util.h > index 83a91f9..0282b36 100644 > --- a/arch/um/include/shared/kern_util.h > +++ b/arch/um/include/shared/kern_util.h > @@ -37,6 +37,7 @@ extern void initial_thread_cb(void (*proc)(void *), void > *arg); > extern int is_syscall(unsigned long addr); > > extern void timer_handler(int sig, struct siginfo *unused_si, struct > uml_pt_regs *regs); > +extern void hrtimer_handler(int sig, struct siginfo *unused_si, struct > uml_pt_regs *regs); > > extern int start_uml(void); > extern void paging_init(void); > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index 7f544f4..d4fefb9 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -222,6 +222,7 @@ extern char *get_umid(void); > > /* signal.c */ > extern void timer_init(void); > +extern void uml_hrtimer_init(void); > extern void set_sigstack(void *sig_stack, int size); > extern void remove_sigstack(void); > extern void set_handler(int sig); > @@ -245,8 +246,12 @@ extern void idle_sleep(unsigned long long nsecs); > extern int set_interval(void); > extern int timer_one_shot(int ticks); > extern long long disable_timer(void); > +extern long long timer_remain(void); > extern void uml_idle_timer(void); > +extern long long persistent_clock_emulation(void); > extern long long os_nsecs(void); > +extern long long os_vnsecs(void); > +extern int itimer_init(void); > > /* skas/mem.c */ > extern long run_syscall_stub(struct mm_id * mm_idp, > diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c > index f4c6fb1..d70c487 100644 > --- a/arch/um/kernel/irq.c > +++ b/arch/um/kernel/irq.c > @@ -529,11 +529,23 @@ static struct irq_chip SIGVTALRM_irq_type = { > .irq_unmask = dummy, > }; > > +static struct irq_chip SIGUSR2_irq_type = { > + .name = "SIGUSR2", > + .irq_disable = dummy, > + .irq_enable = dummy, > + .irq_ack = dummy, > + .irq_mask = dummy, > + .irq_unmask = dummy, >
Re: [uml-devel] [PATCH 10/10] High Resolution Timer subsystem for UML
I just noticed that I missed an include on submission here will, resubmit v2 of patch 10 shortly. A. On 29/08/14 08:05, anton.iva...@kot-begemot.co.uk wrote: > From: Anton Ivanov > > This patch adds an extra timer source which has correct timing > and uses an up-to-date OS API and. > > Results - correct kernel behaviour on timer related tasks. > > 1. Improvement in network performance (TCP state machines > are now fed correct time). > 2. Correct QoS and traffic shaping. > > This improvement does not (and cannot) fix UML userspace. Its > timer/time related behaviour is heavily dependent on getting > VTALRM pacing which is instantiated on a per userspace thread > basis. This patch does not fix this!!! It sorts out only the > kernel side - forwarding, qos, tcp, etc. > > Signed-off-by: Anton Ivanov > --- > arch/um/Makefile |2 +- > arch/um/include/asm/irq.h |3 +- > arch/um/include/shared/kern_util.h |1 + > arch/um/include/shared/os.h|5 + > arch/um/kernel/irq.c | 12 +++ > arch/um/kernel/process.c |7 +- > arch/um/kernel/time.c | 44 ++--- > arch/um/os-Linux/signal.c | 47 +- > arch/um/os-Linux/skas/process.c| 24 ++--- > arch/um/os-Linux/time.c| 178 > > 10 files changed, 250 insertions(+), 73 deletions(-) > > diff --git a/arch/um/Makefile b/arch/um/Makefile > index 133f7de..9864fb7 100644 > --- a/arch/um/Makefile > +++ b/arch/um/Makefile > @@ -121,7 +121,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) > # The wrappers will select whether using "malloc" or the kernel allocator. > LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc > > -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) > +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt > > # Used by link-vmlinux.sh which has special support for um link > export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) > diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h > index be9128b..4dd2f07 100644 > --- a/arch/um/include/asm/irq.h > +++ b/arch/um/include/asm/irq.h > @@ -22,8 +22,9 @@ > #define TELNETD_IRQ UM_END_ETH_IRQ + 7 > #define XTERM_IRQ UM_END_ETH_IRQ + 8 > #define RANDOM_IRQ UM_END_ETH_IRQ + 9 > +#define HRTIMER_IRQ UM_END_ETH_IRQ + 10 > > -#define LAST_IRQ RANDOM_IRQ > +#define LAST_IRQ HRTIMER_IRQ > #define NR_IRQS (LAST_IRQ + 1) > > #endif > diff --git a/arch/um/include/shared/kern_util.h > b/arch/um/include/shared/kern_util.h > index 83a91f9..0282b36 100644 > --- a/arch/um/include/shared/kern_util.h > +++ b/arch/um/include/shared/kern_util.h > @@ -37,6 +37,7 @@ extern void initial_thread_cb(void (*proc)(void *), void > *arg); > extern int is_syscall(unsigned long addr); > > extern void timer_handler(int sig, struct siginfo *unused_si, struct > uml_pt_regs *regs); > +extern void hrtimer_handler(int sig, struct siginfo *unused_si, struct > uml_pt_regs *regs); > > extern int start_uml(void); > extern void paging_init(void); > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index 7f544f4..d4fefb9 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -222,6 +222,7 @@ extern char *get_umid(void); > > /* signal.c */ > extern void timer_init(void); > +extern void uml_hrtimer_init(void); > extern void set_sigstack(void *sig_stack, int size); > extern void remove_sigstack(void); > extern void set_handler(int sig); > @@ -245,8 +246,12 @@ extern void idle_sleep(unsigned long long nsecs); > extern int set_interval(void); > extern int timer_one_shot(int ticks); > extern long long disable_timer(void); > +extern long long timer_remain(void); > extern void uml_idle_timer(void); > +extern long long persistent_clock_emulation(void); > extern long long os_nsecs(void); > +extern long long os_vnsecs(void); > +extern int itimer_init(void); > > /* skas/mem.c */ > extern long run_syscall_stub(struct mm_id * mm_idp, > diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c > index f4c6fb1..d70c487 100644 > --- a/arch/um/kernel/irq.c > +++ b/arch/um/kernel/irq.c > @@ -529,11 +529,23 @@ static struct irq_chip SIGVTALRM_irq_type = { > .irq_unmask = dummy, > }; > > +static struct irq_chip SIGUSR2_irq_type = { > + .name = "SIGUSR2", > + .irq_disable = dummy, > + .irq_enable = dummy, > + .irq_ack = dummy, > + .irq_mask = dummy, > + .irq_unmask = dummy, > +}; > + > + > void __init init_IRQ(void) > { > int i; > > irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, > handle_edge_irq); > + irq_set_chip_and_handler(HRTIMER_IRQ, &SIGUSR2_irq_type, > handle_edge_irq); > + > for (i = 1; i < NR_IRQS - 1 ; i++) > irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_
[uml-devel] [PATCH 08/10] Minor performance optimization for ubd
From: Anton Ivanov Obvious performance optimization - it is not necessary to read the requests one at a time in the IRQ handler Signed-off-by: Anton Ivanov --- arch/um/drivers/ubd_kern.c | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 35ba00b..66d424a 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -443,6 +443,8 @@ static void do_ubd_request(struct request_queue * q); static int thread_fd = -1; static LIST_HEAD(restart); +static struct io_thread_req * ubd_request_list[MAX_SG]; + /* XXX - move this inside ubd_intr. */ /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) @@ -451,21 +453,34 @@ static void ubd_handler(void) struct ubd *ubd; struct list_head *list, *next_ele; unsigned long flags; - int n; + int n, i; + + /* +* obvious optimization - we do not need to read the reqs one at a time +* we can read all pending reqs in one interrupt and handle them in bulk +*/ while(1){ - n = os_read_file(thread_fd, &req, -sizeof(struct io_thread_req *)); - if(n != sizeof(req)){ +do { + n = os_read_file(thread_fd, &ubd_request_list, +sizeof(struct io_thread_req *) * MAX_SG); +} while (n == -EINTR); + if(n < 0){ if(n == -EAGAIN) break; printk(KERN_ERR "spurious interrupt in ubd_handler, " "err = %d\n", -n); return; + } else if (n % sizeof(struct io_thread_req *) != 0) { + printk(KERN_ERR "spurious interrupt in ubd_handler, " + "err = %d\n", -n); + return; + } + for (i = 0; i < n / sizeof(struct io_thread_req *); i++) { + req = ubd_request_list[i]; + blk_end_request(req->req, 0, req->length); + kfree(req); } - - blk_end_request(req->req, 0, req->length); - kfree(req); } list_for_each_safe(list, next_ele, &restart){ -- 1.7.10.4 -- Slashdot TV. Video for Nerds. Stuff that matters. http://tv.slashdot.org/ ___ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel
[uml-devel] [PATCH 02/10] Remove unnecessary 'reactivate' statements
From: Anton Ivanov The epoll based controller has real (not emulated) edge and level semantics and the edge/level is handled by epoll. There is no toggling of the poll set any more, thus it is removed throughout Signed-off-by: Anton Ivanov --- arch/um/drivers/chan_kern.c |2 -- arch/um/drivers/line.c |2 -- arch/um/drivers/mconsole_kern.c |2 -- arch/um/drivers/net_kern.c |2 -- arch/um/drivers/port_kern.c |1 - arch/um/drivers/random.c|1 - arch/um/drivers/ubd_kern.c |1 - 7 files changed, 11 deletions(-) diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index acbe6c6..db0ff51 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -564,8 +564,6 @@ void chan_interrupt(struct line *line, int irq) tty_insert_flip_char(port, c, TTY_NORMAL); } while (err > 0); - if (err == 0) - reactivate_fd(chan->fd, irq); if (err == -EIO) { if (chan->primary) { tty_port_tty_hangup(&line->port, false); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 6c4511f..1e8df84 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -663,8 +663,6 @@ static irqreturn_t winch_interrupt(int irq, void *data) tty_kref_put(tty); } out: - if (winch->fd != -1) - reactivate_fd(winch->fd, WINCH_IRQ); return IRQ_HANDLED; } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 3df3bd5..2b9bfa7 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -95,7 +95,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id) } if (!list_empty(&mc_requests)) schedule_work(&mconsole_work); - reactivate_fd(fd, MCONSOLE_IRQ); return IRQ_HANDLED; } @@ -243,7 +242,6 @@ void mconsole_stop(struct mc_request *req) (*req->cmd->handler)(req); } os_set_fd_block(req->originating_fd, 0); - reactivate_fd(req->originating_fd, MCONSOLE_IRQ); mconsole_reply(req, "", 0, 0); } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 39f1862..64d8426 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -137,8 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id) schedule_work(&lp->work); goto out; } - reactivate_fd(lp->fd, UM_ETH_IRQ); - out: spin_unlock(&lp->lock); return IRQ_HANDLED; diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index 40ca5cc..b0e9ff3 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused) if (!port->has_connection) continue; - reactivate_fd(port->fd, ACCEPT_IRQ); while (port_accept(port)) ; port->has_connection = 0; diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 9e3a722..ec3d788 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -72,7 +72,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, return ret ? : -EAGAIN; atomic_inc(&host_sleep_count); - reactivate_fd(random_fd, RANDOM_IRQ); add_sigio_fd(random_fd); add_wait_queue(&host_read_wait, &wait); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 3716e69..1cc72ae5 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -466,7 +466,6 @@ static void ubd_handler(void) blk_end_request(req->req, 0, req->length); kfree(req); } - reactivate_fd(thread_fd, UBD_IRQ); list_for_each_safe(list, next_ele, &restart){ ubd = container_of(list, struct ubd, restart); -- 1.7.10.4 -- Slashdot TV. Video for Nerds. Stuff that matters. http://tv.slashdot.org/ ___ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel
[uml-devel] [PATCH 06/10] RAW Ethernet transport for UML
From: Anton Ivanov This is an alternative to the well known pcap transport. In the absense of special hardware support pcap is slow, guaranteed to be slow and with significant penalties on NUMA/SMP systems due to the timestamping of every packet. This transport does not incur any of these timestamping penalties. It reads and writes packets directly using recvmmsg and sendmmsg calls. Signed-off-by: Anton Ivanov --- arch/um/Kconfig.net| 12 ++ arch/um/drivers/Makefile |2 + arch/um/drivers/uml_raw.h | 57 + arch/um/drivers/uml_raw_kern.c | 259 arch/um/drivers/uml_raw_user.c | 166 + 5 files changed, 496 insertions(+) create mode 100644 arch/um/drivers/uml_raw.h create mode 100644 arch/um/drivers/uml_raw_kern.c create mode 100644 arch/um/drivers/uml_raw_user.c diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net index 7c8ba68..e38f839 100644 --- a/arch/um/Kconfig.net +++ b/arch/um/Kconfig.net @@ -105,6 +105,18 @@ config UML_NET_GRE the applicable RFCs. The driver supports Soft GRE (wait for connect) as used in Cable systems, etc. +config UML_NET_RAW + bool "RAW transport" + depends on UML_NET + help +This User-Mode Linux network transport allows UML to bind a raw +Ethernet interface using a high-performance non-capture oriented +method to read and write traffic. The difference between this driver +and any form of PCAP is that this driver does not incur the cost +of getting the timestamp for every packet read. This allows it to +reach higher performance levels (in the Gigabit range). + + config UML_NET_DAEMON bool "Daemon transport" depends on UML_NET diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 66127ee..9c9d821 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -11,6 +11,7 @@ slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o uml_l2tpv3-objs := uml_l2tpv3_kern.o uml_l2tpv3_user.o uml_gre-objs := uml_gre_kern.o uml_gre_user.o +uml_raw-objs := uml_raw_kern.o uml_raw_user.o umcast-objs := umcast_kern.o umcast_user.o net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o mconsole-objs := mconsole_kern.o mconsole_user.o @@ -47,6 +48,7 @@ obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_L2TPV3) += uml_l2tpv3.o obj-$(CONFIG_UML_NET_GRE) += uml_gre.o +obj-$(CONFIG_UML_NET_RAW) += uml_raw.o obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o obj-$(CONFIG_UML_NET_PCAP) += pcap.o diff --git a/arch/um/drivers/uml_raw.h b/arch/um/drivers/uml_raw.h new file mode 100644 index 000..224205e --- /dev/null +++ b/arch/um/drivers/uml_raw.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UML_RAW_H__ +#define __UML_RAW_H__ + +#include "net_user.h" + +struct uml_raw_data { + char *host_iface; + int fd; + void *dev; +uint32_t uml_raw_flags; + + /* packet mmap read */ + + uint8_t *scratch_buffer; /* for dummy reads*/ + uint8_t *multiread_buffer; + int ring_index; + + /* multi-rx read */ + + void ** skb_recv_vector; + void * mmsg_recv_vector; + void ** skb_send_vector; + void * mmsg_send_vector; + + uint32_t vector_len; + uint32_t recv_index; + uint32_t recv_enqueued; + + void * send_queue_info; + +}; + +extern const struct net_user_info uml_raw_user_info; + +extern int uml_raw_user_write(int fd, void *buf, int len, +struct uml_raw_data *pri); + +extern void raw_complete_init(void * dev_id, int max_depth); +extern void raw_kern_destroy(struct uml_raw_data *pri); + +#define UML_RAW_FLAG_TX_CHECKSUMS0x0001 +#define UML_RAW_FLAG_RX_CHECKSUMS0x0002 + + +#define UML_RAW_TP_BLOCK_SIZE 4096 +#define UML_RAW_TP_FRAME_SIZE 2048 +#define UML_RAW_TP_BLOCK_NR 32 +#define UML_RAW_TP_FRAME_NR 64 + + +#endif diff --git a/arch/um/drivers/uml_raw_kern.c b/arch/um/drivers/uml_raw_kern.c new file mode 100644 index 000..ea6dbdf --- /dev/null +++ b/arch/um/drivers/uml_raw_kern.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2001 Lennert Buytenhek (buyt...@gnu.org) and + * James Leu (j...@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include "linux/init.h" +#include +#include +#include +#include "net_kern.h" +#include "uml_raw.h" +#include "linux/mutex.h" + +#define DRIVER_NAME "uml-raw" + + +struct uml_raw_init { + char
[uml-devel] [PATCH 07/10] Performance and NUMA improvements for ubd
From: Anton Ivanov The use of the seek()/read() and seek()/write() is a terminal disease on NUMA. Intense use of this on shared files (f.e. the master for a COW image) can cause anything up to and including killing CPUs on unhandled NMIs. This patch deals with this UML major issue (and one of UML biggest performance pitfalls). As a result you can now run (subject to correct pinning) 2000+ UMLs on a NUMA box without crashing it. Signed-off-by: Anton Ivanov --- arch/um/drivers/ubd_kern.c | 29 - arch/um/include/shared/os.h |2 ++ arch/um/os-Linux/file.c | 18 ++ 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 1cc72ae5..35ba00b 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012-2014 Cisco Systems * Copyright (C) 2000 Jeff Dike (jd...@karaya.com) * Licensed under the GPL */ @@ -534,11 +535,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len) { int err; - err = os_seek_file(fd, offset); - if (err < 0) - return err; - - err = os_read_file(fd, buf, len); + err = os_pread_file(fd, buf, len, offset); if (err < 0) return err; @@ -1375,14 +1372,10 @@ static int update_bitmap(struct io_thread_req *req) if(req->cow_offset == -1) return 0; - n = os_seek_file(req->fds[1], req->cow_offset); - if(n < 0){ - printk("do_io - bitmap lseek failed : err = %d\n", -n); - return 1; - } - - n = os_write_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); + n = os_pwrite_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words), + req->cow_offset + ); if(n != sizeof(req->bitmap_words)){ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, req->fds[1]); @@ -1426,18 +1419,12 @@ static void do_io(struct io_thread_req *req) len = (end - start) * req->sectorsize; buf = &req->buffer[start * req->sectorsize]; - err = os_seek_file(req->fds[bit], off); - if(err < 0){ - printk("do_io - lseek failed : err = %d\n", -err); - req->error = 1; - return; - } if(req->op == UBD_READ){ n = 0; do { buf = &buf[n]; len -= n; - n = os_read_file(req->fds[bit], buf, len); + n = os_pread_file(req->fds[bit], buf, len, off); if (n < 0) { printk("do_io - read failed, err = %d " "fd = %d\n", -n, req->fds[bit]); @@ -1447,7 +1434,7 @@ static void do_io(struct io_thread_req *req) } while((n < len) && (n != 0)); if (n < len) memset(&buf[n], 0, len - n); } else { - n = os_write_file(req->fds[bit], buf, len); + n = os_pwrite_file(req->fds[bit], buf, len, off); if(n != len){ printk("do_io - write failed err = %d " "fd = %d\n", -n, req->fds[bit]); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 17b4e9f..7f544f4 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -142,6 +142,8 @@ extern int os_seek_file(int fd, unsigned long long offset); extern int os_open_file(const char *file, struct openflags flags, int mode); extern int os_read_file(int fd, void *buf, int len); extern int os_write_file(int fd, const void *buf, int count); +extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); +extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); extern int os_sync_file(int fd); extern int os_file_size(const char *file, unsigned long long *size_out); extern int os_file_modtime(const char *file, unsigned long *modtime); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 07a7501..64951fd 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -266,6 +266,24 @@ int os_write_file(int fd, const void *buf, int len) return n; } +int os_pread_file(int fd, void *buf, int len, unsigned long long offset) +{ + int n = pread(fd, buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + +int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset) +{ + int n = pwrite(fd,
[uml-devel] [PATCH 01/10] Epoll based interrupt controller
From: Anton Ivanov 1. Minimum kernel 2.5.99 2. No "walk the list" lookups for received IRQs - immediate identification of the correct handler to invoke 3. Full set of IRQ semantics - edge, level, read, write 3.1. Write is now a *REAL* write - so if you (ab)use the write to signify NONE (as in line.c) you will hang!!! 3.2. Read is fully backward compatible 4. Otherwise mostly compatible with original poll() based controller 5. Provides significant performance improvement (up to 10x times for large device numbers) ands lays the groundwork for the network and timer improvements to follow Signed-off-by: Anton Ivanov --- arch/um/drivers/line.c|3 +- arch/um/include/shared/irq_user.h | 19 +- arch/um/include/shared/os.h | 13 +- arch/um/kernel/irq.c | 456 + arch/um/os-Linux/irq.c| 145 +--- 5 files changed, 392 insertions(+), 244 deletions(-) diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 8035145..6c4511f 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -283,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data) if (err) return err; if (output) - err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, + err = um_request_irq(driver->write_irq, fd, IRQ_NONE, line_write_interrupt, IRQF_SHARED, driver->write_irq_name, data); return err; diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index df56330..8d6eaff 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -9,16 +10,18 @@ #include struct irq_fd { - struct irq_fd *next; - void *id; - int fd; - int type; - int irq; - int events; - int current_events; +struct irq_fd *next; +struct irq_fd *leaf; +void *id; +int fd; +int type; +int irq; +int events; }; -enum { IRQ_READ, IRQ_WRITE }; +#define IRQ_NONE 0 +#define IRQ_READ 1 +#define IRQ_WRITE 2 struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 021104d..17b4e9f 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -276,15 +277,17 @@ extern void halt_skas(void); extern void reboot_skas(void); /* irq.c */ -extern int os_waiting_for_events(struct irq_fd *active_fds); -extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); + +extern int os_setup_epoll(int maxevents); +extern int os_waiting_for_events_epoll(void *kernel_events, int maxevents); +extern int os_add_epoll_fd (int events, int fd, void * data); +extern int os_mod_epoll_fd (int events, int fd, void * data); +extern int os_del_epoll_fd (int fd); + extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); extern void os_free_irq_later(struct irq_fd *active_fds, int irq, void *dev_id); -extern int os_get_pollfd(int i); -extern void os_set_pollfd(int i, int fd); -extern void os_set_ioignore(void); /* sigio.c */ extern int add_sigio_fd(int fd); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 1d8505b..5d7ee49e 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: @@ -18,6 +19,61 @@ #include /* +* We are on the "kernel side" so we cannot pick up the sys/epoll.h +* So we lift out of it the applicable key definitions. +*/ + + +enum EPOLL_EVENTS + { + EPOLLIN = 0x001, +#define EPOLLIN EPOLLIN + EPOLLPRI = 0x002, +#define EPOLLPRI EPOLLPRI + EPOLLOUT = 0x004, +#define EPOLLOUT EPOLLOUT + EPOLLRDNORM = 0x040, +#define EPOLLRDNORM EPOLLRDNORM + EPOLLRDBAND = 0x080, +#define EPOLLRDBAND EPOLLRDBAND + EPOLLWRNORM = 0x100, +#define EPOLLWRNORM EPOLLWRNORM + EPOLLWRBAND = 0x200, +#define EPOLLWRBAND EPOLLWRBAND + EPOLLMSG = 0x400, +#define EPOLLMSG EPOLLMSG +
[uml-devel] [PATCH 05/10] GRE transport for UML
From: Anton Ivanov This transport allows a UML to connect to another UML local or remote, the Linux host or any other network device running the industry standard Ethernet over GRE protocol. The transport supports all features of RFC 2784. The transport supports a common set of features with the kernel implementation. Checksum offload is supported on RX, TODO on TX. Additionally, the transport supports the so called "soft" termination where it can listen for an incoming connection which does not require the remote endpoint to be specified at configuration time. Signed-off-by: Anton Ivanov --- arch/um/Kconfig.net| 11 + arch/um/drivers/Makefile |2 + arch/um/drivers/uml_gre.h | 85 arch/um/drivers/uml_gre_kern.c | 446 arch/um/drivers/uml_gre_user.c | 355 5 files changed, 899 insertions(+) create mode 100644 arch/um/drivers/uml_gre.h create mode 100644 arch/um/drivers/uml_gre_kern.c create mode 100644 arch/um/drivers/uml_gre_user.c diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net index 9a98aa5..7c8ba68 100644 --- a/arch/um/Kconfig.net +++ b/arch/um/Kconfig.net @@ -94,6 +94,17 @@ config UML_NET_L2TPV3 the industry standard Ethernet over L2TPv3 protocol as described in the applicable RFCs +config UML_NET_GRE + bool "GRE transport" + depends on UML_NET + help +This User-Mode Linux network transport allows one or more running +UMLs on single or multiple hosts to communicate with each other, +the host as well as other remote or local network devices supporting +the industry standard Ethernet over GRE protocol as described in +the applicable RFCs. The driver supports Soft GRE (wait for connect) +as used in Cable systems, etc. + config UML_NET_DAEMON bool "Daemon transport" depends on UML_NET diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index f54c279..66127ee 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -10,6 +10,7 @@ slip-objs := slip_kern.o slip_user.o slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o uml_l2tpv3-objs := uml_l2tpv3_kern.o uml_l2tpv3_user.o +uml_gre-objs := uml_gre_kern.o uml_gre_user.o umcast-objs := umcast_kern.o umcast_user.o net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o mconsole-objs := mconsole_kern.o mconsole_user.o @@ -45,6 +46,7 @@ obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_L2TPV3) += uml_l2tpv3.o +obj-$(CONFIG_UML_NET_GRE) += uml_gre.o obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o obj-$(CONFIG_UML_NET_PCAP) += pcap.o diff --git a/arch/um/drivers/uml_gre.h b/arch/um/drivers/uml_gre.h new file mode 100644 index 000..353306a --- /dev/null +++ b/arch/um/drivers/uml_gre.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UML_GRE_H__ +#define __UML_GRE_H__ + +#include "net_user.h" + + +#define GRE_MODE_CHECKSUM 8 /* checksum - todo*/ +#define GRE_MODE_RESERVED 4 /* unused */ +#define GRE_MODE_KEY 2 /* KEY present */ +#define GRE_MODE_SEQUENCE 1 /* no sequence */ + +#define GRE_MODE_IP_VERSION 16 /* on for v6, off for v4 */ + + +/* legacy modes */ + + +#define MAX_GRE_HEADER 16 + + +struct uml_gre_data { +void *remote_addr; +int remote_addr_size; +char *remote_addr_string; +char *local_addr_string; +char *rx_key_string; +char *tx_key_string; +uint32_t rx_key; +uint32_t tx_key; +uint8_t *network_buffer; + int fd; + void *dev; +uint32_t mode; /* listening, sending, etc */ +uint32_t sequence; + + /* Precomputed offsets */ + +uint32_t offset; /* main offset == header offset */ +uint32_t protocol_offset; +uint32_t checksum_offset; +uint32_t key_offset; +uint32_t sequence_offset; + + void ** skb_recv_vector; + void * mmsg_recv_vector; + + void ** skb_send_vector; + void * mmsg_send_vector; + + uint32_t vector_len; + uint32_t recv_index; + uint32_t recv_enqueued; + /* normally same as offset, add size of struct ipv4 header in ipv4 raw - API stupiditities */ + uint32_t header_size; + + void * send_queue_info; + +}; + +struct gre_minimal_header { + uint16_t header; + uint16_t arptype; +}; + + +extern const struct net_user_info uml_gre_user_info; + + +extern int uml_gre_user_sendmsg(int fd, void *header, int headerlen, void *data, int datalen, struct uml_gre_data *pri)
[uml-devel] [PATCH 09/10] Better IPC for UBD
From: Anton Ivanov socketpair() is a better IPC choice for lots of small requests as it allows deeper (and configurable) queues than pipe() As a result UBD will process nearly all of the requests submitted to it instead of bouncing a significant percentage under load Signed-off-by: Anton Ivanov --- arch/um/drivers/ubd_kern.c |2 +- arch/um/drivers/ubd_user.c |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 66d424a..ae78211 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1268,7 +1268,7 @@ static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) int n = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (n != sizeof(io_req)) { - if (n != -EAGAIN) + if ((n != -EAGAIN) && (n != -ENOBUFS)) printk("write to io thread failed, " "errno = %d\n", -n); else if (list_empty(&dev->restart)) diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index e376f9b..117ff13 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -25,7 +25,7 @@ int start_io_thread(unsigned long sp, int *fd_out) { int pid, fds[2], err; - err = os_pipe(fds, 1, 1); + err = socketpair(AF_UNIX, SOCK_STREAM, 0, (int *) &fds); if(err < 0){ printk("start_io_thread - os_pipe failed, err = %d\n", -err); goto out; -- 1.7.10.4 -- Slashdot TV. Video for Nerds. Stuff that matters. http://tv.slashdot.org/ ___ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel
[uml-devel] [PATCH 03/10] High performance networking subsystem
From: Anton Ivanov Support for multi-packet vector IO - multiple packets read in one syscall and written in one syscall. Should work with legacy UML, thorough tested only for the epoll based IRQ controller Minimal host kernel version for RX - 2.6.32 Minimal host kernel version for TX - 3.0 Tested on Debian 7.0/Ubuntu 12.x LTS which have the relevant syscalls, but do not have the appropriate glibc routine for TX (this is why it is a direct syscall). Signed-off-by: Anton Ivanov --- arch/um/drivers/Makefile |2 +- arch/um/drivers/net_kern.c| 63 - arch/um/include/asm/irq.h | 26 +-- arch/um/include/shared/net_kern.h | 24 ++ arch/um/include/shared/net_user.h | 24 ++ arch/um/kernel/irq.c |3 ++ 6 files changed, 109 insertions(+), 33 deletions(-) diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index e7582e1..836baaf 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -10,7 +10,7 @@ slip-objs := slip_kern.o slip_user.o slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o umcast-objs := umcast_kern.o umcast_user.o -net-objs := net_kern.o net_user.o +net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 64d8426..1d253fa 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2012 - 2014 Cisco Systems * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2001 Lennert Buytenhek (buyt...@gnu.org) and * James Leu (j...@mindspring.net). @@ -29,6 +30,7 @@ static DEFINE_SPINLOCK(opened_lock); static LIST_HEAD(opened); +static int rr_counter = 0; /* * The drop_skb is used when we can't allocate an skb. The @@ -42,6 +44,7 @@ static DEFINE_SPINLOCK(drop_lock); static struct sk_buff *drop_skb; static int drop_max; + static int update_drop_skb(int max) { struct sk_buff *new; @@ -77,24 +80,38 @@ static int uml_net_rx(struct net_device *dev) struct sk_buff *skb; /* If we can't allocate memory, try again next round. */ - skb = dev_alloc_skb(lp->max_packet); - if (skb == NULL) { - drop_skb->dev = dev; - /* Read a packet into drop_skb and don't do anything with it. */ - (*lp->read)(lp->fd, drop_skb, lp); - dev->stats.rx_dropped++; + if (lp->options & UML_NET_USE_SKB_READ) { + /* we expect a full formed, well behaved skb from zero copy drivers here */ + skb = (*lp->skb_read)(lp); + if (skb == NULL) { return 0; - } - - skb->dev = dev; - skb_put(skb, lp->max_packet); - skb_reset_mac_header(skb); - pkt_len = (*lp->read)(lp->fd, skb, lp); - - if (pkt_len > 0) { + } + pkt_len = skb->len; + } else { + skb = dev_alloc_skb(lp->max_packet + 32); + if (skb == NULL) { + drop_skb->dev = dev; + /* Read a packet into drop_skb and don't do anything with it. */ + (*lp->read)(lp->fd, drop_skb, lp); + dev->stats.rx_dropped++; + return 0; + } + + skb_reserve(skb,32); + skb->dev = dev; + skb_put(skb, lp->max_packet); + skb_reset_mac_header(skb); + + // Mark that virtual devices cannot provide required checksum. + skb->ip_summed = CHECKSUM_NONE; + pkt_len = (*lp->read)(lp->fd, skb, lp); + if (pkt_len > 0) { skb_trim(skb, pkt_len); skb->protocol = (*lp->protocol)(skb); + } + } + if (pkt_len > 0) { dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; netif_rx(skb); @@ -192,8 +209,9 @@ static int uml_net_close(struct net_device *dev) struct uml_net_private *lp = netdev_priv(dev); netif_stop_queue(dev); + deactivate_fd(lp->fd, dev->irq); - um_free_irq(dev->irq, dev); + free_irq(dev->irq, dev); if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user); lp->fd = -1; @@ -216,7 +234,6 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&lp->lock, flags); len = (*lp->write)(lp->fd, skb, lp); - skb_tx_timestamp(skb); if (len == skb->len) { dev->stats.tx_packets++; @@ -273,14 +290,13 @@ static void uml_net_poll_controller(struct net_device *dev) static void uml_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlc
[uml-devel] [PATCH 10/10] High Resolution Timer subsystem for UML
From: Anton Ivanov This patch adds an extra timer source which has correct timing and uses an up-to-date OS API and. Results - correct kernel behaviour on timer related tasks. 1. Improvement in network performance (TCP state machines are now fed correct time). 2. Correct QoS and traffic shaping. This improvement does not (and cannot) fix UML userspace. Its timer/time related behaviour is heavily dependent on getting VTALRM pacing which is instantiated on a per userspace thread basis. This patch does not fix this!!! It sorts out only the kernel side - forwarding, qos, tcp, etc. Signed-off-by: Anton Ivanov --- arch/um/Makefile |2 +- arch/um/include/asm/irq.h |3 +- arch/um/include/shared/kern_util.h |1 + arch/um/include/shared/os.h|5 + arch/um/kernel/irq.c | 12 +++ arch/um/kernel/process.c |7 +- arch/um/kernel/time.c | 44 ++--- arch/um/os-Linux/signal.c | 47 +- arch/um/os-Linux/skas/process.c| 24 ++--- arch/um/os-Linux/time.c| 178 10 files changed, 250 insertions(+), 73 deletions(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 133f7de..9864fb7 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -121,7 +121,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) # The wrappers will select whether using "malloc" or the kernel allocator. LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt # Used by link-vmlinux.sh which has special support for um link export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index be9128b..4dd2f07 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -22,8 +22,9 @@ #define TELNETD_IRQUM_END_ETH_IRQ + 7 #define XTERM_IRQ UM_END_ETH_IRQ + 8 #define RANDOM_IRQ UM_END_ETH_IRQ + 9 +#define HRTIMER_IRQ UM_END_ETH_IRQ + 10 -#define LAST_IRQ RANDOM_IRQ +#define LAST_IRQ HRTIMER_IRQ #define NR_IRQS (LAST_IRQ + 1) #endif diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 83a91f9..0282b36 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -37,6 +37,7 @@ extern void initial_thread_cb(void (*proc)(void *), void *arg); extern int is_syscall(unsigned long addr); extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void hrtimer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern int start_uml(void); extern void paging_init(void); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 7f544f4..d4fefb9 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -222,6 +222,7 @@ extern char *get_umid(void); /* signal.c */ extern void timer_init(void); +extern void uml_hrtimer_init(void); extern void set_sigstack(void *sig_stack, int size); extern void remove_sigstack(void); extern void set_handler(int sig); @@ -245,8 +246,12 @@ extern void idle_sleep(unsigned long long nsecs); extern int set_interval(void); extern int timer_one_shot(int ticks); extern long long disable_timer(void); +extern long long timer_remain(void); extern void uml_idle_timer(void); +extern long long persistent_clock_emulation(void); extern long long os_nsecs(void); +extern long long os_vnsecs(void); +extern int itimer_init(void); /* skas/mem.c */ extern long run_syscall_stub(struct mm_id * mm_idp, diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index f4c6fb1..d70c487 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -529,11 +529,23 @@ static struct irq_chip SIGVTALRM_irq_type = { .irq_unmask = dummy, }; +static struct irq_chip SIGUSR2_irq_type = { + .name = "SIGUSR2", + .irq_disable = dummy, + .irq_enable = dummy, + .irq_ack = dummy, + .irq_mask = dummy, + .irq_unmask = dummy, +}; + + void __init init_IRQ(void) { int i; irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); + irq_set_chip_and_handler(HRTIMER_IRQ, &SIGUSR2_irq_type, handle_edge_irq); + for (i = 1; i < NR_IRQS - 1 ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); os_setup_epoll(MAX_EPOLL_EVENTS); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index bbcef52..b7ebc00 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -215,7 +216,11 @@ void arch_cpu_idle(void) unsigned long long nsecs;
[uml-devel] [PATCH 04/10] L2TPv3 Transport Driver for UML
From: Anton Ivanov This transport allows a UML to connect to another UML local or remote, the Linux host or any other network device running the industry standard Ethernet over L2TPv3 protocol as per RFC 3931 (and successors). The transport supports a common set of features with the kernel implementation as well as the Cisco contributed L2TPv3 transport for QEMU/KVM. In all cases this is static tunnels only, no L2TPv3 control plane. Additionally, the transport supports the so called "soft" termination where it can listen for an incoming connection which does not require the remote endpoint to be specified at configuration time. Signed-off-by: Anton Ivanov --- arch/um/Kconfig.net | 10 + arch/um/drivers/Makefile |2 + arch/um/drivers/uml_l2tpv3.h | 121 ++ arch/um/drivers/uml_l2tpv3_kern.c | 442 + arch/um/drivers/uml_l2tpv3_user.c | 420 +++ 5 files changed, 995 insertions(+) create mode 100644 arch/um/drivers/uml_l2tpv3.h create mode 100644 arch/um/drivers/uml_l2tpv3_kern.c create mode 100644 arch/um/drivers/uml_l2tpv3_user.c diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net index 820a56f..9a98aa5 100644 --- a/arch/um/Kconfig.net +++ b/arch/um/Kconfig.net @@ -84,6 +84,16 @@ config UML_NET_SLIP UMLs on a single host). You may choose more than one without conflict. If you don't need UML networking, say N. +config UML_NET_L2TPV3 + bool "L2TPV3 transport" + depends on UML_NET + help +This User-Mode Linux network transport allows one or more running +UMLs on single or multiple hosts to communicate with each other, +the host as well as other remote or local network devices supporting +the industry standard Ethernet over L2TPv3 protocol as described in +the applicable RFCs + config UML_NET_DAEMON bool "Daemon transport" depends on UML_NET diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 836baaf..f54c279 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -9,6 +9,7 @@ slip-objs := slip_kern.o slip_user.o slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o +uml_l2tpv3-objs := uml_l2tpv3_kern.o uml_l2tpv3_user.o umcast-objs := umcast_kern.o umcast_user.o net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o mconsole-objs := mconsole_kern.o mconsole_user.o @@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o obj-$(CONFIG_UML_NET_DAEMON) += daemon.o +obj-$(CONFIG_UML_NET_L2TPV3) += uml_l2tpv3.o obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o obj-$(CONFIG_UML_NET_PCAP) += pcap.o diff --git a/arch/um/drivers/uml_l2tpv3.h b/arch/um/drivers/uml_l2tpv3.h new file mode 100644 index 000..5137bc7 --- /dev/null +++ b/arch/um/drivers/uml_l2tpv3.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UML_L2TPV3_H__ +#define __UML_L2TPV3_H__ + +#include "net_user.h" + + +#define NEW_MODE_IP_VERSION 1 /* on for v6, off for v4 */ +#define NEW_MODE_UDP 2 /* on for udp, off for raw ip */ +#define NEW_MODE_COOKIE 4 /* cookie present */ +#define NEW_MODE_COOKIE_SIZE 8 /* on for 64 bit */ +#define NEW_MODE_NO_COUNTER 16 /* DT - no counter */ + +/* legacy modes */ + +/* mode 0 */ + +#define LEGACY_UDP6_64_NO_COUNTER (NEW_MODE_IP_VERSION + NEW_MODE_UDP + NEW_MODE_COOKIE + NEW_MODE_COOKIE_SIZE + NEW_MODE_NO_COUNTER) + +#define LEGACY_MODE0 LEGACY_UDP6_64_NO_COUNTER + +/* mode 1 */ + +#define LEGACY_IP6_64_NO_COUNTER (NEW_MODE_IP_VERSION + NEW_MODE_COOKIE + NEW_MODE_COOKIE_SIZE + NEW_MODE_NO_COUNTER) + +#define LEGACY_MODE1 LEGACY_IP6_64_NO_COUNTER + +/* mode 2 */ + +#define LEGACY_UDP4_64_COUNTER (NEW_MODE_COOKIE + NEW_MODE_UDP + NEW_MODE_COOKIE_SIZE ) + +#define LEGACY_MODE2 LEGACY_UDP4_64_COUNTER + +/* mode 3 */ + +#define LEGACY_IP4_64_COUNTER (NEW_MODE_COOKIE + NEW_MODE_COOKIE_SIZE) + +#define LEGACY_MODE3 LEGACY_IP4_64_COUNTER + + +#define L2TPV3_HEADER 16 + + +struct temphtonl { + uint32_t low; + uint32_t high; +}; + + +struct uml_l2tpv3_data { + void *remote_addr; + int remote_addr_size; + char *remote_addr_string; + char *local_addr_string; + char *local_service; + char *remote_service; + char *local_session_string; + char *remote_session_string; + uint32_t local_session; + uint32_t remote_session; + char *rx_cookie_string; + char *tx_cookie_string; + uint64_t rx_cookie; + uint64_t tx_cookie; + + /* this should be ifdef-ed to be used only in single pac
[uml-devel] [PATCHv2 3/10] High performance networking subsystem
From: Anton Ivanov Support for multi-packet vector IO - multiple packets read in one syscall and written in one syscall. Should work with legacy UML, thorough tested only for the epoll based IRQ controller Minimal host kernel version for RX - 2.6.32 Minimal host kernel version for TX - 3.0 Tested on Debian 7.0/Ubuntu 12.x LTS which have the relevant syscalls, but do not have the appropriate glibc routine for TX (this is why it is a direct syscall). Signed-off-by: Anton Ivanov --- I have missed net_extra_* on the original submission, this is a resubmit. Apologies. arch/um/drivers/Makefile |2 +- arch/um/drivers/net_extra_kern.c | 218 + arch/um/drivers/net_extra_user.c | 319 + arch/um/drivers/net_kern.c| 63 +--- arch/um/include/asm/irq.h | 26 +-- arch/um/include/shared/net_kern.h | 24 +++ arch/um/include/shared/net_user.h | 24 +++ arch/um/kernel/irq.c |3 + 8 files changed, 646 insertions(+), 33 deletions(-) create mode 100644 arch/um/drivers/net_extra_kern.c create mode 100644 arch/um/drivers/net_extra_user.c diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index e7582e1..836baaf 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -10,7 +10,7 @@ slip-objs := slip_kern.o slip_user.o slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o umcast-objs := umcast_kern.o umcast_user.o -net-objs := net_kern.o net_user.o +net-objs := net_kern.o net_user.o net_extra_user.o net_extra_kern.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o diff --git a/arch/um/drivers/net_extra_kern.c b/arch/um/drivers/net_extra_kern.c new file mode 100644 index 000..b1d36d8 --- /dev/null +++ b/arch/um/drivers/net_extra_kern.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buyt...@gnu.org) and + * James Leu (j...@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "init.h" +#include "irq_kern.h" +#include "irq_user.h" +#include "mconsole_kern.h" +#include "net_kern.h" +#include "net_user.h" + +#define DRIVER_NAME "uml-netdev" + +/* + These are wrappers around key kernel side functions so we can + invoke them from the user side of our Schizofreniac self + +*/ + +extern spinlock_t uml_sigio_lock; +extern int in_epoll_loop; + +static DEFINE_SPINLOCK(net_queue_list); + +static struct mmsg_queue_info * pending_queue = NULL; + +void uml_net_destroy_skb(void * skb) +{ + if (skb) { + kfree_skb((struct sk_buff *) skb); + } +} + +void * uml_net_build_skb (void * dev) +{ + struct uml_net_private *lp = netdev_priv((struct net_device *) dev); + struct sk_buff * skb; + + skb = dev_alloc_skb(lp->max_packet + 32); + if (skb) { + /* add some tunneling space just in case, we usually do not need it as we use vector IO */ + skb_reserve(skb,32); + skb->dev = dev; + skb_put(skb, lp->max_packet); + skb_reset_mac_header(skb); + skb->ip_summed = CHECKSUM_NONE; + } else { + printk("Failed Atomic SKB Allocation, will drop\n"); + } + return skb; +} + +void * uml_net_skb_data (void * skb) { + if (skb) { + return ((struct sk_buff *) skb)->data; + } else { + printk("hole in vector!!!\n"); + return NULL; + } +} + + +int uml_net_advance_head( struct mmsg_queue_info * queue_info, int advance) +{ + int queue_depth; + queue_info->head = + (queue_info->head + advance) + % queue_info->max_depth; + + /* caller is already holding the head_lock */ + + spin_lock(&queue_info->tail_lock); + queue_info->queue_depth -= advance; + queue_depth = queue_info->queue_depth; + spin_unlock(&queue_info->tail_lock); + return queue_depth; +} + +/* + This is called by enqueuers which should hold the + head lock already +*/ + +int uml_net_advance_tail( struct mmsg_queue_info * queue_info, int advance) +{ + int queue_depth; + queue_info->tail = + (queue_info->tail + advance) + % queue_info->max_depth; + spin_lock(&queue_info->head_lock); + queue_info->queue_depth += advance; + queue_depth = queue_info->queue_depth; + spin_unlock(&queue_info->head_lock); + return queue_depth; +} + + +static int flush_mmsg_queue(struct mmsg_que
[uml-devel] [PATCHv2 10/10] High Resolution Timer subsystem for UML
From: Anton Ivanov This patch adds an extra timer source which has correct timing and uses an up-to-date OS API and. Results - correct kernel behaviour on timer related tasks. 1. Improvement in network performance (TCP state machines are now fed correct time). 2. Correct QoS and traffic shaping. This improvement does not (and cannot) fix UML userspace. Its timer/time related behaviour is heavily dependent on getting VTALRM pacing which is instantiated on a per userspace thread basis. This patch does not fix this!!! It sorts out only the kernel side - forwarding, qos, tcp, etc. Signed-off-by: Anton Ivanov --- I missed timer_internal.h on the original submission. Apologies. arch/um/Makefile|2 +- arch/um/include/asm/irq.h |3 +- arch/um/include/shared/kern_util.h |1 + arch/um/include/shared/os.h |5 + arch/um/include/shared/timer-internal.h | 20 arch/um/kernel/irq.c| 12 +++ arch/um/kernel/process.c|7 +- arch/um/kernel/time.c | 44 +--- arch/um/os-Linux/signal.c | 47 +++- arch/um/os-Linux/skas/process.c | 24 ++--- arch/um/os-Linux/time.c | 178 --- 11 files changed, 270 insertions(+), 73 deletions(-) create mode 100644 arch/um/include/shared/timer-internal.h diff --git a/arch/um/Makefile b/arch/um/Makefile index 133f7de..9864fb7 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -121,7 +121,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) # The wrappers will select whether using "malloc" or the kernel allocator. LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt # Used by link-vmlinux.sh which has special support for um link export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index be9128b..4dd2f07 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -22,8 +22,9 @@ #define TELNETD_IRQUM_END_ETH_IRQ + 7 #define XTERM_IRQ UM_END_ETH_IRQ + 8 #define RANDOM_IRQ UM_END_ETH_IRQ + 9 +#define HRTIMER_IRQ UM_END_ETH_IRQ + 10 -#define LAST_IRQ RANDOM_IRQ +#define LAST_IRQ HRTIMER_IRQ #define NR_IRQS (LAST_IRQ + 1) #endif diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 83a91f9..0282b36 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -37,6 +37,7 @@ extern void initial_thread_cb(void (*proc)(void *), void *arg); extern int is_syscall(unsigned long addr); extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void hrtimer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern int start_uml(void); extern void paging_init(void); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 7f544f4..d4fefb9 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -222,6 +222,7 @@ extern char *get_umid(void); /* signal.c */ extern void timer_init(void); +extern void uml_hrtimer_init(void); extern void set_sigstack(void *sig_stack, int size); extern void remove_sigstack(void); extern void set_handler(int sig); @@ -245,8 +246,12 @@ extern void idle_sleep(unsigned long long nsecs); extern int set_interval(void); extern int timer_one_shot(int ticks); extern long long disable_timer(void); +extern long long timer_remain(void); extern void uml_idle_timer(void); +extern long long persistent_clock_emulation(void); extern long long os_nsecs(void); +extern long long os_vnsecs(void); +extern int itimer_init(void); /* skas/mem.c */ extern long run_syscall_stub(struct mm_id * mm_idp, diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h new file mode 100644 index 000..70f1ee1 --- /dev/null +++ b/arch/um/include/shared/timer-internal.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __TIMER_INTERNAL_H__ +#define __TIMER_INTERNAL_H__ + +#define TIMER_MULTIPLIER 256 +#define TIMER_MIN_DELTA 500 + +extern void timer_lock(void); +extern void timer_unlock(void); + +extern long long hrtimer_disable(void); +extern long long tracingtimer_disable(void); + +#endif + diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index f4c6fb1..d70c487 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -529,11 +529,23 @@ static struct irq_chip SIGVTALRM_irq_type = { .irq_unmask = dummy, }; +static struct irq_chip SIGUSR2_irq_type = { + .name = "SIGUSR2", + .irq_disable = dummy, + .irq_enab