On 15/08/15 09:15, Richard Weinberger wrote: > Am 09.08.2015 um 19:53 schrieb Thomas Meyer: >> Switch the UML clocksource from interval timers to posix interval timers and >> move to a monotonic timer. >> >> This fixes suspend&resume related timer issues and improves network >> performance >> as TCP state machines are now fed with the correct time; also correct QoS and >> traffic shaping. > The patch is rather big. Please describe in your commit message how exactly > it works and why. > It changes many internals.
Tom, I will be happy to assist with the drafting, feel free to take the draft conversation off-list until we are ready. A. > >> Signed-off-by: Thomas Meyer <tho...@m3y3r.de> > Please honor also the original author of the patch. > >> --- >> arch/um/Makefile | 2 +- >> arch/um/include/shared/os.h | 15 +- >> arch/um/include/shared/skas/stub-data.h | 5 +- >> arch/um/include/shared/timer-internal.h | 13 ++ >> arch/um/kernel/process.c | 6 +- >> arch/um/kernel/skas/clone.c | 5 - >> arch/um/kernel/skas/mmu.c | 2 + >> arch/um/kernel/time.c | 80 +++++++---- >> arch/um/os-Linux/internal.h | 1 - >> arch/um/os-Linux/main.c | 6 +- >> arch/um/os-Linux/process.c | 5 + >> arch/um/os-Linux/signal.c | 35 +++-- >> arch/um/os-Linux/skas/process.c | 44 ++---- >> arch/um/os-Linux/time.c | 248 >> ++++++++++++++++---------------- >> 14 files changed, 234 insertions(+), 233 deletions(-) >> create mode 100644 arch/um/include/shared/timer-internal.h >> delete mode 100644 arch/um/os-Linux/internal.h >> >> diff --git a/arch/um/Makefile b/arch/um/Makefile >> index 098ab33..eb79b4b 100644 >> --- a/arch/um/Makefile >> +++ b/arch/um/Makefile >> @@ -131,7 +131,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) >> # The wrappers will select whether using "malloc" or the kernel allocator. >> LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc >> >> -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) >> +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt >> >> # Used by link-vmlinux.sh which has special support for um link >> export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) >> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h >> index ad3fa3a..7519c98 100644 >> --- a/arch/um/include/shared/os.h >> +++ b/arch/um/include/shared/os.h >> @@ -183,6 +183,7 @@ extern int create_mem_file(unsigned long long len); >> /* process.c */ >> extern unsigned long os_process_pc(int pid); >> extern int os_process_parent(int pid); >> +extern void os_alarm_process(int pid); >> extern void os_stop_process(int pid); >> extern void os_kill_process(int pid, int reap_child); >> extern void os_kill_ptraced_process(int pid, int reap_child); >> @@ -217,7 +218,7 @@ extern int set_umid(char *name); >> extern char *get_umid(void); >> >> /* signal.c */ >> -extern void timer_init(void); >> +extern void timer_set_signal_handler(void); >> extern void set_sigstack(void *sig_stack, int size); >> extern void remove_sigstack(void); >> extern void set_handler(int sig); >> @@ -238,12 +239,16 @@ extern void um_early_printk(const char *s, unsigned >> int n); >> extern void os_fix_helper_signals(void); >> >> /* time.c */ >> -extern void idle_sleep(unsigned long long nsecs); >> -extern int set_interval(void); >> -extern int timer_one_shot(int ticks); >> -extern long long disable_timer(void); >> +extern void os_idle_sleep(unsigned long long nsecs); >> +extern int os_timer_create(void* timer); >> +extern int os_timer_set_interval(void* timer, void* its); >> +extern int os_timer_one_shot(int ticks); >> +extern long long os_timer_disable(void); >> +extern long os_timer_remain(void* timer); >> extern void uml_idle_timer(void); >> +extern long long os_persistent_clock_emulation(void); >> extern long long os_nsecs(void); >> +extern long long os_vnsecs(void); >> >> /* skas/mem.c */ >> extern long run_syscall_stub(struct mm_id * mm_idp, >> diff --git a/arch/um/include/shared/skas/stub-data.h >> b/arch/um/include/shared/skas/stub-data.h >> index f6ed92c..e09d8fd 100644 >> --- a/arch/um/include/shared/skas/stub-data.h >> +++ b/arch/um/include/shared/skas/stub-data.h >> @@ -6,12 +6,11 @@ >> #ifndef __STUB_DATA_H >> #define __STUB_DATA_H >> >> -#include <sys/time.h> >> +#include <time.h> >> >> struct stub_data { >> - long offset; >> + unsigned long offset; >> int fd; >> - struct itimerval timer; >> long err; >> }; >> >> diff --git a/arch/um/include/shared/timer-internal.h >> b/arch/um/include/shared/timer-internal.h >> new file mode 100644 >> index 0000000..03e6f21 >> --- /dev/null >> +++ b/arch/um/include/shared/timer-internal.h >> @@ -0,0 +1,13 @@ >> +/* >> + * Copyright (C) 2012 - 2014 Cisco Systems >> + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) >> + * Licensed under the GPL >> + */ >> + >> +#ifndef __TIMER_INTERNAL_H__ >> +#define __TIMER_INTERNAL_H__ >> + >> +#define TIMER_MULTIPLIER 256 >> +#define TIMER_MIN_DELTA 500 >> + >> +#endif >> diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c >> index 68b9119..2ce38c1 100644 >> --- a/arch/um/kernel/process.c >> +++ b/arch/um/kernel/process.c >> @@ -27,6 +27,7 @@ >> #include <kern_util.h> >> #include <os.h> >> #include <skas.h> >> +#include <timer-internal.h> >> >> /* >> * This is a per-cpu array. A processor only modifies its entry and it >> only >> @@ -201,11 +202,8 @@ void initial_thread_cb(void (*proc)(void *), void *arg) >> >> void arch_cpu_idle(void) >> { >> - unsigned long long nsecs; >> - >> cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); >> - nsecs = disable_timer(); >> - idle_sleep(nsecs); >> + os_idle_sleep(UM_NSEC_PER_SEC); >> local_irq_enable(); >> } >> >> diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c >> index 289771d..498148b 100644 >> --- a/arch/um/kernel/skas/clone.c >> +++ b/arch/um/kernel/skas/clone.c >> @@ -35,11 +35,6 @@ stub_clone_handler(void) >> if (err) >> goto out; >> >> - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, >> - (long) &data->timer, 0); >> - if (err) >> - goto out; >> - > By removing this call from our clone stub, you change the way how SKAS0 > works. Please explain why this is needed. > >> remap_stack(data->fd, data->offset); >> goto done; >> >> diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c >> index fda1deb..42e2988 100644 >> --- a/arch/um/kernel/skas/mmu.c >> +++ b/arch/um/kernel/skas/mmu.c >> @@ -61,10 +61,12 @@ int init_new_context(struct task_struct *task, struct >> mm_struct *mm) >> if (current->mm != NULL && current->mm != &init_mm) >> from_mm = ¤t->mm->context; >> >> + block_signals(); >> if (from_mm) >> to_mm->id.u.pid = copy_context_skas0(stack, >> from_mm->id.u.pid); >> else to_mm->id.u.pid = start_userspace(stack); >> + unblock_signals(); > Why do we have to block signals here? > >> if (to_mm->id.u.pid < 0) { >> ret = to_mm->id.u.pid; >> diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c >> index 117568d..29f1125 100644 >> --- a/arch/um/kernel/time.c >> +++ b/arch/um/kernel/time.c >> @@ -1,4 +1,5 @@ >> /* >> + * Copyright (C) 2012-2014 Cisco Systems >> * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) >> * Licensed under the GPL >> */ >> @@ -7,11 +8,15 @@ >> #include <linux/init.h> >> #include <linux/interrupt.h> >> #include <linux/jiffies.h> >> +#include <linux/mm.h> >> +#include <linux/sched.h> >> +#include <linux/spinlock.h> >> #include <linux/threads.h> >> #include <asm/irq.h> >> #include <asm/param.h> >> #include <kern_util.h> >> #include <os.h> >> +#include <timer-internal.h> >> >> void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs >> *regs) >> { >> @@ -22,18 +27,20 @@ void timer_handler(int sig, struct siginfo *unused_si, >> struct uml_pt_regs *regs) >> local_irq_restore(flags); >> } >> >> -static void itimer_set_mode(enum clock_event_mode mode, >> +static void timer_set_mode(enum clock_event_mode mode, >> struct clock_event_device *evt) >> { >> switch (mode) { >> case CLOCK_EVT_MODE_PERIODIC: >> - set_interval(); >> + os_timer_set_interval(NULL, NULL); >> break; >> >> + case CLOCK_EVT_MODE_ONESHOT: >> + os_timer_one_shot(1); >> + >> case CLOCK_EVT_MODE_SHUTDOWN: >> case CLOCK_EVT_MODE_UNUSED: >> - case CLOCK_EVT_MODE_ONESHOT: >> - disable_timer(); >> + os_timer_disable(); >> break; >> >> case CLOCK_EVT_MODE_RESUME: >> @@ -41,68 +48,79 @@ static void itimer_set_mode(enum clock_event_mode mode, >> } >> } >> >> -static int itimer_next_event(unsigned long delta, >> +static int timer_next_event(unsigned long delta, >> struct clock_event_device *evt) >> { >> - return timer_one_shot(delta + 1); >> + return os_timer_one_shot(delta); > Why did you replace "delta + 1" by "delta"? > > >> } >> >> -static struct clock_event_device itimer_clockevent = { >> - .name = "itimer", >> +static struct clock_event_device timer_clockevent = { >> + .name = "posix-timer", >> .rating = 250, >> .cpumask = cpu_all_mask, >> .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, >> - .set_mode = itimer_set_mode, >> - .set_next_event = itimer_next_event, >> - .shift = 32, >> + .set_mode = timer_set_mode, >> + .set_next_event = timer_next_event, >> + .shift = 0, >> + .max_delta_ns = 0xffffffff, >> + .min_delta_ns = TIMER_MIN_DELTA, //microsecond resolution should be >> enough for anyone, same as 640K RAM >> .irq = 0, >> + .mult = 1, >> }; >> >> -static irqreturn_t um_timer(int irq, void *dev) >> +static irqreturn_t um_timer_irq(int irq, void *dev) >> { >> - (*itimer_clockevent.event_handler)(&itimer_clockevent); >> + if (get_current()->mm != NULL) >> + { >> + os_alarm_process(get_current()->mm->context.id.u.pid); >> + } >> + >> + (*timer_clockevent.event_handler)(&timer_clockevent); >> >> return IRQ_HANDLED; >> } >> >> -static cycle_t itimer_read(struct clocksource *cs) >> +static cycle_t timer_read(struct clocksource *cs) >> { >> - return os_nsecs() / 1000; >> + return os_nsecs() / TIMER_MULTIPLIER; >> } >> >> -static struct clocksource itimer_clocksource = { >> - .name = "itimer", >> +static struct clocksource timer_clocksource = { >> + .name = "timer", >> .rating = 300, >> - .read = itimer_read, >> + .read = timer_read, >> .mask = CLOCKSOURCE_MASK(64), >> .flags = CLOCK_SOURCE_IS_CONTINUOUS, >> }; >> >> -static void __init setup_itimer(void) >> +static void __init timer_setup(void) >> { >> int err; >> >> - err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); >> - if (err != 0) >> + err = request_irq(TIMER_IRQ, um_timer_irq, IRQF_TIMER, "hr timer", >> NULL); >> + if (err != 0) { >> printk(KERN_ERR "register_timer : request_irq failed - " >> "errno = %d\n", -err); >> + return; >> + } >> + >> + err = os_timer_create(NULL); >> + if (err != 0) { >> + printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); >> + return; >> + } >> >> - itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); >> - itimer_clockevent.max_delta_ns = >> - clockevent_delta2ns(60 * HZ, &itimer_clockevent); >> - itimer_clockevent.min_delta_ns = >> - clockevent_delta2ns(1, &itimer_clockevent); >> - err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); >> + err = clocksource_register_hz(&timer_clocksource, >> NSEC_PER_SEC/TIMER_MULTIPLIER); >> if (err) { >> printk(KERN_ERR "clocksource_register_hz returned %d\n", err); >> return; >> } >> - clockevents_register_device(&itimer_clockevent); >> + clockevents_register_device(&timer_clockevent); >> } >> >> void read_persistent_clock(struct timespec *ts) >> { >> - long long nsecs = os_nsecs(); >> + long long nsecs = os_persistent_clock_emulation(); >> >> set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, >> nsecs % NSEC_PER_SEC); >> @@ -110,6 +128,6 @@ void read_persistent_clock(struct timespec *ts) >> >> void __init time_init(void) >> { >> - timer_init(); >> - late_time_init = setup_itimer; >> + timer_set_signal_handler(); >> + late_time_init = timer_setup; >> } >> diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h >> deleted file mode 100644 >> index 0dc2c9f..0000000 >> --- a/arch/um/os-Linux/internal.h >> +++ /dev/null >> @@ -1 +0,0 @@ >> -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc); >> diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c >> index df9191a..6e36f0f 100644 >> --- a/arch/um/os-Linux/main.c >> +++ b/arch/um/os-Linux/main.c >> @@ -163,13 +163,13 @@ int __init main(int argc, char **argv, char **envp) >> >> /* >> * This signal stuff used to be in the reboot case. However, >> - * sometimes a SIGVTALRM can come in when we're halting (reproducably >> + * sometimes a timer signal can come in when we're halting (reproducably >> * when writing out gcov information, presumably because that takes >> * some time) and cause a segfault. >> */ >> >> - /* stop timers and set SIGVTALRM to be ignored */ >> - disable_timer(); >> + /* stop timers and set timer signal to be ignored */ >> + os_timer_disable(); >> >> /* disable SIGIO for the fds and set SIGIO to be ignored */ >> err = deactivate_all_fds(); >> diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c >> index 8408aba..f3bd983 100644 >> --- a/arch/um/os-Linux/process.c >> +++ b/arch/um/os-Linux/process.c >> @@ -89,6 +89,11 @@ int os_process_parent(int pid) >> return parent; >> } >> >> +void os_alarm_process(int pid) >> +{ >> + kill(pid, SIGALRM); >> +} >> + >> void os_stop_process(int pid) >> { >> kill(pid, SIGSTOP); >> diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c >> index 036d0db..e04a4cd 100644 >> --- a/arch/um/os-Linux/signal.c >> +++ b/arch/um/os-Linux/signal.c >> @@ -13,7 +13,6 @@ >> #include <kern_util.h> >> #include <os.h> >> #include <sysdep/mcontext.h> >> -#include "internal.h" >> >> void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { >> [SIGTRAP] = relay_signal, >> @@ -23,7 +22,8 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct >> uml_pt_regs *) = { >> [SIGBUS] = bus_handler, >> [SIGSEGV] = segv_handler, >> [SIGIO] = sigio_handler, >> - [SIGVTALRM] = timer_handler }; >> + [SIGALRM] = timer_handler >> +}; >> >> static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) >> { >> @@ -38,7 +38,7 @@ static void sig_handler_common(int sig, struct siginfo >> *si, mcontext_t *mc) >> } >> >> /* enable signals if sig isn't IRQ signal */ >> - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) >> + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) >> unblock_signals(); >> >> (*sig_info[sig])(sig, si, &r); >> @@ -55,8 +55,8 @@ static void sig_handler_common(int sig, struct siginfo >> *si, mcontext_t *mc) >> #define SIGIO_BIT 0 >> #define SIGIO_MASK (1 << SIGIO_BIT) >> >> -#define SIGVTALRM_BIT 1 >> -#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) >> +#define SIGALRM_BIT 1 >> +#define SIGALRM_MASK (1 << SIGALRM_BIT) >> >> static int signals_enabled; >> static unsigned int signals_pending; >> @@ -78,36 +78,34 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t >> *mc) >> set_signals(enabled); >> } >> >> -static void real_alarm_handler(mcontext_t *mc) >> +static void timer_real_alarm_handler(mcontext_t *mc) >> { >> struct uml_pt_regs regs; >> >> if (mc != NULL) >> get_regs_from_mc(®s, mc); >> - regs.is_user = 0; >> - unblock_signals(); >> - timer_handler(SIGVTALRM, NULL, ®s); >> + timer_handler(SIGALRM, NULL, ®s); >> } >> >> -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) >> +void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) >> { >> int enabled; >> >> enabled = signals_enabled; >> if (!signals_enabled) { >> - signals_pending |= SIGVTALRM_MASK; >> + signals_pending |= SIGALRM_MASK; >> return; >> } >> >> block_signals(); >> >> - real_alarm_handler(mc); >> + timer_real_alarm_handler(mc); >> set_signals(enabled); >> } >> >> -void timer_init(void) >> +void timer_set_signal_handler(void) >> { >> - set_handler(SIGVTALRM); >> + set_handler(SIGALRM); >> } >> >> void set_sigstack(void *sig_stack, int size) >> @@ -131,10 +129,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo >> *si, mcontext_t *mc) = { >> >> [SIGIO] = sig_handler, >> [SIGWINCH] = sig_handler, >> - [SIGVTALRM] = alarm_handler >> + [SIGALRM] = timer_alarm_handler >> }; >> >> - >> static void hard_handler(int sig, siginfo_t *si, void *p) >> { >> struct ucontext *uc = p; >> @@ -188,9 +185,9 @@ void set_handler(int sig) >> >> /* block irq ones */ >> sigemptyset(&action.sa_mask); >> - sigaddset(&action.sa_mask, SIGVTALRM); >> sigaddset(&action.sa_mask, SIGIO); >> sigaddset(&action.sa_mask, SIGWINCH); >> + sigaddset(&action.sa_mask, SIGALRM); >> >> if (sig == SIGSEGV) >> flags |= SA_NODEFER; >> @@ -283,8 +280,8 @@ void unblock_signals(void) >> if (save_pending & SIGIO_MASK) >> sig_handler_common(SIGIO, NULL, NULL); >> >> - if (save_pending & SIGVTALRM_MASK) >> - real_alarm_handler(NULL); >> + if (save_pending & SIGALRM_MASK) >> + timer_real_alarm_handler(NULL); >> } >> } >> >> diff --git a/arch/um/os-Linux/skas/process.c >> b/arch/um/os-Linux/skas/process.c >> index 3dddedb..5ae4752 100644 >> --- a/arch/um/os-Linux/skas/process.c >> +++ b/arch/um/os-Linux/skas/process.c >> @@ -45,7 +45,7 @@ static int ptrace_dump_regs(int pid) >> * Signals that are OK to receive in the stub - we'll just continue it. >> * SIGWINCH will happen when UML is inside a detached screen. >> */ >> -#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) >> +#define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) >> >> /* Signals that the stub will finish with - anything else is an error */ >> #define STUB_DONE_MASK (1 << SIGTRAP) >> @@ -179,19 +179,13 @@ extern char __syscall_stub_start[]; >> static int userspace_tramp(void *stack) >> { >> void *addr; >> - int err, fd; >> + int fd; >> unsigned long long offset; >> >> ptrace(PTRACE_TRACEME, 0, 0, 0); >> >> signal(SIGTERM, SIG_DFL); >> signal(SIGWINCH, SIG_IGN); >> - err = set_interval(); >> - if (err) { >> - printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " >> - "errno = %d\n", err); >> - exit(1); >> - } >> >> /* >> * This has a pte, but it can't be mapped in with the usual >> @@ -282,7 +276,7 @@ int start_userspace(unsigned long stub_stack) >> "errno = %d\n", errno); >> goto out_kill; >> } >> - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); >> + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); >> >> if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { >> err = -EINVAL; >> @@ -315,8 +309,6 @@ int start_userspace(unsigned long stub_stack) >> >> void userspace(struct uml_pt_regs *regs) >> { >> - struct itimerval timer; >> - unsigned long long nsecs, now; >> int err, status, op, pid = userspace_pid[0]; >> /* To prevent races if using_sysemu changes under us.*/ >> int local_using_sysemu; >> @@ -325,13 +317,8 @@ void userspace(struct uml_pt_regs *regs) >> /* Handle any immediate reschedules or signals */ >> interrupt_end(); >> >> - if (getitimer(ITIMER_VIRTUAL, &timer)) >> - printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); >> - nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + >> - timer.it_value.tv_usec * UM_NSEC_PER_USEC; >> - nsecs += os_nsecs(); >> - >> while (1) { >> + >> /* >> * This can legitimately fail if the process loads a >> * bogus value into a segment register. It will >> @@ -401,18 +388,7 @@ void userspace(struct uml_pt_regs *regs) >> case SIGTRAP: >> relay_signal(SIGTRAP, (struct siginfo *)&si, >> regs); >> break; >> - case SIGVTALRM: >> - now = os_nsecs(); >> - if (now < nsecs) >> - break; >> - block_signals(); >> - (*sig_info[sig])(sig, (struct siginfo *)&si, >> regs); >> - unblock_signals(); >> - nsecs = timer.it_value.tv_sec * >> - UM_NSEC_PER_SEC + >> - timer.it_value.tv_usec * >> - UM_NSEC_PER_USEC; >> - nsecs += os_nsecs(); >> + case SIGALRM: >> break; >> case SIGIO: >> case SIGILL: >> @@ -460,7 +436,6 @@ __initcall(init_thread_regs); >> >> int copy_context_skas0(unsigned long new_stack, int pid) >> { >> - struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; >> int err; >> unsigned long current_stack = current_stub_stack(); >> struct stub_data *data = (struct stub_data *) current_stack; >> @@ -472,11 +447,10 @@ int copy_context_skas0(unsigned long new_stack, int >> pid) >> * prepare offset and fd of child's stack as argument for parent's >> * and child's mmap2 calls >> */ >> - *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), >> - .fd = new_fd, >> - .timer = ((struct itimerval) >> - { .it_value = tv, >> - .it_interval = tv }) }); >> + *data = ((struct stub_data) { >> + .offset = MMAP_OFFSET(new_offset), >> + .fd = new_fd >> + }); > As written above, you change the way how SKAS0 works, this needs > much more explaination. > >> err = ptrace_setregs(pid, thread_regs); >> if (err < 0) { >> diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c >> index e9824d5..0e2bb7d 100644 >> --- a/arch/um/os-Linux/time.c >> +++ b/arch/um/os-Linux/time.c >> @@ -1,4 +1,5 @@ >> /* >> + * Copyright (C) 2012-2014 Cisco Systems >> * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) >> * Licensed under the GPL >> */ >> @@ -10,177 +11,172 @@ >> #include <sys/time.h> >> #include <kern_util.h> >> #include <os.h> >> -#include "internal.h" >> +#include <string.h> >> +#include <timer-internal.h> >> >> -int set_interval(void) >> -{ >> - int usec = UM_USEC_PER_SEC / UM_HZ; >> - struct itimerval interval = ((struct itimerval) { { 0, usec }, >> - { 0, usec } }); >> - >> - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) >> - return -errno; >> +static timer_t event_high_res_timer = 0; >> >> - return 0; >> +static inline long long timeval_to_ns(const struct timeval *tv) >> +{ >> + return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + >> + tv->tv_usec * UM_NSEC_PER_USEC; >> } >> >> -int timer_one_shot(int ticks) >> +static inline long long timespec_to_ns(const struct timespec *ts) >> { >> - unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; >> - unsigned long sec = usec / UM_USEC_PER_SEC; >> - struct itimerval interval; >> - >> - usec %= UM_USEC_PER_SEC; >> - interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); >> + return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + >> + ts->tv_nsec; >> +} >> >> - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) >> - return -errno; >> +long long os_persistent_clock_emulation (void) { >> + struct timespec realtime_tp; >> >> - return 0; >> + clock_gettime(CLOCK_REALTIME, &realtime_tp); >> + return timespec_to_ns(&realtime_tp); >> } >> >> /** >> - * timeval_to_ns - Convert timeval to nanoseconds >> - * @ts: pointer to the timeval variable to be converted >> - * >> - * Returns the scalar nanosecond representation of the timeval >> - * parameter. >> - * >> - * Ripped from linux/time.h because it's a kernel header, and thus >> - * unusable from here. >> + * os_timer_create() - create an new posix (interval) timer >> */ >> -static inline long long timeval_to_ns(const struct timeval *tv) >> -{ >> - return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + >> - tv->tv_usec * UM_NSEC_PER_USEC; >> +int os_timer_create(void* timer) { >> + >> + timer_t* t = timer; >> + >> + if(t == NULL) { >> + t = &event_high_res_timer; >> + } >> + >> + if (timer_create( >> + CLOCK_MONOTONIC, >> + NULL, >> + t) == -1) { >> + return -1; >> + } >> + return 0; >> } >> >> -long long disable_timer(void) >> +int os_timer_set_interval(void* timer, void* i) >> { >> - struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); >> - long long remain, max = UM_NSEC_PER_SEC / UM_HZ; >> + struct itimerspec its; >> + unsigned long long nsec; >> + timer_t* t = timer; >> + struct itimerspec* its_in = i; >> >> - if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) >> - printk(UM_KERN_ERR "disable_timer - setitimer failed, " >> - "errno = %d\n", errno); >> + if(t == NULL) { >> + t = &event_high_res_timer; >> + } >> >> - remain = timeval_to_ns(&time.it_value); >> - if (remain > max) >> - remain = max; >> + nsec = UM_NSEC_PER_SEC / UM_HZ; >> >> - return remain; >> -} >> + if(its_in != NULL) { >> + its.it_value.tv_sec = its_in->it_value.tv_sec; >> + its.it_value.tv_nsec = its_in->it_value.tv_nsec; >> + } else { >> + its.it_value.tv_sec = 0; >> + its.it_value.tv_nsec = nsec; >> + } >> >> -long long os_nsecs(void) >> -{ >> - struct timeval tv; >> + its.it_interval.tv_sec = 0; >> + its.it_interval.tv_nsec = nsec; >> >> - gettimeofday(&tv, NULL); >> - return timeval_to_ns(&tv); >> -} >> + if(timer_settime(*t, 0, &its, NULL) == -1) { >> + return -errno; >> + } >> >> -#ifdef UML_CONFIG_NO_HZ_COMMON >> -static int after_sleep_interval(struct timespec *ts) >> -{ >> return 0; >> } >> >> -static void deliver_alarm(void) >> +/** >> + * os_timer_remain() - returns the remaining nano seconds of the given >> interval >> + * timer >> + * Because this is the remaining time of an interval timer, which >> correspondends >> + * to HZ, this value can never be bigger than one second. Just >> + * the nanosecond part of the timer is returned. >> + * The returned time is relative to the start time of the interval timer. >> + * Return an negative value in an error case. >> + */ >> +long os_timer_remain(void* timer) >> { >> - alarm_handler(SIGVTALRM, NULL, NULL); >> -} >> + struct itimerspec its; >> + timer_t* t = timer; >> >> -static unsigned long long sleep_time(unsigned long long nsecs) >> -{ >> - return nsecs; >> -} >> + if(t == NULL) { >> + t = &event_high_res_timer; >> + } >> >> -#else >> -unsigned long long last_tick; >> -unsigned long long skew; >> + if(timer_gettime(t, &its) == -1) { >> + return -errno; >> + } >> + >> + return its.it_value.tv_nsec; >> +} >> >> -static void deliver_alarm(void) >> +int os_timer_one_shot(int ticks) >> { >> - unsigned long long this_tick = os_nsecs(); >> - int one_tick = UM_NSEC_PER_SEC / UM_HZ; >> + struct itimerspec its; >> + unsigned long long nsec; >> + unsigned long sec; >> >> - /* Protection against the host's time going backwards */ >> - if ((last_tick != 0) && (this_tick < last_tick)) >> - this_tick = last_tick; >> + nsec = (ticks + 1); >> + sec = nsec / UM_NSEC_PER_SEC; >> + nsec = nsec % UM_NSEC_PER_SEC; >> >> - if (last_tick == 0) >> - last_tick = this_tick - one_tick; >> + its.it_value.tv_sec = nsec / UM_NSEC_PER_SEC; >> + its.it_value.tv_nsec = nsec; >> >> - skew += this_tick - last_tick; >> + its.it_interval.tv_sec = 0; >> + its.it_interval.tv_nsec = 0; // we cheat here >> >> - while (skew >= one_tick) { >> - alarm_handler(SIGVTALRM, NULL, NULL); >> - skew -= one_tick; >> - } >> - >> - last_tick = this_tick; >> + timer_settime(event_high_res_timer, 0, &its, NULL); >> + return 0; >> } >> >> -static unsigned long long sleep_time(unsigned long long nsecs) >> +/** >> + * os_timer_disable() - disable the posix (interval) timer >> + * Returns the remaining interval timer time in nanoseconds >> + */ >> +long long os_timer_disable(void) >> { >> - return nsecs > skew ? nsecs - skew : 0; >> + struct itimerspec its; >> + >> + memset(&its, 0, sizeof(struct itimerspec)); >> + timer_settime(event_high_res_timer, 0, &its, &its); >> + >> + return its.it_value.tv_sec * UM_NSEC_PER_SEC + its.it_value.tv_nsec; >> } >> >> -static inline long long timespec_to_us(const struct timespec *ts) >> +long long os_vnsecs(void) >> { >> - return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + >> - ts->tv_nsec / UM_NSEC_PER_USEC; >> + struct timespec ts; >> + >> + clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&ts); >> + return timespec_to_ns(&ts); >> } >> >> -static int after_sleep_interval(struct timespec *ts) >> +long long os_nsecs(void) >> { >> - int usec = UM_USEC_PER_SEC / UM_HZ; >> - long long start_usecs = timespec_to_us(ts); >> - struct timeval tv; >> - struct itimerval interval; >> - >> - /* >> - * It seems that rounding can increase the value returned from >> - * setitimer to larger than the one passed in. Over time, >> - * this will cause the remaining time to be greater than the >> - * tick interval. If this happens, then just reduce the first >> - * tick to the interval value. >> - */ >> - if (start_usecs > usec) >> - start_usecs = usec; >> - >> - start_usecs -= skew / UM_NSEC_PER_USEC; >> - if (start_usecs < 0) >> - start_usecs = 0; >> - >> - tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, >> - .tv_usec = start_usecs % UM_USEC_PER_SEC }); >> - interval = ((struct itimerval) { { 0, usec }, tv }); >> - >> - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) >> - return -errno; >> + struct timespec ts; >> >> - return 0; >> + clock_gettime(CLOCK_MONOTONIC,&ts); >> + return timespec_to_ns(&ts); >> } >> -#endif >> >> -void idle_sleep(unsigned long long nsecs) >> +/** >> + * os_idle_sleep() - sleep for a given time of nsecs >> + * @nsecs: nanoseconds to sleep >> + */ >> +void os_idle_sleep(unsigned long long nsecs) >> { >> struct timespec ts; >> >> - /* >> - * nsecs can come in as zero, in which case, this starts a >> - * busy loop. To prevent this, reset nsecs to the tick >> - * interval if it is zero. >> - */ >> - if (nsecs == 0) >> - nsecs = UM_NSEC_PER_SEC / UM_HZ; >> - >> - nsecs = sleep_time(nsecs); >> - ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, >> - .tv_nsec = nsecs % UM_NSEC_PER_SEC }); >> - >> - if (nanosleep(&ts, &ts) == 0) >> - deliver_alarm(); >> - after_sleep_interval(&ts); >> + if (nsecs <= 0) { >> + return; >> + } >> + >> + ts = ((struct timespec) { >> + .tv_sec = nsecs / UM_NSEC_PER_SEC, >> + .tv_nsec = nsecs % UM_NSEC_PER_SEC >> + }); >> + >> + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); >> } >> > Thanks, > //richard > > ------------------------------------------------------------------------------ > _______________________________________________ > User-mode-linux-devel mailing list > User-mode-linux-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel > ------------------------------------------------------------------------------ _______________________________________________ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel