Re: [uml-devel] [PATCH] um: Switch clocksource to hrtimers

Thomas Meyer Tue, 18 Aug 2015 09:05:56 -0700

Am 15.08.2015 10:15 vorm. schrieb Richard Weinberger <[email protected]>:
>
> Am 09.08.2015 um 19:53 schrieb Thomas Meyer: 
> > Switch the UML clocksource from interval timers to posix interval timers 
> > and 
> > move to a monotonic timer. 
> > 
> > This fixes suspend&resume related timer issues and improves network 
> > performance 
> > as TCP state machines are now fed with the correct time; also correct QoS 
> > and 
> > traffic shaping. 
>
> The patch is rather big. Please describe in your commit message how exactly 
> it works and why. 
> It changes many internals.


Will do so!

>
> > Signed-off-by: Thomas Meyer <[email protected]> 
>
> Please honor also the original author of the patch. 

Sure! Give credit where credit is due!

>
> > --- 
> >  arch/um/Makefile                        |   2 +- 
> >  arch/um/include/shared/os.h             |  15 +- 
> >  arch/um/include/shared/skas/stub-data.h |   5 +- 
> >  arch/um/include/shared/timer-internal.h |  13 ++ 
> >  arch/um/kernel/process.c                |   6 +- 
> >  arch/um/kernel/skas/clone.c             |   5 - 
> >  arch/um/kernel/skas/mmu.c               |   2 + 
> >  arch/um/kernel/time.c                   |  80 +++++++---- 
> >  arch/um/os-Linux/internal.h             |   1 - 
> >  arch/um/os-Linux/main.c                 |   6 +- 
> >  arch/um/os-Linux/process.c              |   5 + 
> >  arch/um/os-Linux/signal.c               |  35 +++-- 
> >  arch/um/os-Linux/skas/process.c         |  44 ++---- 
> >  arch/um/os-Linux/time.c                 | 248 
> >++++++++++++++++---------------- 
> >  14 files changed, 234 insertions(+), 233 deletions(-) 
> >  create mode 100644 arch/um/include/shared/timer-internal.h 
> >  delete mode 100644 arch/um/os-Linux/internal.h 
> > 
> > diff --git a/arch/um/Makefile b/arch/um/Makefile 
> > index 098ab33..eb79b4b 100644 
> > --- a/arch/um/Makefile 
> > +++ b/arch/um/Makefile 
> > @@ -131,7 +131,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) 
> >  # The wrappers will select whether using "malloc" or the kernel allocator. 
> >  LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc 
> >  
> > -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) 
> > +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt 
> >  
> >  # Used by link-vmlinux.sh which has special support for um link 
> >  export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) 
> > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h 
> > index ad3fa3a..7519c98 100644 
> > --- a/arch/um/include/shared/os.h 
> > +++ b/arch/um/include/shared/os.h 
> > @@ -183,6 +183,7 @@ extern int create_mem_file(unsigned long long len); 
> >  /* process.c */ 
> >  extern unsigned long os_process_pc(int pid); 
> >  extern int os_process_parent(int pid); 
> > +extern void os_alarm_process(int pid); 
> >  extern void os_stop_process(int pid); 
> >  extern void os_kill_process(int pid, int reap_child); 
> >  extern void os_kill_ptraced_process(int pid, int reap_child); 
> > @@ -217,7 +218,7 @@ extern int set_umid(char *name); 
> >  extern char *get_umid(void); 
> >  
> >  /* signal.c */ 
> > -extern void timer_init(void); 
> > +extern void timer_set_signal_handler(void); 
> >  extern void set_sigstack(void *sig_stack, int size); 
> >  extern void remove_sigstack(void); 
> >  extern void set_handler(int sig); 
> > @@ -238,12 +239,16 @@ extern void um_early_printk(const char *s, unsigned 
> > int n); 
> >  extern void os_fix_helper_signals(void); 
> >  
> >  /* time.c */ 
> > -extern void idle_sleep(unsigned long long nsecs); 
> > -extern int set_interval(void); 
> > -extern int timer_one_shot(int ticks); 
> > -extern long long disable_timer(void); 
> > +extern void os_idle_sleep(unsigned long long nsecs); 
> > +extern int os_timer_create(void* timer); 
> > +extern int os_timer_set_interval(void* timer, void* its); 
> > +extern int os_timer_one_shot(int ticks); 
> > +extern long long os_timer_disable(void); 
> > +extern long os_timer_remain(void* timer); 
> >  extern void uml_idle_timer(void); 
> > +extern long long os_persistent_clock_emulation(void); 
> >  extern long long os_nsecs(void); 
> > +extern long long os_vnsecs(void); 
> >  
> >  /* skas/mem.c */ 
> >  extern long run_syscall_stub(struct mm_id * mm_idp, 
> > diff --git a/arch/um/include/shared/skas/stub-data.h 
> > b/arch/um/include/shared/skas/stub-data.h 
> > index f6ed92c..e09d8fd 100644 
> > --- a/arch/um/include/shared/skas/stub-data.h 
> > +++ b/arch/um/include/shared/skas/stub-data.h 
> > @@ -6,12 +6,11 @@ 
> >  #ifndef __STUB_DATA_H 
> >  #define __STUB_DATA_H 
> >  
> > -#include <sys/time.h> 
> > +#include <time.h> 
> >  
> >  struct stub_data { 
> > - long offset; 
> > + unsigned long offset; 
> >  int fd; 
> > - struct itimerval timer; 
> >  long err; 
> >  }; 
> >  
> > diff --git a/arch/um/include/shared/timer-internal.h 
> > b/arch/um/include/shared/timer-internal.h 
> > new file mode 100644 
> > index 0000000..03e6f21 
> > --- /dev/null 
> > +++ b/arch/um/include/shared/timer-internal.h 
> > @@ -0,0 +1,13 @@ 
> > +/* 
> > + * Copyright (C) 2012 - 2014 Cisco Systems 
> > + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 
> > + * Licensed under the GPL 
> > + */ 
> > + 
> > +#ifndef __TIMER_INTERNAL_H__ 
> > +#define __TIMER_INTERNAL_H__ 
> > + 
> > +#define TIMER_MULTIPLIER 256 
> > +#define TIMER_MIN_DELTA  500 
> > + 
> > +#endif 
> > diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c 
> > index 68b9119..2ce38c1 100644 
> > --- a/arch/um/kernel/process.c 
> > +++ b/arch/um/kernel/process.c 
> > @@ -27,6 +27,7 @@ 
> >  #include <kern_util.h> 
> >  #include <os.h> 
> >  #include <skas.h> 
> > +#include <timer-internal.h> 
> >  
> >  /* 
> >   * This is a per-cpu array.  A processor only modifies its entry and it 
> >only 
> > @@ -201,11 +202,8 @@ void initial_thread_cb(void (*proc)(void *), void 
> > *arg) 
> >  
> >  void arch_cpu_idle(void) 
> >  { 
> > - unsigned long long nsecs; 
> > - 
> >  cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); 
> > - nsecs = disable_timer(); 
> > - idle_sleep(nsecs); 
> > + os_idle_sleep(UM_NSEC_PER_SEC); 
> >  local_irq_enable(); 
> >  } 
> >  
> > diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c 
> > index 289771d..498148b 100644 
> > --- a/arch/um/kernel/skas/clone.c 
> > +++ b/arch/um/kernel/skas/clone.c 
> > @@ -35,11 +35,6 @@ stub_clone_handler(void) 
> >  if (err) 
> >  goto out; 
> >  
> > - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, 
> > -     (long) &data->timer, 0); 
> > - if (err) 
> > - goto out; 
> > - 
>
> By removing this call from our clone stub, you change the way how SKAS0 
> works. Please explain why this is needed. 

Yes, before this patch each userspace process has its own itimer, after this 
patch only the uml process that runs the kernel will get a timer tick. The 
kernel will then signal the currently active userspace task about the time 
event.

>
> >  remap_stack(data->fd, data->offset); 
> >  goto done; 
> >  
> > diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c 
> > index fda1deb..42e2988 100644 
> > --- a/arch/um/kernel/skas/mmu.c 
> > +++ b/arch/um/kernel/skas/mmu.c 
> > @@ -61,10 +61,12 @@ int init_new_context(struct task_struct *task, struct 
> > mm_struct *mm) 
> >  if (current->mm != NULL && current->mm != &init_mm) 
> >  from_mm = &current->mm->context; 
> >  
> > + block_signals(); 
> >  if (from_mm) 
> >  to_mm->id.u.pid = copy_context_skas0(stack, 
> >       from_mm->id.u.pid); 
> >  else to_mm->id.u.pid = start_userspace(stack); 
> > + unblock_signals(); 
>
> Why do we have to block signals here? 

There is a small time window, when a userspace process forks itself and because 
of that is running in the stub code... When this process then receives a timer 
signal it gets confused and ends up in a loop. So a quick fix was to disable 
the timer interrupts when possibly entering this stub code in the userspace 
process.
>
> >  if (to_mm->id.u.pid < 0) { 
> >  ret = to_mm->id.u.pid; 
> > diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c 
> > index 117568d..29f1125 100644 
> > --- a/arch/um/kernel/time.c 
> > +++ b/arch/um/kernel/time.c 
> > @@ -1,4 +1,5 @@ 
> >  /* 
> > + * Copyright (C) 2012-2014 Cisco Systems 
> >   * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 
> >   * Licensed under the GPL 
> >   */ 
> > @@ -7,11 +8,15 @@ 
> >  #include <linux/init.h> 
> >  #include <linux/interrupt.h> 
> >  #include <linux/jiffies.h> 
> > +#include <linux/mm.h> 
> > +#include <linux/sched.h> 
> > +#include <linux/spinlock.h> 
> >  #include <linux/threads.h> 
> >  #include <asm/irq.h> 
> >  #include <asm/param.h> 
> >  #include <kern_util.h> 
> >  #include <os.h> 
> > +#include <timer-internal.h> 
> >  
> >  void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs 
> >*regs) 
> >  { 
> > @@ -22,18 +27,20 @@ void timer_handler(int sig, struct siginfo *unused_si, 
> > struct uml_pt_regs *regs) 
> >  local_irq_restore(flags); 
> >  } 
> >  
> > -static void itimer_set_mode(enum clock_event_mode mode, 
> > +static void timer_set_mode(enum clock_event_mode mode, 
> >      struct clock_event_device *evt) 
> >  { 
> >  switch (mode) { 
> >  case CLOCK_EVT_MODE_PERIODIC: 
> > - set_interval(); 
> > + os_timer_set_interval(NULL, NULL); 
> >  break; 
> >  
> > + case CLOCK_EVT_MODE_ONESHOT: 
> > + os_timer_one_shot(1); 
> > + 
> >  case CLOCK_EVT_MODE_SHUTDOWN: 
> >  case CLOCK_EVT_MODE_UNUSED: 
> > - case CLOCK_EVT_MODE_ONESHOT: 
> > - disable_timer(); 
> > + os_timer_disable(); 
> >  break; 
> >  
> >  case CLOCK_EVT_MODE_RESUME: 
> > @@ -41,68 +48,79 @@ static void itimer_set_mode(enum clock_event_mode mode, 
> >  } 
> >  } 
> >  
> > -static int itimer_next_event(unsigned long delta, 
> > +static int timer_next_event(unsigned long delta, 
> >       struct clock_event_device *evt) 
> >  { 
> > - return timer_one_shot(delta + 1); 
> > + return os_timer_one_shot(delta); 
>
> Why did you replace "delta + 1" by "delta"? 

I think this comes from Anton's original patch and AFAIU was a hack to 
guarantee progress with the itimer based solution.
This hack is no longer needed as the new POSIX interval timer are monotonic and 
do always progress correctly!

>
>
> >  } 
> >  
> > -static struct clock_event_device itimer_clockevent = { 
> > - .name = "itimer", 
> > +static struct clock_event_device timer_clockevent = { 
> > + .name = "posix-timer", 
> >  .rating = 250, 
> >  .cpumask = cpu_all_mask, 
> >  .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 
> > - .set_mode = itimer_set_mode, 
> > - .set_next_event = itimer_next_event, 
> > - .shift = 32, 
> > + .set_mode = timer_set_mode, 
> > + .set_next_event = timer_next_event, 
> > + .shift = 0, 
> > + .max_delta_ns = 0xffffffff, 
> > + .min_delta_ns = TIMER_MIN_DELTA, //microsecond resolution should be 
> > enough for anyone, same as 640K RAM 
> >  .irq = 0, 
> > + .mult = 1, 
> >  }; 
> >  
> > -static irqreturn_t um_timer(int irq, void *dev) 
> > +static irqreturn_t um_timer_irq(int irq, void *dev) 
> >  { 
> > - (*itimer_clockevent.event_handler)(&itimer_clockevent); 
> > + if (get_current()->mm != NULL) 
> > + { 
> > + os_alarm_process(get_current()->mm->context.id.u.pid); 
> > + } 
> > + 
> > + (*timer_clockevent.event_handler)(&timer_clockevent); 
> >  
> >  return IRQ_HANDLED; 
> >  } 
> >  
> > -static cycle_t itimer_read(struct clocksource *cs) 
> > +static cycle_t timer_read(struct clocksource *cs) 
> >  { 
> > - return os_nsecs() / 1000; 
> > + return os_nsecs() / TIMER_MULTIPLIER; 
> >  } 
> >  
> > -static struct clocksource itimer_clocksource = { 
> > - .name = "itimer", 
> > +static struct clocksource timer_clocksource = { 
> > + .name = "timer", 
> >  .rating = 300, 
> > - .read = itimer_read, 
> > + .read = timer_read, 
> >  .mask = CLOCKSOURCE_MASK(64), 
> >  .flags = CLOCK_SOURCE_IS_CONTINUOUS, 
> >  }; 
> >  
> > -static void __init setup_itimer(void) 
> > +static void __init timer_setup(void) 
> >  { 
> >  int err; 
> >  
> > - err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); 
> > - if (err != 0) 
> > + err = request_irq(TIMER_IRQ, um_timer_irq, IRQF_TIMER, "hr timer", NULL); 
> > + if (err != 0) { 
> >  printk(KERN_ERR "register_timer : request_irq failed - " 
> >         "errno = %d\n", -err); 
> > + return; 
> > +    } 
> > + 
> > +    err = os_timer_create(NULL); 
> > +    if (err != 0) { 
> > +        printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); 
> > +        return; 
> > +    } 
> >  
> > - itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); 
> > - itimer_clockevent.max_delta_ns = 
> > - clockevent_delta2ns(60 * HZ, &itimer_clockevent); 
> > - itimer_clockevent.min_delta_ns = 
> > - clockevent_delta2ns(1, &itimer_clockevent); 
> > - err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); 
> > + err = clocksource_register_hz(&timer_clocksource, 
> > NSEC_PER_SEC/TIMER_MULTIPLIER); 
> >  if (err) { 
> >  printk(KERN_ERR "clocksource_register_hz returned %d\n", err); 
> >  return; 
> >  } 
> > - clockevents_register_device(&itimer_clockevent); 
> > + clockevents_register_device(&timer_clockevent); 
> >  } 
> >  
> >  void read_persistent_clock(struct timespec *ts) 
> >  { 
> > - long long nsecs = os_nsecs(); 
> > + long long nsecs = os_persistent_clock_emulation(); 
> >  
> >  set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, 
> >  nsecs % NSEC_PER_SEC); 
> > @@ -110,6 +128,6 @@ void read_persistent_clock(struct timespec *ts) 
> >  
> >  void __init time_init(void) 
> >  { 
> > - timer_init(); 
> > - late_time_init = setup_itimer; 
> > + timer_set_signal_handler(); 
> > + late_time_init = timer_setup; 
> >  } 
> > diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h 
> > deleted file mode 100644 
> > index 0dc2c9f..0000000 
> > --- a/arch/um/os-Linux/internal.h 
> > +++ /dev/null 
> > @@ -1 +0,0 @@ 
> > -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc); 
> > diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c 
> > index df9191a..6e36f0f 100644 
> > --- a/arch/um/os-Linux/main.c 
> > +++ b/arch/um/os-Linux/main.c 
> > @@ -163,13 +163,13 @@ int __init main(int argc, char **argv, char **envp) 
> >  
> >  /* 
> >  * This signal stuff used to be in the reboot case.  However, 
> > - * sometimes a SIGVTALRM can come in when we're halting (reproducably 
> > + * sometimes a timer signal can come in when we're halting (reproducably 
> >  * when writing out gcov information, presumably because that takes 
> >  * some time) and cause a segfault. 
> >  */ 
> >  
> > - /* stop timers and set SIGVTALRM to be ignored */ 
> > - disable_timer(); 
> > + /* stop timers and set timer signal to be ignored */ 
> > + os_timer_disable(); 
> >  
> >  /* disable SIGIO for the fds and set SIGIO to be ignored */ 
> >  err = deactivate_all_fds(); 
> > diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c 
> > index 8408aba..f3bd983 100644 
> > --- a/arch/um/os-Linux/process.c 
> > +++ b/arch/um/os-Linux/process.c 
> > @@ -89,6 +89,11 @@ int os_process_parent(int pid) 
> >  return parent; 
> >  } 
> >  
> > +void os_alarm_process(int pid) 
> > +{ 
> > + kill(pid, SIGALRM); 
> > +} 
> > + 
> >  void os_stop_process(int pid) 
> >  { 
> >  kill(pid, SIGSTOP); 
> > diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c 
> > index 036d0db..e04a4cd 100644 
> > --- a/arch/um/os-Linux/signal.c 
> > +++ b/arch/um/os-Linux/signal.c 
> > @@ -13,7 +13,6 @@ 
> >  #include <kern_util.h> 
> >  #include <os.h> 
> >  #include <sysdep/mcontext.h> 
> > -#include "internal.h" 
> >  
> >  void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { 
> >  [SIGTRAP] = relay_signal, 
> > @@ -23,7 +22,8 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct 
> > uml_pt_regs *) = { 
> >  [SIGBUS] = bus_handler, 
> >  [SIGSEGV] = segv_handler, 
> >  [SIGIO] = sigio_handler, 
> > - [SIGVTALRM] = timer_handler }; 
> > + [SIGALRM] = timer_handler 
> > +}; 
> >  
> >  static void sig_handler_common(int sig, struct siginfo *si, mcontext_t 
> >*mc) 
> >  { 
> > @@ -38,7 +38,7 @@ static void sig_handler_common(int sig, struct siginfo 
> > *si, mcontext_t *mc) 
> >  } 
> >  
> >  /* enable signals if sig isn't IRQ signal */ 
> > - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) 
> > + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) 
> >  unblock_signals(); 
> >  
> >  (*sig_info[sig])(sig, si, &r); 
> > @@ -55,8 +55,8 @@ static void sig_handler_common(int sig, struct siginfo 
> > *si, mcontext_t *mc) 
> >  #define SIGIO_BIT 0 
> >  #define SIGIO_MASK (1 << SIGIO_BIT) 
> >  
> > -#define SIGVTALRM_BIT 1 
> > -#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) 
> > +#define SIGALRM_BIT 1 
> > +#define SIGALRM_MASK (1 << SIGALRM_BIT) 
> >  
> >  static int signals_enabled; 
> >  static unsigned int signals_pending; 
> > @@ -78,36 +78,34 @@ void sig_handler(int sig, struct siginfo *si, 
> > mcontext_t *mc) 
> >  set_signals(enabled); 
> >  } 
> >  
> > -static void real_alarm_handler(mcontext_t *mc) 
> > +static void timer_real_alarm_handler(mcontext_t *mc) 
> >  { 
> >  struct uml_pt_regs regs; 
> >  
> >  if (mc != NULL) 
> >  get_regs_from_mc(&regs, mc); 
> > - regs.is_user = 0; 
> > - unblock_signals(); 
> > - timer_handler(SIGVTALRM, NULL, &regs); 
> > + timer_handler(SIGALRM, NULL, &regs); 
> >  } 
> >  
> > -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) 
> > +void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t 
> > *mc) 
> >  { 
> >  int enabled; 
> >  
> >  enabled = signals_enabled; 
> >  if (!signals_enabled) { 
> > - signals_pending |= SIGVTALRM_MASK; 
> > + signals_pending |= SIGALRM_MASK; 
> >  return; 
> >  } 
> >  
> >  block_signals(); 
> >  
> > - real_alarm_handler(mc); 
> > + timer_real_alarm_handler(mc); 
> >  set_signals(enabled); 
> >  } 
> >  
> > -void timer_init(void) 
> > +void timer_set_signal_handler(void) 
> >  { 
> > - set_handler(SIGVTALRM); 
> > + set_handler(SIGALRM); 
> >  } 
> >  
> >  void set_sigstack(void *sig_stack, int size) 
> > @@ -131,10 +129,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo 
> > *si, mcontext_t *mc) = { 
> >  
> >  [SIGIO] = sig_handler, 
> >  [SIGWINCH] = sig_handler, 
> > - [SIGVTALRM] = alarm_handler 
> > + [SIGALRM] = timer_alarm_handler 
> >  }; 
> >  
> > - 
> >  static void hard_handler(int sig, siginfo_t *si, void *p) 
> >  { 
> >  struct ucontext *uc = p; 
> > @@ -188,9 +185,9 @@ void set_handler(int sig) 
> >  
> >  /* block irq ones */ 
> >  sigemptyset(&action.sa_mask); 
> > - sigaddset(&action.sa_mask, SIGVTALRM); 
> >  sigaddset(&action.sa_mask, SIGIO); 
> >  sigaddset(&action.sa_mask, SIGWINCH); 
> > + sigaddset(&action.sa_mask, SIGALRM); 
> >  
> >  if (sig == SIGSEGV) 
> >  flags |= SA_NODEFER; 
> > @@ -283,8 +280,8 @@ void unblock_signals(void) 
> >  if (save_pending & SIGIO_MASK) 
> >  sig_handler_common(SIGIO, NULL, NULL); 
> >  
> > - if (save_pending & SIGVTALRM_MASK) 
> > - real_alarm_handler(NULL); 
> > + if (save_pending & SIGALRM_MASK) 
> > + timer_real_alarm_handler(NULL); 
> >  } 
> >  } 
> >  
> > diff --git a/arch/um/os-Linux/skas/process.c 
> > b/arch/um/os-Linux/skas/process.c 
> > index 3dddedb..5ae4752 100644 
> > --- a/arch/um/os-Linux/skas/process.c 
> > +++ b/arch/um/os-Linux/skas/process.c 
> > @@ -45,7 +45,7 @@ static int ptrace_dump_regs(int pid) 
> >   * Signals that are OK to receive in the stub - we'll just continue it. 
> >   * SIGWINCH will happen when UML is inside a detached screen. 
> >   */ 
> > -#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) 
> > +#define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) 
> >  
> >  /* Signals that the stub will finish with - anything else is an error */ 
> >  #define STUB_DONE_MASK (1 << SIGTRAP) 
> > @@ -179,19 +179,13 @@ extern char __syscall_stub_start[]; 
> >  static int userspace_tramp(void *stack) 
> >  { 
> >  void *addr; 
> > - int err, fd; 
> > + int fd; 
> >  unsigned long long offset; 
> >  
> >  ptrace(PTRACE_TRACEME, 0, 0, 0); 
> >  
> >  signal(SIGTERM, SIG_DFL); 
> >  signal(SIGWINCH, SIG_IGN); 
> > - err = set_interval(); 
> > - if (err) { 
> > - printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " 
> > -        "errno = %d\n", err); 
> > - exit(1); 
> > - } 
> >  
> >  /* 
> >  * This has a pte, but it can't be mapped in with the usual 
> > @@ -282,7 +276,7 @@ int start_userspace(unsigned long stub_stack) 
> >         "errno = %d\n", errno); 
> >  goto out_kill; 
> >  } 
> > - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); 
> > + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 
> >  
> >  if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 
> >  err = -EINVAL; 
> > @@ -315,8 +309,6 @@ int start_userspace(unsigned long stub_stack) 
> >  
> >  void userspace(struct uml_pt_regs *regs) 
> >  { 
> > - struct itimerval timer; 
> > - unsigned long long nsecs, now; 
> >  int err, status, op, pid = userspace_pid[0]; 
> >  /* To prevent races if using_sysemu changes under us.*/ 
> >  int local_using_sysemu; 
> > @@ -325,13 +317,8 @@ void userspace(struct uml_pt_regs *regs) 
> >  /* Handle any immediate reschedules or signals */ 
> >  interrupt_end(); 
> >  
> > - if (getitimer(ITIMER_VIRTUAL, &timer)) 
> > - printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); 
> > - nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + 
> > - timer.it_value.tv_usec * UM_NSEC_PER_USEC; 
> > - nsecs += os_nsecs(); 
> > - 
> >  while (1) { 
> > + 
> >  /* 
> >  * This can legitimately fail if the process loads a 
> >  * bogus value into a segment register.  It will 
> > @@ -401,18 +388,7 @@ void userspace(struct uml_pt_regs *regs) 
> >  case SIGTRAP: 
> >  relay_signal(SIGTRAP, (struct siginfo *)&si, regs); 
> >  break; 
> > - case SIGVTALRM: 
> > - now = os_nsecs(); 
> > - if (now < nsecs) 
> > - break; 
> > - block_signals(); 
> > - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); 
> > - unblock_signals(); 
> > - nsecs = timer.it_value.tv_sec * 
> > - UM_NSEC_PER_SEC + 
> > - timer.it_value.tv_usec * 
> > - UM_NSEC_PER_USEC; 
> > - nsecs += os_nsecs(); 
> > + case SIGALRM: 
> >  break; 
> >  case SIGIO: 
> >  case SIGILL: 
> > @@ -460,7 +436,6 @@ __initcall(init_thread_regs); 
> >  
> >  int copy_context_skas0(unsigned long new_stack, int pid) 
> >  { 
> > - struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; 
> >  int err; 
> >  unsigned long current_stack = current_stub_stack(); 
> >  struct stub_data *data = (struct stub_data *) current_stack; 
> > @@ -472,11 +447,10 @@ int copy_context_skas0(unsigned long new_stack, int 
> > pid) 
> >  * prepare offset and fd of child's stack as argument for parent's 
> >  * and child's mmap2 calls 
> >  */ 
> > - *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), 
> > -       .fd = new_fd, 
> > -       .timer    = ((struct itimerval) 
> > -            { .it_value = tv, 
> > -      .it_interval = tv }) }); 
> > + *data = ((struct stub_data) { 
> > + .offset = MMAP_OFFSET(new_offset), 
> > + .fd     = new_fd 
> > + }); 
>
> As written above, you change the way how SKAS0 works, this needs 
> much more explaination. 

Yes, okay. See above. The userspace stub code no longer generates an itimer, so 
no timer information needs to be passed here.

>
> >  err = ptrace_setregs(pid, thread_regs); 
> >  if (err < 0) { 
> > diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c 
> > index e9824d5..0e2bb7d 100644 
> > --- a/arch/um/os-Linux/time.c 
> > +++ b/arch/um/os-Linux/time.c 
> > @@ -1,4 +1,5 @@ 
> >  /* 
> > + * Copyright (C) 2012-2014 Cisco Systems 
> >   * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) 
> >   * Licensed under the GPL 
> >   */ 
> > @@ -10,177 +11,172 @@ 
> >  #include <sys/time.h> 
> >  #include <kern_util.h> 
> >  #include <os.h> 
> > -#include "internal.h" 
> > +#include <string.h> 
> > +#include <timer-internal.h> 
> >  
> > -int set_interval(void) 
> > -{ 
> > - int usec = UM_USEC_PER_SEC / UM_HZ; 
> > - struct itimerval interval = ((struct itimerval) { { 0, usec }, 
> > -   { 0, usec } }); 
> > - 
> > - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) 
> > - return -errno; 
> > +static timer_t event_high_res_timer = 0; 
> >  
> > - return 0; 
> > +static inline long long timeval_to_ns(const struct timeval *tv) 
> > +{ 
> > + return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + 
> > + tv->tv_usec * UM_NSEC_PER_USEC; 
> >  } 
> >  
> > -int timer_one_shot(int ticks) 
> > +static inline long long timespec_to_ns(const struct timespec *ts) 
> >  { 
> > - unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; 
> > - unsigned long sec = usec / UM_USEC_PER_SEC; 
> > - struct itimerval interval; 
> > - 
> > - usec %= UM_USEC_PER_SEC; 
> > - interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); 
> > + return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + 
> > + ts->tv_nsec; 
> > +} 
> >  
> > - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) 
> > - return -errno; 
> > +long long os_persistent_clock_emulation (void) { 
> > + struct timespec realtime_tp; 
> >  
> > - return 0; 
> > + clock_gettime(CLOCK_REALTIME, &realtime_tp); 
> > + return timespec_to_ns(&realtime_tp); 
> >  } 
> >  
> >  /** 
> > - * timeval_to_ns - Convert timeval to nanoseconds 
> > - * @ts: pointer to the timeval variable to be converted 
> > - * 
> > - * Returns the scalar nanosecond representation of the timeval 
> > - * parameter. 
> > - * 
> > - * Ripped from linux/time.h because it's a kernel header, and thus 
> > - * unusable from here. 
> > + * os_timer_create() - create an new posix (interval) timer 
> >   */ 
> > -static inline long long timeval_to_ns(const struct timeval *tv) 
> > -{ 
> > - return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + 
> > - tv->tv_usec * UM_NSEC_PER_USEC; 
> > +int os_timer_create(void* timer) { 
> > + 
> > + timer_t* t = timer; 
> > + 
> > + if(t == NULL) { 
> > + t = &event_high_res_timer; 
> > + } 
> > + 
> > + if (timer_create( 
> > + CLOCK_MONOTONIC, 
> > + NULL, 
> > + t) == -1) { 
> > + return -1; 
> > + } 
> > + return 0; 
> >  } 
> >  
> > -long long disable_timer(void) 
> > +int os_timer_set_interval(void* timer, void* i) 
> >  { 
> > - struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); 
> > - long long remain, max = UM_NSEC_PER_SEC / UM_HZ; 
> > + struct itimerspec its; 
> > + unsigned long long nsec; 
> > + timer_t* t = timer; 
> > + struct itimerspec* its_in = i; 
> >  
> > - if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) 
> > - printk(UM_KERN_ERR "disable_timer - setitimer failed, " 
> > -        "errno = %d\n", errno); 
> > + if(t == NULL) { 
> > + t = &event_high_res_timer; 
> > + } 
> >  
> > - remain = timeval_to_ns(&time.it_value); 
> > - if (remain > max) 
> > - remain = max; 
> > + nsec = UM_NSEC_PER_SEC / UM_HZ; 
> >  
> > - return remain; 
> > -} 
> > + if(its_in != NULL) { 
> > + its.it_value.tv_sec = its_in->it_value.tv_sec; 
> > + its.it_value.tv_nsec = its_in->it_value.tv_nsec; 
> > + } else { 
> > + its.it_value.tv_sec = 0; 
> > + its.it_value.tv_nsec = nsec; 
> > + } 
> >  
> > -long long os_nsecs(void) 
> > -{ 
> > - struct timeval tv; 
> > + its.it_interval.tv_sec = 0; 
> > + its.it_interval.tv_nsec = nsec; 
> >  
> > - gettimeofday(&tv, NULL); 
> > - return timeval_to_ns(&tv); 
> > -} 
> > + if(timer_settime(*t, 0, &its, NULL) == -1) { 
> > + return -errno; 
> > + } 
> >  
> > -#ifdef UML_CONFIG_NO_HZ_COMMON 
> > -static int after_sleep_interval(struct timespec *ts) 
> > -{ 
> >  return 0; 
> >  } 
> >  
> > -static void deliver_alarm(void) 
> > +/** 
> > + * os_timer_remain() - returns the remaining nano seconds of the given 
> > interval 
> > + *                     timer 
> > + * Because this is the remaining time of an interval timer, which 
> > correspondends 
> > + * to HZ, this value can never be bigger than one second. Just 
> > + * the nanosecond part of the timer is returned. 
> > + * The returned time is relative to the start time of the interval timer. 
> > + * Return an negative value in an error case. 
> > + */ 
> > +long os_timer_remain(void* timer) 
> >  { 
> > - alarm_handler(SIGVTALRM, NULL, NULL); 
> > -} 
> > + struct itimerspec its; 
> > + timer_t* t = timer; 
> >  
> > -static unsigned long long sleep_time(unsigned long long nsecs) 
> > -{ 
> > - return nsecs; 
> > -} 
> > + if(t == NULL) { 
> > + t = &event_high_res_timer; 
> > + } 
> >  
> > -#else 
> > -unsigned long long last_tick; 
> > -unsigned long long skew; 
> > + if(timer_gettime(t, &its) == -1) { 
> > + return -errno; 
> > + } 
> > + 
> > + return its.it_value.tv_nsec; 
> > +} 
> >  
> > -static void deliver_alarm(void) 
> > +int os_timer_one_shot(int ticks) 
> >  { 
> > - unsigned long long this_tick = os_nsecs(); 
> > - int one_tick = UM_NSEC_PER_SEC / UM_HZ; 
> > + struct itimerspec its; 
> > + unsigned long long nsec; 
> > + unsigned long sec; 
> >  
> > - /* Protection against the host's time going backwards */ 
> > - if ((last_tick != 0) && (this_tick < last_tick)) 
> > - this_tick = last_tick; 
> > +    nsec = (ticks + 1); 
> > +    sec = nsec / UM_NSEC_PER_SEC; 
> > + nsec = nsec % UM_NSEC_PER_SEC; 
> >  
> > - if (last_tick == 0) 
> > - last_tick = this_tick - one_tick; 
> > + its.it_value.tv_sec = nsec / UM_NSEC_PER_SEC; 
> > + its.it_value.tv_nsec = nsec; 
> >  
> > - skew += this_tick - last_tick; 
> > + its.it_interval.tv_sec = 0; 
> > + its.it_interval.tv_nsec = 0; // we cheat here 
> >  
> > - while (skew >= one_tick) { 
> > - alarm_handler(SIGVTALRM, NULL, NULL); 
> > - skew -= one_tick; 
> > - } 
> > - 
> > - last_tick = this_tick; 
> > + timer_settime(event_high_res_timer, 0, &its, NULL); 
> > + return 0; 
> >  } 
> >  
> > -static unsigned long long sleep_time(unsigned long long nsecs) 
> > +/** 
> > + * os_timer_disable() - disable the posix (interval) timer 
> > + * Returns the remaining interval timer time in nanoseconds 
> > + */ 
> > +long long os_timer_disable(void) 
> >  { 
> > - return nsecs > skew ? nsecs - skew : 0; 
> > + struct itimerspec its; 
> > + 
> > + memset(&its, 0, sizeof(struct itimerspec)); 
> > + timer_settime(event_high_res_timer, 0, &its, &its); 
> > + 
> > + return its.it_value.tv_sec * UM_NSEC_PER_SEC + its.it_value.tv_nsec; 
> >  } 
> >  
> > -static inline long long timespec_to_us(const struct timespec *ts) 
> > +long long os_vnsecs(void) 
> >  { 
> > - return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + 
> > - ts->tv_nsec / UM_NSEC_PER_USEC; 
> > + struct timespec ts; 
> > + 
> > + clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&ts); 
> > + return timespec_to_ns(&ts); 
> >  } 
> >  
> > -static int after_sleep_interval(struct timespec *ts) 
> > +long long os_nsecs(void) 
> >  { 
> > - int usec = UM_USEC_PER_SEC / UM_HZ; 
> > - long long start_usecs = timespec_to_us(ts); 
> > - struct timeval tv; 
> > - struct itimerval interval; 
> > - 
> > - /* 
> > - * It seems that rounding can increase the value returned from 
> > - * setitimer to larger than the one passed in.  Over time, 
> > - * this will cause the remaining time to be greater than the 
> > - * tick interval.  If this happens, then just reduce the first 
> > - * tick to the interval value. 
> > - */ 
> > - if (start_usecs > usec) 
> > - start_usecs = usec; 
> > - 
> > - start_usecs -= skew / UM_NSEC_PER_USEC; 
> > - if (start_usecs < 0) 
> > - start_usecs = 0; 
> > - 
> > - tv = ((struct timeval) { .tv_sec  = start_usecs / UM_USEC_PER_SEC, 
> > - .tv_usec = start_usecs % UM_USEC_PER_SEC }); 
> > - interval = ((struct itimerval) { { 0, usec }, tv }); 
> > - 
> > - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) 
> > - return -errno; 
> > + struct timespec ts; 
> >  
> > - return 0; 
> > + clock_gettime(CLOCK_MONOTONIC,&ts); 
> > + return timespec_to_ns(&ts); 
> >  } 
> > -#endif 
> >  
> > -void idle_sleep(unsigned long long nsecs) 
> > +/** 
> > + * os_idle_sleep() - sleep for a given time of nsecs 
> > + * @nsecs: nanoseconds to sleep 
> > + */ 
> > +void os_idle_sleep(unsigned long long nsecs) 
> >  { 
> >  struct timespec ts; 
> >  
> > - /* 
> > - * nsecs can come in as zero, in which case, this starts a 
> > - * busy loop.  To prevent this, reset nsecs to the tick 
> > - * interval if it is zero. 
> > - */ 
> > - if (nsecs == 0) 
> > - nsecs = UM_NSEC_PER_SEC / UM_HZ; 
> > - 
> > - nsecs = sleep_time(nsecs); 
> > - ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, 
> > -   .tv_nsec = nsecs % UM_NSEC_PER_SEC }); 
> > - 
> > - if (nanosleep(&ts, &ts) == 0) 
> > - deliver_alarm(); 
> > - after_sleep_interval(&ts); 
> > + if (nsecs <= 0) { 
> > + return; 
> > + } 
> > + 
> > + ts = ((struct timespec) { 
> > + .tv_sec  = nsecs / UM_NSEC_PER_SEC, 
> > + .tv_nsec = nsecs % UM_NSEC_PER_SEC 
> > + }); 
> > + 
> > + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); 
> >  } 
> > 
>
> Thanks, 
> //richard 
------------------------------------------------------------------------------
_______________________________________________
User-mode-linux-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel

Re: [uml-devel] [PATCH] um: Switch clocksource to hrtimers

Reply via email to