hr_sleep is a new system call engineered for nanosecond time scale granularities. With respect to nanosleep, it uses a single value representation of the sleep period. hr_sleep achieves 15x improvement for microsecond scale timers w.r.t. nanosleep: the reason is the use of a CPU register for passing the sleep period (avoiding cross-ring data move) and the use of the thread's kernel stack area (avoiding in-kernel memory allocations). Further details about hr_sleep and the evaluation compared to nanosleep can be found in Section 3 of our paper "Metronome: adaptive and precise intermittent packet retrieval in DPDK" hr_sleep in this patch has syscall number 442, so you can try it calling syscall(442, sleep_period)
Signed-off-by: Marco Faltelli <marco.falte...@uniroma2.it> --- arch/x86/entry/syscalls/syscall_64.tbl | 1 + kernel/time/hrtimer.c | 61 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 78672124d28b..27343c016e42 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -363,6 +363,7 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common hr_sleep sys_hr_sleep # # Due to a historical design error, certain syscalls are numbered differently diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 743c852e10f2..422410c60a9f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1988,6 +1988,67 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, } #endif +#ifdef CONFIG_64BIT + + +typedef struct _control_record { + struct task_struct *task; + int pid; + int awake; + struct hrtimer hr_timer; +} control_record; + + +static enum hrtimer_restart hr_sleep_callback(struct hrtimer *timer) +{ + control_record *control; + struct task_struct *the_task; + + control = (control_record *)container_of(timer, control_record, hr_timer); + control->awake = 1; + the_task = control->task; + wake_up_process(the_task); + + return HRTIMER_NORESTART; +} + +/** + * hr_sleep - a high-resolution sleep service for fine-grained timeouts + * @nanoseconds: the requested sleep period in nanoseconds + * + * Returns: + * 0 when the sleep request successfully terminated + * -EINVAL if a sleep period < 0 is requested + */ +SYSCALL_DEFINE1(hr_sleep, long, nanoseconds) +{ + DECLARE_WAIT_QUEUE_HEAD(the_queue);//here we use a private queue + control_record *control; + ktime_t ktime_interval; + + if (nanoseconds < 0) + return -EINVAL; + + if (nanoseconds == 0) + return 0; + + ktime_interval = ktime_set(0, nanoseconds); + control = (control_record *)((void *) current->stack + sizeof(struct thread_info)); + hrtimer_init(&(control->hr_timer), CLOCK_MONOTONIC, HRTIMER_MODE_REL); + control->hr_timer.function = &hr_sleep_callback; + control->task = current; + control->pid = control->task->pid; //current->pid is more costly + control->awake = 0; + hrtimer_start(&(control->hr_timer), ktime_interval, HRTIMER_MODE_REL); + wait_event_interruptible(the_queue, control->awake == 1); + hrtimer_cancel(&(control->hr_timer)); + + return 0; + +} + +#endif + /* * Functions related to boot-time initialization: */ -- 2.25.1