hr_sleep is a new system call engineered for nanosecond time scale
granularities.
With respect to nanosleep, it uses a single value representation
of the sleep period.
hr_sleep achieves 15x improvement for microsecond scale timers
w.r.t. nanosleep: the reason is the use of a CPU register for
passing the sleep period (avoiding cross-ring data move) and
the use of the thread's kernel stack area (avoiding in-kernel
memory allocations).
Further details about hr_sleep and the evaluation compared
to nanosleep can be found in Section 3 of our paper "Metronome:
adaptive and precise intermittent packet retrieval in DPDK"
hr_sleep in this patch has syscall number 442, so you can try it
calling syscall(442, sleep_period)

Signed-off-by: Marco Faltelli <marco.falte...@uniroma2.it>
---
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 kernel/time/hrtimer.c                  | 61 ++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index 78672124d28b..27343c016e42 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -363,6 +363,7 @@
 439    common  faccessat2              sys_faccessat2
 440    common  process_madvise         sys_process_madvise
 441    common  epoll_pwait2            sys_epoll_pwait2
+442    common  hr_sleep                sys_hr_sleep
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 743c852e10f2..422410c60a9f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1988,6 +1988,67 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 
__user *, rqtp,
 }
 #endif
 
+#ifdef CONFIG_64BIT
+
+
+typedef struct _control_record {
+       struct task_struct *task;
+       int pid;
+       int awake;
+       struct hrtimer hr_timer;
+} control_record;
+
+
+static enum hrtimer_restart hr_sleep_callback(struct hrtimer *timer)
+{
+       control_record *control;
+       struct task_struct *the_task;
+
+       control = (control_record *)container_of(timer, control_record, 
hr_timer);
+       control->awake = 1;
+       the_task = control->task;
+       wake_up_process(the_task);
+
+       return HRTIMER_NORESTART;
+}
+
+/**
+ * hr_sleep - a high-resolution sleep service for fine-grained timeouts
+ * @nanoseconds:       the requested sleep period in nanoseconds
+ *
+ * Returns:
+ * 0 when the sleep request successfully terminated
+ * -EINVAL if a sleep period < 0 is requested
+ */
+SYSCALL_DEFINE1(hr_sleep, long, nanoseconds)
+{
+       DECLARE_WAIT_QUEUE_HEAD(the_queue);//here we use a private queue
+       control_record *control;
+       ktime_t ktime_interval;
+
+       if (nanoseconds < 0)
+               return -EINVAL;
+
+       if (nanoseconds == 0)
+               return 0;
+
+       ktime_interval = ktime_set(0, nanoseconds);
+       control = (control_record *)((void *) current->stack + sizeof(struct 
thread_info));
+       hrtimer_init(&(control->hr_timer), CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       control->hr_timer.function = &hr_sleep_callback;
+       control->task = current;
+       control->pid  = control->task->pid; //current->pid is more costly
+       control->awake = 0;
+       hrtimer_start(&(control->hr_timer), ktime_interval, HRTIMER_MODE_REL);
+       wait_event_interruptible(the_queue, control->awake == 1);
+       hrtimer_cancel(&(control->hr_timer));
+
+       return 0;
+
+}
+
+#endif
+
 /*
  * Functions related to boot-time initialization:
  */
-- 
2.25.1

Reply via email to