Make possible to read virtualized container's CLOCK_MONOTONIC time
via __vclock_getttime(). Record containers start time in per-ve
vdso and substruct it from the host's time on clock read.

https://jira.sw.ru/browse/PSBM-121668
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 arch/x86/entry/vdso/vclock_gettime.c | 27 +++++++++++++++++++++++----
 arch/x86/entry/vdso/vdso2c.c         |  1 +
 arch/x86/include/asm/vdso.h          |  1 +
 kernel/ve/ve.c                       | 14 ++++++++++++++
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c 
b/arch/x86/entry/vdso/vclock_gettime.c
index e48ca3afa091..be1de6c4cafa 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -24,6 +24,8 @@
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
+u64 ve_start_time;
+
 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
@@ -227,6 +229,21 @@ notrace static int __always_inline do_realtime(struct 
timespec *ts)
        return mode;
 }
 
+static inline void timespec_sub_ns(struct timespec *ts, u64 ns)
+{
+       if ((s64)ns <= 0) {
+               ts->tv_sec += __iter_div_u64_rem(-ns, NSEC_PER_SEC, &ns);
+               ts->tv_nsec = ns;
+       } else {
+               ts->tv_sec -= __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+               if (ns) {
+                       ts->tv_sec--;
+                       ns = NSEC_PER_SEC - ns;
+               }
+               ts->tv_nsec = ns;
+       }
+}
+
 notrace static int __always_inline do_monotonic(struct timespec *ts)
 {
        unsigned long seq;
@@ -242,9 +259,7 @@ notrace static int __always_inline do_monotonic(struct 
timespec *ts)
                ns >>= gtod->shift;
        } while (unlikely(gtod_read_retry(gtod, seq)));
 
-       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
-       ts->tv_nsec = ns;
-
+       timespec_sub_ns(ts, ve_start_time - ns);
        return mode;
 }
 
@@ -260,12 +275,16 @@ notrace static void do_realtime_coarse(struct timespec 
*ts)
 
 notrace static void do_monotonic_coarse(struct timespec *ts)
 {
+       u64 ns;
        unsigned long seq;
+
        do {
                seq = gtod_read_begin(gtod);
                ts->tv_sec = gtod->monotonic_time_coarse_sec;
-               ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
+               ns = gtod->monotonic_time_coarse_nsec;
        } while (unlikely(gtod_read_retry(gtod, seq)));
+
+       timespec_sub_ns(ts, ve_start_time - ns);
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 7fab0bd96ac1..c76141e9ca16 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -110,6 +110,7 @@ struct vdso_sym required_syms[] = {
        {"__kernel_rt_sigreturn", true},
        {"int80_landing_pad", true},
        {"linux_version_code", true},
+       {"ve_start_time", true},
 };
 
 __attribute__((format(printf, 1, 2))) __attribute__((noreturn))
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 92c7ac06828e..9c265f79a126 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -28,6 +28,7 @@ struct vdso_image {
        long sym___kernel_vsyscall;
        long sym_int80_landing_pad;
        long sym_linux_version_code;
+       long sym_ve_start_time;
 };
 
 #ifdef CONFIG_X86_64
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 98c2e7e3d2c6..ac3dda55e9ae 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -374,6 +374,17 @@ static int ve_start_kthreadd(struct ve_struct *ve)
        return err;
 }
 
+static void ve_set_vdso_time(struct ve_struct *ve, u64 time)
+{
+       u64 *vdso_start_time;
+
+       vdso_start_time = ve->vdso_64->data + ve->vdso_64->sym_ve_start_time;
+       *vdso_start_time = time;
+
+       vdso_start_time = ve->vdso_32->data + ve->vdso_32->sym_ve_start_time;
+       *vdso_start_time = time;
+}
+
 /* under ve->op_sem write-lock */
 static int ve_start_container(struct ve_struct *ve)
 {
@@ -408,6 +419,8 @@ static int ve_start_container(struct ve_struct *ve)
        if (ve->start_time == 0) {
                ve->start_time = tsk->start_time;
                ve->real_start_time = tsk->real_start_time;
+
+               ve_set_vdso_time(ve, ve->start_time);
        }
        /* The value is wrong, but it is never compared to process
         * start times */
@@ -1028,6 +1041,7 @@ static ssize_t ve_ts_write(struct kernfs_open_file *of, 
char *buf,
                case VE_CF_CLOCK_MONOTONIC:
                        now = ktime_get_ns();
                        target = &ve->start_time;
+                       ve_set_vdso_time(ve, now - delta_ns);
                        break;
                case VE_CF_CLOCK_BOOTBASED:
                        now = ktime_get_boot_ns();
-- 
2.26.2

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to