We already have infrastructure for virtualized vdso, however we use
it only to change LINUX_VERSION_NAME in container. Simply store container's
start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
and use it in __vdso_clock_gettime() to calculate container's monotonic time.

Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
wouldn't work for virtualized __vdso_clock_gettime()

https://jira.sw.ru/browse/PSBM-66451
Signed-off-by: Andrey Ryabinin <[email protected]>
---
 arch/x86/vdso/vclock_gettime.c | 45 +++++++++++++++++++++++++++++++++++++++++-
 arch/x86/vdso/vdso32-setup.c   | 10 ++++------
 arch/x86/vdso/vma.c            |  7 +++----
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index f079e1fd5633..3a5b319984c7 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -27,6 +27,10 @@
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
+struct timespec VDSO64_ve_start_timespec;
+extern struct timespec VDSO32_ve_start_timespec
+       __attribute__((weak, alias("VDSO64_ve_start_timespec")));
+
 notrace static cycle_t vread_tsc(void)
 {
        cycle_t ret = (cycle_t)rdtsc_ordered();
@@ -175,6 +179,43 @@ notrace static int __always_inline do_realtime(struct 
timespec *ts)
        return mode;
 }
 
+notrace static struct timespec *get_ve_timespec(void)
+{
+       struct timespec *ret;
+       asm volatile ("lea VDSO64_ve_start_timespec(%%rip),%0\n": "=r"(ret));
+       return ret;
+}
+
+notrace static void vdso_set_normalized_timespec(struct timespec *ts, time_t 
sec, s64 nsec)
+{
+       while (nsec >= NSEC_PER_SEC) {
+               /*
+                * The following asm() prevents the compiler from
+                * optimising this loop into a modulo operation. See
+                * also __iter_div_u64_rem() in include/linux/time.h
+                */
+               asm("" : "+rm"(nsec));
+               nsec -= NSEC_PER_SEC;
+               ++sec;
+       }
+       while (nsec < 0) {
+               asm("" : "+rm"(nsec));
+               nsec += NSEC_PER_SEC;
+               --sec;
+       }
+       ts->tv_sec = sec;
+       ts->tv_nsec = nsec;
+}
+
+static void monotonic_time_to_ve(struct timespec *ts)
+{
+       struct timespec *ve_timespec = get_ve_timespec();
+
+       vdso_set_normalized_timespec(ts,
+               ts->tv_sec - ve_timespec->tv_sec,
+               ts->tv_nsec - ve_timespec->tv_nsec);
+}
+
 notrace static int do_monotonic(struct timespec *ts)
 {
        unsigned long seq;
@@ -190,8 +231,9 @@ notrace static int do_monotonic(struct timespec *ts)
                ns += vgetsns(&mode);
                ns >>= gtod->clock.shift;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
-       timespec_add_ns(ts, ns);
 
+       timespec_add_ns(ts, ns);
+       monotonic_time_to_ve(ts);
        return mode;
 }
 
@@ -215,6 +257,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts)
                ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
+       monotonic_time_to_ve(ts);
        return 0;
 }
 
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 5056d0ec9ab7..a082f1541f3c 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -344,10 +344,8 @@ static struct page **uts_prep_vdso_pages_locked(int map)
                 * preallocated one.
                 */
                new_version = KERNEL_VERSION(n1, n2, n3);
-               if (new_version == LINUX_VERSION_CODE)
-                       goto out;
 #ifdef CONFIG_X86_32
-               else {
+               {
                        /*
                         * Native x86-32 mode requires vDSO runtime
                         * relocations applied which is not supported
@@ -370,8 +368,8 @@ static struct page **uts_prep_vdso_pages_locked(int map)
                 * better than walk out with error.
                 */
                pr_warn_once("Wrong release uts name format detected."
-                            " Ignoring vDSO virtualization.\n");
-               goto out;
+                            " Using host's uts name.\n");
+               new_version = LINUX_VERSION_CODE;
        }
 
        mutex_lock(&vdso32_mutex);
@@ -402,6 +400,7 @@ static struct page **uts_prep_vdso_pages_locked(int map)
 
        addr = page_address(new_pages[0]);
        *((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
+       *((struct timespec*)(VDSO32_SYMBOL(uts_ns->vdso.addr, 
ve_start_timespec))) = ve->start_timespec;
        smp_wmb();
 
        pages = uts_ns->vdso32.pages = new_pages;
@@ -411,7 +410,6 @@ static struct page **uts_prep_vdso_pages_locked(int map)
 
 out_unlock:
        mutex_unlock(&vdso32_mutex);
-out:
        down_write(&mm->mmap_sem);
        return pages;
 }
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index a862c79e2e91..ad0e0ac14f83 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -244,8 +244,6 @@ static int uts_arch_setup_additional_pages(struct 
linux_binprm *bprm, int uses_i
                 * preallocated one.
                 */
                new_version = KERNEL_VERSION(n1, n2, n3);
-               if (new_version == LINUX_VERSION_CODE)
-                       goto map_init_uts;
        } else {
                /*
                 * If admin is passed malformed string here
@@ -254,8 +252,8 @@ static int uts_arch_setup_additional_pages(struct 
linux_binprm *bprm, int uses_i
                 * better than walk out with error.
                 */
                pr_warn_once("Wrong release uts name format detected."
-                            " Ignoring vDSO virtualization.\n");
-               goto map_init_uts;
+                            " Using host's uts name.\n");
+               new_version = LINUX_VERSION_CODE;
        }
 
        mutex_lock(&vdso_mutex);
@@ -296,6 +294,7 @@ static int uts_arch_setup_additional_pages(struct 
linux_binprm *bprm, int uses_i
        }
 
        *((int *)(uts_ns->vdso.addr + uts_ns->vdso.version_off)) = new_version;
+       *((struct timespec*)(VDSO64_SYMBOL(uts_ns->vdso.addr, 
ve_start_timespec))) = ve->start_timespec;
        smp_wmb();
        uts_ns->vdso.pages = new_pages;
        mutex_unlock(&vdso_mutex);
-- 
2.13.0

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to