We already have infrastructure for virtualized vdso, however we use it only to change LINUX_VERSION_NAME in container. Simply store container's start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec, and use it in __vdso_clock_gettime() to calculate container's monotonic time.
Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always setup new vdso, since previous policy to setup vdso only if uts_ns->name.release wouldn't work for virtualized __vdso_clock_gettime() https://jira.sw.ru/browse/PSBM-66451 Signed-off-by: Andrey Ryabinin <[email protected]> --- arch/x86/vdso/vclock_gettime.c | 45 +++++++++++++++++++++++++++++++++++++++++- arch/x86/vdso/vdso32-setup.c | 10 ++++------ arch/x86/vdso/vma.c | 7 +++---- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index f079e1fd5633..3a5b319984c7 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -27,6 +27,10 @@ #define gtod (&VVAR(vsyscall_gtod_data)) +struct timespec VDSO64_ve_start_timespec; +extern struct timespec VDSO32_ve_start_timespec + __attribute__((weak, alias("VDSO64_ve_start_timespec"))); + notrace static cycle_t vread_tsc(void) { cycle_t ret = (cycle_t)rdtsc_ordered(); @@ -175,6 +179,43 @@ notrace static int __always_inline do_realtime(struct timespec *ts) return mode; } +notrace static struct timespec *get_ve_timespec(void) +{ + struct timespec *ret; + asm volatile ("lea VDSO64_ve_start_timespec(%%rip),%0\n": "=r"(ret)); + return ret; +} + +notrace static void vdso_set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec) +{ + while (nsec >= NSEC_PER_SEC) { + /* + * The following asm() prevents the compiler from + * optimising this loop into a modulo operation. See + * also __iter_div_u64_rem() in include/linux/time.h + */ + asm("" : "+rm"(nsec)); + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + asm("" : "+rm"(nsec)); + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void monotonic_time_to_ve(struct timespec *ts) +{ + struct timespec *ve_timespec = get_ve_timespec(); + + vdso_set_normalized_timespec(ts, + ts->tv_sec - ve_timespec->tv_sec, + ts->tv_nsec - ve_timespec->tv_nsec); +} + notrace static int do_monotonic(struct timespec *ts) { unsigned long seq; @@ -190,8 +231,9 @@ notrace static int do_monotonic(struct timespec *ts) ns += vgetsns(&mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); - timespec_add_ns(ts, ns); + timespec_add_ns(ts, ns); + monotonic_time_to_ve(ts); return mode; } @@ -215,6 +257,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts) ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); + monotonic_time_to_ve(ts); return 0; } diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 5056d0ec9ab7..a082f1541f3c 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -344,10 +344,8 @@ static struct page **uts_prep_vdso_pages_locked(int map) * preallocated one. */ new_version = KERNEL_VERSION(n1, n2, n3); - if (new_version == LINUX_VERSION_CODE) - goto out; #ifdef CONFIG_X86_32 - else { + { /* * Native x86-32 mode requires vDSO runtime * relocations applied which is not supported @@ -370,8 +368,8 @@ static struct page **uts_prep_vdso_pages_locked(int map) * better than walk out with error. */ pr_warn_once("Wrong release uts name format detected." - " Ignoring vDSO virtualization.\n"); - goto out; + " Using host's uts name.\n"); + new_version = LINUX_VERSION_CODE; } mutex_lock(&vdso32_mutex); @@ -402,6 +400,7 @@ static struct page **uts_prep_vdso_pages_locked(int map) addr = page_address(new_pages[0]); *((int *)(addr + uts_ns->vdso32.version_off)) = new_version; + *((struct timespec*)(VDSO32_SYMBOL(uts_ns->vdso.addr, ve_start_timespec))) = ve->start_timespec; smp_wmb(); pages = uts_ns->vdso32.pages = new_pages; @@ -411,7 +410,6 @@ static struct page **uts_prep_vdso_pages_locked(int map) out_unlock: mutex_unlock(&vdso32_mutex); -out: down_write(&mm->mmap_sem); return pages; } diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index a862c79e2e91..ad0e0ac14f83 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -244,8 +244,6 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i * preallocated one. */ new_version = KERNEL_VERSION(n1, n2, n3); - if (new_version == LINUX_VERSION_CODE) - goto map_init_uts; } else { /* * If admin is passed malformed string here @@ -254,8 +252,8 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i * better than walk out with error. */ pr_warn_once("Wrong release uts name format detected." - " Ignoring vDSO virtualization.\n"); - goto map_init_uts; + " Using host's uts name.\n"); + new_version = LINUX_VERSION_CODE; } mutex_lock(&vdso_mutex); @@ -296,6 +294,7 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i } *((int *)(uts_ns->vdso.addr + uts_ns->vdso.version_off)) = new_version; + *((struct timespec*)(VDSO64_SYMBOL(uts_ns->vdso.addr, ve_start_timespec))) = ve->start_timespec; smp_wmb(); uts_ns->vdso.pages = new_pages; mutex_unlock(&vdso_mutex); -- 2.13.0 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
