On Wed, Oct 31, 2012 at 08:47:06PM -0200, Marcelo Tosatti wrote:
> Improve performance of time system calls when using Linux pvclock,
> by reading time info from fixmap visible copy of pvclock data.
>
> Originally from Jeremy Fitzhardinge.
>
> Signed-off-by: Marcelo Tosatti <[email protected]>
>
> Index: vsyscall/arch/x86/vdso/vclock_gettime.c
> ===================================================================
> --- vsyscall.orig/arch/x86/vdso/vclock_gettime.c
> +++ vsyscall/arch/x86/vdso/vclock_gettime.c
> @@ -22,6 +22,7 @@
> #include <asm/hpet.h>
> #include <asm/unistd.h>
> #include <asm/io.h>
> +#include <asm/pvclock.h>
>
> #define gtod (&VVAR(vsyscall_gtod_data))
>
> @@ -62,6 +63,70 @@ static notrace cycle_t vread_hpet(void)
> return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
> }
>
> +#ifdef CONFIG_PARAVIRT_CLOCK
> +
> +static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
> +{
> + const aligned_pvti_t *pvti_base;
> + int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
> + int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
> +
> + BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
> +
> + pvti_base = (aligned_pvti_t *)__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
> +
> + return &pvti_base[offset].info;
> +}
> +
> +static notrace cycle_t vread_pvclock(int *mode)
> +{
> + const struct pvclock_vsyscall_time_info *pvti;
> + cycle_t ret;
> + u64 last;
> + u32 version;
> + u32 migrate_count;
> + u8 flags;
> + unsigned cpu, cpu1;
> +
> +
> + /*
> + * When looping to get a consistent (time-info, tsc) pair, we
> + * also need to deal with the possibility we can switch vcpus,
> + * so make sure we always re-fetch time-info for the current vcpu.
> + */
> + do {
> + cpu = __getcpu() & VGETCPU_CPU_MASK;
> + pvti = get_pvti(cpu);
> +
> + migrate_count = pvti->migrate_count;
> +
> + version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
> +
> + /*
> + * Test we're still on the cpu as well as the version.
> + * We could have been migrated just after the first
> + * vgetcpu but before fetching the version, so we
> + * wouldn't notice a version change.
> + */
> + cpu1 = __getcpu() & VGETCPU_CPU_MASK;
> + } while (unlikely(cpu != cpu1 ||
> + (pvti->pvti.version & 1) ||
> + pvti->pvti.version != version ||
> + pvti->migrate_count != migrate_count));
> +
We can put vcpu id into higher bits of pvti.version. This will
save a couple of cycles by getting rid of __getcpu() calls.
> + if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
> + *mode = VCLOCK_NONE;
> +
> + /* refer to tsc.c read_tsc() comment for rationale */
> + last = VVAR(vsyscall_gtod_data).clock.cycle_last;
> +
> + if (likely(ret >= last))
> + return ret;
> +
> + return last;
> +}
> +#endif
> +
> notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
> {
> long ret;
> @@ -80,7 +145,7 @@ notrace static long vdso_fallback_gtod(s
> }
>
>
> -notrace static inline u64 vgetsns(void)
> +notrace static inline u64 vgetsns(int *mode)
> {
> long v;
> cycles_t cycles;
> @@ -88,6 +153,8 @@ notrace static inline u64 vgetsns(void)
> cycles = vread_tsc();
> else if (gtod->clock.vclock_mode == VCLOCK_HPET)
> cycles = vread_hpet();
> + else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
> + cycles = vread_pvclock(mode);
> else
> return 0;
> v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
> @@ -107,7 +174,7 @@ notrace static int __always_inline do_re
> mode = gtod->clock.vclock_mode;
> ts->tv_sec = gtod->wall_time_sec;
> ns = gtod->wall_time_snsec;
> - ns += vgetsns();
> + ns += vgetsns(&mode);
> ns >>= gtod->clock.shift;
> } while (unlikely(read_seqcount_retry(>od->seq, seq)));
>
> @@ -127,7 +194,7 @@ notrace static int do_monotonic(struct t
> mode = gtod->clock.vclock_mode;
> ts->tv_sec = gtod->monotonic_time_sec;
> ns = gtod->monotonic_time_snsec;
> - ns += vgetsns();
> + ns += vgetsns(&mode);
> ns >>= gtod->clock.shift;
> } while (unlikely(read_seqcount_retry(>od->seq, seq)));
> timespec_add_ns(ts, ns);
> Index: vsyscall/arch/x86/include/asm/vsyscall.h
> ===================================================================
> --- vsyscall.orig/arch/x86/include/asm/vsyscall.h
> +++ vsyscall/arch/x86/include/asm/vsyscall.h
> @@ -33,6 +33,23 @@ extern void map_vsyscall(void);
> */
> extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
>
> +#define VGETCPU_CPU_MASK 0xfff
> +
> +static inline unsigned int __getcpu(void)
> +{
> + unsigned int p;
> +
> + if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
> + /* Load per CPU data from RDTSCP */
> + native_read_tscp(&p);
> + } else {
> + /* Load per CPU data from GDT */
> + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
> + }
> +
> + return p;
> +}
> +
> #endif /* __KERNEL__ */
>
> #endif /* _ASM_X86_VSYSCALL_H */
> Index: vsyscall/arch/x86/vdso/vgetcpu.c
> ===================================================================
> --- vsyscall.orig/arch/x86/vdso/vgetcpu.c
> +++ vsyscall/arch/x86/vdso/vgetcpu.c
> @@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *n
> {
> unsigned int p;
>
> - if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
> - /* Load per CPU data from RDTSCP */
> - native_read_tscp(&p);
> - } else {
> - /* Load per CPU data from GDT */
> - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
> - }
> + p = __getcpu();
> +
> if (cpu)
> - *cpu = p & 0xfff;
> + *cpu = p & VGETCPU_CPU_MASK;
> if (node)
> *node = p >> 12;
> return 0;
>
--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html