From: Stefani Seibold <[email protected]>

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary.

The vsyscall_gtod_data struture must be a little bit rearranged, to
serve 32- and 64-bit code access:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- The structure is now packed, so it can accessed from 32- und 64- bit
  code at the same time.
- The inner struct clock was removed, to make packing of the while
  struct easier.

The "unsigned seq" would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold <[email protected]>
---
 arch/x86/include/asm/vgtod.h          |  20 +++---
 arch/x86/kernel/vsyscall_gtod.c       |  26 +++++--
 arch/x86/vdso/vclock_gettime.c        | 129 ++++++++++++++++++++++++----------
 arch/x86/vdso/vdso32/vclock_gettime.c |  11 +++
 include/uapi/linux/time.h             |   2 +-
 5 files changed, 132 insertions(+), 56 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..2567b02 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -4,16 +4,18 @@
 #include <asm/vsyscall.h>
 #include <linux/clocksource.h>
 
-struct vsyscall_gtod_data {
-       seqcount_t      seq;
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so the structure must be packed
+ */
+struct __attribute__((packed)) vsyscall_gtod_data {
+       unsigned seq;
 
-       struct { /* extract of a clocksource struct */
-               int vclock_mode;
-               cycle_t cycle_last;
-               cycle_t mask;
-               u32     mult;
-               u32     shift;
-       } clock;
+       int vclock_mode;
+       cycle_t cycle_last;
+       cycle_t mask;
+       u32     mult;
+       u32     shift;
 
        /* open coded 'struct timespec' */
        time_t          wall_time_sec;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index 91862a4..ca48248 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -16,6 +16,18 @@
 
 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
 
+static inline void gtod_write_begin(unsigned *s)
+{
+       ++*s;
+       smp_wmb();
+}
+
+static inline void gtod_write_end(unsigned *s)
+{
+       smp_wmb();
+       ++*s;
+}
+
 void update_vsyscall_tz(void)
 {
        vsyscall_gtod_data.sys_tz = sys_tz;
@@ -25,14 +37,14 @@ void update_vsyscall(struct timekeeper *tk)
 {
        struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
-       write_seqcount_begin(&vdata->seq);
+       gtod_write_begin(&vdata->seq);
 
        /* copy vsyscall data */
-       vdata->clock.vclock_mode        = tk->clock->archdata.vclock_mode;
-       vdata->clock.cycle_last         = tk->clock->cycle_last;
-       vdata->clock.mask               = tk->clock->mask;
-       vdata->clock.mult               = tk->mult;
-       vdata->clock.shift              = tk->shift;
+       vdata->vclock_mode      = tk->clock->archdata.vclock_mode;
+       vdata->cycle_last       = tk->clock->cycle_last;
+       vdata->mask             = tk->clock->mask;
+       vdata->mult             = tk->mult;
+       vdata->shift            = tk->shift;
 
        vdata->wall_time_sec            = tk->xtime_sec;
        vdata->wall_time_snsec          = tk->xtime_nsec;
@@ -55,6 +67,6 @@ void update_vsyscall(struct timekeeper *tk)
        vdata->monotonic_time_coarse    = timespec_add(vdata->wall_time_coarse,
                                                        tk->wall_to_monotonic);
 
-       write_seqcount_end(&vdata->seq);
+       gtod_write_end(&vdata->seq);
 }
 
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 469e57b..b045aaa 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -31,12 +31,24 @@
 
 #ifndef BUILD_VDSO32
 
+struct api_timeval {
+       long    tv_sec;         /* seconds */
+       long    tv_usec;        /* microseconds */
+};
+
+struct api_timespec {
+       long    tv_sec;         /* seconds */
+       long    tv_nsec;        /* nanoseconds */
+};
+
+typedef long api_time_t;
+
 static notrace cycle_t vread_hpet(void)
 {
        return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
 }
 
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct api_timespec *ts)
 {
        long ret;
        asm("syscall" : "=a" (ret) :
@@ -44,7 +56,8 @@ notrace static long vdso_fallback_gettime(long clock, struct 
timespec *ts)
        return ret;
 }
 
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
+notrace static long vdso_fallback_gtod(struct api_timeval *tv,
+               struct timezone *tz)
 {
        long ret;
 
@@ -57,6 +70,18 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, 
struct timezone *tz)
 u8 hpet_page
        __attribute__((visibility("hidden")));
 
+struct api_timeval {
+       s32     tv_sec;         /* seconds */
+       s32     tv_usec;        /* microseconds */
+};
+
+struct api_timespec {
+       s32     tv_sec;         /* seconds */
+       s32     tv_nsec;        /* microseconds */
+};
+
+typedef s32 api_time_t;
+
 #ifdef CONFIG_HPET_TIMER
 static notrace cycle_t vread_hpet(void)
 {
@@ -64,7 +89,7 @@ static notrace cycle_t vread_hpet(void)
 }
 #endif
 
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct api_timespec *ts)
 {
        long ret;
 
@@ -74,12 +99,12 @@ notrace static long vdso_fallback_gettime(long clock, 
struct timespec *ts)
                "call VDSO32_vsyscall \n"
                "pop %%ebx \n"
                : "=a" (ret)
-               : "0" (__NR_clock_gettime), "d" (clock), "c" (ts)
+               : "0" (__NR_ia32_clock_gettime), "d" (clock), "c" (ts)
                : "memory");
        return ret;
 }
 
-notrace static long vdso_fallback_gtod(struct timeval *tv,
+notrace static long vdso_fallback_gtod(struct api_timeval *tv,
                struct timezone *tz)
 {
        long ret;
@@ -90,7 +115,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv,
                "call VDSO32_vsyscall \n"
                "pop %%ebx \n"
                : "=a" (ret)
-               : "0" (__NR_gettimeofday), "d" (tv), "c" (tz)
+               : "0" (__NR_ia32_gettimeofday), "d" (tv), "c" (tz)
                : "memory");
        return ret;
 }
@@ -157,7 +182,7 @@ static notrace cycle_t vread_pvclock(int *mode)
                *mode = VCLOCK_NONE;
 
        /* refer to tsc.c read_tsc() comment for rationale */
-       last = gtod->clock.cycle_last;
+       last = gtod->cycle_last;
 
        if (likely(ret >= last))
                return ret;
@@ -181,7 +206,7 @@ notrace static cycle_t vread_tsc(void)
        rdtsc_barrier();
        ret = (cycle_t)vget_cycles();
 
-       last = gtod->clock.cycle_last;
+       last = gtod->cycle_last;
 
        if (likely(ret >= last))
                return ret;
@@ -202,20 +227,40 @@ notrace static inline u64 vgetsns(int *mode)
 {
        u64 v;
        cycles_t cycles;
-       if (gtod->clock.vclock_mode == VCLOCK_TSC)
+       if (gtod->vclock_mode == VCLOCK_TSC)
                cycles = vread_tsc();
 #ifdef CONFIG_HPET_TIMER
-       else if (gtod->clock.vclock_mode == VCLOCK_HPET)
+       else if (gtod->vclock_mode == VCLOCK_HPET)
                cycles = vread_hpet();
 #endif
 #ifdef CONFIG_PARAVIRT_CLOCK
-       else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
+       else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
                cycles = vread_pvclock(mode);
 #endif
        else
                return 0;
-       v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
-       return v * gtod->clock.mult;
+       v = (cycles - gtod->cycle_last) & gtod->mask;
+       return v * gtod->mult;
+}
+
+notrace static unsigned gtod_read_begin(const unsigned *s)
+{
+       unsigned ret;
+
+repeat:
+       ret = ACCESS_ONCE(*s);
+       if (unlikely(ret & 1)) {
+               cpu_relax();
+               goto repeat;
+       }
+       smp_rmb();
+       return ret;
+}
+
+notrace static int gtod_read_retry(const unsigned *s, unsigned start)
+{
+       smp_rmb();
+       return unlikely(*s != start);
 }
 
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
@@ -227,13 +272,13 @@ notrace static int __always_inline do_realtime(struct 
timespec *ts)
 
        ts->tv_nsec = 0;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
-               mode = gtod->clock.vclock_mode;
+               seq = gtod_read_begin(&gtod->seq);
+               mode = gtod->vclock_mode;
                ts->tv_sec = gtod->wall_time_sec;
                ns = gtod->wall_time_snsec;
                ns += vgetsns(&mode);
-               ns >>= gtod->clock.shift;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+               ns >>= gtod->shift;
+       } while (unlikely(gtod_read_retry(&gtod->seq, seq)));
 
        timespec_add_ns(ts, ns);
        return mode;
@@ -247,13 +292,13 @@ notrace static int do_monotonic(struct timespec *ts)
 
        ts->tv_nsec = 0;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
-               mode = gtod->clock.vclock_mode;
+               seq = gtod_read_begin(&gtod->seq);
+               mode = gtod->vclock_mode;
                ts->tv_sec = gtod->monotonic_time_sec;
                ns = gtod->monotonic_time_snsec;
                ns += vgetsns(&mode);
-               ns >>= gtod->clock.shift;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+               ns >>= gtod->shift;
+       } while (unlikely(gtod_read_retry(&gtod->seq, seq)));
        timespec_add_ns(ts, ns);
 
        return mode;
@@ -263,58 +308,64 @@ notrace static void do_realtime_coarse(struct timespec 
*ts)
 {
        unsigned long seq;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
+               seq = gtod_read_begin(&gtod->seq);
                ts->tv_sec = gtod->wall_time_coarse.tv_sec;
                ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+       } while (unlikely(gtod_read_retry(&gtod->seq, seq)));
 }
 
 notrace static void do_monotonic_coarse(struct timespec *ts)
 {
        unsigned long seq;
        do {
-               seq = raw_read_seqcount_begin(&gtod->seq);
+               seq = gtod_read_begin(&gtod->seq);
                ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
                ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
-       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+       } while (unlikely(gtod_read_retry(&gtod->seq, seq)));
 }
 
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clock, struct api_timespec *ts)
 {
+       struct timespec tmp;
+
        switch (clock) {
        case CLOCK_REALTIME:
-               if (do_realtime(ts) == VCLOCK_NONE)
+               if (do_realtime(&tmp) == VCLOCK_NONE)
                        goto fallback;
                break;
        case CLOCK_MONOTONIC:
-               if (do_monotonic(ts) == VCLOCK_NONE)
+               if (do_monotonic(&tmp) == VCLOCK_NONE)
                        goto fallback;
                break;
        case CLOCK_REALTIME_COARSE:
-               do_realtime_coarse(ts);
+               do_realtime_coarse(&tmp);
                break;
        case CLOCK_MONOTONIC_COARSE:
-               do_monotonic_coarse(ts);
+               do_monotonic_coarse(&tmp);
                break;
        default:
                goto fallback;
        }
 
+       ts->tv_sec = tmp.tv_sec;
+       ts->tv_nsec = tmp.tv_nsec;
+
        return 0;
 fallback:
        return vdso_fallback_gettime(clock, ts);
 }
-int clock_gettime(clockid_t, struct timespec *)
+int clock_gettime(clockid_t, struct api_timespec *)
        __attribute__((weak, alias("__vdso_clock_gettime")));
 
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace int __vdso_gettimeofday(struct api_timeval *tv, struct timezone *tz)
 {
+       struct timespec tmp;
+
        if (likely(tv != NULL)) {
-               BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
-                            offsetof(struct timespec, tv_nsec) ||
-                            sizeof(*tv) != sizeof(struct timespec));
                if (unlikely(do_realtime(&tmp) == VCLOCK_NONE))
                        return vdso_fallback_gtod(tv, tz);
+               tv->tv_sec = tmp.tv_sec;
+               tv->tv_usec = tmp.tv_nsec;
                tv->tv_usec /= 1000;
        }
        if (unlikely(tz != NULL)) {
@@ -325,21 +376,21 @@ notrace int __vdso_gettimeofday(struct timeval *tv, 
struct timezone *tz)
 
        return 0;
 }
-int gettimeofday(struct timeval *, struct timezone *)
+int gettimeofday(struct api_timeval *, struct timezone *)
        __attribute__((weak, alias("__vdso_gettimeofday")));
 
 /*
  * This will break when the xtime seconds get inaccurate, but that is
  * unlikely
  */
-notrace time_t __vdso_time(time_t *t)
+notrace api_time_t __vdso_time(api_time_t *t)
 {
        /* This is atomic on x86 so we don't need any locks. */
-       time_t result = ACCESS_ONCE(gtod->wall_time_sec);
+       api_time_t result = ACCESS_ONCE(gtod->wall_time_sec);
 
        if (t)
                *t = result;
        return result;
 }
-int time(time_t *t)
+int time(api_time_t *t)
        __attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c 
b/arch/x86/vdso/vdso32/vclock_gettime.c
index fab4ec6..b8a3b22 100644
--- a/arch/x86/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/vdso/vdso32/vclock_gettime.c
@@ -2,6 +2,12 @@
 
 #ifdef CONFIG_X86_64
 
+typedef signed long long       __kernel_long_t;
+typedef unsigned long long     __kernel_ulong_t;
+#define __kernel_long_t __kernel_long_t
+
+#include <generated/asm/unistd_32_ia32.h>
+
 #define _ASM_X86_PAGE_H
 
 #define __pa(x)                0
@@ -10,6 +16,11 @@
 #undef CONFIG_ILLEGAL_POINTER_VALUE
 #define CONFIG_ILLEGAL_POINTER_VALUE   0
 
+#else
+
+#define __NR_ia32_clock_gettime        __NR_clock_gettime
+#define __NR_ia32_gettimeofday __NR_gettimeofday
+
 #endif
 
 #include "../vclock_gettime.c"
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index e75e1b6..ebf3734 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -8,7 +8,7 @@
 #define _STRUCT_TIMESPEC
 struct timespec {
        __kernel_time_t tv_sec;                 /* seconds */
-       long            tv_nsec;                /* nanoseconds */
+       __kernel_long_t tv_nsec;                /* nanoseconds */
 };
 #endif
 
-- 
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to