Using __iter_div_ulong_rem() is suboptimal on 32 bits. Nanoseconds are only 32 bits, and VDSO data is updated every 10ms so nsec will never overflow 32 bits.
Add an equivalent of __iter_div_u64_rem() but based on unsigned long to better fit with 32 bits arches. Before: gettimeofday: vdso: 1078 nsec/call clock-gettime-monotonic-raw: vdso: 1317 nsec/call clock-gettime-monotonic: vdso: 1255 nsec/call After: gettimeofday: vdso: 1032 nsec/call clock-gettime-monotonic-raw: vdso: 1312 nsec/call clock-gettime-monotonic: vdso: 1243 nsec/call Signed-off-by: Christophe Leroy <christophe.le...@c-s.fr> --- lib/vdso/gettimeofday.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index decd3f2b37af..da15a8842825 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -38,12 +38,32 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) } #endif +static __always_inline u32 +__iter_div_ulong_rem(unsigned long dividend, u32 divisor, unsigned long *remainder) +{ + u32 ret = 0; + + while (dividend >= divisor) { + /* The following asm() prevents the compiler from + optimising this loop into a modulo operation. */ + asm("" : "+rm"(dividend)); + + dividend -= divisor; + ret++; + } + + *remainder = dividend; + + return ret; +} + static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; u64 cycles, last, sec, ns; u32 seq; + unsigned long nsec; do { seq = vdso_read_begin(vd); @@ -54,7 +74,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, return -1; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); - ns >>= vd->shift; + nsec = ns >> vd->shift; sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); @@ -62,8 +82,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, * Do this outside the loop: a race inside the loop could result * in __iter_div_u64_rem() being extremely slow. */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + ts->tv_sec = sec + __iter_div_ulong_rem(nsec, NSEC_PER_SEC, &nsec); + ts->tv_nsec = nsec; return 0; } -- 2.13.3