From: Peter Zijlstra <pet...@infradead.org>

As reported by Leo; the existing implementation is broken when the
clock and counter don't intersect at 0.

Use the sched_clock's struct clock_read_data information to correctly
implement cap_user_time and cap_user_time_zero.

Note that the ARM64 counter is architecturally only guaranteed to be
56bit wide (implementations are allowed to be wider) and the existing
perf ABI cannot deal with wrap-around.

This implementation should also be faster than the old; seeing how we
don't need to recompute mult and shift all the time.

[leoyan: Use mul_u64_u32_shr() to convert cyc to ns to avoid overflow]

Reported-by: Leo Yan <leo....@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Signed-off-by: Leo Yan <leo....@linaro.org>
---
 arch/arm64/kernel/perf_event.c | 38 ++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4d7879484cec..47db6c7cae6a 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -19,6 +19,7 @@
 #include <linux/of.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
+#include <linux/sched_clock.h>
 #include <linux/smp.h>
 
 /* ARMv8 Cortex-A53 specific event types. */
@@ -1165,28 +1166,47 @@ device_initcall(armv8_pmu_driver_init)
 void arch_perf_update_userpage(struct perf_event *event,
                               struct perf_event_mmap_page *userpg, u64 now)
 {
-       u32 freq;
-       u32 shift;
+       struct clock_read_data *rd;
+       unsigned int seq;
+       u64 ns;
 
        /*
         * Internal timekeeping for enabled/running/stopped times
         * is always computed with the sched_clock.
         */
-       freq = arch_timer_get_rate();
        userpg->cap_user_time = 1;
+       userpg->cap_user_time_zero = 1;
+
+       do {
+               rd = sched_clock_read_begin(&seq);
+
+               userpg->time_mult = rd->mult;
+               userpg->time_shift = rd->shift;
+               userpg->time_zero = rd->epoch_ns;
+
+               /*
+                * This isn't strictly correct, the ARM64 counter can be
+                * 'short' and then we get funnies when it wraps. The correct
+                * thing would be to extend the perf ABI with a cycle and mask
+                * value, but because wrapping on ARM64 is very rare in
+                * practise this 'works'.
+                */
+               ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+               userpg->time_zero -= ns;
+
+       } while (sched_clock_read_retry(seq));
+
+       userpg->time_offset = userpg->time_zero - now;
 
-       clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
-                       NSEC_PER_SEC, 0);
        /*
         * time_shift is not expected to be greater than 31 due to
         * the original published conversion algorithm shifting a
         * 32-bit value (now specifies a 64-bit value) - refer
         * perf_event_mmap_page documentation in perf_event.h.
         */
-       if (shift == 32) {
-               shift = 31;
+       if (userpg->time_shift == 32) {
+               userpg->time_shift = 31;
                userpg->time_mult >>= 1;
        }
-       userpg->time_shift = (u16)shift;
-       userpg->time_offset = -now;
+
 }
-- 
2.17.1

Reply via email to