Setting the realtime clock triggers an IPI to all CPUs to reprogram
the clock event device.

However, only realtime and TAI clocks have their offsets updated
(and therefore potentially require a reprogram).

Instead of sending an IPI unconditionally, check each per CPU hrtimer base
whether it has active timers in the CLOCK_REALTIME and CLOCK_TAI bases. If
that's not the case, update the realtime and TAI base offsets remotely and
skip the IPI. This ensures that any subsequently armed timers on
CLOCK_REALTIME and CLOCK_TAI are evaluated with the correct offsets.

Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com>

---

v6:
  - Do not take softirq_raised into account (Peter Xu).
  - Include BOOTTIME as base that requires IPI (Thomas).
  - Unconditional reprogram on resume path, since there is
    nothing to gain in such path anyway.

v5:
  - Add missing hrtimer_update_base (Peter Xu).

v4:
   - Drop unused code (Thomas).

v3:
   - Nicer changelog  (Thomas).
   - Code style fixes (Thomas).
   - Compilation warning with CONFIG_HIGH_RES_TIMERS=n (Thomas).
   - Shrink preemption disabled section (Thomas).

v2:
   - Only REALTIME and TAI bases are affected by offset-to-monotonic changes 
(Thomas).
   - Don't special case nohz_full CPUs (Thomas).


diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bb5e7b0a4274..14a6e449b221 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -318,7 +318,7 @@ struct clock_event_device;
 
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
-extern void clock_was_set_delayed(void);
+extern void clock_was_set_delayed(bool force_reprogram);
 
 extern unsigned int hrtimer_resolution;
 
@@ -326,7 +326,7 @@ extern unsigned int hrtimer_resolution;
 
 #define hrtimer_resolution     (unsigned int)LOW_RES_NSEC
 
-static inline void clock_was_set_delayed(void) { }
+static inline void clock_was_set_delayed(bool force_reprogram) { }
 
 #endif
 
@@ -351,7 +351,7 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer 
*timer)
                                                    timer->base->get_time());
 }
 
-extern void clock_was_set(void);
+extern void clock_was_set(bool);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
 #else
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5c9d968187ae..2258782fd714 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -758,9 +758,17 @@ static void hrtimer_switch_to_hres(void)
        retrigger_next_event(NULL);
 }
 
+static void clock_was_set_force_reprogram_work(struct work_struct *work)
+{
+       clock_was_set(true);
+}
+
+static DECLARE_WORK(hrtimer_force_reprogram_work, 
clock_was_set_force_reprogram_work);
+
+
 static void clock_was_set_work(struct work_struct *work)
 {
-       clock_was_set();
+       clock_was_set(false);
 }
 
 static DECLARE_WORK(hrtimer_work, clock_was_set_work);
@@ -769,9 +777,12 @@ static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  * Called from timekeeping and resume code to reprogram the hrtimer
  * interrupt device on all cpus.
  */
-void clock_was_set_delayed(void)
+void clock_was_set_delayed(bool force_reprogram)
 {
-       schedule_work(&hrtimer_work);
+       if (force_reprogram)
+               schedule_work(&hrtimer_force_reprogram_work);
+       else
+               schedule_work(&hrtimer_work);
 }
 
 #else
@@ -871,6 +882,18 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool 
reprogram)
        tick_program_event(expires, 1);
 }
 
+#define CLOCK_SET_BASES ((1U << HRTIMER_BASE_REALTIME) |       \
+                        (1U << HRTIMER_BASE_REALTIME_SOFT) |   \
+                        (1U << HRTIMER_BASE_TAI) |             \
+                        (1U << HRTIMER_BASE_TAI_SOFT) |        \
+                        (1U << HRTIMER_BASE_BOOTTIME) |        \
+                        (1U << HRTIMER_BASE_BOOTTIME_SOFT))
+
+static bool need_reprogram_timer(struct hrtimer_cpu_base *cpu_base)
+{
+       return (cpu_base->active_bases & CLOCK_SET_BASES) != 0;
+}
+
 /*
  * Clock realtime was set
  *
@@ -882,11 +905,42 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool 
reprogram)
  * resolution timer interrupts. On UP we just disable interrupts and
  * call the high resolution interrupt code.
  */
-void clock_was_set(void)
+void clock_was_set(bool force_reprogram)
 {
 #ifdef CONFIG_HIGH_RES_TIMERS
-       /* Retrigger the CPU local events everywhere */
-       on_each_cpu(retrigger_next_event, NULL, 1);
+       cpumask_var_t mask;
+       int cpu;
+
+       if (force_reprogram == true) {
+               on_each_cpu(retrigger_next_event, NULL, 1);
+               goto set_timerfd;
+       }
+
+       if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+               on_each_cpu(retrigger_next_event, NULL, 1);
+               goto set_timerfd;
+       }
+
+       /* Avoid interrupting CPUs if possible */
+       cpus_read_lock();
+       for_each_online_cpu(cpu) {
+               unsigned long flags;
+               struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, 
cpu);
+
+               raw_spin_lock_irqsave(&cpu_base->lock, flags);
+               if (need_reprogram_timer(cpu_base))
+                       cpumask_set_cpu(cpu, mask);
+               else
+                       hrtimer_update_base(cpu_base);
+               raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+       }
+
+       preempt_disable();
+       smp_call_function_many(mask, retrigger_next_event, NULL, 1);
+       preempt_enable();
+       cpus_read_unlock();
+       free_cpumask_var(mask);
+set_timerfd:
 #endif
        timerfd_clock_was_set();
 }
@@ -903,7 +957,7 @@ void hrtimers_resume(void)
        /* Retrigger on the local CPU */
        retrigger_next_event(NULL);
        /* And schedule a retrigger for all others */
-       clock_was_set_delayed();
+       clock_was_set_delayed(true);
 }
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6aee5768c86f..3fef237267bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1323,7 +1323,7 @@ int do_settimeofday64(const struct timespec64 *ts)
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        /* signal hrtimers about time change */
-       clock_was_set();
+       clock_was_set(false);
 
        if (!ret)
                audit_tk_injoffset(ts_delta);
@@ -1371,7 +1371,7 @@ static int timekeeping_inject_offset(const struct 
timespec64 *ts)
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        /* signal hrtimers about time change */
-       clock_was_set();
+       clock_was_set(false);
 
        return ret;
 }
@@ -1736,7 +1736,7 @@ void timekeeping_inject_sleeptime64(const struct 
timespec64 *delta)
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        /* signal hrtimers about time change */
-       clock_was_set();
+       clock_was_set(true);
 }
 #endif
 
@@ -2187,7 +2187,7 @@ static void timekeeping_advance(enum timekeeping_adv_mode 
mode)
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        if (clock_set)
                /* Have to call _delayed version, since in irq context*/
-               clock_was_set_delayed();
+               clock_was_set_delayed(false);
 }
 
 /**
@@ -2425,7 +2425,7 @@ int do_adjtimex(struct __kernel_timex *txc)
                timekeeping_advance(TK_ADV_FREQ);
 
        if (tai != orig_tai)
-               clock_was_set();
+               clock_was_set(false);
 
        ntp_notify_cmos_timer();
 

Reply via email to