The hrtimer_interrupt hang logic adjusts min_delta_ns based on the
execution time of the hrtimer callbacks.

This is error-prone for virtual machines, where a guest vcpu can be
scheduled out during the execution of the callbacks (and the callbacks
themselves can do operations that translate to blocking operations in
the hypervisor), which in can lead to large min_delta_ns rendering the
system unusable.

Change the logic to simply schedule the next interrupt using the 1/4 
ratio, while keeping min_delta_ns intact.

Reported-by: Michael Tokarev <m...@tls.msk.ru>
Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com>

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index ff037f0..88a8ca5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -245,6 +245,8 @@ static inline ktime_t hrtimer_expires_remaining(const 
struct hrtimer *timer)
 #ifdef CONFIG_HIGH_RES_TIMERS
 struct clock_event_device;
 
+extern int hrtimer_interrupt_hang;
+
 extern void clock_was_set(void);
 extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6d70204..6b81888 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1219,29 +1219,17 @@ static void __run_hrtimer(struct hrtimer *timer, 
ktime_t *now)
 
 #ifdef CONFIG_HIGH_RES_TIMERS
 
-static int force_clock_reprogram;
-
 /*
  * After 5 iteration's attempts, we consider that hrtimer_interrupt()
  * is hanging, which could happen with something that slows the interrupt
- * such as the tracing. Then we force the clock reprogramming for each future
- * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
- * threshold that we will overwrite.
+ * such as the tracing. 
  * The next tick event will be scheduled to 3 times we currently spend on
  * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
  * 1/4 of their time to process the hrtimer interrupts. This is enough to
  * let it running without serious starvation.
  */
+int __read_mostly hrtimer_interrupt_hang;
 
-static inline void
-hrtimer_interrupt_hanging(struct clock_event_device *dev,
-                       ktime_t try_time)
-{
-       force_clock_reprogram = 1;
-       dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
-       printk(KERN_WARNING "hrtimer: interrupt too slow, "
-               "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
-}
 /*
  * High resolution timer interrupt
  * Called with interrupts disabled
@@ -1260,8 +1248,27 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
  retry:
        /* 5 retries is enough to notice a hang */
-       if (!(++nr_retries % 5))
-               hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
+       if (!(++nr_retries % 5)) {
+               int tries = 0;
+               ktime_t try_time = ktime_sub(ktime_get(), now);
+
+               if (ktime_to_ns(try_time) < dev->min_delta_ns)
+                       try_time = ns_to_ktime(dev->min_delta_ns);
+
+               do {
+                       tries++;
+                       for (i = 0; i < 3*tries; i++)
+                               expires_next = ktime_add(expires_next,try_time);
+
+                       printk_once(KERN_WARNING "hrtimer: interrupt too slow, "
+                                       "scheduling tick %lld ns ahead\n",
+                                       ktime_to_ns(ktime_sub(expires_next,
+                                                   ktime_get())));
+               } while (tick_program_event(expires_next, 1));
+
+               hrtimer_interrupt_hang++;
+               return;
+       }
 
        now = ktime_get();
 
@@ -1327,7 +1334,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
        /* Reprogramming necessary ? */
        if (expires_next.tv64 != KTIME_MAX) {
-               if (tick_program_event(expires_next, force_clock_reprogram))
+               if (tick_program_event(expires_next, 0))
                        goto retry;
        }
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d949c5..7223853 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1032,6 +1032,17 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_HIGH_RES_TIMERS
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "hrtimer_interrupt_hang",
+               .data           = &hrtimer_interrupt_hang,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+       
+       },
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to