Before converting the given delta from ns to cycles by means of the
mult/shift pair, clockevents_program_event() enforces it to be less or
equal than ->max_delta_ns. Simplified, this reads as

  delta = min(delta, dev->max_delta_ns);
  clc = (delta * dev->mult) >> dev->shift;

A device's ->max_delta_ns is chosen such that
1.) The multiplication does not overflow 64 bits.
2.) clc fits into an unsigned long.
3.) clc <= the ->max_delta_ticks allowed by hardware.

Item 3.) is responsible for making ->max_delta_ns depend tightly on
->max_delta_ticks and the device's frequency, i.e. its mult/shift pair.
As it stands, any update to ->mult would require a corresponding change of
->max_delta_ns as well and these two had to get consumed in the clockevent
programming path atomically. This would have a negative performance impact
as soon as we start adjusting ->mult from a different CPU with upcoming
changes making the clockevents core NTP correction aware: some kind of
locking would have been needed in the event programming path.

In order to circumvent this necessity, ->max_delta_ns could be made small
enough to cover the whole range of possible adjustments to ->mult,
eliminating the need to update it at all.

The timekeeping core never adjusts its mono clock's inverse frequency by
more than 11% (c.f. timekeeping_adjust()). This means that a clockevent
device's adjusted mult will never grow beyond 1 / (1-11%) = 112.4% of its
initial value. Thus, reducing ->max_delta_ns unconditionally to
1/112.4% = 89% would do the trick. (Actually, setting it to 8/9 = 88.89%
thereof allows for mult increases of up to 12.5% = 1/8 which is good for
binary arithmetic.)

However, such a decrease of ->max_delta_ns could become problematic for
devices whose ->max_delta_ticks is small, i.e. those for whom item 3.)
prevails.

In order to work around this, I chose to make ->max_delta_ns completely
independent of ->max_delta_ticks. It is set to 8/9 of the maximum value
satisfying conditions 1.) and 2.) for the given ->mult. Item 3.) is made
explicit by introducing an extra min(clc, ->max_delta_ticks) after the ns
to cycles conversion in clockevents_program_event(). This way, the maximum
programmable delta is reduced only for those devices which have got a large
->max_delta_ticks anyway. This comes at the cost of an extra min() in the
event programming path though.

So,
- In the case of CLOCK_EVT_FEAT_NO_ADJUST not being enabled, set
  ->max_delta_ns to 8/9 of the maximum possible value such that items 1.)
  and 2.) hold for the given ->mult. OTOH, if CLOCK_EVT_FEAT_NO_ADJUST
  is not set, set ->max_delta_ns to the full such value.

- Add a

    clc = min(clc, dev->max_delta_ticks)

  after the ns to cycle conversion in clockevents_program_event().

- Move the ->max_delta_ticks member into struct clock_event_device's
  first 64 bytes in order to optimize for cacheline usage.

- Finally, preserve the semantics of /proc/timer_list by letting it display
  the minimum of ->max_delta_ns and ->max_delta_ticks converted to ns at
  the 'max_delta_ns' field.

Signed-off-by: Nicolai Stange <nicsta...@gmail.com>
---
 include/linux/clockchips.h |  4 ++--
 kernel/time/clockevents.c  | 50 +++++++++++++++++++++++++++++++++++++++++++++-
 kernel/time/timer_list.c   |  6 +++++-
 3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 9a25185..be5f222 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -81,6 +81,7 @@ enum clock_event_state {
  * @next_event:                local storage for the next event in oneshot mode
  * @max_delta_ns:      maximum delta value in ns
  * @min_delta_ns:      minimum delta value in ns
+ * @max_delta_ticks:   maximum delta value in ticks
  * @mult:              nanosecond to cycles multiplier
  * @shift:             nanoseconds to cycles divisor (power of two)
  * @state_use_accessors:current state of the device, assigned by the core code
@@ -93,7 +94,6 @@ enum clock_event_state {
  * @tick_resume:       resume clkevt device
  * @broadcast:         function to broadcast events
  * @min_delta_ticks:   minimum delta value in ticks stored for reconfiguration
- * @max_delta_ticks:   maximum delta value in ticks stored for reconfiguration
  * @name:              ptr to clock event name
  * @rating:            variable to rate clock event devices
  * @irq:               IRQ number (only for non CPU local devices)
@@ -109,6 +109,7 @@ struct clock_event_device {
        ktime_t                 next_event;
        u64                     max_delta_ns;
        u64                     min_delta_ns;
+       unsigned long           max_delta_ticks;
        u32                     mult;
        u32                     shift;
        enum clock_event_state  state_use_accessors;
@@ -125,7 +126,6 @@ struct clock_event_device {
        void                    (*suspend)(struct clock_event_device *);
        void                    (*resume)(struct clock_event_device *);
        unsigned long           min_delta_ticks;
-       unsigned long           max_delta_ticks;
 
        const char              *name;
        int                     rating;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f352f54..472fcc2 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/smp.h>
 #include <linux/device.h>
+#include <asm/bitsperlong.h>
 
 #include "tick-internal.h"
 
@@ -336,6 +337,9 @@ int clockevents_program_event(struct clock_event_device 
*dev, ktime_t expires,
        delta = max(delta, (int64_t) dev->min_delta_ns);
 
        clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+
+       clc = min_t(unsigned long, clc, dev->max_delta_ticks);
+
        rc = dev->set_next_event((unsigned long) clc, dev);
 
        return (rc && force) ? clockevents_program_min_delta(dev) : rc;
@@ -444,15 +448,59 @@ EXPORT_SYMBOL_GPL(clockevents_unbind_device);
 
 static void __clockevents_update_bounds(struct clock_event_device *dev)
 {
+       u64 max_delta_ns;
+
        if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
                return;
 
        /*
+        * max_delta_ns is <= the maximum value such that the ns to
+        * cycles conversion in clockevents_program_event() doesn't
+        * overflow. It follows that is has to fulfill the following
+        * constraints:
+        * 1.) mult * max_delta_ns <= ULLONG_MAX
+        * 2.) (mult * max_delta_ns) >> shift <= ULONG_MAX
+        * On 32 bit archs, 2.) is stricter because shift <= 32 always
+        * holds, otherwise 1.) is.
+        *
+        * If CLOCK_EVT_FEAT_NO_ADJUST is not set, the actual
+        * ->max_delta_ns is set to a value equal to 8/9 of the
+        * maximum one possible. This gives some room for increasing
+        * mult by up to 12.5% which is just enough to compensate for
+        * the up to 11% mono clock frequency adjustments made by the
+        * timekeeping core, c.f. timekeeping_adjust().
+        *
+        */
+#if BITS_PER_LONG == 32
+       /*
+        * Set the lower BITS_PER_LONG + dev->shift bits.
+        * Note: dev->shift can be 32, so avoid undefined behaviour.
+        * The ^= below is really a |= here. However the former is the
+        * common idiom and recognizable by gcc.
+        */
+       max_delta_ns = (u64)1 << (BITS_PER_LONG + dev->shift - 1);
+       max_delta_ns ^= (dev->max_delta_ns - 1);
+#else
+       max_delta_ns = U64_MAX;
+#endif
+       if (unlikely(!dev->mult)) {
+               dev->mult = 1;
+               WARN_ON(1);
+       }
+       do_div(max_delta_ns, dev->mult);
+       dev->max_delta_ns = max_delta_ns;
+       if (!(dev->features & CLOCK_EVT_FEAT_NO_ADJUST)) {
+               /* reduce by 1/9 */
+               do_div(max_delta_ns, 9);
+               ++max_delta_ns; /* round up */
+               dev->max_delta_ns -= max_delta_ns;
+       }
+
+       /*
         * cev_delta2ns() never returns values less than 1us and thus,
         * we'll never program any ced with anything less.
         */
        dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
-       dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
 }
 
 /**
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ba7d8b2..ac20d4c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -206,6 +206,10 @@ static void
 print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 {
        struct clock_event_device *dev = td->evtdev;
+       u64 max_delta_ns;
+
+       max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+       max_delta_ns = min(max_delta_ns, dev->max_delta_ns);
 
        SEQ_printf(m, "Tick Device: mode:     %d\n", td->mode);
        if (cpu < 0)
@@ -220,7 +224,7 @@ print_tickdevice(struct seq_file *m, struct tick_device 
*td, int cpu)
        }
        SEQ_printf(m, "%s\n", dev->name);
        SEQ_printf(m, " max_delta_ns:   %llu\n",
-                  (unsigned long long) dev->max_delta_ns);
+                  (unsigned long long) max_delta_ns);
        SEQ_printf(m, " min_delta_ns:   %llu\n",
                   (unsigned long long) dev->min_delta_ns);
        SEQ_printf(m, " mult:           %u\n", dev->mult);
-- 
2.10.0

Reply via email to