From: Wenbo Wang <[email protected]>

Signed-off-by: Wenbo Wang <[email protected]>
Reviewed-by: Chong Yuan <[email protected]>

Currently __run_timers() increases jiffie by one in every loop. This is
very slow when the jiffie gap is large.

The following test is done on idle cpu 15 (isolated by isolcpus= kernel
option). There is a clocksource_watchdog timer expires in every 20s.

run_timer_softirq() (invokes __run_timers) takes 120+us to catch up jiffies.

[root@localhost tracing]# cat trace_pipe
450.328663 |    15) ! 124.138 us  |  run_timer_softirq();
470.318675 |    15) ! 128.002 us  |  run_timer_softirq();
490.308875 |    15)               |  run_timer_softirq() {
490.309048 |    15) ! 164.647 us  |  }
510.299059 |    15)               |  run_timer_softirq() {
510.299230 |    15) ! 162.307 us  |  }

After applying this patch, the same softirq takes less than 20us to complete
the same work.

[root@localhost tracing]# cat trace_pipe
430.058585 |    15) + 11.228 us   |  run_timer_softirq();
450.048683 |    15)   7.936 us    |  run_timer_softirq();
470.038760 |    15) + 19.443 us   |  run_timer_softirq();
490.028856 |    15) + 11.361 us   |  run_timer_softirq();
510.018953 |    15) + 10.085 us   |  run_timer_softirq();
530.009038 |    15) + 10.075 us   |  run_timer_softirq();
550.000242 |    15) + 10.958 us   |  run_timer_softirq();
---
 kernel/time/timer.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d3f5c5..15da6a4 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -88,6 +88,7 @@ struct tvec_base {
        struct tvec tv3;
        struct tvec tv4;
        struct tvec tv5;
+       int tv1_cnt;    /* number of newly inserted timers in tv1 */
 } ____cacheline_aligned;
 
 struct tvec_base boot_tvec_bases;
@@ -363,6 +364,7 @@ __internal_add_timer(struct tvec_base *base, struct 
timer_list *timer)
        if (idx < TVR_SIZE) {
                int i = expires & TVR_MASK;
                vec = base->tv1.vec + i;
+               base->tv1_cnt++;
        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
                int i = (expires >> TVR_BITS) & TVN_MASK;
                vec = base->tv2.vec + i;
@@ -378,6 +380,7 @@ __internal_add_timer(struct tvec_base *base, struct 
timer_list *timer)
                 * or you set a timer to go off in the past
                 */
                vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+               base->tv1_cnt++;
        } else {
                int i;
                /* If the timeout is larger than MAX_TVAL (on 64-bit
@@ -1174,6 +1177,7 @@ static inline void __run_timers(struct tvec_base *base)
                spin_unlock_irq(&base->lock);
                return;
        }
+
        while (time_after_eq(jiffies, base->timer_jiffies)) {
                struct list_head work_list;
                struct list_head *head = &work_list;
@@ -1182,11 +1186,29 @@ static inline void __run_timers(struct tvec_base *base)
                /*
                 * Cascade timers:
                 */
-               if (!index &&
-                       (!cascade(base, &base->tv2, INDEX(0))) &&
-                               (!cascade(base, &base->tv3, INDEX(1))) &&
-                                       !cascade(base, &base->tv4, INDEX(2)))
-                       cascade(base, &base->tv5, INDEX(3));
+               if (unlikely(!index)) {
+                       if (!cascade(base, &base->tv2, INDEX(0))) {
+                               if ((!cascade(base, &base->tv3, INDEX(1))) &&
+                                               !cascade(base, &base->tv4, 
INDEX(2)))
+                                       cascade(base, &base->tv5, INDEX(3));
+                       }
+
+                       /*
+                        * We are just crossing the boundary of tv1 (usually 
256 jiffies).
+                        * Since there was no new timers inserted to tv1 and 
all the old
+                        * timers have been processed, it is guaranteed that 
tv1 has no
+                        * pending timers, so jiffie can jump to the next 
boundary at
+                        * most.
+                        */
+                       if (base->tv1_cnt == 0) {
+                               base->timer_jiffies += min_t(unsigned long,
+                                               TVR_SIZE - index, jiffies - 
base->timer_jiffies + 1);
+                                       continue;
+                       }
+
+                       base->tv1_cnt = 0;
+               }
+
                ++base->timer_jiffies;
                list_replace_init(base->tv1.vec + index, head);
                while (!list_empty(head)) {
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to