irq: add irq utilization tracking

Vincent Guittot Wed, 06 Jun 2018 09:06:59 -0700

Hi Dietmar,

Sorry for the late answer


On 31 May 2018 at 18:54, Dietmar Eggemann <[email protected]> wrote:
> On 05/30/2018 08:45 PM, Vincent Guittot wrote:
>> Hi Dietmar,
>>
>> On 30 May 2018 at 17:55, Dietmar Eggemann <[email protected]> wrote:
>>> On 05/25/2018 03:12 PM, Vincent Guittot wrote:
>
> [...]
>
>>>> +        */
>>>> +       ret = ___update_load_sum(rq->clock - running, rq->cpu,
>>>> &rq->avg_irq,
>>>> +                               0,
>>>> +                               0,
>>>> +                               0);
>>>> +       ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
>>>> +                               1,
>>>> +                               1,
>>>> +                               1);
>
> Can you not change the function parameter list to the usual
> (u64 now, struct rq *rq, int running)?
>
> Something like this (only compile and boot tested):

To be honest, I prefer to keep the specific sequence above in a
dedicated function instead of adding it in core code.
Furthermore,  we end up calling call twice ___update_load_avg instead
of only once. This will set an intermediate and incorrect value in
util_avg and this value can be read in the meantime

Vincent

>
> -- >8 --
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 9894bc7af37e..26ffd585cab8 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -177,8 +177,22 @@ static void update_rq_clock_task(struct rq *rq, s64 
> delta)
>         rq->clock_task += delta;
>
>  #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || 
> defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
> -       if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
> -               update_irq_load_avg(rq, irq_delta + steal);
> +       if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) {
> +               /*
> +                * We know the time that has been used by interrupt since last
> +                * update but we don't when. Let be pessimistic and assume 
> that
> +                * interrupt has happened just before the update. This is not
> +                * so far from reality because interrupt will most probably
> +                * wake up task and trig an update of rq clock during which 
> the
> +                * metric si updated.
> +                * We start to decay with normal context time and then we add
> +                * the interrupt context time.
> +                * We can safely remove running from rq->clock because
> +                * rq->clock += delta with delta >= running
> +                */
> +               update_irq_load_avg(rq_clock(rq) - (irq_delta + steal), rq, 
> 0);
> +               update_irq_load_avg(rq_clock(rq), rq, 1);
> +       }
>  #endif
>  }
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 1bb3379c4b71..a245f853c271 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7363,7 +7363,7 @@ static void update_blocked_averages(int cpu)
>         }
>         update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
>         update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
> -       update_irq_load_avg(rq, 0);
> +       update_irq_load_avg(rq_clock(rq), rq, 0);
>         /* Don't need periodic decay once load/util_avg are null */
>         if (others_rqs_have_blocked(rq))
>                 done = false;
> @@ -7434,7 +7434,7 @@ static inline void update_blocked_averages(int cpu)
>         update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
>         update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
>         update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
> -       update_irq_load_avg(rq, 0);
> +       update_irq_load_avg(rq_clock(rq), rq, 0);
>  #ifdef CONFIG_NO_HZ_COMMON
>         rq->last_blocked_load_update_tick = jiffies;
>         if (!cfs_rq_has_blocked(cfs_rq) && !others_rqs_have_blocked(rq))
> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
> index d2e4f2186b13..ae01bb18e28c 100644
> --- a/kernel/sched/pelt.c
> +++ b/kernel/sched/pelt.c
> @@ -365,31 +365,15 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int 
> running)
>   *
>   */
>
> -int update_irq_load_avg(struct rq *rq, u64 running)
> +int update_irq_load_avg(u64 now, struct rq *rq, int running)
>  {
> -       int ret = 0;
> -       /*
> -        * We know the time that has been used by interrupt since last update
> -        * but we don't when. Let be pessimistic and assume that interrupt has
> -        * happened just before the update. This is not so far from reality
> -        * because interrupt will most probably wake up task and trig an 
> update
> -        * of rq clock during which the metric si updated.
> -        * We start to decay with normal context time and then we add the
> -        * interrupt context time.
> -        * We can safely remove running from rq->clock because
> -        * rq->clock += delta with delta >= running
> -        */
> -       ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
> -                               0,
> -                               0,
> -                               0);
> -       ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
> -                               1,
> -                               1,
> -                               1);
> -
> -       if (ret)
> +       if (___update_load_sum(now, rq->cpu, &rq->avg_irq,
> +                               running,
> +                               running,
> +                               running)) {
>                 ___update_load_avg(&rq->avg_irq, 1, 1);
> +               return 1;
> +       }
>
> -       return ret;
> +       return 0;
>  }
> diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
> index 0ce9a5a5877a..ebc57301a9a8 100644
> --- a/kernel/sched/pelt.h
> +++ b/kernel/sched/pelt.h
> @@ -5,7 +5,7 @@ int __update_load_avg_se(u64 now, int cpu, struct cfs_rq 
> *cfs_rq, struct sched_e
>  int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq);
>  int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
>  int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
> -int update_irq_load_avg(struct rq *rq, u64 running);
> +int update_irq_load_avg(u64 now, struct rq *rq, int running);
>
>  /*
>   * When a task is dequeued, its estimated utilization should not be update if
>
>

Re: [PATCH v5 07/10] sched/irq: add irq utilization tracking

Reply via email to