On 09/12/2020 11:44, Xuewen Yan wrote:
> when a task dequeued, it will update it's util, and cfs_rq_util_change
> would check rq's util, if the cfs_rq->avg.util_est.enqueued is bigger
> than  cfs_rq->avg.util_avg, but because the cfs_rq->avg.util_est.enqueued
> didn't be decreased, this would cause bigger cfs_rq_util by mistake,
> as a result, cfs_rq_util_change may change freq unreasonablely.
> 
> separate the util_est_dequeue() into util_est_dequeue() and
> util_est_update(), and dequeue the _task_util_est(p) before update util.

I assume this patch header needs a little more substance so that less
involved folks understand the issue as well. Describing the testcase
which reveals the problem would help here too. 

> Signed-off-by: Xuewen Yan <[email protected]>
> ---
>  kernel/sched/fair.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index ae7ceba..20ecfd5 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3946,11 +3946,9 @@ static inline bool within_margin(int value, int margin)
>  }
>  
>  static void
> -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool 
> task_sleep)
> +util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p)

Not sure why util_est_enqueue is inline and util_est_dequeue() and
util_est_update() aren't?

>  {
> -     long last_ewma_diff;
>       struct util_est ue;

You would just need a 'unsigned int enqueued' here, like in util_est_enqueue().

> -     int cpu;
>  
>       if (!sched_feat(UTIL_EST))
>               return;
> @@ -3961,6 +3959,17 @@ static inline bool within_margin(int value, int margin)
>       WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
>  
>       trace_sched_util_est_cfs_tp(cfs_rq);
> +}
> +
> +static void
> +util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool 
> task_sleep)
> +{
> +     long last_ewma_diff;
> +     struct util_est ue;
> +     int cpu;

Nitpick: 'int cpu' not needed

---8<---

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c3685a743a76..53dfb20d101e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3956,28 +3956,28 @@ static inline bool within_margin(int value, int margin)
        return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
 }
 
-static void
-util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p)
+static inline void util_est_dequeue(struct cfs_rq *cfs_rq,
+                                   struct task_struct *p)
 {
-       struct util_est ue;
+       unsigned int enqueued;
 
        if (!sched_feat(UTIL_EST))
                return;
 
        /* Update root cfs_rq's estimated utilization */
-       ue.enqueued  = cfs_rq->avg.util_est.enqueued;
-       ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p));
-       WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
+       enqueued  = cfs_rq->avg.util_est.enqueued;
+       enqueued -= min_t(unsigned int, enqueued, _task_util_est(p));
+       WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
 
        trace_sched_util_est_cfs_tp(cfs_rq);
 }
 
-static void
-util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
+static inline void util_est_update(struct cfs_rq *cfs_rq,
+                                  struct task_struct *p,
+                                  bool task_sleep)
 {
        long last_ewma_diff;
        struct util_est ue;
-       int cpu;
 
        if (!sched_feat(UTIL_EST))
                return;
@@ -4021,8 +4021,7 @@ util_est_update(struct cfs_rq *cfs_rq, struct task_struct 
*p, bool task_sleep)
         * To avoid overestimation of actual task utilization, skip updates if
         * we cannot grant there is idle time in this CPU.
         */
-       cpu = cpu_of(rq_of(cfs_rq));
-       if (task_util(p) > capacity_orig_of(cpu))
+       if (task_util(p) > capacity_orig_of(cpu_of(rq_of(cfs_rq))))
                return;
 
        /*
-- 
2.17.1

Reply via email to