Re: [PATCH v2] sched/pelt: Fix task util_est update filtering

2021-03-03 Thread Vincent Donnefort
On Mon, Mar 01, 2021 at 06:21:23PM +0100, Peter Zijlstra wrote:
> On Mon, Mar 01, 2021 at 05:34:09PM +0100, Dietmar Eggemann wrote:
> > On 26/02/2021 09:41, Peter Zijlstra wrote:
> > > On Thu, Feb 25, 2021 at 04:58:20PM +, Vincent Donnefort wrote:
> > >> +#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
> > >> +
> > >>  /*
> > >> - * Check if a (signed) value is within a specified (unsigned) margin,
> > >> + * Check if a (signed) value is within the (unsigned) util_est margin,
> > >>   * based on the observation that:
> > >>   *
> > >>   * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
> > >>   *
> > >> - * NOTE: this only works when value + maring < INT_MAX.
> > >> + * NOTE: this only works when value + UTIL_EST_MARGIN < INT_MAX.
> > >>   */
> > >> -static inline bool within_margin(int value, int margin)
> > >> +static inline bool util_est_within_margin(int value)
> > >>  {
> > >> -return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
> > >> +return ((unsigned int)(value + UTIL_EST_MARGIN - 1) <
> > >> +(2 * UTIL_EST_MARGIN - 1));
> > >>  }
> > > 
> > >> -if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
> > >> +if (util_est_within_margin(last_ewma_diff)) {
> > > 
> > > What was the purpose of this change? What was a generic helper is now
> > > super specific.
> > 
> > I guess because it was only ever used in util_est for last_ewma_diff.
> > 
> > It's now used for last_ewma_diff and last_enqueued_diff, still only for
> > util_est though and both times with the same margin
> > (SCHED_CAPACITY_SCALE / 100)).
> > 
> > Vincent D. should be back on Wed from hols.
> 
> Fair enough; I've un-done it but kept the rest of the patch.

Indeed I was off for couple of days. Apologies for the delay and thanks for
applying the patch.

-- 
Vincent


Re: [PATCH v2] sched/pelt: Fix task util_est update filtering

2021-03-01 Thread Peter Zijlstra
On Mon, Mar 01, 2021 at 05:34:09PM +0100, Dietmar Eggemann wrote:
> On 26/02/2021 09:41, Peter Zijlstra wrote:
> > On Thu, Feb 25, 2021 at 04:58:20PM +, Vincent Donnefort wrote:
> >> +#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
> >> +
> >>  /*
> >> - * Check if a (signed) value is within a specified (unsigned) margin,
> >> + * Check if a (signed) value is within the (unsigned) util_est margin,
> >>   * based on the observation that:
> >>   *
> >>   * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
> >>   *
> >> - * NOTE: this only works when value + maring < INT_MAX.
> >> + * NOTE: this only works when value + UTIL_EST_MARGIN < INT_MAX.
> >>   */
> >> -static inline bool within_margin(int value, int margin)
> >> +static inline bool util_est_within_margin(int value)
> >>  {
> >> -  return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
> >> +  return ((unsigned int)(value + UTIL_EST_MARGIN - 1) <
> >> +  (2 * UTIL_EST_MARGIN - 1));
> >>  }
> > 
> >> -  if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
> >> +  if (util_est_within_margin(last_ewma_diff)) {
> > 
> > What was the purpose of this change? What was a generic helper is now
> > super specific.
> 
> I guess because it was only ever used in util_est for last_ewma_diff.
> 
> It's now used for last_ewma_diff and last_enqueued_diff, still only for
> util_est though and both times with the same margin
> (SCHED_CAPACITY_SCALE / 100)).
> 
> Vincent D. should be back on Wed from hols.

Fair enough; I've un-done it but kept the rest of the patch.


Re: [PATCH v2] sched/pelt: Fix task util_est update filtering

2021-03-01 Thread Dietmar Eggemann
On 26/02/2021 09:41, Peter Zijlstra wrote:
> On Thu, Feb 25, 2021 at 04:58:20PM +, Vincent Donnefort wrote:
>> +#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
>> +
>>  /*
>> - * Check if a (signed) value is within a specified (unsigned) margin,
>> + * Check if a (signed) value is within the (unsigned) util_est margin,
>>   * based on the observation that:
>>   *
>>   * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
>>   *
>> - * NOTE: this only works when value + maring < INT_MAX.
>> + * NOTE: this only works when value + UTIL_EST_MARGIN < INT_MAX.
>>   */
>> -static inline bool within_margin(int value, int margin)
>> +static inline bool util_est_within_margin(int value)
>>  {
>> -return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
>> +return ((unsigned int)(value + UTIL_EST_MARGIN - 1) <
>> +(2 * UTIL_EST_MARGIN - 1));
>>  }
> 
>> -if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
>> +if (util_est_within_margin(last_ewma_diff)) {
> 
> What was the purpose of this change? What was a generic helper is now
> super specific.

I guess because it was only ever used in util_est for last_ewma_diff.

It's now used for last_ewma_diff and last_enqueued_diff, still only for
util_est though and both times with the same margin
(SCHED_CAPACITY_SCALE / 100)).

Vincent D. should be back on Wed from hols.


Re: [PATCH v2] sched/pelt: Fix task util_est update filtering

2021-02-26 Thread Peter Zijlstra
On Thu, Feb 25, 2021 at 04:58:20PM +, Vincent Donnefort wrote:
> +#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
> +
>  /*
> - * Check if a (signed) value is within a specified (unsigned) margin,
> + * Check if a (signed) value is within the (unsigned) util_est margin,
>   * based on the observation that:
>   *
>   * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
>   *
> - * NOTE: this only works when value + maring < INT_MAX.
> + * NOTE: this only works when value + UTIL_EST_MARGIN < INT_MAX.
>   */
> -static inline bool within_margin(int value, int margin)
> +static inline bool util_est_within_margin(int value)
>  {
> - return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
> + return ((unsigned int)(value + UTIL_EST_MARGIN - 1) <
> + (2 * UTIL_EST_MARGIN - 1));
>  }

> - if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
> + if (util_est_within_margin(last_ewma_diff)) {

What was the purpose of this change? What was a generic helper is now
super specific.


Re: [PATCH v2] sched/pelt: Fix task util_est update filtering

2021-02-25 Thread Vincent Guittot
On Thu, 25 Feb 2021 at 17:58, Vincent Donnefort
 wrote:
>
> Being called for each dequeue, util_est reduces the number of its updates
> by filtering out when the EWMA signal is different from the task util_avg
> by less than 1%. It is a problem for a sudden util_avg ramp-up. Due to the
> decay from a previous high util_avg, EWMA might now be close enough to
> the new util_avg. No update would then happen while it would leave
> ue.enqueued with an out-of-date value.
>
> Taking into consideration the two util_est members, EWMA and enqueued for
> the filtering, ensures, for both, an up-to-date value.
>
> This is for now an issue only for the trace probe that might return the
> stale value. Functional-wise, it isn't a problem, as the value is always
> accessed through max(enqueued, ewma).
>
> This problem has been observed using LISA's UtilConvergence:test_means on
> the sd845c board.
>
> No regression observed with Hackbench on sd845c and Perf-bench sched pipe
> on hikey/hikey960.
>
> Signed-off-by: Vincent Donnefort 
> Reviewed-by: Dietmar Eggemann 

Reviewed-by: Vincent Guittot 

>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 9e4104ae39ae..214e02862994 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3966,24 +3966,27 @@ static inline void util_est_dequeue(struct cfs_rq 
> *cfs_rq,
> trace_sched_util_est_cfs_tp(cfs_rq);
>  }
>
> +#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
> +
>  /*
> - * Check if a (signed) value is within a specified (unsigned) margin,
> + * Check if a (signed) value is within the (unsigned) util_est margin,
>   * based on the observation that:
>   *
>   * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
>   *
> - * NOTE: this only works when value + maring < INT_MAX.
> + * NOTE: this only works when value + UTIL_EST_MARGIN < INT_MAX.
>   */
> -static inline bool within_margin(int value, int margin)
> +static inline bool util_est_within_margin(int value)
>  {
> -   return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
> +   return ((unsigned int)(value + UTIL_EST_MARGIN - 1) <
> +   (2 * UTIL_EST_MARGIN - 1));
>  }
>
>  static inline void util_est_update(struct cfs_rq *cfs_rq,
>struct task_struct *p,
>bool task_sleep)
>  {
> -   long last_ewma_diff;
> +   long last_ewma_diff, last_enqueued_diff;
> struct util_est ue;
>
> if (!sched_feat(UTIL_EST))
> @@ -4004,6 +4007,8 @@ static inline void util_est_update(struct cfs_rq 
> *cfs_rq,
> if (ue.enqueued & UTIL_AVG_UNCHANGED)
> return;
>
> +   last_enqueued_diff = ue.enqueued;
> +
> /*
>  * Reset EWMA on utilization increases, the moving average is used 
> only
>  * to smooth utilization decreases.
> @@ -4017,12 +4022,17 @@ static inline void util_est_update(struct cfs_rq 
> *cfs_rq,
> }
>
> /*
> -* Skip update of task's estimated utilization when its EWMA is
> +* Skip update of task's estimated utilization when its members are
>  * already ~1% close to its last activation value.
>  */
> last_ewma_diff = ue.enqueued - ue.ewma;
> -   if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
> +   last_enqueued_diff -= ue.enqueued;
> +   if (util_est_within_margin(last_ewma_diff)) {
> +   if (!util_est_within_margin(last_enqueued_diff))
> +   goto done;
> +
> return;
> +   }
>
> /*
>  * To avoid overestimation of actual task utilization, skip updates if
> --
> 2.25.1
>


[PATCH v2] sched/pelt: Fix task util_est update filtering

2021-02-25 Thread Vincent Donnefort
Being called for each dequeue, util_est reduces the number of its updates
by filtering out when the EWMA signal is different from the task util_avg
by less than 1%. It is a problem for a sudden util_avg ramp-up. Due to the
decay from a previous high util_avg, EWMA might now be close enough to
the new util_avg. No update would then happen while it would leave
ue.enqueued with an out-of-date value.

Taking into consideration the two util_est members, EWMA and enqueued for
the filtering, ensures, for both, an up-to-date value.

This is for now an issue only for the trace probe that might return the
stale value. Functional-wise, it isn't a problem, as the value is always
accessed through max(enqueued, ewma).

This problem has been observed using LISA's UtilConvergence:test_means on
the sd845c board.

No regression observed with Hackbench on sd845c and Perf-bench sched pipe
on hikey/hikey960.

Signed-off-by: Vincent Donnefort 
Reviewed-by: Dietmar Eggemann 

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9e4104ae39ae..214e02862994 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3966,24 +3966,27 @@ static inline void util_est_dequeue(struct cfs_rq 
*cfs_rq,
trace_sched_util_est_cfs_tp(cfs_rq);
 }
 
+#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
+
 /*
- * Check if a (signed) value is within a specified (unsigned) margin,
+ * Check if a (signed) value is within the (unsigned) util_est margin,
  * based on the observation that:
  *
  * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
  *
- * NOTE: this only works when value + maring < INT_MAX.
+ * NOTE: this only works when value + UTIL_EST_MARGIN < INT_MAX.
  */
-static inline bool within_margin(int value, int margin)
+static inline bool util_est_within_margin(int value)
 {
-   return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
+   return ((unsigned int)(value + UTIL_EST_MARGIN - 1) <
+   (2 * UTIL_EST_MARGIN - 1));
 }
 
 static inline void util_est_update(struct cfs_rq *cfs_rq,
   struct task_struct *p,
   bool task_sleep)
 {
-   long last_ewma_diff;
+   long last_ewma_diff, last_enqueued_diff;
struct util_est ue;
 
if (!sched_feat(UTIL_EST))
@@ -4004,6 +4007,8 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
if (ue.enqueued & UTIL_AVG_UNCHANGED)
return;
 
+   last_enqueued_diff = ue.enqueued;
+
/*
 * Reset EWMA on utilization increases, the moving average is used only
 * to smooth utilization decreases.
@@ -4017,12 +4022,17 @@ static inline void util_est_update(struct cfs_rq 
*cfs_rq,
}
 
/*
-* Skip update of task's estimated utilization when its EWMA is
+* Skip update of task's estimated utilization when its members are
 * already ~1% close to its last activation value.
 */
last_ewma_diff = ue.enqueued - ue.ewma;
-   if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
+   last_enqueued_diff -= ue.enqueued;
+   if (util_est_within_margin(last_ewma_diff)) {
+   if (!util_est_within_margin(last_enqueued_diff))
+   goto done;
+
return;
+   }
 
/*
 * To avoid overestimation of actual task utilization, skip updates if
-- 
2.25.1