Re: [RFC PATCH 1/3] sched/fair: Aggregate task utilization only on root cfs_rq
On 02/06/16 16:53, Dietmar Eggemann wrote: > On 02/06/16 10:23, Juri Lelli wrote: > > >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > >> index 218f8e83db73..212becd3708f 100644 > >> --- a/kernel/sched/fair.c > >> +++ b/kernel/sched/fair.c > >> @@ -2705,6 +2705,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg > >> *sa, > >>u32 contrib; > >>unsigned int delta_w, scaled_delta_w, decayed = 0; > >>unsigned long scale_freq, scale_cpu; > >> + int update_util = 0; > >> > >>delta = now - sa->last_update_time; > >>/* > >> @@ -2725,6 +2726,12 @@ __update_load_avg(u64 now, int cpu, struct > >> sched_avg *sa, > >>return 0; > >>sa->last_update_time = now; > >> > >> + if (cfs_rq) { > >> + if (&rq_of(cfs_rq)->cfs == cfs_rq) > > > > Maybe we can wrap this sort of checks in a static inline improving > > readability? > > Something like this? > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 218f8e83db73..01b0fa689ef9 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -251,6 +251,18 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) > return cfs_rq->rq; > } > > +/* cfs_rq "owned" by the root task group */ > +static inline struct cfs_rq *root_rq_of(struct cfs_rq *cfs_rq) > +{ > + return &rq_of(cfs_rq)->cfs; > +} > + > +/* Is this cfs_rq "owned" by the root task group ? */ > +static inline bool rq_is_root(struct cfs_rq *cfs_rq) > +{ > + return root_rq_of(cfs_rq) == cfs_rq; > +} > + > /* An entity is a task if it doesn't "own" a runqueue */ > #define entity_is_task(se) (!se->my_q) > > @@ -376,6 +388,16 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) > return container_of(cfs_rq, struct rq, cfs); > } > > +static inline struct cfs_rq *root_rq_of(struct cfs_rq *cfs_rq) > +{ > + return cfs_rq; > +} > + > +static inline bool rq_is_root(struct cfs_rq *cfs_rq) > +{ > + return true; > +} > + > #define entity_is_task(se) 1 > Looks good to me. Thanks, - Juri
Re: [RFC PATCH 1/3] sched/fair: Aggregate task utilization only on root cfs_rq
On 02/06/16 10:23, Juri Lelli wrote: >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c >> index 218f8e83db73..212becd3708f 100644 >> --- a/kernel/sched/fair.c >> +++ b/kernel/sched/fair.c >> @@ -2705,6 +2705,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg >> *sa, >> u32 contrib; >> unsigned int delta_w, scaled_delta_w, decayed = 0; >> unsigned long scale_freq, scale_cpu; >> +int update_util = 0; >> >> delta = now - sa->last_update_time; >> /* >> @@ -2725,6 +2726,12 @@ __update_load_avg(u64 now, int cpu, struct sched_avg >> *sa, >> return 0; >> sa->last_update_time = now; >> >> +if (cfs_rq) { >> +if (&rq_of(cfs_rq)->cfs == cfs_rq) > > Maybe we can wrap this sort of checks in a static inline improving > readability? Something like this? diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 218f8e83db73..01b0fa689ef9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -251,6 +251,18 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) return cfs_rq->rq; } +/* cfs_rq "owned" by the root task group */ +static inline struct cfs_rq *root_rq_of(struct cfs_rq *cfs_rq) +{ + return &rq_of(cfs_rq)->cfs; +} + +/* Is this cfs_rq "owned" by the root task group ? */ +static inline bool rq_is_root(struct cfs_rq *cfs_rq) +{ + return root_rq_of(cfs_rq) == cfs_rq; +} + /* An entity is a task if it doesn't "own" a runqueue */ #define entity_is_task(se) (!se->my_q) @@ -376,6 +388,16 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) return container_of(cfs_rq, struct rq, cfs); } +static inline struct cfs_rq *root_rq_of(struct cfs_rq *cfs_rq) +{ + return cfs_rq; +} + +static inline bool rq_is_root(struct cfs_rq *cfs_rq) +{ + return true; +} + #define entity_is_task(se) 1
Re: [RFC PATCH 1/3] sched/fair: Aggregate task utilization only on root cfs_rq
Hi, minor comment below. On 01/06/16 20:39, Dietmar Eggemann wrote: > cpu utilization (cpu_util()) is defined as the cpu (original) capacity > capped cfs_rq->avg->util_avg signal of the root cfs_rq. > > With the current pelt version, the utilization of a task [en|de]queued > on/from a cfs_rq, representing a task group other than the root task group > on a cpu, is not immediately propagated down to the root cfs_rq. > > This makes decisions based on cpu_util() for scheduling or cpu frequency > settings less accurate in case tasks are running in task groups. > > This patch aggregates the task utilization only on the root cfs_rq, > essentially avoiding maintaining utilization for a se/cfs_rq representing > task groups other than the root task group (!entity_is_task(se) and > &rq_of(cfs_rq)->cfs != cfs_rq). > > The additional if/else condition to set @update_util in > __update_load_avg() is replaced in 'sched/fair: Change @running of > __update_load_avg() to @update_util' by providing the information whether > utilization has to be maintained via an argument to this function. > > The additional requirements for the alignment of the last_update_time of a > se and the root cfs_rq are handled by the patch 'sched/fair: Sync se with > root cfs_rq'. > > Signed-off-by: Dietmar Eggemann > --- > kernel/sched/fair.c | 48 > 1 file changed, 36 insertions(+), 12 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 218f8e83db73..212becd3708f 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -2705,6 +2705,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg > *sa, > u32 contrib; > unsigned int delta_w, scaled_delta_w, decayed = 0; > unsigned long scale_freq, scale_cpu; > + int update_util = 0; > > delta = now - sa->last_update_time; > /* > @@ -2725,6 +2726,12 @@ __update_load_avg(u64 now, int cpu, struct sched_avg > *sa, > return 0; > sa->last_update_time = now; > > + if (cfs_rq) { > + if (&rq_of(cfs_rq)->cfs == cfs_rq) Maybe we can wrap this sort of checks in a static inline improving readability? Best, - Juri
[RFC PATCH 1/3] sched/fair: Aggregate task utilization only on root cfs_rq
cpu utilization (cpu_util()) is defined as the cpu (original) capacity capped cfs_rq->avg->util_avg signal of the root cfs_rq. With the current pelt version, the utilization of a task [en|de]queued on/from a cfs_rq, representing a task group other than the root task group on a cpu, is not immediately propagated down to the root cfs_rq. This makes decisions based on cpu_util() for scheduling or cpu frequency settings less accurate in case tasks are running in task groups. This patch aggregates the task utilization only on the root cfs_rq, essentially avoiding maintaining utilization for a se/cfs_rq representing task groups other than the root task group (!entity_is_task(se) and &rq_of(cfs_rq)->cfs != cfs_rq). The additional if/else condition to set @update_util in __update_load_avg() is replaced in 'sched/fair: Change @running of __update_load_avg() to @update_util' by providing the information whether utilization has to be maintained via an argument to this function. The additional requirements for the alignment of the last_update_time of a se and the root cfs_rq are handled by the patch 'sched/fair: Sync se with root cfs_rq'. Signed-off-by: Dietmar Eggemann --- kernel/sched/fair.c | 48 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 218f8e83db73..212becd3708f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2705,6 +2705,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, u32 contrib; unsigned int delta_w, scaled_delta_w, decayed = 0; unsigned long scale_freq, scale_cpu; + int update_util = 0; delta = now - sa->last_update_time; /* @@ -2725,6 +2726,12 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, return 0; sa->last_update_time = now; + if (cfs_rq) { + if (&rq_of(cfs_rq)->cfs == cfs_rq) + update_util = 1; + } else if (entity_is_task(container_of(sa, struct sched_entity, avg))) + update_util = 1; + scale_freq = arch_scale_freq_capacity(NULL, cpu); scale_cpu = arch_scale_cpu_capacity(NULL, cpu); @@ -2750,7 +2757,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, weight * scaled_delta_w; } } - if (running) + if (update_util && running) sa->util_sum += scaled_delta_w * scale_cpu; delta -= delta_w; @@ -2774,7 +2781,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, if (cfs_rq) cfs_rq->runnable_load_sum += weight * contrib; } - if (running) + if (update_util && running) sa->util_sum += contrib * scale_cpu; } @@ -2785,7 +2792,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, if (cfs_rq) cfs_rq->runnable_load_sum += weight * scaled_delta; } - if (running) + if (update_util && running) sa->util_sum += scaled_delta * scale_cpu; sa->period_contrib += delta; @@ -2796,7 +2803,8 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, cfs_rq->runnable_load_avg = div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX); } - sa->util_avg = sa->util_sum / LOAD_AVG_MAX; + if (update_util) + sa->util_avg = sa->util_sum / LOAD_AVG_MAX; } return decayed; @@ -2918,7 +2926,8 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) removed_load = 1; } - if (atomic_long_read(&cfs_rq->removed_util_avg)) { + if ((&rq_of(cfs_rq)->cfs == cfs_rq) && + atomic_long_read(&cfs_rq->removed_util_avg)) { long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); sa->util_avg = max_t(long, sa->util_avg - r, 0); sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0); @@ -2982,8 +2991,12 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s se->avg.last_update_time = cfs_rq->avg.last_update_time; cfs_rq->avg.load_avg += se->avg.load_avg; cfs_rq->avg.load_sum += se->avg.load_sum; - cfs_rq->avg.util_avg += se->avg.util_avg; - cfs_rq->avg.util_sum += se->avg.util_sum; + + if (!entity_is_task(se)) + return; + + rq_of(cfs_rq)->cfs.avg.util_avg += se->avg.util_avg; + rq_of(cfs_rq)->cfs.avg.util_sum += se->avg.util_sum; cfs_rq_util_change(cfs_rq); } @@ -2996,8 +3009,14 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s cfs_r