Re: [PATCH 1/2] sched/fair: pelt: use u32 for util_avg

2018-06-04 Thread kbuild test robot
Hi Patrick,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on v4.17 next-20180604]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Patrick-Bellasi/sched-fair-pelt-use-u32-for-util_avg/20180605-082640
config: i386-randconfig-s0-201822 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/sched/fair.o: In function `post_init_entity_util_avg':
>> kernel/sched/fair.c:761: undefined reference to `__udivdi3'

vim +761 kernel/sched/fair.c

   724  
   725  /*
   726   * With new tasks being created, their initial util_avgs are 
extrapolated
   727   * based on the cfs_rq's current util_avg:
   728   *
   729   *   util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * 
se.load.weight
   730   *
   731   * However, in many cases, the above util_avg does not give a desired
   732   * value. Moreover, the sum of the util_avgs may be divergent, such
   733   * as when the series is a harmonic series.
   734   *
   735   * To solve this problem, we also cap the util_avg of successive tasks 
to
   736   * only 1/2 of the left utilization budget:
   737   *
   738   *   util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n
   739   *
   740   * where n denotes the nth task.
   741   *
   742   * For example, a simplest series from the beginning would be like:
   743   *
   744   *  task  util_avg: 512, 256, 128,  64,  32,   16,8, ...
   745   * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
   746   *
   747   * Finally, that extrapolated util_avg is clamped to the cap 
(util_avg_cap)
   748   * if util_avg > util_avg_cap.
   749   */
   750  void post_init_entity_util_avg(struct sched_entity *se)
   751  {
   752  struct cfs_rq *cfs_rq = cfs_rq_of(se);
   753  long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) 
/ 2;
   754  
   755  if (cap > 0) {
   756  struct sched_avg *sa = >avg;
   757  u64 util_avg = READ_ONCE(sa->util_avg);
   758  
   759  if (cfs_rq->avg.util_avg != 0) {
   760  util_avg  =  cfs_rq->avg.util_avg * 
se->load.weight;
 > 761  util_avg /= (cfs_rq->avg.load_avg + 1);
   762  if (util_avg > cap)
   763  util_avg = cap;
   764  } else {
   765  util_avg = cap;
   766  }
   767  
   768  WRITE_ONCE(sa->util_avg, util_avg);
   769  }
   770  
   771  if (entity_is_task(se)) {
   772  struct task_struct *p = task_of(se);
   773  if (p->sched_class != _sched_class) {
   774  /*
   775   * For !fair tasks do:
   776   *
   777  update_cfs_rq_load_avg(now, cfs_rq);
   778  attach_entity_load_avg(cfs_rq, se, 0);
   779  switched_from_fair(rq, p);
   780   *
   781   * such that the next switched_to_fair() has the
   782   * expected state.
   783   */
   784  se->avg.last_update_time = 
cfs_rq_clock_task(cfs_rq);
   785  return;
   786  }
   787  }
   788  
   789  attach_entity_cfs_rq(se);
   790  }
   791  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/2] sched/fair: pelt: use u32 for util_avg

2018-06-04 Thread kbuild test robot
Hi Patrick,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on v4.17 next-20180604]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Patrick-Bellasi/sched-fair-pelt-use-u32-for-util_avg/20180605-082640
config: i386-randconfig-s0-201822 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/sched/fair.o: In function `post_init_entity_util_avg':
>> kernel/sched/fair.c:761: undefined reference to `__udivdi3'

vim +761 kernel/sched/fair.c

   724  
   725  /*
   726   * With new tasks being created, their initial util_avgs are 
extrapolated
   727   * based on the cfs_rq's current util_avg:
   728   *
   729   *   util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * 
se.load.weight
   730   *
   731   * However, in many cases, the above util_avg does not give a desired
   732   * value. Moreover, the sum of the util_avgs may be divergent, such
   733   * as when the series is a harmonic series.
   734   *
   735   * To solve this problem, we also cap the util_avg of successive tasks 
to
   736   * only 1/2 of the left utilization budget:
   737   *
   738   *   util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n
   739   *
   740   * where n denotes the nth task.
   741   *
   742   * For example, a simplest series from the beginning would be like:
   743   *
   744   *  task  util_avg: 512, 256, 128,  64,  32,   16,8, ...
   745   * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
   746   *
   747   * Finally, that extrapolated util_avg is clamped to the cap 
(util_avg_cap)
   748   * if util_avg > util_avg_cap.
   749   */
   750  void post_init_entity_util_avg(struct sched_entity *se)
   751  {
   752  struct cfs_rq *cfs_rq = cfs_rq_of(se);
   753  long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) 
/ 2;
   754  
   755  if (cap > 0) {
   756  struct sched_avg *sa = >avg;
   757  u64 util_avg = READ_ONCE(sa->util_avg);
   758  
   759  if (cfs_rq->avg.util_avg != 0) {
   760  util_avg  =  cfs_rq->avg.util_avg * 
se->load.weight;
 > 761  util_avg /= (cfs_rq->avg.load_avg + 1);
   762  if (util_avg > cap)
   763  util_avg = cap;
   764  } else {
   765  util_avg = cap;
   766  }
   767  
   768  WRITE_ONCE(sa->util_avg, util_avg);
   769  }
   770  
   771  if (entity_is_task(se)) {
   772  struct task_struct *p = task_of(se);
   773  if (p->sched_class != _sched_class) {
   774  /*
   775   * For !fair tasks do:
   776   *
   777  update_cfs_rq_load_avg(now, cfs_rq);
   778  attach_entity_load_avg(cfs_rq, se, 0);
   779  switched_from_fair(rq, p);
   780   *
   781   * such that the next switched_to_fair() has the
   782   * expected state.
   783   */
   784  se->avg.last_update_time = 
cfs_rq_clock_task(cfs_rq);
   785  return;
   786  }
   787  }
   788  
   789  attach_entity_cfs_rq(se);
   790  }
   791  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/2] sched/fair: pelt: use u32 for util_avg

2018-06-04 Thread kbuild test robot
Hi Patrick,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on v4.17 next-20180604]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Patrick-Bellasi/sched-fair-pelt-use-u32-for-util_avg/20180605-082640
config: i386-defconfig (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/sched/fair.o: In function `post_init_entity_util_avg':
>> fair.c:(.text+0xa057): undefined reference to `__udivdi3'

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/2] sched/fair: pelt: use u32 for util_avg

2018-06-04 Thread kbuild test robot
Hi Patrick,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on v4.17 next-20180604]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Patrick-Bellasi/sched-fair-pelt-use-u32-for-util_avg/20180605-082640
config: i386-defconfig (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/sched/fair.o: In function `post_init_entity_util_avg':
>> fair.c:(.text+0xa057): undefined reference to `__udivdi3'

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH 1/2] sched/fair: pelt: use u32 for util_avg

2018-06-04 Thread Patrick Bellasi
The util_avg signal is used to track the utilization (i.e. RUNNING time)
of SEs and RQs. Its values are computed according to the PELT algorithm
and thus, for SE, they are bounded to an (internal) representation which
uses 20bits. For RQ instead they are technically un-bounded, since when
tasks are migrated across RQs we sum their utilization to the
destination RQ.

We currently use an unsigned long to track util_avg which maps into a
64bits storage on 64bits systems. However, 32bits should be good enough
for all practical usages. Indeed, even for RQs, the remaining 12bits
allows to track up to 4K 100% tasks concurrently RUNNABLE on a single
CPU.

Since the sched_avg data structure already completely fits a 64B cache
line, let's get back 4B by using u32 to track util_avg. The recovered
space could be conveniently used to fit other load tracking related
metrics into the same cache line.

Signed-off-by: Patrick Bellasi 
Cc: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Vincent Guittot 
Cc: Juri Lelli 
Cc: Todd Kjos 
Cc: Joel Fernandes 
Cc: Steve Muckle 
Cc: Dietmar Eggemann 
Cc: Morten Rasmussen 
Cc: linux-kernel@vger.kernel.org
Cc: linux...@vger.kernel.org
---
 include/linux/sched.h |  2 +-
 kernel/sched/debug.c  |  2 +-
 kernel/sched/fair.c   | 17 ++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 28ff3ca9f752..9d8732dab264 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -402,7 +402,7 @@ struct sched_avg {
u32 period_contrib;
unsigned long   load_avg;
unsigned long   runnable_load_avg;
-   unsigned long   util_avg;
+   u32 util_avg;
struct util_est util_est;
 } cacheline_aligned;
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 15b10e210a6b..a985789eeb9c 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -541,7 +541,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct 
cfs_rq *cfs_rq)
cfs_rq->avg.load_avg);
SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
cfs_rq->avg.runnable_load_avg);
-   SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
+   SEQ_printf(m, "  .%-30s: %u\n", "util_avg",
cfs_rq->avg.util_avg);
SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
cfs_rq->avg.util_est.enqueued);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e497c05aab7f..f74441be3f44 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -750,19 +750,22 @@ static void attach_entity_cfs_rq(struct sched_entity *se);
 void post_init_entity_util_avg(struct sched_entity *se)
 {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
-   struct sched_avg *sa = >avg;
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
 
if (cap > 0) {
-   if (cfs_rq->avg.util_avg != 0) {
-   sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
-   sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+   struct sched_avg *sa = >avg;
+   u64 util_avg = READ_ONCE(sa->util_avg);
 
-   if (sa->util_avg > cap)
-   sa->util_avg = cap;
+   if (cfs_rq->avg.util_avg != 0) {
+   util_avg  =  cfs_rq->avg.util_avg * se->load.weight;
+   util_avg /= (cfs_rq->avg.load_avg + 1);
+   if (util_avg > cap)
+   util_avg = cap;
} else {
-   sa->util_avg = cap;
+   util_avg = cap;
}
+
+   WRITE_ONCE(sa->util_avg, util_avg);
}
 
if (entity_is_task(se)) {
-- 
2.15.1



[PATCH 1/2] sched/fair: pelt: use u32 for util_avg

2018-06-04 Thread Patrick Bellasi
The util_avg signal is used to track the utilization (i.e. RUNNING time)
of SEs and RQs. Its values are computed according to the PELT algorithm
and thus, for SE, they are bounded to an (internal) representation which
uses 20bits. For RQ instead they are technically un-bounded, since when
tasks are migrated across RQs we sum their utilization to the
destination RQ.

We currently use an unsigned long to track util_avg which maps into a
64bits storage on 64bits systems. However, 32bits should be good enough
for all practical usages. Indeed, even for RQs, the remaining 12bits
allows to track up to 4K 100% tasks concurrently RUNNABLE on a single
CPU.

Since the sched_avg data structure already completely fits a 64B cache
line, let's get back 4B by using u32 to track util_avg. The recovered
space could be conveniently used to fit other load tracking related
metrics into the same cache line.

Signed-off-by: Patrick Bellasi 
Cc: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Vincent Guittot 
Cc: Juri Lelli 
Cc: Todd Kjos 
Cc: Joel Fernandes 
Cc: Steve Muckle 
Cc: Dietmar Eggemann 
Cc: Morten Rasmussen 
Cc: linux-kernel@vger.kernel.org
Cc: linux...@vger.kernel.org
---
 include/linux/sched.h |  2 +-
 kernel/sched/debug.c  |  2 +-
 kernel/sched/fair.c   | 17 ++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 28ff3ca9f752..9d8732dab264 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -402,7 +402,7 @@ struct sched_avg {
u32 period_contrib;
unsigned long   load_avg;
unsigned long   runnable_load_avg;
-   unsigned long   util_avg;
+   u32 util_avg;
struct util_est util_est;
 } cacheline_aligned;
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 15b10e210a6b..a985789eeb9c 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -541,7 +541,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct 
cfs_rq *cfs_rq)
cfs_rq->avg.load_avg);
SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
cfs_rq->avg.runnable_load_avg);
-   SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
+   SEQ_printf(m, "  .%-30s: %u\n", "util_avg",
cfs_rq->avg.util_avg);
SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
cfs_rq->avg.util_est.enqueued);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e497c05aab7f..f74441be3f44 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -750,19 +750,22 @@ static void attach_entity_cfs_rq(struct sched_entity *se);
 void post_init_entity_util_avg(struct sched_entity *se)
 {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
-   struct sched_avg *sa = >avg;
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
 
if (cap > 0) {
-   if (cfs_rq->avg.util_avg != 0) {
-   sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
-   sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+   struct sched_avg *sa = >avg;
+   u64 util_avg = READ_ONCE(sa->util_avg);
 
-   if (sa->util_avg > cap)
-   sa->util_avg = cap;
+   if (cfs_rq->avg.util_avg != 0) {
+   util_avg  =  cfs_rq->avg.util_avg * se->load.weight;
+   util_avg /= (cfs_rq->avg.load_avg + 1);
+   if (util_avg > cap)
+   util_avg = cap;
} else {
-   sa->util_avg = cap;
+   util_avg = cap;
}
+
+   WRITE_ONCE(sa->util_avg, util_avg);
}
 
if (entity_is_task(se)) {
-- 
2.15.1