Re: [PATCH 2/4] sched: Correctly handle nohz ticks cpu load accounting

2016-04-02 Thread Frederic Weisbecker
On Sat, Apr 02, 2016 at 09:15:20AM +0200, Peter Zijlstra wrote:
> On Fri, Apr 01, 2016 at 03:23:05PM +0200, Frederic Weisbecker wrote:
> > Ticks can happen in the middle of a nohz frame and
> 
> I'm still miffed with that.. And this changelog doesn't even explain why
> and how.

Indeed, it looks like I've cooked sloppy changelogs in this series, I'll
do another pass on all of them.

> 
> > cpu_load_update_active() doesn't handle these correctly. It forgets the
> > whole previous tickless load and just records the current tick, ignoring
> > potentially long idle periods.
> > 
> > In order to solve this, record the load on nohz frame entry so we know
> > what to record in case of nohz interruptions, then use this recorded load
> > to account the tickless load on nohz ticks and nohz frame end.
> 
> 
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index f33764d..394f008 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -4527,9 +4527,9 @@ decay_load_missed(unsigned long load, unsigned long 
> > missed_updates, int idx)
> >   * term. See the @active paramter.
> 
> ^^
> 
> What active parameter... you need to update that comment.

Yeah, forgot that.

Thanks.


Re: [PATCH 2/4] sched: Correctly handle nohz ticks cpu load accounting

2016-04-02 Thread Peter Zijlstra
On Fri, Apr 01, 2016 at 03:23:05PM +0200, Frederic Weisbecker wrote:
> Ticks can happen in the middle of a nohz frame and

I'm still miffed with that.. And this changelog doesn't even explain why
and how.

> cpu_load_update_active() doesn't handle these correctly. It forgets the
> whole previous tickless load and just records the current tick, ignoring
> potentially long idle periods.
> 
> In order to solve this, record the load on nohz frame entry so we know
> what to record in case of nohz interruptions, then use this recorded load
> to account the tickless load on nohz ticks and nohz frame end.


> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index f33764d..394f008 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4527,9 +4527,9 @@ decay_load_missed(unsigned long load, unsigned long 
> missed_updates, int idx)
>   * term. See the @active paramter.

^^

What active parameter... you need to update that comment.

>   */
>  static void __cpu_load_update(struct rq *this_rq, unsigned long this_load,
> -   unsigned long pending_updates, int active)
> +   unsigned long pending_updates)
>  {
> - unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
> + unsigned long tickless_load = this_rq->cpu_load[0];
>   int i, scale;
>  
>   this_rq->nr_load_updates++;


[PATCH 2/4] sched: Correctly handle nohz ticks cpu load accounting

2016-04-01 Thread Frederic Weisbecker
Ticks can happen in the middle of a nohz frame and
cpu_load_update_active() doesn't handle these correctly. It forgets the
whole previous tickless load and just records the current tick, ignoring
potentially long idle periods.

In order to solve this, record the load on nohz frame entry so we know
what to record in case of nohz interruptions, then use this recorded load
to account the tickless load on nohz ticks and nohz frame end.

Cc: Byungchul Park 
Cc: Chris Metcalf 
Cc: Christoph Lameter 
Cc: Ingo Molnar 
Cc: Luiz Capitulino 
Cc: Mike Galbraith 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Thomas Gleixner 
Signed-off-by: Frederic Weisbecker 
---
 include/linux/sched.h|  6 +++--
 kernel/sched/fair.c  | 63 +---
 kernel/time/tick-sched.c |  9 ---
 3 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 86adc0e..6f9415a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -178,9 +178,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, 
unsigned long *load);
 extern void calc_global_load(unsigned long ticks);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void cpu_load_update_nohz(int active);
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
 #else
-static inline void cpu_load_update_nohz(int active) { }
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
 #endif
 
 extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f33764d..394f008 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4527,9 +4527,9 @@ decay_load_missed(unsigned long load, unsigned long 
missed_updates, int idx)
  * term. See the @active paramter.
  */
 static void __cpu_load_update(struct rq *this_rq, unsigned long this_load,
- unsigned long pending_updates, int active)
+ unsigned long pending_updates)
 {
-   unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
+   unsigned long tickless_load = this_rq->cpu_load[0];
int i, scale;
 
this_rq->nr_load_updates++;
@@ -4567,17 +4567,9 @@ static void __cpu_load_update(struct rq *this_rq, 
unsigned long this_load,
sched_avg_update(this_rq);
 }
 
-/* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
-{
-   return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
-}
-
-#ifdef CONFIG_NO_HZ_COMMON
-static void __cpu_load_update_nohz(struct rq *this_rq,
-  unsigned long curr_jiffies,
-  unsigned long load,
-  int active)
+static void cpu_load_update(struct rq *this_rq,
+   unsigned long curr_jiffies,
+   unsigned long load)
 {
unsigned long pending_updates;
 
@@ -4589,10 +4581,17 @@ static void __cpu_load_update_nohz(struct rq *this_rq,
 * In the NOHZ_FULL case, we were non-idle, we should consider
 * its weighted load.
 */
-   __cpu_load_update(this_rq, load, pending_updates, active);
+   __cpu_load_update(this_rq, load, pending_updates);
}
 }
 
+/* Used instead of source_load when we know the type == 0 */
+static unsigned long weighted_cpuload(const int cpu)
+{
+   return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * There is no sane way to deal with nohz on smp when using jiffies because the
  * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -4618,26 +4617,43 @@ static void cpu_load_update_idle(struct rq *this_rq)
if (weighted_cpuload(cpu_of(this_rq)))
return;
 
-   __cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
+   cpu_load_update(this_rq, READ_ONCE(jiffies), 0);
 }
 
 /*
- * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ * Record CPU load on nohz entry so we know the tickless load to account
+ * on nohz exit.
  */
-void cpu_load_update_nohz(int active)
+void cpu_load_update_nohz_start(void)
 {
struct rq *this_rq = this_rq();
+
+   /*
+* This is all lockless but should be fine. If weighted_cpuload changes
+* concurrently we'll exit nohz. And cpu_load write can race with
+* cpu_load_update_idle() but both updater would be writing the same.
+*/
+   this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
+}
+
+/*
+ * Account the tickless load in the end of a nohz frame.
+ */
+void cpu_load_update_nohz_stop(void)
+{
unsigned long curr_jiffies = READ_ONCE(jiffies);
-   unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
+   struct rq *this_rq = this_rq();
+