Re: [PATCH] sched/fair: Restore env status before goto redo in load_balance()
On 27 March 2015 at 23:30, Peter Zijlstra wrote: > On Wed, Mar 18, 2015 at 02:31:02PM +0800, Xunlei Pang wrote: >> From: Xunlei Pang >> >> In load_balance(), some members of lb_env will be assigned with >> new values in LBF_DST_PINNED case. But lb_env::flags may still >> retain LBF_ALL_PINNED if no proper tasks were found afterwards >> due to another balance, task affinity changing, etc, which can >> really happen because busiest rq lock has already been released. > > Sure.. > >> This is wrong, for example with env.dst_cpu assigned new_dst_cpu >> when going back to "redo" label, it may cause should_we_balance() >> to return false which is unreasonable. > > Why? You've got a very unlikely, very hard case, its unlikely that > anything we do will substantially improve the situation, but you make > the code uglier for it. > >> This patch restores proper status of env before "goto redo", and >> improves "out_all_pinned" and "out_one_pinned" labels. > > That doesn't even begin to explain half of what the patch does. > >> @@ -6977,12 +6978,19 @@ more_balance: >> /* All tasks on this runqueue were pinned by CPU affinity */ >> if (unlikely(env.flags & LBF_ALL_PINNED)) { >> cpumask_clear_cpu(cpu_of(busiest), cpus); >> - if (!cpumask_empty(cpus)) { >> - env.loop = 0; >> - env.loop_break = sched_nr_migrate_break; >> - goto redo; >> + if (env.new_dst_cpu != -1) { > > I really don't get this, how can this not be? > >> + env.new_dst_cpu = -1; >> + cpumask_or(cpus, cpus, >> + sched_group_cpus(sd->groups)); >> + cpumask_and(cpus, cpus, cpu_active_mask); > > More unexplained magic, why is this right? When LBF_DST_PINNED was set, after going back to "more_balance", things may change as the changelog describes, so it can hit LBF_ALL_PINNED afterwards. Then env.cpus, env.dst_rq, env.dst_cpu held the values assigned in the LBF_DST_PINNED case which is unreasonable. When we want to redo, we must reset those values. > > The rest of the patch isn't much better. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sched/fair: Restore env status before goto redo in load_balance()
On 27 March 2015 at 23:30, Peter Zijlstra pet...@infradead.org wrote: On Wed, Mar 18, 2015 at 02:31:02PM +0800, Xunlei Pang wrote: From: Xunlei Pang pang.xun...@linaro.org In load_balance(), some members of lb_env will be assigned with new values in LBF_DST_PINNED case. But lb_env::flags may still retain LBF_ALL_PINNED if no proper tasks were found afterwards due to another balance, task affinity changing, etc, which can really happen because busiest rq lock has already been released. Sure.. This is wrong, for example with env.dst_cpu assigned new_dst_cpu when going back to redo label, it may cause should_we_balance() to return false which is unreasonable. Why? You've got a very unlikely, very hard case, its unlikely that anything we do will substantially improve the situation, but you make the code uglier for it. This patch restores proper status of env before goto redo, and improves out_all_pinned and out_one_pinned labels. That doesn't even begin to explain half of what the patch does. @@ -6977,12 +6978,19 @@ more_balance: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) { - env.loop = 0; - env.loop_break = sched_nr_migrate_break; - goto redo; + if (env.new_dst_cpu != -1) { I really don't get this, how can this not be? + env.new_dst_cpu = -1; + cpumask_or(cpus, cpus, + sched_group_cpus(sd-groups)); + cpumask_and(cpus, cpus, cpu_active_mask); More unexplained magic, why is this right? When LBF_DST_PINNED was set, after going back to more_balance, things may change as the changelog describes, so it can hit LBF_ALL_PINNED afterwards. Then env.cpus, env.dst_rq, env.dst_cpu held the values assigned in the LBF_DST_PINNED case which is unreasonable. When we want to redo, we must reset those values. The rest of the patch isn't much better. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sched/fair: Restore env status before goto redo in load_balance()
On Wed, Mar 18, 2015 at 02:31:02PM +0800, Xunlei Pang wrote: > From: Xunlei Pang > > In load_balance(), some members of lb_env will be assigned with > new values in LBF_DST_PINNED case. But lb_env::flags may still > retain LBF_ALL_PINNED if no proper tasks were found afterwards > due to another balance, task affinity changing, etc, which can > really happen because busiest rq lock has already been released. Sure.. > This is wrong, for example with env.dst_cpu assigned new_dst_cpu > when going back to "redo" label, it may cause should_we_balance() > to return false which is unreasonable. Why? You've got a very unlikely, very hard case, its unlikely that anything we do will substantially improve the situation, but you make the code uglier for it. > This patch restores proper status of env before "goto redo", and > improves "out_all_pinned" and "out_one_pinned" labels. That doesn't even begin to explain half of what the patch does. > @@ -6977,12 +6978,19 @@ more_balance: > /* All tasks on this runqueue were pinned by CPU affinity */ > if (unlikely(env.flags & LBF_ALL_PINNED)) { > cpumask_clear_cpu(cpu_of(busiest), cpus); > - if (!cpumask_empty(cpus)) { > - env.loop = 0; > - env.loop_break = sched_nr_migrate_break; > - goto redo; > + if (env.new_dst_cpu != -1) { I really don't get this, how can this not be? > + env.new_dst_cpu = -1; > + cpumask_or(cpus, cpus, > + sched_group_cpus(sd->groups)); > + cpumask_and(cpus, cpus, cpu_active_mask); More unexplained magic, why is this right? The rest of the patch isn't much better. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sched/fair: Restore env status before goto redo in load_balance()
Ping Peter > From: Xunlei Pang > > In load_balance(), some members of lb_env will be assigned with > new values in LBF_DST_PINNED case. But lb_env::flags may still > retain LBF_ALL_PINNED if no proper tasks were found afterwards > due to another balance, task affinity changing, etc, which can > really happen because busiest rq lock has already been released. > > This is wrong, for example with env.dst_cpu assigned new_dst_cpu > when going back to "redo" label, it may cause should_we_balance() > to return false which is unreasonable. > > This patch restores proper status of env before "goto redo", and > improves "out_all_pinned" and "out_one_pinned" labels. > > Signed-off-by: Xunlei Pang > --- > kernel/sched/fair.c | 35 --- > 1 file changed, 20 insertions(+), 15 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index ee595ef..45bbda1 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -6843,6 +6843,7 @@ static int load_balance(int this_cpu, struct rq > *this_rq, > .dst_cpu= this_cpu, > .dst_rq = this_rq, > .dst_grpmask= sched_group_cpus(sd->groups), > + .new_dst_cpu= -1, > .idle = idle, > .loop_break = sched_nr_migrate_break, > .cpus = cpus, > @@ -6977,12 +6978,19 @@ more_balance: > /* All tasks on this runqueue were pinned by CPU affinity */ > if (unlikely(env.flags & LBF_ALL_PINNED)) { > cpumask_clear_cpu(cpu_of(busiest), cpus); > - if (!cpumask_empty(cpus)) { > - env.loop = 0; > - env.loop_break = sched_nr_migrate_break; > - goto redo; > + if (env.new_dst_cpu != -1) { > + env.new_dst_cpu = -1; > + cpumask_or(cpus, cpus, > + sched_group_cpus(sd->groups)); > + cpumask_and(cpus, cpus, cpu_active_mask); > + > + env.dst_cpu = this_cpu; > + env.dst_rq = this_rq; > } > - goto out_all_pinned; > + env.flags &= ~LBF_SOME_PINNED; > + env.loop = 0; > + env.loop_break = sched_nr_migrate_break; > + goto redo; > } > } > > @@ -7009,7 +7017,7 @@ more_balance: > raw_spin_unlock_irqrestore(>lock, > flags); > env.flags |= LBF_ALL_PINNED; > - goto out_one_pinned; > + goto out_active_balanced; > } > > /* > @@ -7058,26 +7066,23 @@ more_balance: > out_balanced: > /* > * We reach balance although we may have faced some affinity > -* constraints. Clear the imbalance flag if it was set. > +* constraints. > +* > +* When LBF_ALL_PINNED was not set, clear the imbalance flag > +* if it was set. > */ > - if (sd_parent) { > + if (sd_parent && !(env.flags & LBF_ALL_PINNED)) { > int *group_imbalance = _parent->groups->sgc->imbalance; > > if (*group_imbalance) > *group_imbalance = 0; > } > > -out_all_pinned: > - /* > -* We reach balance because all tasks are pinned at this level so > -* we can't migrate them. Let the imbalance flag set so parent level > -* can try to migrate them. > -*/ > schedstat_inc(sd, lb_balanced[idle]); > > sd->nr_balance_failed = 0; > > -out_one_pinned: > +out_active_balanced: > /* tune up the balancing interval */ > if (((env.flags & LBF_ALL_PINNED) && > sd->balance_interval < MAX_PINNED_INTERVAL) || > -- > 1.9.1 > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sched/fair: Restore env status before goto redo in load_balance()
Ping Peter From: Xunlei Pang pang.xun...@linaro.org In load_balance(), some members of lb_env will be assigned with new values in LBF_DST_PINNED case. But lb_env::flags may still retain LBF_ALL_PINNED if no proper tasks were found afterwards due to another balance, task affinity changing, etc, which can really happen because busiest rq lock has already been released. This is wrong, for example with env.dst_cpu assigned new_dst_cpu when going back to redo label, it may cause should_we_balance() to return false which is unreasonable. This patch restores proper status of env before goto redo, and improves out_all_pinned and out_one_pinned labels. Signed-off-by: Xunlei Pang pang.xun...@linaro.org --- kernel/sched/fair.c | 35 --- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ee595ef..45bbda1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6843,6 +6843,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, .dst_cpu= this_cpu, .dst_rq = this_rq, .dst_grpmask= sched_group_cpus(sd-groups), + .new_dst_cpu= -1, .idle = idle, .loop_break = sched_nr_migrate_break, .cpus = cpus, @@ -6977,12 +6978,19 @@ more_balance: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) { - env.loop = 0; - env.loop_break = sched_nr_migrate_break; - goto redo; + if (env.new_dst_cpu != -1) { + env.new_dst_cpu = -1; + cpumask_or(cpus, cpus, + sched_group_cpus(sd-groups)); + cpumask_and(cpus, cpus, cpu_active_mask); + + env.dst_cpu = this_cpu; + env.dst_rq = this_rq; } - goto out_all_pinned; + env.flags = ~LBF_SOME_PINNED; + env.loop = 0; + env.loop_break = sched_nr_migrate_break; + goto redo; } } @@ -7009,7 +7017,7 @@ more_balance: raw_spin_unlock_irqrestore(busiest-lock, flags); env.flags |= LBF_ALL_PINNED; - goto out_one_pinned; + goto out_active_balanced; } /* @@ -7058,26 +7066,23 @@ more_balance: out_balanced: /* * We reach balance although we may have faced some affinity -* constraints. Clear the imbalance flag if it was set. +* constraints. +* +* When LBF_ALL_PINNED was not set, clear the imbalance flag +* if it was set. */ - if (sd_parent) { + if (sd_parent !(env.flags LBF_ALL_PINNED)) { int *group_imbalance = sd_parent-groups-sgc-imbalance; if (*group_imbalance) *group_imbalance = 0; } -out_all_pinned: - /* -* We reach balance because all tasks are pinned at this level so -* we can't migrate them. Let the imbalance flag set so parent level -* can try to migrate them. -*/ schedstat_inc(sd, lb_balanced[idle]); sd-nr_balance_failed = 0; -out_one_pinned: +out_active_balanced: /* tune up the balancing interval */ if (((env.flags LBF_ALL_PINNED) sd-balance_interval MAX_PINNED_INTERVAL) || -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sched/fair: Restore env status before goto redo in load_balance()
On Wed, Mar 18, 2015 at 02:31:02PM +0800, Xunlei Pang wrote: From: Xunlei Pang pang.xun...@linaro.org In load_balance(), some members of lb_env will be assigned with new values in LBF_DST_PINNED case. But lb_env::flags may still retain LBF_ALL_PINNED if no proper tasks were found afterwards due to another balance, task affinity changing, etc, which can really happen because busiest rq lock has already been released. Sure.. This is wrong, for example with env.dst_cpu assigned new_dst_cpu when going back to redo label, it may cause should_we_balance() to return false which is unreasonable. Why? You've got a very unlikely, very hard case, its unlikely that anything we do will substantially improve the situation, but you make the code uglier for it. This patch restores proper status of env before goto redo, and improves out_all_pinned and out_one_pinned labels. That doesn't even begin to explain half of what the patch does. @@ -6977,12 +6978,19 @@ more_balance: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) { - env.loop = 0; - env.loop_break = sched_nr_migrate_break; - goto redo; + if (env.new_dst_cpu != -1) { I really don't get this, how can this not be? + env.new_dst_cpu = -1; + cpumask_or(cpus, cpus, + sched_group_cpus(sd-groups)); + cpumask_and(cpus, cpus, cpu_active_mask); More unexplained magic, why is this right? The rest of the patch isn't much better. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sched/fair: Restore env status before goto redo in load_balance()
From: Xunlei Pang In load_balance(), some members of lb_env will be assigned with new values in LBF_DST_PINNED case. But lb_env::flags may still retain LBF_ALL_PINNED if no proper tasks were found afterwards due to another balance, task affinity changing, etc, which can really happen because busiest rq lock has already been released. This is wrong, for example with env.dst_cpu assigned new_dst_cpu when going back to "redo" label, it may cause should_we_balance() to return false which is unreasonable. This patch restores proper status of env before "goto redo", and improves "out_all_pinned" and "out_one_pinned" labels. Signed-off-by: Xunlei Pang --- kernel/sched/fair.c | 35 --- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ee595ef..45bbda1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6843,6 +6843,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, .dst_cpu= this_cpu, .dst_rq = this_rq, .dst_grpmask= sched_group_cpus(sd->groups), + .new_dst_cpu= -1, .idle = idle, .loop_break = sched_nr_migrate_break, .cpus = cpus, @@ -6977,12 +6978,19 @@ more_balance: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags & LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) { - env.loop = 0; - env.loop_break = sched_nr_migrate_break; - goto redo; + if (env.new_dst_cpu != -1) { + env.new_dst_cpu = -1; + cpumask_or(cpus, cpus, + sched_group_cpus(sd->groups)); + cpumask_and(cpus, cpus, cpu_active_mask); + + env.dst_cpu = this_cpu; + env.dst_rq = this_rq; } - goto out_all_pinned; + env.flags &= ~LBF_SOME_PINNED; + env.loop = 0; + env.loop_break = sched_nr_migrate_break; + goto redo; } } @@ -7009,7 +7017,7 @@ more_balance: raw_spin_unlock_irqrestore(>lock, flags); env.flags |= LBF_ALL_PINNED; - goto out_one_pinned; + goto out_active_balanced; } /* @@ -7058,26 +7066,23 @@ more_balance: out_balanced: /* * We reach balance although we may have faced some affinity -* constraints. Clear the imbalance flag if it was set. +* constraints. +* +* When LBF_ALL_PINNED was not set, clear the imbalance flag +* if it was set. */ - if (sd_parent) { + if (sd_parent && !(env.flags & LBF_ALL_PINNED)) { int *group_imbalance = _parent->groups->sgc->imbalance; if (*group_imbalance) *group_imbalance = 0; } -out_all_pinned: - /* -* We reach balance because all tasks are pinned at this level so -* we can't migrate them. Let the imbalance flag set so parent level -* can try to migrate them. -*/ schedstat_inc(sd, lb_balanced[idle]); sd->nr_balance_failed = 0; -out_one_pinned: +out_active_balanced: /* tune up the balancing interval */ if (((env.flags & LBF_ALL_PINNED) && sd->balance_interval < MAX_PINNED_INTERVAL) || -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sched/fair: Restore env status before goto redo in load_balance()
From: Xunlei Pang pang.xun...@linaro.org In load_balance(), some members of lb_env will be assigned with new values in LBF_DST_PINNED case. But lb_env::flags may still retain LBF_ALL_PINNED if no proper tasks were found afterwards due to another balance, task affinity changing, etc, which can really happen because busiest rq lock has already been released. This is wrong, for example with env.dst_cpu assigned new_dst_cpu when going back to redo label, it may cause should_we_balance() to return false which is unreasonable. This patch restores proper status of env before goto redo, and improves out_all_pinned and out_one_pinned labels. Signed-off-by: Xunlei Pang pang.xun...@linaro.org --- kernel/sched/fair.c | 35 --- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ee595ef..45bbda1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6843,6 +6843,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, .dst_cpu= this_cpu, .dst_rq = this_rq, .dst_grpmask= sched_group_cpus(sd-groups), + .new_dst_cpu= -1, .idle = idle, .loop_break = sched_nr_migrate_break, .cpus = cpus, @@ -6977,12 +6978,19 @@ more_balance: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) { - env.loop = 0; - env.loop_break = sched_nr_migrate_break; - goto redo; + if (env.new_dst_cpu != -1) { + env.new_dst_cpu = -1; + cpumask_or(cpus, cpus, + sched_group_cpus(sd-groups)); + cpumask_and(cpus, cpus, cpu_active_mask); + + env.dst_cpu = this_cpu; + env.dst_rq = this_rq; } - goto out_all_pinned; + env.flags = ~LBF_SOME_PINNED; + env.loop = 0; + env.loop_break = sched_nr_migrate_break; + goto redo; } } @@ -7009,7 +7017,7 @@ more_balance: raw_spin_unlock_irqrestore(busiest-lock, flags); env.flags |= LBF_ALL_PINNED; - goto out_one_pinned; + goto out_active_balanced; } /* @@ -7058,26 +7066,23 @@ more_balance: out_balanced: /* * We reach balance although we may have faced some affinity -* constraints. Clear the imbalance flag if it was set. +* constraints. +* +* When LBF_ALL_PINNED was not set, clear the imbalance flag +* if it was set. */ - if (sd_parent) { + if (sd_parent !(env.flags LBF_ALL_PINNED)) { int *group_imbalance = sd_parent-groups-sgc-imbalance; if (*group_imbalance) *group_imbalance = 0; } -out_all_pinned: - /* -* We reach balance because all tasks are pinned at this level so -* we can't migrate them. Let the imbalance flag set so parent level -* can try to migrate them. -*/ schedstat_inc(sd, lb_balanced[idle]); sd-nr_balance_failed = 0; -out_one_pinned: +out_active_balanced: /* tune up the balancing interval */ if (((env.flags LBF_ALL_PINNED) sd-balance_interval MAX_PINNED_INTERVAL) || -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/