Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-14 Thread Oleg Nesterov
On 09/13, Peter Zijlstra wrote:
>
> On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:
>
> > Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
> > __schedule() should be removed it seems, do_exit() can call __schedule()
> > directly.
>
> something like so?

Yes, exactly.

But I think that raw_spin_unlock_wait(>pi_lock) and its huge
comment should be moved into the new helper too, this connects to
set_current_state(TASK_DEAD).

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-14 Thread Oleg Nesterov
On 09/13, Peter Zijlstra wrote:
>
> On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:
>
> > Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
> > __schedule() should be removed it seems, do_exit() can call __schedule()
> > directly.
>
> something like so?

Yes, exactly.

But I think that raw_spin_unlock_wait(>pi_lock) and its huge
comment should be moved into the new helper too, this connects to
set_current_state(TASK_DEAD).

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-14 Thread Peter Zijlstra
On Wed, Sep 14, 2016 at 10:07:14AM +0800, Cheng Chao wrote:
> 
> great, __schedule() doesn't need pay any attention to the TASK_DEAD now.

There's still __schedule()->context_switch()->finish_task_switch().


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-14 Thread Peter Zijlstra
On Wed, Sep 14, 2016 at 10:07:14AM +0800, Cheng Chao wrote:
> 
> great, __schedule() doesn't need pay any attention to the TASK_DEAD now.

There's still __schedule()->context_switch()->finish_task_switch().


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Cheng Chao

great, __schedule() doesn't need pay any attention to the TASK_DEAD now.

on 09/14/2016 12:37 AM, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:
> 
>> Me too, and I failed to find something which could be broken... So
>> perhaps should make it nop and investigate the new bug reports after
>> that.
> 
> Works for me :-)
> 
>>
>> Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
>> __schedule() should be removed it seems, do_exit() can call __schedule()
>> directly.
> 
> 
> something like so?
> 
> ---
> 
>  include/linux/kernel.h |  2 +-
>  include/linux/sched.h  |  2 ++
>  kernel/exit.c  | 11 ++-
>  kernel/sched/core.c| 23 ---
>  4 files changed, 17 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index d96a6118d26a..e5bd9cdd2e24 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -266,7 +266,7 @@ extern void oops_enter(void);
>  extern void oops_exit(void);
>  void print_oops_end_marker(void);
>  extern int oops_may_print(void);
> -void do_exit(long error_code)
> +void __noreturn do_exit(long error_code)
>   __noreturn;
>  void complete_and_exit(struct completion *, long)
>   __noreturn;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index eb64fcd89e68..b0c818a05b2e 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -448,6 +448,8 @@ static inline void io_schedule(void)
>   io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
>  }
>  
> +void __noreturn do_task_dead(void);
> +
>  struct nsproxy;
>  struct user_namespace;
>  
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 091a78be3b09..d4c12692f766 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -725,7 +725,7 @@ static void check_stack_usage(void)
>  static inline void check_stack_usage(void) {}
>  #endif
>  
> -void do_exit(long code)
> +void __noreturn do_exit(long code)
>  {
>   struct task_struct *tsk = current;
>   int group_dead;
> @@ -897,14 +897,7 @@ void do_exit(long code)
>   smp_mb();
>   raw_spin_unlock_wait(>pi_lock);
>  
> - /* causes final put_task_struct in finish_task_switch(). */
> - tsk->state = TASK_DEAD;
> - tsk->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
> - schedule();
> - BUG();
> - /* Avoid "noreturn function does return".  */
> - for (;;)
> - cpu_relax();/* For when BUG is null */
> + do_task_dead();
>  }
>  EXPORT_SYMBOL_GPL(do_exit);
>  
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a0086a5fc008..6034f269000f 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
>   rq = cpu_rq(cpu);
>   prev = rq->curr;
>  
> - /*
> -  * do_exit() calls schedule() with preemption disabled as an exception;
> -  * however we must fix that up, otherwise the next task will see an
> -  * inconsistent (higher) preempt count.
> -  *
> -  * It also avoids the below schedule_debug() test from complaining
> -  * about this.
> -  */
> - if (unlikely(prev->state == TASK_DEAD))
> - preempt_enable_no_resched_notrace();
> -
>   schedule_debug(prev);
>  
>   if (sched_feat(HRTICK))
> @@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
>   balance_callback(rq);
>  }
>  
> +void __noreturn do_task_dead(void)
> +{
> + /* causes final put_task_struct in finish_task_switch(). */
> + __set_current_state(TASK_DEAD);
> + current->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
> + __schedule(false);
> + BUG();
> + /* Avoid "noreturn function does return".  */
> + for (;;)
> + cpu_relax();/* For when BUG is null */
> +}
> +
>  static inline void sched_submit_work(struct task_struct *tsk)
>  {
>   if (!tsk->state || tsk_is_pi_blocked(tsk))
> 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Cheng Chao

great, __schedule() doesn't need pay any attention to the TASK_DEAD now.

on 09/14/2016 12:37 AM, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:
> 
>> Me too, and I failed to find something which could be broken... So
>> perhaps should make it nop and investigate the new bug reports after
>> that.
> 
> Works for me :-)
> 
>>
>> Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
>> __schedule() should be removed it seems, do_exit() can call __schedule()
>> directly.
> 
> 
> something like so?
> 
> ---
> 
>  include/linux/kernel.h |  2 +-
>  include/linux/sched.h  |  2 ++
>  kernel/exit.c  | 11 ++-
>  kernel/sched/core.c| 23 ---
>  4 files changed, 17 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index d96a6118d26a..e5bd9cdd2e24 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -266,7 +266,7 @@ extern void oops_enter(void);
>  extern void oops_exit(void);
>  void print_oops_end_marker(void);
>  extern int oops_may_print(void);
> -void do_exit(long error_code)
> +void __noreturn do_exit(long error_code)
>   __noreturn;
>  void complete_and_exit(struct completion *, long)
>   __noreturn;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index eb64fcd89e68..b0c818a05b2e 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -448,6 +448,8 @@ static inline void io_schedule(void)
>   io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
>  }
>  
> +void __noreturn do_task_dead(void);
> +
>  struct nsproxy;
>  struct user_namespace;
>  
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 091a78be3b09..d4c12692f766 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -725,7 +725,7 @@ static void check_stack_usage(void)
>  static inline void check_stack_usage(void) {}
>  #endif
>  
> -void do_exit(long code)
> +void __noreturn do_exit(long code)
>  {
>   struct task_struct *tsk = current;
>   int group_dead;
> @@ -897,14 +897,7 @@ void do_exit(long code)
>   smp_mb();
>   raw_spin_unlock_wait(>pi_lock);
>  
> - /* causes final put_task_struct in finish_task_switch(). */
> - tsk->state = TASK_DEAD;
> - tsk->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
> - schedule();
> - BUG();
> - /* Avoid "noreturn function does return".  */
> - for (;;)
> - cpu_relax();/* For when BUG is null */
> + do_task_dead();
>  }
>  EXPORT_SYMBOL_GPL(do_exit);
>  
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a0086a5fc008..6034f269000f 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
>   rq = cpu_rq(cpu);
>   prev = rq->curr;
>  
> - /*
> -  * do_exit() calls schedule() with preemption disabled as an exception;
> -  * however we must fix that up, otherwise the next task will see an
> -  * inconsistent (higher) preempt count.
> -  *
> -  * It also avoids the below schedule_debug() test from complaining
> -  * about this.
> -  */
> - if (unlikely(prev->state == TASK_DEAD))
> - preempt_enable_no_resched_notrace();
> -
>   schedule_debug(prev);
>  
>   if (sched_feat(HRTICK))
> @@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
>   balance_callback(rq);
>  }
>  
> +void __noreturn do_task_dead(void)
> +{
> + /* causes final put_task_struct in finish_task_switch(). */
> + __set_current_state(TASK_DEAD);
> + current->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
> + __schedule(false);
> + BUG();
> + /* Avoid "noreturn function does return".  */
> + for (;;)
> + cpu_relax();/* For when BUG is null */
> +}
> +
>  static inline void sched_submit_work(struct task_struct *tsk)
>  {
>   if (!tsk->state || tsk_is_pi_blocked(tsk))
> 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Peter Zijlstra
On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:

> Me too, and I failed to find something which could be broken... So
> perhaps should make it nop and investigate the new bug reports after
> that.

Works for me :-)

> 
> Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
> __schedule() should be removed it seems, do_exit() can call __schedule()
> directly.


something like so?

---

 include/linux/kernel.h |  2 +-
 include/linux/sched.h  |  2 ++
 kernel/exit.c  | 11 ++-
 kernel/sched/core.c| 23 ---
 4 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a6118d26a..e5bd9cdd2e24 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -266,7 +266,7 @@ extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-void do_exit(long error_code)
+void __noreturn do_exit(long error_code)
__noreturn;
 void complete_and_exit(struct completion *, long)
__noreturn;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb64fcd89e68..b0c818a05b2e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 }
 
+void __noreturn do_task_dead(void);
+
 struct nsproxy;
 struct user_namespace;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78be3b09..d4c12692f766 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-void do_exit(long code)
+void __noreturn do_exit(long code)
 {
struct task_struct *tsk = current;
int group_dead;
@@ -897,14 +897,7 @@ void do_exit(long code)
smp_mb();
raw_spin_unlock_wait(>pi_lock);
 
-   /* causes final put_task_struct in finish_task_switch(). */
-   tsk->state = TASK_DEAD;
-   tsk->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
-   schedule();
-   BUG();
-   /* Avoid "noreturn function does return".  */
-   for (;;)
-   cpu_relax();/* For when BUG is null */
+   do_task_dead();
 }
 EXPORT_SYMBOL_GPL(do_exit);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a0086a5fc008..6034f269000f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
rq = cpu_rq(cpu);
prev = rq->curr;
 
-   /*
-* do_exit() calls schedule() with preemption disabled as an exception;
-* however we must fix that up, otherwise the next task will see an
-* inconsistent (higher) preempt count.
-*
-* It also avoids the below schedule_debug() test from complaining
-* about this.
-*/
-   if (unlikely(prev->state == TASK_DEAD))
-   preempt_enable_no_resched_notrace();
-
schedule_debug(prev);
 
if (sched_feat(HRTICK))
@@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
balance_callback(rq);
 }
 
+void __noreturn do_task_dead(void)
+{
+   /* causes final put_task_struct in finish_task_switch(). */
+   __set_current_state(TASK_DEAD);
+   current->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
+   __schedule(false);
+   BUG();
+   /* Avoid "noreturn function does return".  */
+   for (;;)
+   cpu_relax();/* For when BUG is null */
+}
+
 static inline void sched_submit_work(struct task_struct *tsk)
 {
if (!tsk->state || tsk_is_pi_blocked(tsk))


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Peter Zijlstra
On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:

> Me too, and I failed to find something which could be broken... So
> perhaps should make it nop and investigate the new bug reports after
> that.

Works for me :-)

> 
> Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
> __schedule() should be removed it seems, do_exit() can call __schedule()
> directly.


something like so?

---

 include/linux/kernel.h |  2 +-
 include/linux/sched.h  |  2 ++
 kernel/exit.c  | 11 ++-
 kernel/sched/core.c| 23 ---
 4 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a6118d26a..e5bd9cdd2e24 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -266,7 +266,7 @@ extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-void do_exit(long error_code)
+void __noreturn do_exit(long error_code)
__noreturn;
 void complete_and_exit(struct completion *, long)
__noreturn;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb64fcd89e68..b0c818a05b2e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 }
 
+void __noreturn do_task_dead(void);
+
 struct nsproxy;
 struct user_namespace;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78be3b09..d4c12692f766 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-void do_exit(long code)
+void __noreturn do_exit(long code)
 {
struct task_struct *tsk = current;
int group_dead;
@@ -897,14 +897,7 @@ void do_exit(long code)
smp_mb();
raw_spin_unlock_wait(>pi_lock);
 
-   /* causes final put_task_struct in finish_task_switch(). */
-   tsk->state = TASK_DEAD;
-   tsk->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
-   schedule();
-   BUG();
-   /* Avoid "noreturn function does return".  */
-   for (;;)
-   cpu_relax();/* For when BUG is null */
+   do_task_dead();
 }
 EXPORT_SYMBOL_GPL(do_exit);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a0086a5fc008..6034f269000f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
rq = cpu_rq(cpu);
prev = rq->curr;
 
-   /*
-* do_exit() calls schedule() with preemption disabled as an exception;
-* however we must fix that up, otherwise the next task will see an
-* inconsistent (higher) preempt count.
-*
-* It also avoids the below schedule_debug() test from complaining
-* about this.
-*/
-   if (unlikely(prev->state == TASK_DEAD))
-   preempt_enable_no_resched_notrace();
-
schedule_debug(prev);
 
if (sched_feat(HRTICK))
@@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
balance_callback(rq);
 }
 
+void __noreturn do_task_dead(void)
+{
+   /* causes final put_task_struct in finish_task_switch(). */
+   __set_current_state(TASK_DEAD);
+   current->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
+   __schedule(false);
+   BUG();
+   /* Avoid "noreturn function does return".  */
+   for (;;)
+   cpu_relax();/* For when BUG is null */
+}
+
 static inline void sched_submit_work(struct task_struct *tsk)
 {
if (!tsk->state || tsk_is_pi_blocked(tsk))


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Oleg Nesterov
On 09/12, Peter Zijlstra wrote:
>
> On Mon, Sep 12, 2016 at 03:05:38PM +0200, Oleg Nesterov wrote:
>
> > But this leads to the question which I wanted to ask many times.
> >
> > Why cond_resched() is not NOP if CONFIG_PREEMPT=y ?
>
> Dunno, nobody bothered to do it? We should keep the might_sleep() of
> course, but the preemption check is pointless.

Yes, agreed, I actually meant _cond_resched().

> > Perhaps we have some users like, just for example,
> > 
> > preempt_enable_no_resched();
> > cond_resched();
> > 
> > which actually want the should_resched() check even if CONFIG_PREEMPT,
> > but most callers do not?
>
> I would hope not, the few preempt_enable_no_resched() users _should_
> have an actual schedule() call in the _very_ near future.

Me too, and I failed to find something which could be broken... So
perhaps should make it nop and investigate the new bug reports after
that.


Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
__schedule() should be removed it seems, do_exit() can call __schedule()
directly.

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Oleg Nesterov
On 09/12, Peter Zijlstra wrote:
>
> On Mon, Sep 12, 2016 at 03:05:38PM +0200, Oleg Nesterov wrote:
>
> > But this leads to the question which I wanted to ask many times.
> >
> > Why cond_resched() is not NOP if CONFIG_PREEMPT=y ?
>
> Dunno, nobody bothered to do it? We should keep the might_sleep() of
> course, but the preemption check is pointless.

Yes, agreed, I actually meant _cond_resched().

> > Perhaps we have some users like, just for example,
> > 
> > preempt_enable_no_resched();
> > cond_resched();
> > 
> > which actually want the should_resched() check even if CONFIG_PREEMPT,
> > but most callers do not?
>
> I would hope not, the few preempt_enable_no_resched() users _should_
> have an actual schedule() call in the _very_ near future.

Me too, and I failed to find something which could be broken... So
perhaps should make it nop and investigate the new bug reports after
that.


Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
__schedule() should be removed it seems, do_exit() can call __schedule()
directly.

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Peter Zijlstra
On Tue, Sep 13, 2016 at 12:03:05PM +0800, Cheng Chao wrote:
> 
> Peter, Is it as a new patch?

I wanted both changes in one pathc, but I fudged my git-diff.

> > ---
> >  kernel/stop_machine.c | 5 +
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> > index ae6f41fb9cba..637798d6b554 100644
> > --- a/kernel/stop_machine.c
> > +++ b/kernel/stop_machine.c
> > @@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> > void *arg)
> > cpu_stop_init_done(, 1);
> > if (!cpu_stop_queue_work(cpu, ))
> > return -ENOENT;
> > +   /*
> > +* In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
> > +* by doing a preemption.
> > +*/
> > +   cond_resched();
> > wait_for_completion();
> > return done.ret;
> >  }
> > 
> 
> I agree to use cond_resched(). https://lkml.org/lkml/2016/9/12/1228
> 
> so we really don't need "if defined(CONFIG_PREEMPT_NONE)"?

> I also think without the "if defined(CONFIG_PREEMPT_NONE)", 
> the code is more clean,and the logic is still ok.

You really don't need the #ifdef, look at the actual code, not the
Kconfig language.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-13 Thread Peter Zijlstra
On Tue, Sep 13, 2016 at 12:03:05PM +0800, Cheng Chao wrote:
> 
> Peter, Is it as a new patch?

I wanted both changes in one pathc, but I fudged my git-diff.

> > ---
> >  kernel/stop_machine.c | 5 +
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> > index ae6f41fb9cba..637798d6b554 100644
> > --- a/kernel/stop_machine.c
> > +++ b/kernel/stop_machine.c
> > @@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> > void *arg)
> > cpu_stop_init_done(, 1);
> > if (!cpu_stop_queue_work(cpu, ))
> > return -ENOENT;
> > +   /*
> > +* In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
> > +* by doing a preemption.
> > +*/
> > +   cond_resched();
> > wait_for_completion();
> > return done.ret;
> >  }
> > 
> 
> I agree to use cond_resched(). https://lkml.org/lkml/2016/9/12/1228
> 
> so we really don't need "if defined(CONFIG_PREEMPT_NONE)"?

> I also think without the "if defined(CONFIG_PREEMPT_NONE)", 
> the code is more clean,and the logic is still ok.

You really don't need the #ifdef, look at the actual code, not the
Kconfig language.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Cheng Chao
Peter, thank you.

on 09/12/2016 07:41 PM, Peter Zijlstra wrote:
> On Mon, Sep 12, 2016 at 01:37:27PM +0200, Peter Zijlstra wrote:
>> So what you're saying is that migration_stop_cpu() doesn't work because
>> wait_for_completion() dequeues the task.
>>
>> True I suppose. Not sure I like your solution, nor your implementation
>> of the solution much though.
>>
>> I would much prefer an unconditional cond_resched() there, but also, I
>> think we should do what __migrate_swap_task() does, and set wake_cpu.
>>
>> So something like so..
>>
>> ---
>>  kernel/sched/core.c | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index ddd5f48551f1..ade772aa9610 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -1063,8 +1063,12 @@ static int migration_cpu_stop(void *data)
>>   * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
>>   * we're holding p->pi_lock.
>>   */
>> -if (task_rq(p) == rq && task_on_rq_queued(p))
>> -rq = __migrate_task(rq, p, arg->dest_cpu);
>> +if (task_rq(p) == rq) {
>> +if (task_on_rq_queued(p))
>> +rq = __migrate_task(rq, p, arg->dest_cpu);
>> +else
>> +p->wake_cpu = arg->dest_cpu;
>> +}
>>  raw_spin_unlock(>lock);
>>  raw_spin_unlock(>pi_lock);
>>  
> 
> And this, too narrow a constraint do git diff made it go away.
> 

yes, set wake_cpu is better when try_to_wake_up(). 
Peter, Is it as a new patch?

> ---
>  kernel/stop_machine.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index ae6f41fb9cba..637798d6b554 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> + /*
> +  * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
> +  * by doing a preemption.
> +  */
> + cond_resched();
>   wait_for_completion();
>   return done.ret;
>  }
> 

I agree to use cond_resched(). https://lkml.org/lkml/2016/9/12/1228

for CONFIG_PREEMPT=y and CONFIG_PREEMPT_VOLUNTARY=y, this patch seems 
unnecessary.
1. when CONFIG_PREEMPT=y
  stop_one_cpu()
  ->cpu_stop_queue_work()
->spin_unlock_irqrestore() (preempt_enable() calls __preempt_schedule())

2. when CONFIG_PREEMPT_VOLUNTARY=y
  stop_one_cpu()
  ->wait_for_completion()
->...
  ->might_sleep() (calls _cond_resched()


so we really don't need "if defined(CONFIG_PREEMPT_NONE)"?

I also think without the "if defined(CONFIG_PREEMPT_NONE)", 
the code is more clean,and the logic is still ok.



diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4a1ca5f..87464a2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -126,6 +126,15 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void 
*arg)
cpu_stop_init_done(, 1);
if (!cpu_stop_queue_work(cpu, ))
return -ENOENT;
+
+#if defined(CONFIG_PREEMPT_NONE)
+   /*
+* In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
+* by doing a preemption.
+*/
+   cond_resched();
+#endif
+
wait_for_completion();
return done.ret;
 }


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Cheng Chao
Peter, thank you.

on 09/12/2016 07:41 PM, Peter Zijlstra wrote:
> On Mon, Sep 12, 2016 at 01:37:27PM +0200, Peter Zijlstra wrote:
>> So what you're saying is that migration_stop_cpu() doesn't work because
>> wait_for_completion() dequeues the task.
>>
>> True I suppose. Not sure I like your solution, nor your implementation
>> of the solution much though.
>>
>> I would much prefer an unconditional cond_resched() there, but also, I
>> think we should do what __migrate_swap_task() does, and set wake_cpu.
>>
>> So something like so..
>>
>> ---
>>  kernel/sched/core.c | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index ddd5f48551f1..ade772aa9610 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -1063,8 +1063,12 @@ static int migration_cpu_stop(void *data)
>>   * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
>>   * we're holding p->pi_lock.
>>   */
>> -if (task_rq(p) == rq && task_on_rq_queued(p))
>> -rq = __migrate_task(rq, p, arg->dest_cpu);
>> +if (task_rq(p) == rq) {
>> +if (task_on_rq_queued(p))
>> +rq = __migrate_task(rq, p, arg->dest_cpu);
>> +else
>> +p->wake_cpu = arg->dest_cpu;
>> +}
>>  raw_spin_unlock(>lock);
>>  raw_spin_unlock(>pi_lock);
>>  
> 
> And this, too narrow a constraint do git diff made it go away.
> 

yes, set wake_cpu is better when try_to_wake_up(). 
Peter, Is it as a new patch?

> ---
>  kernel/stop_machine.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index ae6f41fb9cba..637798d6b554 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> + /*
> +  * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
> +  * by doing a preemption.
> +  */
> + cond_resched();
>   wait_for_completion();
>   return done.ret;
>  }
> 

I agree to use cond_resched(). https://lkml.org/lkml/2016/9/12/1228

for CONFIG_PREEMPT=y and CONFIG_PREEMPT_VOLUNTARY=y, this patch seems 
unnecessary.
1. when CONFIG_PREEMPT=y
  stop_one_cpu()
  ->cpu_stop_queue_work()
->spin_unlock_irqrestore() (preempt_enable() calls __preempt_schedule())

2. when CONFIG_PREEMPT_VOLUNTARY=y
  stop_one_cpu()
  ->wait_for_completion()
->...
  ->might_sleep() (calls _cond_resched()


so we really don't need "if defined(CONFIG_PREEMPT_NONE)"?

I also think without the "if defined(CONFIG_PREEMPT_NONE)", 
the code is more clean,and the logic is still ok.



diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4a1ca5f..87464a2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -126,6 +126,15 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void 
*arg)
cpu_stop_init_done(, 1);
if (!cpu_stop_queue_work(cpu, ))
return -ENOENT;
+
+#if defined(CONFIG_PREEMPT_NONE)
+   /*
+* In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
+* by doing a preemption.
+*/
+   cond_resched();
+#endif
+
wait_for_completion();
return done.ret;
 }


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Cheng Chao


on 09/12/2016 07:03 PM, Oleg Nesterov wrote:
> On 09/10, Cheng Chao wrote:
>>
>> @@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
>> void *arg)
>>  cpu_stop_init_done(, 1);
>>  if (!cpu_stop_queue_work(cpu, ))
>>  return -ENOENT;
>> +
>> +#if defined(CONFIG_PREEMPT_NONE)
>> +/*
>> + * Makes the stopper thread run as soon as possible.
>> + * And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
>> + * It's special useful for some callers which are expected to be
>> + * TASK_ON_RQ_QUEUED.
>> + * sched_exec does benefit from this improvement.
>> + */
>> +schedule();
>> +#endif
>>  wait_for_completion();
>>  return done.ret;
>>  }
> 
> Cheng, I already tried twice to suggest to conditionalize this schedule,
> because it can only help if cpu == smp_processor_id, and you didn't reply.
> I still think _cond_resched() makes more sense.
> 
> I won't really argue if you prefer it this way. But did you see my emails?
>

I read them, thanks. because Peter didn't receive my mails before, it took me 
much time
to fix my mailbox, so I didn't reply on time.

Ok, even if cpu != smp_processor_id(), to call schedule() instead 
_cond_resched() can
give the caller a chance not to sleep. when the caller runs on the cpu again, 
it may 
likely find the completion is already done. 
then the stopper thread cpu_stop_signal_done() and the caller 
wait_for_completion() will
actually run very soon.

I think it is trivial improvement. using cond_resched()/_cond_resched() is 
better for 
readability, I choose the cond_resched().

thanks again.



 
> Oleg.
> 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Cheng Chao


on 09/12/2016 07:03 PM, Oleg Nesterov wrote:
> On 09/10, Cheng Chao wrote:
>>
>> @@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
>> void *arg)
>>  cpu_stop_init_done(, 1);
>>  if (!cpu_stop_queue_work(cpu, ))
>>  return -ENOENT;
>> +
>> +#if defined(CONFIG_PREEMPT_NONE)
>> +/*
>> + * Makes the stopper thread run as soon as possible.
>> + * And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
>> + * It's special useful for some callers which are expected to be
>> + * TASK_ON_RQ_QUEUED.
>> + * sched_exec does benefit from this improvement.
>> + */
>> +schedule();
>> +#endif
>>  wait_for_completion();
>>  return done.ret;
>>  }
> 
> Cheng, I already tried twice to suggest to conditionalize this schedule,
> because it can only help if cpu == smp_processor_id, and you didn't reply.
> I still think _cond_resched() makes more sense.
> 
> I won't really argue if you prefer it this way. But did you see my emails?
>

I read them, thanks. because Peter didn't receive my mails before, it took me 
much time
to fix my mailbox, so I didn't reply on time.

Ok, even if cpu != smp_processor_id(), to call schedule() instead 
_cond_resched() can
give the caller a chance not to sleep. when the caller runs on the cpu again, 
it may 
likely find the completion is already done. 
then the stopper thread cpu_stop_signal_done() and the caller 
wait_for_completion() will
actually run very soon.

I think it is trivial improvement. using cond_resched()/_cond_resched() is 
better for 
readability, I choose the cond_resched().

thanks again.



 
> Oleg.
> 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Peter Zijlstra
On Mon, Sep 12, 2016 at 03:05:38PM +0200, Oleg Nesterov wrote:

> But this leads to the question which I wanted to ask many times.
> 
> Why cond_resched() is not NOP if CONFIG_PREEMPT=y ?

Dunno, nobody bothered to do it? We should keep the might_sleep() of
course, but the preemption check is pointless.

> Perhaps we have some users like, just for example,
> 
>   preempt_enable_no_resched();
>   cond_resched();
> 
> which actually want the should_resched() check even if CONFIG_PREEMPT,
> but most callers do not?

I would hope not, the few preempt_enable_no_resched() users _should_
have an actual schedule() call in the _very_ near future.


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Peter Zijlstra
On Mon, Sep 12, 2016 at 03:05:38PM +0200, Oleg Nesterov wrote:

> But this leads to the question which I wanted to ask many times.
> 
> Why cond_resched() is not NOP if CONFIG_PREEMPT=y ?

Dunno, nobody bothered to do it? We should keep the might_sleep() of
course, but the preemption check is pointless.

> Perhaps we have some users like, just for example,
> 
>   preempt_enable_no_resched();
>   cond_resched();
> 
> which actually want the should_resched() check even if CONFIG_PREEMPT,
> but most callers do not?

I would hope not, the few preempt_enable_no_resched() users _should_
have an actual schedule() call in the _very_ near future.


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Oleg Nesterov
On 09/12, Peter Zijlstra wrote:
>
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> + /*
> +  * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
> +  * by doing a preemption.
> +  */
> + cond_resched();

Yes, this is what I tried to suggest too.

But this leads to the question which I wanted to ask many times.

Why cond_resched() is not NOP if CONFIG_PREEMPT=y ?

Perhaps we have some users like, just for example,

preempt_enable_no_resched();
cond_resched();

which actually want the should_resched() check even if CONFIG_PREEMPT,
but most callers do not?

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Oleg Nesterov
On 09/12, Peter Zijlstra wrote:
>
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> + /*
> +  * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
> +  * by doing a preemption.
> +  */
> + cond_resched();

Yes, this is what I tried to suggest too.

But this leads to the question which I wanted to ask many times.

Why cond_resched() is not NOP if CONFIG_PREEMPT=y ?

Perhaps we have some users like, just for example,

preempt_enable_no_resched();
cond_resched();

which actually want the should_resched() check even if CONFIG_PREEMPT,
but most callers do not?

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Peter Zijlstra
On Mon, Sep 12, 2016 at 01:37:27PM +0200, Peter Zijlstra wrote:
> So what you're saying is that migration_stop_cpu() doesn't work because
> wait_for_completion() dequeues the task.
> 
> True I suppose. Not sure I like your solution, nor your implementation
> of the solution much though.
> 
> I would much prefer an unconditional cond_resched() there, but also, I
> think we should do what __migrate_swap_task() does, and set wake_cpu.
> 
> So something like so..
> 
> ---
>  kernel/sched/core.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index ddd5f48551f1..ade772aa9610 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1063,8 +1063,12 @@ static int migration_cpu_stop(void *data)
>* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
>* we're holding p->pi_lock.
>*/
> - if (task_rq(p) == rq && task_on_rq_queued(p))
> - rq = __migrate_task(rq, p, arg->dest_cpu);
> + if (task_rq(p) == rq) {
> + if (task_on_rq_queued(p))
> + rq = __migrate_task(rq, p, arg->dest_cpu);
> + else
> + p->wake_cpu = arg->dest_cpu;
> + }
>   raw_spin_unlock(>lock);
>   raw_spin_unlock(>pi_lock);
>  

And this, too narrow a constraint do git diff made it go away.

---
 kernel/stop_machine.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ae6f41fb9cba..637798d6b554 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void 
*arg)
cpu_stop_init_done(, 1);
if (!cpu_stop_queue_work(cpu, ))
return -ENOENT;
+   /*
+* In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
+* by doing a preemption.
+*/
+   cond_resched();
wait_for_completion();
return done.ret;
 }


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Peter Zijlstra
On Mon, Sep 12, 2016 at 01:37:27PM +0200, Peter Zijlstra wrote:
> So what you're saying is that migration_stop_cpu() doesn't work because
> wait_for_completion() dequeues the task.
> 
> True I suppose. Not sure I like your solution, nor your implementation
> of the solution much though.
> 
> I would much prefer an unconditional cond_resched() there, but also, I
> think we should do what __migrate_swap_task() does, and set wake_cpu.
> 
> So something like so..
> 
> ---
>  kernel/sched/core.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index ddd5f48551f1..ade772aa9610 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1063,8 +1063,12 @@ static int migration_cpu_stop(void *data)
>* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
>* we're holding p->pi_lock.
>*/
> - if (task_rq(p) == rq && task_on_rq_queued(p))
> - rq = __migrate_task(rq, p, arg->dest_cpu);
> + if (task_rq(p) == rq) {
> + if (task_on_rq_queued(p))
> + rq = __migrate_task(rq, p, arg->dest_cpu);
> + else
> + p->wake_cpu = arg->dest_cpu;
> + }
>   raw_spin_unlock(>lock);
>   raw_spin_unlock(>pi_lock);
>  

And this, too narrow a constraint do git diff made it go away.

---
 kernel/stop_machine.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ae6f41fb9cba..637798d6b554 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -121,6 +121,11 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void 
*arg)
cpu_stop_init_done(, 1);
if (!cpu_stop_queue_work(cpu, ))
return -ENOENT;
+   /*
+* In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
+* by doing a preemption.
+*/
+   cond_resched();
wait_for_completion();
return done.ret;
 }


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Peter Zijlstra
On Sat, Sep 10, 2016 at 04:52:12PM +0800, Cheng Chao wrote:
> For CONFIG_PREEMPT_NONE=y, when sched_exec() needs migration, sched_exec()
> calls stop_one_cpu(task_cpu(p), migration_cpu_stop, ).
> 
> If the migration_cpu_stop() can not migrate,why do we call stop_one_cpu()?
> It just makes the task TASK_UNINTERRUPTIBLE, wakes up the stopper thread,
> executes migration_cpu_stop(), and the stopper thread wakes up the task.
> 
> But in fact, all above works are almost useless(wasteful),the reason is
> migration_cpu_stop() can not migrate. why? migration_cpu_stop() needs the
> task is TASK_ON_RQ_QUEUED before it calls __migrate_task().
> 
> This patch keeps the task TASK_RUNNING instead of TASK_UNINTERRUPTIBLE,
> so the migration_cpu_stop() can do useful works.

OK, completely confused. What!?

/me ponders

So what you're saying is that migration_stop_cpu() doesn't work because
wait_for_completion() dequeues the task.

True I suppose. Not sure I like your solution, nor your implementation
of the solution much though.

I would much prefer an unconditional cond_resched() there, but also, I
think we should do what __migrate_swap_task() does, and set wake_cpu.

So something like so..

---
 kernel/sched/core.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ddd5f48551f1..ade772aa9610 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1063,8 +1063,12 @@ static int migration_cpu_stop(void *data)
 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
 * we're holding p->pi_lock.
 */
-   if (task_rq(p) == rq && task_on_rq_queued(p))
-   rq = __migrate_task(rq, p, arg->dest_cpu);
+   if (task_rq(p) == rq) {
+   if (task_on_rq_queued(p))
+   rq = __migrate_task(rq, p, arg->dest_cpu);
+   else
+   p->wake_cpu = arg->dest_cpu;
+   }
raw_spin_unlock(>lock);
raw_spin_unlock(>pi_lock);
 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Peter Zijlstra
On Sat, Sep 10, 2016 at 04:52:12PM +0800, Cheng Chao wrote:
> For CONFIG_PREEMPT_NONE=y, when sched_exec() needs migration, sched_exec()
> calls stop_one_cpu(task_cpu(p), migration_cpu_stop, ).
> 
> If the migration_cpu_stop() can not migrate,why do we call stop_one_cpu()?
> It just makes the task TASK_UNINTERRUPTIBLE, wakes up the stopper thread,
> executes migration_cpu_stop(), and the stopper thread wakes up the task.
> 
> But in fact, all above works are almost useless(wasteful),the reason is
> migration_cpu_stop() can not migrate. why? migration_cpu_stop() needs the
> task is TASK_ON_RQ_QUEUED before it calls __migrate_task().
> 
> This patch keeps the task TASK_RUNNING instead of TASK_UNINTERRUPTIBLE,
> so the migration_cpu_stop() can do useful works.

OK, completely confused. What!?

/me ponders

So what you're saying is that migration_stop_cpu() doesn't work because
wait_for_completion() dequeues the task.

True I suppose. Not sure I like your solution, nor your implementation
of the solution much though.

I would much prefer an unconditional cond_resched() there, but also, I
think we should do what __migrate_swap_task() does, and set wake_cpu.

So something like so..

---
 kernel/sched/core.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ddd5f48551f1..ade772aa9610 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1063,8 +1063,12 @@ static int migration_cpu_stop(void *data)
 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
 * we're holding p->pi_lock.
 */
-   if (task_rq(p) == rq && task_on_rq_queued(p))
-   rq = __migrate_task(rq, p, arg->dest_cpu);
+   if (task_rq(p) == rq) {
+   if (task_on_rq_queued(p))
+   rq = __migrate_task(rq, p, arg->dest_cpu);
+   else
+   p->wake_cpu = arg->dest_cpu;
+   }
raw_spin_unlock(>lock);
raw_spin_unlock(>pi_lock);
 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Oleg Nesterov
On 09/10, Cheng Chao wrote:
>
> @@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> +
> +#if defined(CONFIG_PREEMPT_NONE)
> + /*
> +  * Makes the stopper thread run as soon as possible.
> +  * And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
> +  * It's special useful for some callers which are expected to be
> +  * TASK_ON_RQ_QUEUED.
> +  * sched_exec does benefit from this improvement.
> +  */
> + schedule();
> +#endif
>   wait_for_completion();
>   return done.ret;
>  }

Cheng, I already tried twice to suggest to conditionalize this schedule,
because it can only help if cpu == smp_processor_id, and you didn't reply.
I still think _cond_resched() makes more sense.

I won't really argue if you prefer it this way. But did you see my emails?

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-12 Thread Oleg Nesterov
On 09/10, Cheng Chao wrote:
>
> @@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> +
> +#if defined(CONFIG_PREEMPT_NONE)
> + /*
> +  * Makes the stopper thread run as soon as possible.
> +  * And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
> +  * It's special useful for some callers which are expected to be
> +  * TASK_ON_RQ_QUEUED.
> +  * sched_exec does benefit from this improvement.
> +  */
> + schedule();
> +#endif
>   wait_for_completion();
>   return done.ret;
>  }

Cheng, I already tried twice to suggest to conditionalize this schedule,
because it can only help if cpu == smp_processor_id, and you didn't reply.
I still think _cond_resched() makes more sense.

I won't really argue if you prefer it this way. But did you see my emails?

Oleg.



Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-10 Thread Peter Zijlstra
On Sat, Sep 10, 2016 at 05:51:02PM +0800, Cheng Chao wrote:
> hi Peter, I guess you can receive the mail from me now,
> I have changed the mailbox to gmail.

Indeed, I'll have a look on Monday.


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-10 Thread Peter Zijlstra
On Sat, Sep 10, 2016 at 05:51:02PM +0800, Cheng Chao wrote:
> hi Peter, I guess you can receive the mail from me now,
> I have changed the mailbox to gmail.

Indeed, I'll have a look on Monday.


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-10 Thread Cheng Chao
hi Peter, I guess you can receive the mail from me now,
I have changed the mailbox to gmail.

Oleg has already done much work for this patch, I am really obliged.
please review this patch, thanks.

on 09/10/2016 04:52 PM, Cheng Chao wrote:
> For CONFIG_PREEMPT_NONE=y, when sched_exec() needs migration, sched_exec()
> calls stop_one_cpu(task_cpu(p), migration_cpu_stop, ).
> 
> If the migration_cpu_stop() can not migrate,why do we call stop_one_cpu()?
> It just makes the task TASK_UNINTERRUPTIBLE, wakes up the stopper thread,
> executes migration_cpu_stop(), and the stopper thread wakes up the task.
> 
> But in fact, all above works are almost useless(wasteful),the reason is
> migration_cpu_stop() can not migrate. why? migration_cpu_stop() needs the
> task is TASK_ON_RQ_QUEUED before it calls __migrate_task().
> 
> This patch keeps the task TASK_RUNNING instead of TASK_UNINTERRUPTIBLE,
> so the migration_cpu_stop() can do useful works.
> 
> Signed-off-by: Cheng Chao 
> ---
>  kernel/stop_machine.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index 4a1ca5f..41aea5e 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> +
> +#if defined(CONFIG_PREEMPT_NONE)
> + /*
> +  * Makes the stopper thread run as soon as possible.
> +  * And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
> +  * It's special useful for some callers which are expected to be
> +  * TASK_ON_RQ_QUEUED.
> +  * sched_exec does benefit from this improvement.
> +  */
> + schedule();
> +#endif
>   wait_for_completion();
>   return done.ret;
>  }
> 


Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-10 Thread Cheng Chao
hi Peter, I guess you can receive the mail from me now,
I have changed the mailbox to gmail.

Oleg has already done much work for this patch, I am really obliged.
please review this patch, thanks.

on 09/10/2016 04:52 PM, Cheng Chao wrote:
> For CONFIG_PREEMPT_NONE=y, when sched_exec() needs migration, sched_exec()
> calls stop_one_cpu(task_cpu(p), migration_cpu_stop, ).
> 
> If the migration_cpu_stop() can not migrate,why do we call stop_one_cpu()?
> It just makes the task TASK_UNINTERRUPTIBLE, wakes up the stopper thread,
> executes migration_cpu_stop(), and the stopper thread wakes up the task.
> 
> But in fact, all above works are almost useless(wasteful),the reason is
> migration_cpu_stop() can not migrate. why? migration_cpu_stop() needs the
> task is TASK_ON_RQ_QUEUED before it calls __migrate_task().
> 
> This patch keeps the task TASK_RUNNING instead of TASK_UNINTERRUPTIBLE,
> so the migration_cpu_stop() can do useful works.
> 
> Signed-off-by: Cheng Chao 
> ---
>  kernel/stop_machine.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index 4a1ca5f..41aea5e 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, 
> void *arg)
>   cpu_stop_init_done(, 1);
>   if (!cpu_stop_queue_work(cpu, ))
>   return -ENOENT;
> +
> +#if defined(CONFIG_PREEMPT_NONE)
> + /*
> +  * Makes the stopper thread run as soon as possible.
> +  * And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
> +  * It's special useful for some callers which are expected to be
> +  * TASK_ON_RQ_QUEUED.
> +  * sched_exec does benefit from this improvement.
> +  */
> + schedule();
> +#endif
>   wait_for_completion();
>   return done.ret;
>  }
> 


[PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-10 Thread Cheng Chao
For CONFIG_PREEMPT_NONE=y, when sched_exec() needs migration, sched_exec()
calls stop_one_cpu(task_cpu(p), migration_cpu_stop, ).

If the migration_cpu_stop() can not migrate,why do we call stop_one_cpu()?
It just makes the task TASK_UNINTERRUPTIBLE, wakes up the stopper thread,
executes migration_cpu_stop(), and the stopper thread wakes up the task.

But in fact, all above works are almost useless(wasteful),the reason is
migration_cpu_stop() can not migrate. why? migration_cpu_stop() needs the
task is TASK_ON_RQ_QUEUED before it calls __migrate_task().

This patch keeps the task TASK_RUNNING instead of TASK_UNINTERRUPTIBLE,
so the migration_cpu_stop() can do useful works.

Signed-off-by: Cheng Chao 
---
 kernel/stop_machine.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4a1ca5f..41aea5e 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void 
*arg)
cpu_stop_init_done(, 1);
if (!cpu_stop_queue_work(cpu, ))
return -ENOENT;
+
+#if defined(CONFIG_PREEMPT_NONE)
+   /*
+* Makes the stopper thread run as soon as possible.
+* And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
+* It's special useful for some callers which are expected to be
+* TASK_ON_RQ_QUEUED.
+* sched_exec does benefit from this improvement.
+*/
+   schedule();
+#endif
wait_for_completion();
return done.ret;
 }
-- 
2.4.11



[PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

2016-09-10 Thread Cheng Chao
For CONFIG_PREEMPT_NONE=y, when sched_exec() needs migration, sched_exec()
calls stop_one_cpu(task_cpu(p), migration_cpu_stop, ).

If the migration_cpu_stop() can not migrate,why do we call stop_one_cpu()?
It just makes the task TASK_UNINTERRUPTIBLE, wakes up the stopper thread,
executes migration_cpu_stop(), and the stopper thread wakes up the task.

But in fact, all above works are almost useless(wasteful),the reason is
migration_cpu_stop() can not migrate. why? migration_cpu_stop() needs the
task is TASK_ON_RQ_QUEUED before it calls __migrate_task().

This patch keeps the task TASK_RUNNING instead of TASK_UNINTERRUPTIBLE,
so the migration_cpu_stop() can do useful works.

Signed-off-by: Cheng Chao 
---
 kernel/stop_machine.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4a1ca5f..41aea5e 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -126,6 +126,17 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void 
*arg)
cpu_stop_init_done(, 1);
if (!cpu_stop_queue_work(cpu, ))
return -ENOENT;
+
+#if defined(CONFIG_PREEMPT_NONE)
+   /*
+* Makes the stopper thread run as soon as possible.
+* And if the caller is TASK_RUNNING, keeps the caller TASK_RUNNING.
+* It's special useful for some callers which are expected to be
+* TASK_ON_RQ_QUEUED.
+* sched_exec does benefit from this improvement.
+*/
+   schedule();
+#endif
wait_for_completion();
return done.ret;
 }
-- 
2.4.11