great, __schedule() doesn't need pay any attention to the TASK_DEAD now.

on 09/14/2016 12:37 AM, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:
> 
>> Me too, and I failed to find something which could be broken... So
>> perhaps should make it nop and investigate the new bug reports after
>> that.
> 
> Works for me :-)
> 
>>
>> Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
>> __schedule() should be removed it seems, do_exit() can call __schedule()
>> directly.
> 
> 
> something like so?
> 
> ---
> 
>  include/linux/kernel.h |  2 +-
>  include/linux/sched.h  |  2 ++
>  kernel/exit.c          | 11 ++---------
>  kernel/sched/core.c    | 23 ++++++++++++-----------
>  4 files changed, 17 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index d96a6118d26a..e5bd9cdd2e24 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -266,7 +266,7 @@ extern void oops_enter(void);
>  extern void oops_exit(void);
>  void print_oops_end_marker(void);
>  extern int oops_may_print(void);
> -void do_exit(long error_code)
> +void __noreturn do_exit(long error_code)
>       __noreturn;
>  void complete_and_exit(struct completion *, long)
>       __noreturn;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index eb64fcd89e68..b0c818a05b2e 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -448,6 +448,8 @@ static inline void io_schedule(void)
>       io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
>  }
>  
> +void __noreturn do_task_dead(void);
> +
>  struct nsproxy;
>  struct user_namespace;
>  
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 091a78be3b09..d4c12692f766 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -725,7 +725,7 @@ static void check_stack_usage(void)
>  static inline void check_stack_usage(void) {}
>  #endif
>  
> -void do_exit(long code)
> +void __noreturn do_exit(long code)
>  {
>       struct task_struct *tsk = current;
>       int group_dead;
> @@ -897,14 +897,7 @@ void do_exit(long code)
>       smp_mb();
>       raw_spin_unlock_wait(&tsk->pi_lock);
>  
> -     /* causes final put_task_struct in finish_task_switch(). */
> -     tsk->state = TASK_DEAD;
> -     tsk->flags |= PF_NOFREEZE;      /* tell freezer to ignore us */
> -     schedule();
> -     BUG();
> -     /* Avoid "noreturn function does return".  */
> -     for (;;)
> -             cpu_relax();    /* For when BUG is null */
> +     do_task_dead();
>  }
>  EXPORT_SYMBOL_GPL(do_exit);
>  
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a0086a5fc008..6034f269000f 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
>       rq = cpu_rq(cpu);
>       prev = rq->curr;
>  
> -     /*
> -      * do_exit() calls schedule() with preemption disabled as an exception;
> -      * however we must fix that up, otherwise the next task will see an
> -      * inconsistent (higher) preempt count.
> -      *
> -      * It also avoids the below schedule_debug() test from complaining
> -      * about this.
> -      */
> -     if (unlikely(prev->state == TASK_DEAD))
> -             preempt_enable_no_resched_notrace();
> -
>       schedule_debug(prev);
>  
>       if (sched_feat(HRTICK))
> @@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
>       balance_callback(rq);
>  }
>  
> +void __noreturn do_task_dead(void)
> +{
> +     /* causes final put_task_struct in finish_task_switch(). */
> +     __set_current_state(TASK_DEAD);
> +     current->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
> +     __schedule(false);
> +     BUG();
> +     /* Avoid "noreturn function does return".  */
> +     for (;;)
> +             cpu_relax();    /* For when BUG is null */
> +}
> +
>  static inline void sched_submit_work(struct task_struct *tsk)
>  {
>       if (!tsk->state || tsk_is_pi_blocked(tsk))
> 

Reply via email to