On 11.09.2015 12:54, Vladimir Davydov wrote:
> Currently, we won't select a new oom victim until the previous one has
> passed away. This might lead to a deadlock if an allocating task holds a
> lock needed by the victim to complete. To cope with this problem, this
> patch introduced oom timeout, after which a new task will be selected
> even if the previous victim hasn't died. The timeout is hard-coded,
> equals 5 seconds.
> 
> https://jira.sw.ru/browse/PSBM-38581
> 
> Signed-off-by: Vladimir Davydov <[email protected]>
> ---
>  include/linux/oom.h |  1 +
>  mm/oom_kill.c       | 48 +++++++++++++++++++++++++++++++++++++++---------
>  2 files changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index e19385dd29aa..caeda102e7ba 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -34,6 +34,7 @@ enum oom_scan_t {
>  struct oom_context {
>       struct task_struct *owner;
>       struct task_struct *victim;
> +     unsigned long oom_start;
>       wait_queue_head_t waitq;
>  };
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index c9265092825d..bf101f9242ff 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -45,6 +45,8 @@ int sysctl_oom_dump_tasks;
>  
>  static DEFINE_SPINLOCK(oom_context_lock);
>  
> +#define OOM_TIMEOUT  (5 * HZ)
> +
>  #ifndef CONFIG_MEMCG
>  struct oom_context oom_ctx = {
>       .waitq          = __WAIT_QUEUE_HEAD_INITIALIZER(oom_ctx.waitq),
> @@ -55,6 +57,7 @@ void init_oom_context(struct oom_context *ctx)
>  {
>       ctx->owner = NULL;
>       ctx->victim = NULL;
> +     ctx->oom_start = 0;
>       init_waitqueue_head(&ctx->waitq);
>  }
>  
> @@ -286,11 +289,14 @@ enum oom_scan_t oom_scan_process_thread(struct 
> task_struct *task,
>  
>       /*
>        * This task already has access to memory reserves and is being killed.
> -      * Don't allow any other task to have access to the reserves.
> +      * Try to select another one.
> +      *
> +      * This can only happen if oom_trylock timeout-ed, which most probably
> +      * means that the victim had dead-locked.
>        */
>       if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
>               if (!force_kill)
> -                     return OOM_SCAN_ABORT;
> +                     return OOM_SCAN_CONTINUE;
>       }
>       if (!task->mm)
>               return OOM_SCAN_CONTINUE;
> @@ -645,6 +651,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>  
>  bool oom_trylock(struct mem_cgroup *memcg)
>  {
> +     unsigned long now = jiffies;
>       struct mem_cgroup *iter;
>       struct oom_context *ctx;
>       DEFINE_WAIT(wait);
> @@ -660,14 +667,29 @@ bool oom_trylock(struct mem_cgroup *memcg)
>       iter = mem_cgroup_iter(memcg, NULL, NULL);
>       do {
>               ctx = mem_cgroup_oom_context(iter);
> -             if (ctx->owner || ctx->victim) {
> +             if ((ctx->owner || ctx->victim) &&
> +                 time_before(now, ctx->oom_start + OOM_TIMEOUT)) {
>                       prepare_to_wait(&ctx->waitq, &wait,
>                                       TASK_KILLABLE);
>                       spin_unlock(&oom_context_lock);
> -                     schedule();
> +                     schedule_timeout(ctx->oom_start + OOM_TIMEOUT - now);
>                       finish_wait(&ctx->waitq, &wait);
>                       mem_cgroup_iter_break(memcg, iter);
>                       return false;
> +             } else if (ctx->owner || ctx->victim) {
> +                     /*
> +                      * Timeout. Release the context.
> +                      */
> +                     struct task_struct *p = ctx->victim ?: ctx->owner;
> +
> +                     task_lock(p);
> +                     pr_err("OOM kill timeout: %d (%s)\n",
> +                            task_pid_nr(p), p->comm);
> +                     task_unlock(p);
> +                     show_stack(p, NULL);
> +
> +                     ctx->owner = ctx->victim = NULL;
> +                     wake_up_all(&ctx->waitq);
>               }
>       } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
>  
> @@ -680,6 +702,7 @@ bool oom_trylock(struct mem_cgroup *memcg)
>               BUG_ON(ctx->owner);
>               BUG_ON(ctx->victim);
>               ctx->owner = current;
> +             ctx->oom_start = now;
>       } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
>  
>       spin_unlock(&oom_context_lock);
> @@ -689,9 +712,10 @@ bool oom_trylock(struct mem_cgroup *memcg)
>  
>  void oom_unlock(struct mem_cgroup *memcg)
>  {
> +     unsigned long now = jiffies;
> +     unsigned long timeout = 0;
>       struct mem_cgroup *iter, *victim_memcg = NULL;
>       struct oom_context *ctx;
> -     bool need_to_wait = false;
>       DEFINE_WAIT(wait);
>  
>       spin_lock(&oom_context_lock);
> @@ -714,8 +738,15 @@ void oom_unlock(struct mem_cgroup *memcg)
>               }
>  
>               /* Already waiting? */
> -             if (need_to_wait)
> +             if (timeout > 0)
>                       continue;
> +             /* Expired? */
> +             if (time_after(now, ctx->oom_start + OOM_TIMEOUT))
> +                     continue;
> +
> +             timeout = ctx->oom_start + OOM_TIMEOUT - now;
> +             BUG_ON(timeout == 0);

now may be less or more than (oom_start + OOM_TIMEOUT), so why can't
it be equal? Did you mean time_after_eq() above?

> +
>               /*
>                * Remember victim memcg so that we can wait for victim
>                * to exit below.
> @@ -724,13 +755,12 @@ void oom_unlock(struct mem_cgroup *memcg)
>               mem_cgroup_get(iter);
>  
>               prepare_to_wait(&ctx->waitq, &wait, TASK_KILLABLE);
> -             need_to_wait = true;
>       } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
>  
>       spin_unlock(&oom_context_lock);
>  
> -     if (need_to_wait) {
> -             schedule();
> +     if (timeout > 0) {
> +             schedule_timeout(timeout);
>               ctx = mem_cgroup_oom_context(victim_memcg);
>               finish_wait(&ctx->waitq, &wait);
>               mem_cgroup_put(victim_memcg);
> 
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to