Currently, we won't select a new oom victim until the previous one has passed away. This might lead to a deadlock if an allocating task holds a lock needed by the victim to complete. To cope with this problem, this patch introduced oom timeout, after which a new task will be selected even if the previous victim hasn't died. The timeout is hard-coded, equals 5 seconds.
https://jira.sw.ru/browse/PSBM-38581 Signed-off-by: Vladimir Davydov <[email protected]> --- include/linux/oom.h | 1 + mm/oom_kill.c | 48 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index e19385dd29aa..caeda102e7ba 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -34,6 +34,7 @@ enum oom_scan_t { struct oom_context { struct task_struct *owner; struct task_struct *victim; + unsigned long oom_start; wait_queue_head_t waitq; }; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c9265092825d..130318ee3598 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -45,6 +45,8 @@ int sysctl_oom_dump_tasks; static DEFINE_SPINLOCK(oom_context_lock); +#define OOM_TIMEOUT (5 * HZ) + #ifndef CONFIG_MEMCG struct oom_context oom_ctx = { .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(oom_ctx.waitq), @@ -55,6 +57,7 @@ void init_oom_context(struct oom_context *ctx) { ctx->owner = NULL; ctx->victim = NULL; + ctx->oom_start = 0; init_waitqueue_head(&ctx->waitq); } @@ -286,11 +289,14 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task, /* * This task already has access to memory reserves and is being killed. - * Don't allow any other task to have access to the reserves. + * Try to select another one. + * + * This can only happen if oom_trylock timeout-ed, which most probably + * means that the victim had dead-locked. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { if (!force_kill) - return OOM_SCAN_ABORT; + return OOM_SCAN_CONTINUE; } if (!task->mm) return OOM_SCAN_CONTINUE; @@ -645,6 +651,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); bool oom_trylock(struct mem_cgroup *memcg) { + unsigned long now = jiffies; struct mem_cgroup *iter; struct oom_context *ctx; DEFINE_WAIT(wait); @@ -660,14 +667,29 @@ bool oom_trylock(struct mem_cgroup *memcg) iter = mem_cgroup_iter(memcg, NULL, NULL); do { ctx = mem_cgroup_oom_context(iter); - if (ctx->owner || ctx->victim) { + if ((ctx->owner || ctx->victim) && + time_before(now, ctx->oom_start + OOM_TIMEOUT)) { prepare_to_wait(&ctx->waitq, &wait, TASK_KILLABLE); spin_unlock(&oom_context_lock); - schedule(); + schedule_timeout(ctx->oom_start + OOM_TIMEOUT - now); finish_wait(&ctx->waitq, &wait); mem_cgroup_iter_break(memcg, iter); return false; + } else if (ctx->owner || ctx->victim) { + /* + * Timeout. Release the context. + */ + struct task_struct *p = ctx->victim ?: ctx->owner; + + task_lock(p); + pr_err("OOM kill timeout: %d (%s)\n", + task_pid_nr(p), p->comm); + task_unlock(p); + show_stack(p, NULL); + + ctx->owner = ctx->victim = NULL; + wake_up_all(&ctx->waitq); } } while ((iter = mem_cgroup_iter(memcg, iter, NULL))); @@ -680,6 +702,7 @@ bool oom_trylock(struct mem_cgroup *memcg) BUG_ON(ctx->owner); BUG_ON(ctx->victim); ctx->owner = current; + ctx->oom_start = now; } while ((iter = mem_cgroup_iter(memcg, iter, NULL))); spin_unlock(&oom_context_lock); @@ -689,9 +712,10 @@ bool oom_trylock(struct mem_cgroup *memcg) void oom_unlock(struct mem_cgroup *memcg) { + unsigned long now = jiffies; + unsigned long timeout = 0; struct mem_cgroup *iter, *victim_memcg = NULL; struct oom_context *ctx; - bool need_to_wait = false; DEFINE_WAIT(wait); spin_lock(&oom_context_lock); @@ -714,8 +738,15 @@ void oom_unlock(struct mem_cgroup *memcg) } /* Already waiting? */ - if (need_to_wait) + if (timeout > 0) continue; + /* Expired? */ + if (time_after_eq(now, ctx->oom_start + OOM_TIMEOUT)) + continue; + + timeout = ctx->oom_start + OOM_TIMEOUT - now; + BUG_ON(timeout == 0); + /* * Remember victim memcg so that we can wait for victim * to exit below. @@ -724,13 +755,12 @@ void oom_unlock(struct mem_cgroup *memcg) mem_cgroup_get(iter); prepare_to_wait(&ctx->waitq, &wait, TASK_KILLABLE); - need_to_wait = true; } while ((iter = mem_cgroup_iter(memcg, iter, NULL))); spin_unlock(&oom_context_lock); - if (need_to_wait) { - schedule(); + if (timeout > 0) { + schedule_timeout(timeout); ctx = mem_cgroup_oom_context(victim_memcg); finish_wait(&ctx->waitq, &wait); mem_cgroup_put(victim_memcg); -- 2.1.4 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
