The commit is pushed to "branch-rh7-3.10.0-327.3.1-vz7.10.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.3.1.vz7.10.7
------>
commit 522b3faa45d160bb1dc4903bdf524286d5a543d4
Author: Vladimir Davydov <[email protected]>
Date:   Thu Jan 28 20:21:28 2016 +0400

    oom: introduce oom timeout
    
    Rebase to RHEL 7.2 based kernel:
    https://jira.sw.ru/browse/PSBM-42320
    ===
    From: Vladimir Davydov <[email protected]>
    
    Patchset description: oom enhancements - part 2
    
     - Patches 1-2 prepare memcg for upcoming changes in oom design.
     - Patch 3 reworks oom locking design so that the executioner waits for
       victim to exit. This is necessary to increase oom kill rate, which is
       essential for berserker mode.
     - Patch 4 drops unused OOM_SCAN_ABORT
     - Patch 5 introduces oom timeout.
       https://jira.sw.ru/browse/PSBM-38581
     - Patch 6 makes oom fairer when it comes to selecting a victim among
       different containers.
       https://jira.sw.ru/browse/PSBM-37915
     - Patch 7 prepares oom for introducing berserker mode
     - Patch 8 resurrects oom berserker mode, which is supposed to cope with
       actively forking processes.
       https://jira.sw.ru/browse/PSBM-17930
    
    https://jira.sw.ru/browse/PSBM-26973
    
    Changes in v3:
     - rework oom_trylock (patch 3)
     - select exiting process instead of aborting oom scan so as not to keep
       busy-waiting for an exiting process to exit (patches 3, 4)
     - cleanup oom timeout handling + fix stuck process trace dumped
       multiple times on timeout (patch 5)
     - set max_overdraft to ULONG_MAX on selected processes (patch 6)
     - rework oom berserker process selection logic (patches 7, 8)
    
    Changes in v2:
     - s/time_after/time_after_eq to avoid BUG_ON in oom_trylock (patch 4)
     - propagate victim to the context that initiated oom in oom_unlock
       (patch 6)
     - always set oom_end on releasing oom context (patch 6)
    
    Vladimir Davydov (8):
      memcg: add mem_cgroup_get/put helpers
      memcg: add lock for protecting memcg->oom_notify list
      oom: rework locking design
      oom: introduce oom timeout
      oom: drop OOM_SCAN_ABORT
      oom: rework logic behind memory.oom_guarantee
      oom: pass points and overdraft to oom_kill_process
      oom: resurrect berserker mode
    
    Reviewed-by: Kirill Tkhai <[email protected]>
    
    =========================================
    This patch description:
    
    Currently, we won't select a new oom victim until the previous one has
    passed away. This might lead to a deadlock if an allocating task holds a
    lock needed by the victim to complete. To cope with this problem, this
    patch introduced oom timeout, after which a new task will be selected
    even if the previous victim hasn't died. The timeout is hard-coded,
    equals 5 seconds.
    
    https://jira.sw.ru/browse/PSBM-38581
    
    Signed-off-by: Vladimir Davydov <[email protected]>
---
 include/linux/oom.h |  2 ++
 mm/oom_kill.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index e19385d..f804551 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -34,6 +34,8 @@ enum oom_scan_t {
 struct oom_context {
        struct task_struct *owner;
        struct task_struct *victim;
+       bool marked;
+       unsigned long oom_start;
        wait_queue_head_t waitq;
 };
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index e50621b..fd6defa7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -45,6 +45,8 @@ int sysctl_oom_dump_tasks;
 
 static DEFINE_SPINLOCK(oom_context_lock);
 
+#define OOM_TIMEOUT    (5 * HZ)
+
 #ifndef CONFIG_MEMCG
 struct oom_context oom_ctx = {
        .waitq          = __WAIT_QUEUE_HEAD_INITIALIZER(oom_ctx.waitq),
@@ -55,6 +57,8 @@ void init_oom_context(struct oom_context *ctx)
 {
        ctx->owner = NULL;
        ctx->victim = NULL;
+       ctx->marked = false;
+       ctx->oom_start = 0;
        init_waitqueue_head(&ctx->waitq);
 }
 
@@ -62,6 +66,7 @@ static void __release_oom_context(struct oom_context *ctx)
 {
        ctx->owner = NULL;
        ctx->victim = NULL;
+       ctx->marked = false;
        wake_up_all(&ctx->waitq);
 }
 
@@ -291,11 +296,14 @@ enum oom_scan_t oom_scan_process_thread(struct 
task_struct *task,
 
        /*
         * This task already has access to memory reserves and is being killed.
-        * Don't allow any other task to have access to the reserves.
+        * Try to select another one.
+        *
+        * This can only happen if oom_trylock timeout-ed, which most probably
+        * means that the victim had dead-locked.
         */
        if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
                if (!force_kill)
-                       return OOM_SCAN_ABORT;
+                       return OOM_SCAN_CONTINUE;
        }
        if (!task->mm)
                return OOM_SCAN_CONTINUE;
@@ -463,8 +471,10 @@ void mark_oom_victim(struct task_struct *tsk)
        memcg = try_get_mem_cgroup_from_mm(tsk->mm);
        ctx = mem_cgroup_oom_context(memcg);
        spin_lock(&oom_context_lock);
-       if (!ctx->victim)
+       if (!ctx->victim) {
                ctx->victim = tsk;
+               ctx->marked = true;
+       }
        spin_unlock(&oom_context_lock);
        mem_cgroup_put(memcg);
 }
@@ -499,21 +509,26 @@ void exit_oom_victim(void)
 
 static void __wait_oom_context(struct oom_context *ctx)
 {
+       unsigned long now = jiffies;
+       unsigned long timeout;
        DEFINE_WAIT(wait);
 
-       if (ctx->victim == current) {
+       if (ctx->victim == current ||
+           time_after_eq(now, ctx->oom_start + OOM_TIMEOUT)) {
                spin_unlock(&oom_context_lock);
                return;
        }
 
        prepare_to_wait(&ctx->waitq, &wait, TASK_KILLABLE);
+       timeout = ctx->oom_start + OOM_TIMEOUT - now;
        spin_unlock(&oom_context_lock);
-       schedule();
+       schedule_timeout(timeout);
        finish_wait(&ctx->waitq, &wait);
 }
 
 bool oom_trylock(struct mem_cgroup *memcg)
 {
+       unsigned long now = jiffies;
        struct mem_cgroup *iter;
        struct oom_context *ctx;
 
@@ -528,10 +543,32 @@ bool oom_trylock(struct mem_cgroup *memcg)
        iter = mem_cgroup_iter(memcg, NULL, NULL);
        do {
                ctx = mem_cgroup_oom_context(iter);
-               if (ctx->owner || ctx->victim) {
+               if ((ctx->owner || ctx->victim) &&
+                   time_before(now, ctx->oom_start + OOM_TIMEOUT)) {
                        __wait_oom_context(ctx);
                        mem_cgroup_iter_break(memcg, iter);
                        return false;
+               } else if (ctx->owner || ctx->victim) {
+                       /*
+                        * Timeout. Release the context and dump stack
+                        * trace of the stuck process.
+                        *
+                        * To avoid dumping stack trace of the same task
+                        * more than once, we mark the context that
+                        * contained the victim when it was killed (see
+                        * mark_oom_victim).
+                        */
+                       struct task_struct *p = ctx->victim;
+
+                       if (p && ctx->marked) {
+                               task_lock(p);
+                               pr_err("OOM kill timeout: %d (%s)\n",
+                                      task_pid_nr(p), p->comm);
+                               task_unlock(p);
+                               show_stack(p, NULL);
+                       }
+
+                       __release_oom_context(ctx);
                }
        } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
 
@@ -544,6 +581,7 @@ bool oom_trylock(struct mem_cgroup *memcg)
                BUG_ON(ctx->owner);
                BUG_ON(ctx->victim);
                ctx->owner = current;
+               ctx->oom_start = now;
        } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
 
        spin_unlock(&oom_context_lock);
@@ -565,7 +603,11 @@ void oom_unlock(struct mem_cgroup *memcg)
        iter = mem_cgroup_iter(memcg, NULL, NULL);
        do {
                ctx = mem_cgroup_oom_context(iter);
-               BUG_ON(ctx->owner != current);
+               if (ctx->owner != current) {
+                       /* Lost ownership on timeout */
+                       mem_cgroup_iter_break(memcg, iter);
+                       break;
+               }
                if (ctx->victim) {
                        victim = ctx->victim;
                        /*
@@ -598,7 +640,9 @@ void oom_unlock(struct mem_cgroup *memcg)
        iter = mem_cgroup_iter(memcg, NULL, NULL);
        do {
                ctx = mem_cgroup_oom_context(iter);
-               BUG_ON(ctx->owner != current);
+               if (ctx->owner != current)
+                       /* Lost ownership on timeout */
+                       continue;
                if (!ctx->victim)
                        /*
                         * Victim already exited or nobody was killed in
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to