First, separate tsk_is_oom_victim() and TIF_MEMDIE flag checks:
let the first one indicate that a task is killed by the OOM killer,
and the second one indicate that a task has an access to the memory
reserves (with a hope to eliminate it later).

Second, set TIF_MEMDIE to all threads of an OOM victim process.

Third, to limit the number of processes which have an access to memory
reserves, let's keep an atomic pointer to a task, which grabbed it.

Signed-off-by: Roman Gushchin <g...@fb.com>
Cc: Michal Hocko <mho...@kernel.org>
Cc: Vladimir Davydov <vdavydov....@gmail.com>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: Tetsuo Handa <penguin-ker...@i-love.sakura.ne.jp>
Cc: David Rientjes <rient...@google.com>
Cc: Tejun Heo <t...@kernel.org>
Cc: kernel-t...@fb.com
Cc: cgro...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux...@kvack.org
---
 kernel/exit.c   |  2 +-
 mm/memcontrol.c |  2 +-
 mm/oom_kill.c   | 30 +++++++++++++++++++++++++-----
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 8f40bee5ba9d..d5f372a2a363 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -542,7 +542,7 @@ static void exit_mm(void)
        task_unlock(current);
        mm_update_next_owner(mm);
        mmput(mm);
-       if (test_thread_flag(TIF_MEMDIE))
+       if (tsk_is_oom_victim(current))
                exit_oom_victim();
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d61133e6af99..9085e55eb69f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1896,7 +1896,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask,
         * bypass the last charges so that they can exit quickly and
         * free their memory.
         */
-       if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+       if (unlikely(tsk_is_oom_victim(current) ||
                     fatal_signal_pending(current) ||
                     current->flags & PF_EXITING))
                goto force;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 9e8b4f030c1c..72de01be4d33 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -435,6 +435,8 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
 
 static bool oom_killer_disabled __read_mostly;
 
+static struct task_struct *tif_memdie_owner;
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 
 /*
@@ -656,13 +658,24 @@ static void mark_oom_victim(struct task_struct *tsk)
        struct mm_struct *mm = tsk->mm;
 
        WARN_ON(oom_killer_disabled);
-       /* OOM killer might race with memcg OOM */
-       if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
+
+       if (!cmpxchg(&tif_memdie_owner, NULL, current)) {
+               struct task_struct *t;
+
+               rcu_read_lock();
+               for_each_thread(current, t)
+                       set_tsk_thread_flag(t, TIF_MEMDIE);
+               rcu_read_unlock();
+       }
+
+       /*
+        * OOM killer might race with memcg OOM.
+        * oom_mm is bound to the signal struct life time.
+        */
+       if (cmpxchg(&tsk->signal->oom_mm, NULL, mm))
                return;
 
-       /* oom_mm is bound to the signal struct life time. */
-       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
-               mmgrab(tsk->signal->oom_mm);
+       mmgrab(tsk->signal->oom_mm);
 
        /*
         * Make sure that the task is woken up from uninterruptible sleep
@@ -682,6 +695,13 @@ void exit_oom_victim(void)
 {
        clear_thread_flag(TIF_MEMDIE);
 
+       /*
+        * If current tasks if a thread, which initially
+        * received TIF_MEMDIE, clear tif_memdie_owner to
+        * give a next process a chance to capture it.
+        */
+       cmpxchg(&tif_memdie_owner, current, NULL);
+
        if (!atomic_dec_return(&oom_victims))
                wake_up_all(&oom_victims_wait);
 }
-- 
2.13.3

Reply via email to