Just to be explicit what I've had in mind. This hasn't been even compile
tested but it should provide at least an idea where I am trying to go..
---
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5f3a62887cf..91fa05372114 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1528,26 +1528,36 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-       if (!current->memcg_may_oom)
-               return;
        /*
         * We are in the middle of the charge context here, so we
         * don't want to block when potentially sitting on a callstack
         * that holds all kinds of filesystem and mm locks.
         *
-        * Also, the caller may handle a failed allocation gracefully
-        * (like optional page cache readahead) and so an OOM killer
-        * invocation might not even be necessary.
+        * cgroup v1 allowes sync users space handling so we cannot afford
+        * to get stuck here for that configuration. That's why we don't do
+        * anything here except remember the OOM context and then deal with
+        * it at the end of the page fault when the stack is unwound, the 
+        * locks are released, and when we know whether the fault was overall
+        * successful.
         *
-        * That's why we don't do anything here except remember the
-        * OOM context and then deal with it at the end of the page
-        * fault when the stack is unwound, the locks are released,
-        * and when we know whether the fault was overall successful.
+        * On the other hand, in-kernel OOM killer allows for an async victim
+        * memory reclaim (oom_reaper) and that means that we are not solely
+        * relying on the oom victim to make a forward progress so we can stay
+        * in the the try_charge context and keep retrying as long as there
+        * are oom victims to select.
         */
-       css_get(&memcg->css);
-       current->memcg_in_oom = memcg;
-       current->memcg_oom_gfp_mask = mask;
-       current->memcg_oom_order = order;
+       if (memcg->oom_kill_disable) {
+               if (!current->memcg_may_oom)
+                       return false;
+               css_get(&memcg->css);
+               current->memcg_in_oom = memcg;
+               current->memcg_oom_gfp_mask = mask;
+               current->memcg_oom_order = order;
+
+               return false;
+       }
+
+       return mem_cgroup_out_of_memory(memcg, mask, order);
 }
 
 /**
@@ -2007,8 +2017,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask,
 
        mem_cgroup_event(mem_over_limit, MEMCG_OOM);
 
-       mem_cgroup_oom(mem_over_limit, gfp_mask,
-                      get_order(nr_pages * PAGE_SIZE));
+       if (mem_cgroup_oom(mem_over_limit, gfp_mask,
+                      get_order(nr_pages * PAGE_SIZE))) {
+               nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+               goto retry;
+       }
 nomem:
        if (!(gfp_mask & __GFP_NOFAIL))
                return -ENOMEM;
-- 
Michal Hocko
SUSE Labs

Reply via email to