The commit is pushed to "vz7.96.12" and will appear at 
https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.12
------>
commit 222af0e107e39792184bb516e793a583ab386fe7
Author: Andrey Ryabinin <[email protected]>
Date:   Mon Jul 8 13:28:38 2019 +0300

    mm/memcontrol: fix memory.high
    
    Our commit b607d3e1a953 ("ms/mm: memcontrol: fold mem_cgroup_do_charge()")
    accidently loses piece of code making memory.high work. Bring it back.
    
    Signed-off-by: Andrey Ryabinin <[email protected]>
---
 include/linux/memcontrol.h |  2 ++
 include/linux/sched.h      |  3 +++
 include/linux/tracehook.h  |  3 +++
 mm/memcontrol.c            | 65 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aa8cef097055..265b5e350779 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -138,6 +138,8 @@ extern void mem_cgroup_note_oom_kill(struct mem_cgroup 
*memcg,
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
                                        struct task_struct *p);
 
+void mem_cgroup_handle_over_high(void);
+
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
                                           int nid, unsigned int lru_mask);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea08dfd17448..74e34bcd1e2d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1824,6 +1824,9 @@ struct task_struct {
 #endif
 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
        unsigned int memcg_kmem_skip_account;
+
+       /* Number of pages to reclaim on returning to userland: */
+       unsigned int                    memcg_nr_pages_over_high;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        atomic_t ptrace_bp_refcnt;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 1e98b5530425..f4e830954674 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -47,6 +47,7 @@
 #define _LINUX_TRACEHOOK_H     1
 
 #include <linux/sched.h>
+#include <linux/memcontrol.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/task_work.h>
@@ -194,6 +195,8 @@ static inline void tracehook_notify_resume(struct pt_regs 
*regs)
        smp_mb__after_clear_bit();
        if (unlikely(current->task_works))
                task_work_run();
+
+       mem_cgroup_handle_over_high();
 }
 
 #endif /* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d11aa24ed207..0e6911b2dcc3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -55,6 +55,7 @@
 #include <linux/oom.h>
 #include <linux/virtinfo.h>
 #include <linux/migrate.h>
+#include <linux/tracehook.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -311,6 +312,7 @@ struct mem_cgroup {
 
        /* vmpressure notifications */
        struct vmpressure vmpressure;
+       struct work_struct high_work;
 
        /*
         * the counter to account for kernel memory usage.
@@ -2996,6 +2998,44 @@ static bool kmem_reclaim_is_low(struct mem_cgroup *memcg)
        return dcache_is_low(memcg);
 }
 
+static void reclaim_high(struct mem_cgroup *memcg,
+                        unsigned int nr_pages,
+                        gfp_t gfp_mask)
+{
+       do {
+               if (page_counter_read(&memcg->memory) <= memcg->high)
+                       continue;
+
+               try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 0);
+       } while ((memcg = parent_mem_cgroup(memcg)));
+}
+
+static void high_work_func(struct work_struct *work)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = container_of(work, struct mem_cgroup, high_work);
+       reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+       unsigned int nr_pages = current->memcg_nr_pages_over_high;
+       struct mem_cgroup *memcg;
+
+       if (likely(!nr_pages))
+               return;
+
+       memcg = get_mem_cgroup_from_mm(current->mm);
+       reclaim_high(memcg, nr_pages, GFP_KERNEL);
+       css_put(&memcg->css);
+       current->memcg_nr_pages_over_high = 0;
+}
+
 /**
  * mem_cgroup_try_charge - try charging a memcg
  * @memcg: memcg to charge
@@ -3195,6 +3235,28 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask, bool kmem_charge
        if (batch > nr_pages)
                refill_stock(memcg, batch - nr_pages);
 done:
+       /*
+        * If the hierarchy is above the normal consumption range, schedule
+        * reclaim on returning to userland.  We can perform reclaim here
+        * if __GFP_RECLAIM but let's always punt for simplicity and so that
+        * GFP_KERNEL can consistently be used during reclaim.  @memcg is
+        * not recorded as it most likely matches current's and won't
+        * change in the meantime.  As high limit is checked again before
+        * reclaim, the cost of mismatch is negligible.
+        */
+       do {
+               if (page_counter_read(&memcg->memory) > memcg->high) {
+                       /* Don't bother a random interrupted task */
+                       if (in_interrupt()) {
+                               schedule_work(&memcg->high_work);
+                               break;
+                       }
+                       current->memcg_nr_pages_over_high += batch;
+                       set_notify_resume(current);
+                       break;
+               }
+       } while ((memcg = parent_mem_cgroup(memcg)));
+
        return 0;
 }
 
@@ -6435,6 +6497,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
        memcg->last_scanned_node = MAX_NUMNODES;
        INIT_LIST_HEAD(&memcg->oom_notify);
        memcg->move_charge_at_immigrate = 0;
+       INIT_WORK(&memcg->high_work, high_work_func);
        mutex_init(&memcg->thresholds_lock);
        spin_lock_init(&memcg->move_lock);
        vmpressure_init(&memcg->vmpressure);
@@ -6625,6 +6688,8 @@ static void mem_cgroup_css_free(struct cgroup *cont)
        mem_cgroup_reparent_charges(memcg);
 
        vmpressure_cleanup(&memcg->vmpressure);
+       cancel_work_sync(&memcg->high_work);
+
        memcg_destroy_kmem(memcg);
        memcg_free_shrinker_maps(memcg);
        __mem_cgroup_free(memcg);

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to