Our commit b607d3e1a953 ("ms/mm: memcontrol: fold mem_cgroup_do_charge()")
accidently loses piece of code making memory.high work. bring it back.Signed-off-by: Andrey Ryabinin <[email protected]> --- include/linux/memcontrol.h | 2 ++ include/linux/sched.h | 3 ++ include/linux/tracehook.h | 3 ++ mm/memcontrol.c | 65 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index aa8cef097055..265b5e350779 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -138,6 +138,8 @@ extern void mem_cgroup_note_oom_kill(struct mem_cgroup *memcg, extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); +void mem_cgroup_handle_over_high(void); + unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask); diff --git a/include/linux/sched.h b/include/linux/sched.h index ea08dfd17448..74e34bcd1e2d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1824,6 +1824,9 @@ struct task_struct { #endif #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ unsigned int memcg_kmem_skip_account; + + /* Number of pages to reclaim on returning to userland: */ + unsigned int memcg_nr_pages_over_high; #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT atomic_t ptrace_bp_refcnt; diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 1e98b5530425..f4e830954674 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -47,6 +47,7 @@ #define _LINUX_TRACEHOOK_H 1 #include <linux/sched.h> +#include <linux/memcontrol.h> #include <linux/ptrace.h> #include <linux/security.h> #include <linux/task_work.h> @@ -194,6 +195,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) smp_mb__after_clear_bit(); if (unlikely(current->task_works)) task_work_run(); + + mem_cgroup_handle_over_high(); } #endif /* <linux/tracehook.h> */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e123323f1044..cc8cf887c205 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -55,6 +55,7 @@ #include <linux/oom.h> #include <linux/virtinfo.h> #include <linux/migrate.h> +#include <linux/tracehook.h> #include "internal.h" #include <net/sock.h> #include <net/ip.h> @@ -311,6 +312,7 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; + struct work_struct high_work; /* * the counter to account for kernel memory usage. @@ -2996,6 +2998,44 @@ static bool kmem_reclaim_is_low(struct mem_cgroup *memcg) return dcache_is_low(memcg); } +static void reclaim_high(struct mem_cgroup *memcg, + unsigned int nr_pages, + gfp_t gfp_mask) +{ + do { + if (page_counter_read(&memcg->memory) <= memcg->high) + continue; + + try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 0); + } while ((memcg = parent_mem_cgroup(memcg))); +} + +static void high_work_func(struct work_struct *work) +{ + struct mem_cgroup *memcg; + + memcg = container_of(work, struct mem_cgroup, high_work); + reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL); +} + +/* + * Scheduled by try_charge() to be executed from the userland return path + * and reclaims memory over the high limit. + */ +void mem_cgroup_handle_over_high(void) +{ + unsigned int nr_pages = current->memcg_nr_pages_over_high; + struct mem_cgroup *memcg; + + if (likely(!nr_pages)) + return; + + memcg = get_mem_cgroup_from_mm(current->mm); + reclaim_high(memcg, nr_pages, GFP_KERNEL); + css_put(&memcg->css); + current->memcg_nr_pages_over_high = 0; +} + /** * mem_cgroup_try_charge - try charging a memcg * @memcg: memcg to charge @@ -3195,6 +3235,28 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, bool kmem_charge if (batch > nr_pages) refill_stock(memcg, batch - nr_pages); done: + /* + * If the hierarchy is above the normal consumption range, schedule + * reclaim on returning to userland. We can perform reclaim here + * if __GFP_RECLAIM but let's always punt for simplicity and so that + * GFP_KERNEL can consistently be used during reclaim. @memcg is + * not recorded as it most likely matches current's and won't + * change in the meantime. As high limit is checked again before + * reclaim, the cost of mismatch is negligible. + */ + do { + if (page_counter_read(&memcg->memory) > memcg->high) { + /* Don't bother a random interrupted task */ + if (in_interrupt()) { + schedule_work(&memcg->high_work); + break; + } + current->memcg_nr_pages_over_high += batch; + set_notify_resume(current); + break; + } + } while ((memcg = parent_mem_cgroup(memcg))); + return 0; } @@ -6445,6 +6507,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) memcg->last_scanned_node = MAX_NUMNODES; INIT_LIST_HEAD(&memcg->oom_notify); memcg->move_charge_at_immigrate = 0; + INIT_WORK(&memcg->high_work, high_work_func); mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); @@ -6635,6 +6698,8 @@ static void mem_cgroup_css_free(struct cgroup *cont) mem_cgroup_reparent_charges(memcg); vmpressure_cleanup(&memcg->vmpressure); + cancel_work_sync(&memcg->high_work); + memcg_destroy_kmem(memcg); memcg_free_shrinker_maps(memcg); __mem_cgroup_free(memcg); -- 2.21.0 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
