The commit is pushed to "branch-rh7-3.10.0-514.vz7.27.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-514.vz7.27.10 ------> commit b47182055b23197e9037214bdd631bfb73bf251c Author: Johannes Weiner <han...@cmpxchg.org> Date: Mon Jan 16 20:27:16 2017 +0400
ms/mm: memcontrol: revert use of root_mem_cgroup res_counter Dave Hansen reports a massive scalability regression in an uncontained page fault benchmark with more than 30 concurrent threads, which he bisected down to 05b843012335 ("mm: memcontrol: use root_mem_cgroup res_counter") and pin-pointed on res_counter spinlock contention. That change relied on the per-cpu charge caches to mostly swallow the res_counter costs, but it's apparent that the caches don't scale yet. Revert memcg back to bypassing res_counters on the root level in order to restore performance for uncontained workloads. Reported-by: Dave Hansen <d...@sr71.net> Signed-off-by: Johannes Weiner <han...@cmpxchg.org> Tested-by: Dave Hansen <dave.han...@intel.com> Acked-by: Michal Hocko <mho...@suse.cz> Acked-by: Vladimir Davydov <vdavy...@parallels.com> Signed-off-by: Linus Torvalds <torva...@linux-foundation.org> https://jira.sw.ru/browse/PSBM-51558 (cherry picked from commit ce00a967377baadf2481521e131771adc7652856) Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> --- mm/memcontrol.c | 73 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 16bb6aa..6c11788 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4158,8 +4158,8 @@ out: } -static unsigned long tree_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) { struct mem_cgroup *iter; long val = 0; @@ -4173,6 +4173,30 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, return val; } +static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) +{ + u64 val; + + if (!mem_cgroup_is_root(memcg)) { + if (!swap) + return page_counter_read(&memcg->memory); + else + return page_counter_read(&memcg->memsw); + } + + /* + * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS + * as well as in MEM_CGROUP_STAT_RSS_HUGE. + */ + val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); + val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); + + if (swap) + val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP); + + return val << PAGE_SHIFT; +} + void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi) { int nid; @@ -4181,12 +4205,12 @@ void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi) for_each_online_node(nid) mem_cgroup_get_nr_pages(memcg, nid, mi->pages); - mi->slab_reclaimable = tree_stat(memcg, + mi->slab_reclaimable = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SLAB_RECLAIMABLE); - mi->slab_unreclaimable = tree_stat(memcg, + mi->slab_unreclaimable = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SLAB_UNRECLAIMABLE); - mi->cached = tree_stat(memcg, MEM_CGROUP_STAT_CACHE); - mi->shmem = tree_stat(memcg, MEM_CGROUP_STAT_SHMEM); + mi->cached = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); + mi->shmem = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM); } int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages) @@ -4200,33 +4224,15 @@ int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages) free += page_counter_read(&memcg->dcache); /* assume file cache is reclaimable */ - free += tree_stat(memcg, MEM_CGROUP_STAT_CACHE); + free += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); /* but do not count shmem pages as they can't be purged, * only swapped out */ - free -= tree_stat(memcg, MEM_CGROUP_STAT_SHMEM); + free -= mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM); return free < pages ? -ENOMEM : 0; } -static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) -{ - u64 val; - - if (mem_cgroup_is_root(memcg)) { - val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE); - val += tree_stat(memcg, MEM_CGROUP_STAT_RSS); - if (swap) - val += tree_stat(memcg, MEM_CGROUP_STAT_SWAP); - } else { - if (!swap) - val = page_counter_read(&memcg->memory); - else - val = page_counter_read(&memcg->memsw); - } - return val << PAGE_SHIFT; -} - enum { RES_USAGE, RES_LIMIT, @@ -6760,7 +6766,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) rcu_read_lock(); memcg = mem_cgroup_lookup(id); if (memcg) { - page_counter_uncharge(&memcg->memsw, 1); + if (!mem_cgroup_is_root(memcg)) + page_counter_uncharge(&memcg->memsw, 1); mem_cgroup_swap_statistics(memcg, false); css_put(&memcg->css); } @@ -6919,12 +6926,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, { unsigned long flags; - if (nr_mem) - page_counter_uncharge(&memcg->memory, nr_mem); - if (nr_memsw) - page_counter_uncharge(&memcg->memsw, nr_memsw); + if (!mem_cgroup_is_root(memcg)) { + if (nr_mem) + page_counter_uncharge(&memcg->memory, nr_mem); + if (nr_memsw) + page_counter_uncharge(&memcg->memsw, nr_memsw); - memcg_oom_recover(memcg); + memcg_oom_recover(memcg); + } local_irq_save(flags); __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel