The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after ark-5.14 ------> commit 6ddbff07208f2c135871e804b64b9b0664911f9d Author: Vladimir Davydov <vdavydov....@gmail.com> Date: Thu Sep 30 17:44:02 2021 +0300
ve/mm: introduce min threshold for dcache This patch adds new sysctl vm.vfs_cache_min_ratio. If the ratio of reclaimable slabs (i.e. dcache and icache) to total memory usage of a cgroup is less than the value of this sysctl (2% by default), slabs won't be reclaimed from this cgroup on memory pressure. https://jira.sw.ru/browse/PSBM-34161 Signed-off-by: Vladimir Davydov <vdavy...@virtuozzo.com> Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> +++ ve/mm/dcache: Honor changing per-memcg s[un]reclaimable counters to bytes in dcache min threshold RHEL8.4 has following ms commit backported: d42f3245c7e2 ("mm: memcg: convert vmstat slab counters to bytes") So, update places were we use per-memcg counters NR_SLAB_[UN]RECLAIMABLE_B accordingly. https://jira.sw.ru/browse/PSBM-132893 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> (cherry-picked from vz8 commit a3cff910211e ("ve/mm: introduce min threshold for dcache")) Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com> --- fs/dcache.c | 2 ++ fs/super.c | 23 +++++++++++++++++++++++ include/linux/dcache.h | 1 + include/linux/memcontrol.h | 7 +++++++ include/linux/shrinker.h | 2 ++ kernel/sysctl.c | 9 +++++++++ mm/memcontrol.c | 16 ++++++++++++++++ mm/vmscan.c | 7 ++++--- 8 files changed, 64 insertions(+), 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index cf871a81f4fd..fa0a8fe12bfd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -74,6 +74,8 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); +int sysctl_vfs_cache_min_ratio __read_mostly = 2; + __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); diff --git a/fs/super.c b/fs/super.c index c72159ea66fa..f40b431420f7 100644 --- a/fs/super.c +++ b/fs/super.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/blkdev.h> +#include <linux/memcontrol.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/writeback.h> /* for the emergency remount stuff */ @@ -53,6 +54,25 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { "sb_internal", }; +static bool dcache_is_low(struct mem_cgroup *memcg) +{ + unsigned long anon, file, dcache; + int vfs_cache_min_ratio = READ_ONCE(sysctl_vfs_cache_min_ratio); + + if (vfs_cache_min_ratio <= 0) + return false; + + if (memcg) + return mem_cgroup_dcache_is_low(memcg, vfs_cache_min_ratio); + + anon = global_node_page_state(NR_ANON_MAPPED); + file = global_node_page_state(NR_FILE_PAGES); + dcache = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B); + + return dcache / vfs_cache_min_ratio < + (anon + file + dcache) / 100; +} + /* * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. @@ -123,6 +143,9 @@ static unsigned long super_cache_count(struct shrinker *shrink, struct super_block *sb; long total_objects = 0; + if (!sc->for_drop_caches && dcache_is_low(sc->memcg)) + return 0; + sb = container_of(shrink, struct super_block, s_shrink); /* diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9e23d33bb6f1..b88f64c97558 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -508,6 +508,7 @@ static inline bool d_is_fallthru(const struct dentry *dentry) extern int sysctl_vfs_cache_pressure; +extern int sysctl_vfs_cache_min_ratio; static inline unsigned long vfs_pressure_ratio(unsigned long val) { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b716a5bc806f..46b92cc0bdc5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -909,6 +909,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg) /* * For memory reclaim. */ +bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio); bool mem_cgroup_cleancache_disabled(struct page *page); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); @@ -1384,6 +1385,12 @@ static inline bool mem_cgroup_cleancache_disabled(struct page *page) return false; } +static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, + int vfs_cache_min_ratio) +{ + return false; +} + static inline unsigned long mm_overdraft(struct mm_struct *mm) { return 0; diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 9814fff58a69..3dbb5b0d1052 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -31,6 +31,8 @@ struct shrink_control { /* current memcg being shrunk (for memcg aware shrinkers) */ struct mem_cgroup *memcg; + + bool for_drop_caches; }; #define SHRINK_STOP (~0UL) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7244a1d1f2b8..5abb6df3b1d0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -3061,6 +3061,15 @@ static struct ctl_table vm_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, + { + .procname = "vfs_cache_min_ratio", + .data = &sysctl_vfs_cache_min_ratio, + .maxlen = sizeof(sysctl_vfs_cache_min_ratio), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = SYSCTL_ZERO, + .extra2 = &one_hundred, + }, #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c2b527cf73dc..47384b7fce0a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1335,6 +1335,22 @@ unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg) return usage > guarantee ? (usage - guarantee) : 0; } +bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio) +{ + unsigned long anon, file, dcache; + + anon = memcg_page_state(memcg, NR_ANON_MAPPED); + file = memcg_page_state(memcg, NR_FILE_PAGES); + /* + * After ms commit d42f3245c7e2 ("mm: memcg: convert vmstat slab + * counters to bytes") NR_SLAB_{,UN}RECLAIMABLE_B are in bytes. + */ + dcache = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT; + + return dcache / vfs_cache_min_ratio < + (anon + file + dcache) / 100; +} + /** * mem_cgroup_margin - calculate chargeable space of a memory cgroup * @memcg: the memory cgroup diff --git a/mm/vmscan.c b/mm/vmscan.c index f55e24e18874..dfc094cafb9b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -844,7 +844,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, */ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, - int priority) + int priority, bool for_drop_caches) { unsigned long ret, freed = 0; struct shrinker *shrinker; @@ -870,6 +870,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, .gfp_mask = gfp_mask, .nid = nid, .memcg = memcg, + .for_drop_caches = for_drop_caches, }; ret = do_shrink_slab(&sc, shrinker, priority); @@ -906,7 +907,7 @@ void drop_slab_node(int nid) freed = 0; memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - freed += shrink_slab(GFP_KERNEL, nid, memcg, 0); + freed += shrink_slab(GFP_KERNEL, nid, memcg, 0, true); } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); } while (freed > 10); } @@ -2880,7 +2881,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) shrink_lruvec(lruvec, sc); shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, - sc->priority); + sc->priority, false); /* Record the group's reclaim efficiency */ vmpressure(sc->gfp_mask, memcg, false, _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel