The commit is pushed to "branch-rh10-6.12.0-55.13.1.vz10.2.x-ovz" and will 
appear at g...@bitbucket.org:openvz/vzkernel.git
after rh10-6.12.0-55.13.1.el10
------>
commit 13d41871a85c23fe77618e1081610e018fd5aad1
Author: Konstantin Khorenko <khore...@virtuozzo.com>
Date:   Fri Jul 25 15:53:11 2025 +0200

    mm/memcontrol: fix cache counter imbalance in mem_cgroup_replace_folio()
    
    The function was checking cache flag on the new folio instead of the old 
one,
    causing cache counter imbalance when replacing cache pages.
    The new folio is freshly allocated and never has the cache flag set, so
    we need to copy the flag from the old folio and charge the cache counter
    accordingly.
    
    This fixes potential cache accounting issues during page cache replacement
    operations like page migration between NUMA nodes.
    
    Another possible cause of the imbalance is the commit_charge() function
    which unconditionally drops MEMCG_DATA_PGCACHE flag on
    folio->memcg_data, so enhance commit_charge() to preserve the flag.
    
    The way of adding a new arguments was chosen in order not to miss the
    appearance of new commit_charge() calls on rebases.
    
    Fixes: 6a2ca4d515c5 ("mm: Memory cgroup page cache limit")
    https://virtuozzo.atlassian.net/browse/VSTOR-111756
    
    Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com>
    Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
    
    Feature: mm: Memory cgroup page cache limit
---
 include/linux/memcontrol.h |  6 ++--
 mm/hugetlb.c               |  3 +-
 mm/memcontrol-v1.c         |  5 +++-
 mm/memcontrol.c            | 70 +++++++++++++++++++++++-----------------------
 4 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c97bd1a4b3c95..e0df2d395d03f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -696,7 +696,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup 
*target,
                page_counter_read(&memcg->memory);
 }
 
-void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
+void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
+                             bool cache_charge);
 
 int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
 
@@ -1256,7 +1257,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup 
*target,
 }
 
 static inline void mem_cgroup_commit_charge(struct folio *folio,
-               struct mem_cgroup *memcg)
+                                           struct mem_cgroup *memcg,
+                                           bool cache_charge)
 {
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bd46cf476a0e3..93886dba813e0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3106,7 +3106,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct 
*vma,
        }
 
        if (!memcg_charge_ret)
-               mem_cgroup_commit_charge(folio, memcg);
+               /* huge pages are not used for page cache */
+               mem_cgroup_commit_charge(folio, memcg, false);
        mem_cgroup_put(memcg);
 
        return folio;
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 756bc518cee3f..fd0a563a352b1 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -852,7 +852,10 @@ static int mem_cgroup_move_account(struct folio *folio,
 
        /* Warning should never happen, so don't worry about refcount non-0 */
        WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
-       folio->memcg_data = (unsigned long)to;
+       /* Preserve MEMCG_DATA_PGCACHE flag if it was set */
+       WRITE_ONCE(folio->memcg_data,
+                  (unsigned long)to |
+                  (READ_ONCE(folio->memcg_data) & MEMCG_DATA_PGCACHE));
 
        __folio_memcg_unlock(from);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d0fe610306a02..28b8f50504e06 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2524,7 +2524,8 @@ void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, 
unsigned int nr_pages)
                page_counter_uncharge(&memcg->memsw, nr_pages);
 }
 
-static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
+static void commit_charge(struct folio *folio, struct mem_cgroup *memcg,
+                         unsigned long cache_flag)
 {
        VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio);
        /*
@@ -2536,18 +2537,20 @@ static void commit_charge(struct folio *folio, struct 
mem_cgroup *memcg)
         * - exclusive reference
         * - mem_cgroup_trylock_pages()
         */
-       folio->memcg_data = (unsigned long)memcg;
+       WRITE_ONCE(folio->memcg_data, (unsigned long)memcg | cache_flag);
 }
 
 /**
  * mem_cgroup_commit_charge - commit a previously successful try_charge().
  * @folio: folio to commit the charge to.
  * @memcg: memcg previously charged.
+ * @cache_charge: if we charge for page cache or not.
  */
-void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
+void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
+                             bool cache_charge)
 {
        css_get(&memcg->css);
-       commit_charge(folio, memcg);
+       commit_charge(folio, memcg, cache_charge ? MEMCG_DATA_PGCACHE : 0);
        memcg1_commit_charge(folio, memcg);
 }
 
@@ -5267,18 +5270,13 @@ static int charge_memcg(struct folio *folio, struct 
mem_cgroup *memcg,
        ret = try_charge(memcg, gfp, folio_nr_pages(folio), cache_charge);
        if (ret)
                goto out;
-
-       mem_cgroup_commit_charge(folio, memcg);
-
        /*
         * We always cleanup this flag on uncharging, it means
         * that during charging we shouldn't have this flag set
         */
-
        VM_BUG_ON(folio_memcg_cache(folio));
-       if (cache_charge)
-               WRITE_ONCE(folio->memcg_data,
-                       READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
+
+       mem_cgroup_commit_charge(folio, memcg, cache_charge);
 out:
        return ret;
 }
@@ -5551,6 +5549,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct 
folio *new)
 {
        struct mem_cgroup *memcg;
        long nr_pages = folio_nr_pages(new);
+       unsigned long cache_flag;
 
        VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
        VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
@@ -5575,32 +5574,33 @@ void mem_cgroup_replace_folio(struct folio *old, struct 
folio *new)
                if (do_memsw_account())
                        page_counter_charge(&memcg->memsw, nr_pages);
        }
-
-       /*
-        * finist explained the idea behind adding a WARN_ON() here:
-        * - we do not want to check flags correctness on each flag change
-        *   because of performance
-        * - we do want to have a warning in case we somehow messed-up and
-        *   have got a folio with wrong bits set
-        * - we do not insist to catch every first buggy page with wrong
-        *   bits
-        *
-        * But we still want to get a warning about the problem sooner or
-        * later if the problem with flags exists.
-        *
-        * To achieve this check if a folio that is marked as cache does
-        * not have any other incompatible flags set.
-        */
-       WARN_ON(folio_memcg_cache(new) &&
-               (folio_test_slab(new)         || folio_test_anon(new)      ||
-                folio_test_swapbacked(new)   || folio_test_swapcache(new) ||
-                folio_test_mappedtodisk(new) || folio_test_ksm(new)));
-
-       if (folio_memcg_cache(new))
+       cache_flag = READ_ONCE(old->memcg_data) & MEMCG_DATA_PGCACHE;
+       if (cache_flag && !mem_cgroup_is_root(memcg)) {
                page_counter_charge(&memcg->cache, nr_pages);
 
+               /*
+                * finist explained the idea behind adding a WARN_ON() here:
+                * - we do not want to check flags correctness on each flag 
change
+                *   because of performance
+                * - we do want to have a warning in case we somehow messed-up 
and
+                *   have got a folio with wrong bits set
+                * - we do not insist to catch every first buggy page with wrong
+                *   bits
+                *
+                * But we still want to get a warning about the problem sooner 
or
+                * later if the problem with flags exists.
+                *
+                * To achieve this check if a folio that is marked as cache does
+                * not have any other incompatible flags set.
+                */
+               WARN_ON(folio_test_slab(new) || folio_test_swapcache(new)  ||
+                       folio_test_anon(new) || folio_test_swapbacked(new) ||
+                       folio_test_ksm(new)  || folio_test_mappedtodisk(new));
+       }
+
        css_get(&memcg->css);
-       commit_charge(new, memcg);
+       commit_charge(new, memcg, cache_flag);
+
        memcg1_commit_charge(new, memcg);
 }
 
@@ -5639,7 +5639,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio 
*new)
                return;
 
        /* Transfer the charge and the css ref */
-       commit_charge(new, memcg);
+       commit_charge(new, memcg, READ_ONCE(old->memcg_data) & 
MEMCG_DATA_PGCACHE);
 
        /* Warning should never happen, so don't worry about refcount non-0 */
        WARN_ON_ONCE(folio_unqueue_deferred_split(old));
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to