It would be useful to see a breakdown of allocations to understand which
NUMA policies are driving them. For example, when investigating memory
pressure, having policy-specific counts could show that allocations were
bound to the affected node (via MPOL_BIND).

Add per-policy page allocation counters as new node stat items. These
counters can provide correlation between a mempolicy and pressure on a
given node.

Signed-off-by: JP Kobryn <[email protected]>
Suggested-by: Johannes Weiner <[email protected]>
---
 include/linux/mmzone.h |  9 +++++++++
 mm/mempolicy.c         | 30 ++++++++++++++++++++++++++++--
 mm/vmstat.c            |  9 +++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fc5d6c88d2f0..762609d5f0af 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -255,6 +255,15 @@ enum node_stat_item {
        PGDEMOTE_DIRECT,
        PGDEMOTE_KHUGEPAGED,
        PGDEMOTE_PROACTIVE,
+#ifdef CONFIG_NUMA
+       PGALLOC_MPOL_DEFAULT,
+       PGALLOC_MPOL_PREFERRED,
+       PGALLOC_MPOL_BIND,
+       PGALLOC_MPOL_INTERLEAVE,
+       PGALLOC_MPOL_LOCAL,
+       PGALLOC_MPOL_PREFERRED_MANY,
+       PGALLOC_MPOL_WEIGHTED_INTERLEAVE,
+#endif
 #ifdef CONFIG_HUGETLB_PAGE
        NR_HUGETLB,
 #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 68a98ba57882..3c64784af761 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -217,6 +217,21 @@ static void reduce_interleave_weights(unsigned int *bw, u8 
*new_iw)
                new_iw[nid] /= iw_gcd;
 }
 
+#define CHECK_MPOL_NODE_STAT_OFFSET(mpol) \
+       BUILD_BUG_ON(PGALLOC_##mpol - mpol != PGALLOC_MPOL_DEFAULT)
+
+static enum node_stat_item mpol_node_stat(unsigned short mode)
+{
+       CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED);
+       CHECK_MPOL_NODE_STAT_OFFSET(MPOL_BIND);
+       CHECK_MPOL_NODE_STAT_OFFSET(MPOL_INTERLEAVE);
+       CHECK_MPOL_NODE_STAT_OFFSET(MPOL_LOCAL);
+       CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED_MANY);
+       CHECK_MPOL_NODE_STAT_OFFSET(MPOL_WEIGHTED_INTERLEAVE);
+
+       return PGALLOC_MPOL_DEFAULT + mode;
+}
+
 int mempolicy_set_node_perf(unsigned int node, struct access_coordinate 
*coords)
 {
        struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL;
@@ -2446,8 +2461,14 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned 
int order,
 
        nodemask = policy_nodemask(gfp, pol, ilx, &nid);
 
-       if (pol->mode == MPOL_PREFERRED_MANY)
-               return alloc_pages_preferred_many(gfp, order, nid, nodemask);
+       if (pol->mode == MPOL_PREFERRED_MANY) {
+               page = alloc_pages_preferred_many(gfp, order, nid, nodemask);
+               if (page)
+                       __mod_node_page_state(page_pgdat(page),
+                                       mpol_node_stat(MPOL_PREFERRED_MANY), 1 
<< order);
+
+               return page;
+       }
 
        if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
            /* filter "hugepage" allocation, unless from alloc_pages() */
@@ -2472,6 +2493,9 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned 
int order,
                        page = __alloc_frozen_pages_noprof(
                                gfp | __GFP_THISNODE | __GFP_NORETRY, order,
                                nid, NULL);
+                       if (page)
+                               __mod_node_page_state(page_pgdat(page),
+                                               mpol_node_stat(pol->mode), 1 << 
order);
                        if (page || !(gfp & __GFP_DIRECT_RECLAIM))
                                return page;
                        /*
@@ -2484,6 +2508,8 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned 
int order,
        }
 
        page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask);
+       if (page)
+               __mod_node_page_state(page_pgdat(page), 
mpol_node_stat(pol->mode), 1 << order);
 
        if (unlikely(pol->mode == MPOL_INTERLEAVE ||
                     pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 65de88cdf40e..74e0ddde1e93 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1291,6 +1291,15 @@ const char * const vmstat_text[] = {
        [I(PGDEMOTE_DIRECT)]                    = "pgdemote_direct",
        [I(PGDEMOTE_KHUGEPAGED)]                = "pgdemote_khugepaged",
        [I(PGDEMOTE_PROACTIVE)]                 = "pgdemote_proactive",
+#ifdef CONFIG_NUMA
+       [I(PGALLOC_MPOL_DEFAULT)]               = "pgalloc_mpol_default",
+       [I(PGALLOC_MPOL_PREFERRED)]             = "pgalloc_mpol_preferred",
+       [I(PGALLOC_MPOL_BIND)]                  = "pgalloc_mpol_bind",
+       [I(PGALLOC_MPOL_INTERLEAVE)]            = "pgalloc_mpol_interleave",
+       [I(PGALLOC_MPOL_LOCAL)]                 = "pgalloc_mpol_local",
+       [I(PGALLOC_MPOL_PREFERRED_MANY)]        = "pgalloc_mpol_preferred_many",
+       [I(PGALLOC_MPOL_WEIGHTED_INTERLEAVE)]   = 
"pgalloc_mpol_weighted_interleave",
+#endif
 #ifdef CONFIG_HUGETLB_PAGE
        [I(NR_HUGETLB)]                         = "nr_hugetlb",
 #endif
-- 
2.47.3


Reply via email to