Memory reclaim events are currently difficult to attribute to
specific cgroups, making debugging memory pressure issues
challenging.  This patch adds memory cgroup ID (memcg_id) to key
vmscan tracepoints to enable better correlation and analysis.

For operations not associated with a specific cgroup, the field
is defaulted to 0.

Signed-off-by: Thomas Ballasi <[email protected]>
---
 include/trace/events/vmscan.h | 83 ++++++++++++++++++++---------------
 mm/shrinker.c                 |  6 ++-
 mm/vmscan.c                   | 17 +++----
 3 files changed, 61 insertions(+), 45 deletions(-)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 490958fa10dee..20160e79eb0d7 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -114,85 +114,92 @@ TRACE_EVENT(mm_vmscan_wakeup_kswapd,
 
 DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
 
-       TP_PROTO(int order, gfp_t gfp_flags),
+       TP_PROTO(gfp_t gfp_flags, int order, u64 memcg_id),
 
-       TP_ARGS(order, gfp_flags),
+       TP_ARGS(gfp_flags, order, memcg_id),
 
        TP_STRUCT__entry(
-               __field(        int,    order           )
                __field(        unsigned long,  gfp_flags       )
+               __field(        u64,    memcg_id        )
+               __field(        int,    order           )
        ),
 
        TP_fast_assign(
-               __entry->order          = order;
                __entry->gfp_flags      = (__force unsigned long)gfp_flags;
+               __entry->order          = order;
+               __entry->memcg_id       = memcg_id;
        ),
 
-       TP_printk("order=%d gfp_flags=%s",
+       TP_printk("order=%d gfp_flags=%s memcg_id=%llu",
                __entry->order,
-               show_gfp_flags(__entry->gfp_flags))
+               show_gfp_flags(__entry->gfp_flags),
+               __entry->memcg_id)
 );
 
 DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, 
mm_vmscan_direct_reclaim_begin,
 
-       TP_PROTO(int order, gfp_t gfp_flags),
+       TP_PROTO(gfp_t gfp_flags, int order, u64 memcg_id),
 
-       TP_ARGS(order, gfp_flags)
+       TP_ARGS(gfp_flags, order, memcg_id)
 );
 
 #ifdef CONFIG_MEMCG
 DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, 
mm_vmscan_memcg_reclaim_begin,
 
-       TP_PROTO(int order, gfp_t gfp_flags),
+       TP_PROTO(gfp_t gfp_flags, int order, u64 memcg_id),
 
-       TP_ARGS(order, gfp_flags)
+       TP_ARGS(gfp_flags, order, memcg_id)
 );
 
 DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, 
mm_vmscan_memcg_softlimit_reclaim_begin,
 
-       TP_PROTO(int order, gfp_t gfp_flags),
+       TP_PROTO(gfp_t gfp_flags, int order, u64 memcg_id),
 
-       TP_ARGS(order, gfp_flags)
+       TP_ARGS(gfp_flags, order, memcg_id)
 );
 #endif /* CONFIG_MEMCG */
 
 DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
 
-       TP_PROTO(unsigned long nr_reclaimed),
+       TP_PROTO(unsigned long nr_reclaimed, u64 memcg_id),
 
-       TP_ARGS(nr_reclaimed),
+       TP_ARGS(nr_reclaimed, memcg_id),
 
        TP_STRUCT__entry(
                __field(        unsigned long,  nr_reclaimed    )
+               __field(        u64,    memcg_id        )
        ),
 
        TP_fast_assign(
                __entry->nr_reclaimed   = nr_reclaimed;
+               __entry->memcg_id       = memcg_id;
        ),
 
-       TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed)
+       TP_printk("nr_reclaimed=%lu memcg_id=%llu",
+               __entry->nr_reclaimed,
+               __entry->memcg_id)
 );
 
 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, 
mm_vmscan_direct_reclaim_end,
 
-       TP_PROTO(unsigned long nr_reclaimed),
+       TP_PROTO(unsigned long nr_reclaimed, u64 memcg_id),
 
-       TP_ARGS(nr_reclaimed)
+       TP_ARGS(nr_reclaimed, memcg_id)
 );
 
 #ifdef CONFIG_MEMCG
 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, 
mm_vmscan_memcg_reclaim_end,
 
-       TP_PROTO(unsigned long nr_reclaimed),
+       TP_PROTO(unsigned long nr_reclaimed, u64 memcg_id),
 
-       TP_ARGS(nr_reclaimed)
+       TP_ARGS(nr_reclaimed, memcg_id)
 );
 
 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, 
mm_vmscan_memcg_softlimit_reclaim_end,
 
-       TP_PROTO(unsigned long nr_reclaimed),
+       TP_PROTO(unsigned long nr_reclaimed, u64 memcg_id),
 
-       TP_ARGS(nr_reclaimed)
+       TP_ARGS(nr_reclaimed, memcg_id)
 );
 #endif /* CONFIG_MEMCG */
 
@@ -200,39 +207,42 @@ TRACE_EVENT(mm_shrink_slab_start,
        TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
                long nr_objects_to_shrink, unsigned long cache_items,
                unsigned long long delta, unsigned long total_scan,
-               int priority),
+               int priority, u64 memcg_id),
 
        TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan,
-               priority),
+               priority, memcg_id),
 
        TP_STRUCT__entry(
                __field(struct shrinker *, shr)
                __field(void *, shrink)
-               __field(int, nid)
                __field(long, nr_objects_to_shrink)
                __field(unsigned long, gfp_flags)
                __field(unsigned long, cache_items)
                __field(unsigned long long, delta)
                __field(unsigned long, total_scan)
                __field(int, priority)
+               __field(int, nid)
+               __field(u64, memcg_id)
        ),
 
        TP_fast_assign(
                __entry->shr = shr;
                __entry->shrink = shr->scan_objects;
-               __entry->nid = sc->nid;
                __entry->nr_objects_to_shrink = nr_objects_to_shrink;
                __entry->gfp_flags = (__force unsigned long)sc->gfp_mask;
                __entry->cache_items = cache_items;
                __entry->delta = delta;
                __entry->total_scan = total_scan;
                __entry->priority = priority;
+               __entry->nid = sc->nid;
+               __entry->memcg_id = memcg_id;
        ),
 
-       TP_printk("%pS %p: nid: %d objects to shrink %ld gfp_flags %s cache 
items %ld delta %lld total_scan %ld priority %d",
+       TP_printk("%pS %p: nid: %d memcg_id: %llu objects to shrink %ld 
gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d",
                __entry->shrink,
                __entry->shr,
                __entry->nid,
+               __entry->memcg_id,
                __entry->nr_objects_to_shrink,
                show_gfp_flags(__entry->gfp_flags),
                __entry->cache_items,
@@ -243,35 +253,38 @@ TRACE_EVENT(mm_shrink_slab_start,
 
 TRACE_EVENT(mm_shrink_slab_end,
        TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval,
-               long unused_scan_cnt, long new_scan_cnt, long total_scan),
+               long unused_scan_cnt, long new_scan_cnt, long total_scan, u64 
memcg_id),
 
        TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt,
-               total_scan),
+               total_scan, memcg_id),
 
        TP_STRUCT__entry(
                __field(struct shrinker *, shr)
-               __field(int, nid)
                __field(void *, shrink)
                __field(long, unused_scan)
                __field(long, new_scan)
-               __field(int, retval)
                __field(long, total_scan)
+               __field(int, nid)
+               __field(int, retval)
+               __field(u64, memcg_id)
        ),
 
        TP_fast_assign(
                __entry->shr = shr;
-               __entry->nid = nid;
                __entry->shrink = shr->scan_objects;
                __entry->unused_scan = unused_scan_cnt;
                __entry->new_scan = new_scan_cnt;
-               __entry->retval = shrinker_retval;
                __entry->total_scan = total_scan;
+               __entry->nid = nid;
+               __entry->retval = shrinker_retval;
+               __entry->memcg_id = memcg_id;
        ),
 
-       TP_printk("%pS %p: nid: %d unused scan count %ld new scan count %ld 
total_scan %ld last shrinker return val %d",
+       TP_printk("%pS %p: nid: %d memcg_id: %llu unused scan count %ld new 
scan count %ld total_scan %ld last shrinker return val %d",
                __entry->shrink,
                __entry->shr,
                __entry->nid,
+               __entry->memcg_id,
                __entry->unused_scan,
                __entry->new_scan,
                __entry->total_scan,
@@ -504,9 +517,9 @@ TRACE_EVENT(mm_vmscan_node_reclaim_begin,
 
 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end,
 
-       TP_PROTO(unsigned long nr_reclaimed),
+       TP_PROTO(unsigned long nr_reclaimed, u64 memcg_id),
 
-       TP_ARGS(nr_reclaimed)
+       TP_ARGS(nr_reclaimed, memcg_id)
 );
 
 TRACE_EVENT(mm_vmscan_throttled,
diff --git a/mm/shrinker.c b/mm/shrinker.c
index 4a93fd433689a..ccfbe3fb3b378 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -410,7 +410,8 @@ static unsigned long do_shrink_slab(struct shrink_control 
*shrinkctl,
        total_scan = min(total_scan, (2 * freeable));
 
        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
-                                  freeable, delta, total_scan, priority);
+                                  freeable, delta, total_scan, priority,
+                                  mem_cgroup_id(shrinkctl->memcg));
 
        /*
         * Normally, we should not scan less than batch_size objects in one
@@ -461,7 +462,8 @@ static unsigned long do_shrink_slab(struct shrink_control 
*shrinkctl,
         */
        new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
 
-       trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, 
total_scan);
+       trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, 
total_scan,
+                                mem_cgroup_id(shrinkctl->memcg));
        return freed;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b2fc8b626d3df..9a3cd975a9f30 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6642,11 +6642,11 @@ unsigned long try_to_free_pages(struct zonelist 
*zonelist, int order,
                return 1;
 
        set_task_reclaim_state(current, &sc.reclaim_state);
-       trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask);
+       trace_mm_vmscan_direct_reclaim_begin(sc.gfp_mask, order, 0);
 
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
-       trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
+       trace_mm_vmscan_direct_reclaim_end(nr_reclaimed, 0);
        set_task_reclaim_state(current, NULL);
 
        return nr_reclaimed;
@@ -6675,8 +6675,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup 
*memcg,
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 
-       trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
-                                                     sc.gfp_mask);
+       trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.gfp_mask,
+                                                     sc.order,
+                                                     mem_cgroup_id(memcg));
 
        /*
         * NOTE: Although we can get the priority field, using it
@@ -6687,7 +6688,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup 
*memcg,
         */
        shrink_lruvec(lruvec, &sc);
 
-       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed, 
mem_cgroup_id(memcg));
 
        *nr_scanned = sc.nr_scanned;
 
@@ -6723,13 +6724,13 @@ unsigned long try_to_free_mem_cgroup_pages(struct 
mem_cgroup *memcg,
        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
 
        set_task_reclaim_state(current, &sc.reclaim_state);
-       trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
+       trace_mm_vmscan_memcg_reclaim_begin(sc.gfp_mask, 0, 
mem_cgroup_id(memcg));
        noreclaim_flag = memalloc_noreclaim_save();
 
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        memalloc_noreclaim_restore(noreclaim_flag);
-       trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+       trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed, mem_cgroup_id(memcg));
        set_task_reclaim_state(current, NULL);
 
        return nr_reclaimed;
@@ -7675,7 +7676,7 @@ static unsigned long __node_reclaim(struct pglist_data 
*pgdat, gfp_t gfp_mask,
        delayacct_freepages_end();
        psi_memstall_leave(&pflags);
 
-       trace_mm_vmscan_node_reclaim_end(sc->nr_reclaimed);
+       trace_mm_vmscan_node_reclaim_end(sc->nr_reclaimed, 0);
 
        return sc->nr_reclaimed;
 }
-- 
2.33.8


Reply via email to