This commit introduces the `mm_setup_per_zone_wmarks` trace event,
which provides detailed insights into the kernel's per-zone watermark
configuration, offering precise timing and the ability to correlate
watermark changes with specific kernel events.

While `/proc/zoneinfo` provides some information about zone watermarks,
this trace event offers:

1. The ability to link watermark changes to specific kernel events and
logic.

2. The ability to capture rapid or short-lived changes in watermarks
that may be missed by user-space polling

3.  Diagnosing unexpected kswapd activity or excessive direct reclaim
triggered by rapidly changing watermarks.

Signed-off-by: Martin Liu <[email protected]>
---
 include/trace/events/kmem.h | 33 +++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  1 +
 2 files changed, 34 insertions(+)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index b37eb0a7060f..5fd392dae503 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -342,6 +342,39 @@ TRACE_EVENT(mm_alloc_contig_migrate_range_info,
                  __entry->nr_mapped)
 );
 
+TRACE_EVENT(mm_setup_per_zone_wmarks,
+
+       TP_PROTO(struct zone *zone),
+
+       TP_ARGS(zone),
+
+       TP_STRUCT__entry(
+               __field(int, node_id)
+               __string(name, zone->name)
+               __field(unsigned long, watermark_min)
+               __field(unsigned long, watermark_low)
+               __field(unsigned long, watermark_high)
+               __field(unsigned long, watermark_promo)
+       ),
+
+       TP_fast_assign(
+               __entry->node_id = zone->zone_pgdat->node_id;
+               __assign_str(name);
+               __entry->watermark_min = zone->_watermark[WMARK_MIN];
+               __entry->watermark_low = zone->_watermark[WMARK_LOW];
+               __entry->watermark_high = zone->_watermark[WMARK_HIGH];
+               __entry->watermark_promo = zone->_watermark[WMARK_PROMO];
+       ),
+
+       TP_printk("node_id=%d zone name=%s watermark min=%lu low=%lu high=%lu 
promo=%lu",
+                 __entry->node_id,
+                 __get_str(name),
+                 __entry->watermark_min,
+                 __entry->watermark_low,
+                 __entry->watermark_high,
+                 __entry->watermark_promo)
+);
+
 /*
  * Required for uniquely and securely identifying mm in rss_stat tracepoint.
  */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 579789600a3c..50893061db66 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5920,6 +5920,7 @@ static void __setup_per_zone_wmarks(void)
                zone->_watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
                zone->_watermark[WMARK_HIGH] = low_wmark_pages(zone) + tmp;
                zone->_watermark[WMARK_PROMO] = high_wmark_pages(zone) + tmp;
+               trace_mm_setup_per_zone_wmarks(zone);
 
                spin_unlock_irqrestore(&zone->lock, flags);
        }
-- 
2.49.0.rc0.332.g42c0ae87b1-goog


Reply via email to