On Thu, 16 Oct 2025 16:46:19 +1100 Balbir Singh <[email protected]> wrote:
> Add tracepoints for debugging device migration flow in migrate_device.c. > This is helpful in debugging how long migration took (time can be > tracked backwards from migrate_device_finalize to migrate_vma_setup). > > A combination of these events along with existing thp:*, exceptions:* > and migrate:* is very useful for debugging issues related to > migration. > > Cc: Steven Rostedt <[email protected]> > Cc: Masami Hiramatsu <[email protected]> > Cc: Mathieu Desnoyers <[email protected]> > Cc: Andrew Morton <[email protected]> > Cc: David Hildenbrand <[email protected]> > Cc: Zi Yan <[email protected]> > Cc: Joshua Hahn <[email protected]> > Cc: Rakie Kim <[email protected]> > Cc: Byungchul Park <[email protected]> > Cc: Gregory Price <[email protected]> > Cc: Ying Huang <[email protected]> > Cc: Alistair Popple <[email protected]> > > Signed-off-by: Balbir Singh <[email protected]> > --- > > Sample output from hmm-tests > > hmm-tests-855 [002] 50.042792: migrate_vma_setup: > start=0x7f2908a00000 end=0x7f2908c00000 nr_pages=512 > hmm-tests-855 [002] 50.042800: set_migration_pmd: > addr=7f2908a00000, pmd=dfffffffd39ffe00 > hmm-tests-855 [002] 50.042801: migrate_vma_collect_skip: > start=0x7f2908a01000 end=0x7f2908c00000 > hmm-tests-855 [002] 50.042802: migrate_vma_collect: > start=0x7f2908a00000 end=0x7f2908c00000 npages=512 > hmm-tests-855 [002] 50.061929: migrate_device_pages: npages=512 > migrated=512 > hmm-tests-855 [002] 50.062345: remove_migration_pmd: > addr=7f2908a00000, pmd=efffffe00403fe00 > hmm-tests-855 [002] 50.062371: migrate_vma_finalize: npages=512 Looks like some of these tracepoints can be combined via classes: > hmm-tests-855 [002] 50.042792: migrate_vma_setup: > start=0x7f2908a00000 end=0x7f2908c00000 nr_pages=512 > hmm-tests-855 [002] 50.042802: migrate_vma_collect: > start=0x7f2908a00000 end=0x7f2908c00000 npages=512 Is there a difference between "nr_pages" and "npages"? > hmm-tests-855 [002] 50.042800: set_migration_pmd: > addr=7f2908a00000, pmd=dfffffffd39ffe00 > hmm-tests-855 [002] 50.062345: remove_migration_pmd: > addr=7f2908a00000, pmd=efffffe00403fe00 Each TRACE_EVENT() is equivalent to: DECLARE_EVENT_CLASS(event, ...) DEFINE_EVENT(event, event, ...) Where a class is around 4-5K in size, and the DEFINE_EVENT is between 500 and 1k in size. By using a single DECLARE_EVENT_CLASS() for multiple events, you can save several thousands of bytes of memory. > > > include/trace/events/migrate_device.h | 196 ++++++++++++++++++++++++++ > mm/migrate_device.c | 11 ++ > 2 files changed, 207 insertions(+) > create mode 100644 include/trace/events/migrate_device.h > > diff --git a/include/trace/events/migrate_device.h > b/include/trace/events/migrate_device.h > new file mode 100644 > index 000000000000..9b2782800ea9 > --- /dev/null > +++ b/include/trace/events/migrate_device.h > @@ -0,0 +1,196 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES > + */ > +#undef TRACE_SYSTEM > +#define TRACE_SYSTEM migrate_device > + > +#if !defined(_TRACE_MIGRATE_DEVICE_H) || defined(TRACE_HEADER_MULTI_READ) > +#define _TRACE_MIGRATE_DEVICE_H > + > +#include <linux/tracepoint.h> > +#include <linux/migrate.h> > + > +TRACE_EVENT(migrate_vma_setup, > + > + TP_PROTO(unsigned long start, unsigned long end, unsigned long > nr_pages), > + > + TP_ARGS(start, end, nr_pages), > + > + TP_STRUCT__entry( > + __field(unsigned long, start) > + __field(unsigned long, end) > + __field(unsigned long, nr_pages) > + ), > + > + TP_fast_assign( > + __entry->start = start; > + __entry->end = end; > + __entry->nr_pages = nr_pages; > + ), > + > + TP_printk("start=0x%lx end=0x%lx nr_pages=%lu", > + __entry->start, __entry->end, __entry->nr_pages) > +); Make the above into: DECLAER_EVENT_CLASS(migrate_vma_pages_template, TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages), TP_ARGS(start, end, nr_pages), TP_STRUCT__entry( __field(unsigned long, start) __field(unsigned long, end) __field(unsigned long, nr_pages) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->nr_pages = nr_pages; ), TP_printk("start=0x%lx end=0x%lx nr_pages=%lu", __entry->start, __entry->end, __entry->nr_pages) ); DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_setup, TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages), TP_ARGS(start, end, nr_pages)); DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_collect, TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages), TP_ARGS(start, end, nr_pages)); DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_collect_hole, TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages), TP_ARGS(start, end, nr_pages)); > + > +TRACE_EVENT(migrate_vma_collect_skip, > + > + TP_PROTO(unsigned long start, unsigned long end), > + > + TP_ARGS(start, end), > + > + TP_STRUCT__entry( > + __field(unsigned long, start) > + __field(unsigned long, end) > + ), > + > + TP_fast_assign( > + __entry->start = start; > + __entry->end = end; > + ), > + > + TP_printk("start=0x%lx end=0x%lx", __entry->start, __entry->end) > +); > + > +TRACE_EVENT(migrate_vma_unmap, > + > + TP_PROTO(unsigned long npages, unsigned long cpages), > + > + TP_ARGS(npages, cpages), > + > + TP_STRUCT__entry( > + __field(unsigned long, npages) > + __field(unsigned long, cpages) > + ), > + > + TP_fast_assign( > + __entry->npages = npages; > + __entry->cpages = cpages; > + ), > + > + TP_printk("npages=%lu cpages=%lu", > + __entry->npages, __entry->cpages) > +); > + > +TRACE_EVENT(migrate_device_pages, > + > + TP_PROTO(unsigned long npages, unsigned long migrated), > + > + TP_ARGS(npages, migrated), > + > + TP_STRUCT__entry( > + __field(unsigned long, npages) > + __field(unsigned long, migrated) > + ), > + > + TP_fast_assign( > + __entry->npages = npages; > + __entry->migrated = migrated; > + ), > + > + TP_printk("npages=%lu migrated=%lu", > + __entry->npages, __entry->migrated) > +); The above two could be converted to: DECLARE_EVENT_CLASS(migrate_vma_device_template TP_PROTO(unsigned long npages, unsigned long cpage_migrate), TP_ARGS(npages, cpage_migrate), TP_STRUCT__entry( __field(unsigned long, npages) __field(unsigned long, cpage_migrated) ), TP_fast_assign( __entry->npages = npages; __entry->cpage_migrated = cpage_migrate; ), TP_printk("npages=%lu migrated=%lu", __entry->npages, __entry->migrated) ); DEFINE_EVENT(migrate_vma_device_template, migrate_device_pages, TP_PROTO(unsigned long npages, unsigned long cpage_migrate), TP_ARGS(npages, cpage_migrate)); DEFINE_EVENT_PRINT(migrate_vma_device_template, migrate_vma_unmap TP_PROTO(unsigned long npages, unsigned long cpage_migrate), TP_ARGS(npages, cpage_migrate), TP_printk("npages=%lu cpages=%lu", __entry->npages, __entry->cpages)); Where the second one will show a different print format. > + > +TRACE_EVENT(migrate_vma_pages, > + > + TP_PROTO(unsigned long npages, unsigned long start, unsigned long end), > + > + TP_ARGS(npages, start, end), > + > + TP_STRUCT__entry( > + __field(unsigned long, npages) > + __field(unsigned long, start) > + __field(unsigned long, end) > + ), > + > + TP_fast_assign( > + __entry->npages = npages; > + __entry->start = start; > + __entry->end = end; > + ), > + > + TP_printk("npages=%lu start=0x%lx end=0x%lx", > + __entry->npages, __entry->start, __entry->end) Is there a reason npages is not at the end? Otherwise you can save even more memory with: DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_pages, TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages), TP_ARGS(start, end, nr_pages)); > +); > + > +TRACE_EVENT(migrate_device_finalize, > + > + TP_PROTO(unsigned long npages), > + > + TP_ARGS(npages), > + > + TP_STRUCT__entry( > + __field(unsigned long, npages) > + ), > + > + TP_fast_assign( > + __entry->npages = npages; > + ), > + > + TP_printk("npages=%lu", __entry->npages) > +); > + > +TRACE_EVENT(migrate_vma_finalize, > + > + TP_PROTO(unsigned long npages), > + > + TP_ARGS(npages), > + > + TP_STRUCT__entry( > + __field(unsigned long, npages) > + ), > + > + TP_fast_assign( > + __entry->npages = npages; > + ), > + > + TP_printk("npages=%lu", __entry->npages) > +); The above two can be converted to: DECLARE_EVENT_CLASS(migrate_finalize_template, TP_PROTO(unsigned long npages), TP_ARGS(npages), TP_STRUCT__entry( __field(unsigned long, npages) ), TP_fast_assign( __entry->npages = npages; ), TP_printk("npages=%lu", __entry->npages) ); DEFINE_EVENT(migrate_finalize_template, migrate_device_finalize, TP_PROTO(unsigned long npages), TP_ARGS(npages)); DEFINE_EVENT(migrate_finalize_template, migrate_vma_finalize, TP_PROTO(unsigned long npages), TP_ARGS(npages)); -- Steve > +#endif /* _TRACE_MIGRATE_DEVICE_H */ > + > +#include <trace/define_trace.h> > diff --git a/mm/migrate_device.c b/mm/migrate_device.c > index fa42d2ebd024..c869b272e85a 100644 > --- a/mm/migrate_device.c > +++ b/mm/migrate_device.c > @@ -18,6 +18,9 @@ > #include <asm/tlbflush.h> > #include "internal.h" > > +#define CREATE_TRACE_POINTS > +#include <trace/events/migrate_device.h> > + > static int migrate_vma_collect_skip(unsigned long start, > unsigned long end, > struct mm_walk *walk) > @@ -25,6 +28,8 @@ static int migrate_vma_collect_skip(unsigned long start, > struct migrate_vma *migrate = walk->private; > unsigned long addr; > > + trace_migrate_vma_collect_skip(start, end); > + > for (addr = start; addr < end; addr += PAGE_SIZE) { > migrate->dst[migrate->npages] = 0; > migrate->src[migrate->npages++] = 0; > @@ -69,6 +74,7 @@ static int migrate_vma_collect_hole(unsigned long start, > migrate->cpages++; > } > > + trace_migrate_vma_collect_hole(start, end, migrate->npages); > return 0; > } > > @@ -517,6 +523,7 @@ static void migrate_vma_collect(struct migrate_vma > *migrate) > > mmu_notifier_invalidate_range_end(&range); > migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); > + trace_migrate_vma_collect(migrate->start, migrate->end, > migrate->npages); > } > > /* > @@ -748,6 +755,8 @@ int migrate_vma_setup(struct migrate_vma *args) > if (args->fault_page && !PageLocked(args->fault_page)) > return -EINVAL; > > + trace_migrate_vma_setup(args->start, args->end, nr_pages); > + > memset(args->src, 0, sizeof(*args->src) * nr_pages); > args->cpages = 0; > args->npages = 0; > @@ -1259,6 +1268,7 @@ EXPORT_SYMBOL(migrate_device_pages); > void migrate_vma_pages(struct migrate_vma *migrate) > { > __migrate_device_pages(migrate->src, migrate->dst, migrate->npages, > migrate); > + trace_migrate_device_pages(migrate->npages, migrate->npages); > } > EXPORT_SYMBOL(migrate_vma_pages); > > @@ -1312,6 +1322,7 @@ static void __migrate_device_finalize(unsigned long > *src_pfns, > folio_put(dst); > } > } > + trace_migrate_vma_finalize(npages); > } > > /*
