On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <[email protected]> wrote: > > Add a new callchain record mode option for deferred callchains. For now > it only works with FP (frame-pointer) mode. > > And add the missing feature detection logic to clear the flag on old > kernels. > > $ perf record --call-graph fp,defer -vv true > ... > ------------------------------------------------------------ > perf_event_attr: > type 0 (PERF_TYPE_HARDWARE) > size 136 > config 0 (PERF_COUNT_HW_CPU_CYCLES) > { sample_period, sample_freq } 4000 > sample_type IP|TID|TIME|CALLCHAIN|PERIOD > read_format ID|LOST > disabled 1 > inherit 1 > mmap 1 > comm 1 > freq 1 > enable_on_exec 1 > task 1 > sample_id_all 1 > mmap2 1 > comm_exec 1 > ksymbol 1 > bpf_event 1 > defer_callchain 1 > defer_output 1 > ------------------------------------------------------------ > sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8 > sys_perf_event_open failed, error -22 > switching off deferred callchain support > > Signed-off-by: Namhyung Kim <[email protected]>
Reviewed-by: Ian Rogers <[email protected]> Thanks, Ian > --- > tools/perf/Documentation/perf-config.txt | 3 +++ > tools/perf/Documentation/perf-record.txt | 4 ++++ > tools/perf/util/callchain.c | 16 +++++++++++++--- > tools/perf/util/callchain.h | 1 + > tools/perf/util/evsel.c | 19 +++++++++++++++++++ > tools/perf/util/evsel.h | 1 + > 6 files changed, 41 insertions(+), 3 deletions(-) > > diff --git a/tools/perf/Documentation/perf-config.txt > b/tools/perf/Documentation/perf-config.txt > index c6f33565966735fe..642d1c490d9e3bcd 100644 > --- a/tools/perf/Documentation/perf-config.txt > +++ b/tools/perf/Documentation/perf-config.txt > @@ -452,6 +452,9 @@ Variables > kernel space is controlled not by this option but by the > kernel config (CONFIG_UNWINDER_*). > > + The 'defer' mode can be used with 'fp' mode to enable deferred > + user callchains (like 'fp,defer'). > + > call-graph.dump-size:: > The size of stack to dump in order to do post-unwinding. > Default is 8192 (byte). > When using dwarf into record-mode, the default size will be > used if omitted. > diff --git a/tools/perf/Documentation/perf-record.txt > b/tools/perf/Documentation/perf-record.txt > index 067891bd7da6edc8..e8b9aadbbfa50574 100644 > --- a/tools/perf/Documentation/perf-record.txt > +++ b/tools/perf/Documentation/perf-record.txt > @@ -325,6 +325,10 @@ OPTIONS > by default. User can change the number by passing it after comma > like "--call-graph fp,32". > > + Also "defer" can be used with "fp" (like "--call-graph fp,defer") to > + enable deferred user callchain which will collect user-space > callchains > + when the thread returns to the user space. > + > -q:: > --quiet:: > Don't print any warnings or messages, useful for scripting. > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c > index d7b7eef740b9d6ed..2884187ccbbecfdc 100644 > --- a/tools/perf/util/callchain.c > +++ b/tools/perf/util/callchain.c > @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct > callchain_param *param) > if (tok) { > unsigned long size; > > - size = strtoul(tok, &name, 0); > - if (size < (unsigned) sysctl__max_stack()) > - param->max_stack = size; > + if (!strncmp(tok, "defer", sizeof("defer"))) { > + param->defer = true; > + } else { > + size = strtoul(tok, &name, 0); > + if (size < (unsigned) > sysctl__max_stack()) > + param->max_stack = size; > + } > } > break; > > @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct > callchain_param *param) > } while (0); > > free(buf); > + > + if (param->defer && param->record_mode != CALLCHAIN_FP) { > + pr_err("callchain: deferred callchain only works with FP\n"); > + return -EINVAL; > + } > + > return ret; > } > > diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h > index 86ed9e4d04f9ee7b..d5ae4fbb7ce5fa44 100644 > --- a/tools/perf/util/callchain.h > +++ b/tools/perf/util/callchain.h > @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users; > > struct callchain_param { > bool enabled; > + bool defer; > enum perf_call_graph_mode record_mode; > u32 dump_size; > enum chain_mode mode; > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c > index 5ee3e7dee93fbbcb..7772ee9cfe3ac1c7 100644 > --- a/tools/perf/util/evsel.c > +++ b/tools/perf/util/evsel.c > @@ -1065,6 +1065,9 @@ static void __evsel__config_callchain(struct evsel > *evsel, struct record_opts *o > pr_info("Disabling user space callchains for function trace > event.\n"); > attr->exclude_callchain_user = 1; > } > + > + if (param->defer && !attr->exclude_callchain_user) > + attr->defer_callchain = 1; > } > > void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, > @@ -1511,6 +1514,7 @@ void evsel__config(struct evsel *evsel, struct > record_opts *opts, > attr->mmap2 = track && !perf_missing_features.mmap2; > attr->comm = track; > attr->build_id = track && opts->build_id; > + attr->defer_output = track && callchain->defer; > > /* > * ksymbol is tracked separately with text poke because it needs to be > @@ -2199,6 +2203,10 @@ static int __evsel__prepare_open(struct evsel *evsel, > struct perf_cpu_map *cpus, > > static void evsel__disable_missing_features(struct evsel *evsel) > { > + if (perf_missing_features.defer_callchain && > evsel->core.attr.defer_callchain) > + evsel->core.attr.defer_callchain = 0; > + if (perf_missing_features.defer_callchain && > evsel->core.attr.defer_output) > + evsel->core.attr.defer_output = 0; > if (perf_missing_features.inherit_sample_read && > evsel->core.attr.inherit && > (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) > evsel->core.attr.inherit = 0; > @@ -2473,6 +2481,13 @@ static bool evsel__detect_missing_features(struct > evsel *evsel, struct perf_cpu > > /* Please add new feature detection here. */ > > + attr.defer_callchain = true; > + if (has_attr_feature(&attr, /*flags=*/0)) > + goto found; > + perf_missing_features.defer_callchain = true; > + pr_debug2("switching off deferred callchain support\n"); > + attr.defer_callchain = false; > + > attr.inherit = true; > attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID; > if (has_attr_feature(&attr, /*flags=*/0)) > @@ -2584,6 +2599,10 @@ static bool evsel__detect_missing_features(struct > evsel *evsel, struct perf_cpu > errno = old_errno; > > check: > + if ((evsel->core.attr.defer_callchain || > evsel->core.attr.defer_output) && > + perf_missing_features.defer_callchain) > + return true; > + > if (evsel->core.attr.inherit && > (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && > perf_missing_features.inherit_sample_read) > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h > index 3ae4ac8f9a37e009..a08130ff2e47a887 100644 > --- a/tools/perf/util/evsel.h > +++ b/tools/perf/util/evsel.h > @@ -221,6 +221,7 @@ struct perf_missing_features { > bool branch_counters; > bool aux_action; > bool inherit_sample_read; > + bool defer_callchain; > }; > > extern struct perf_missing_features perf_missing_features; > -- > 2.52.0.rc1.455.g30608eb744-goog >
