On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <[email protected]> wrote:
>
> Add a new callchain record mode option for deferred callchains.  For now
> it only works with FP (frame-pointer) mode.
>
> And add the missing feature detection logic to clear the flag on old
> kernels.
>
>   $ perf record --call-graph fp,defer -vv true
>   ...
>   ------------------------------------------------------------
>   perf_event_attr:
>     type                             0 (PERF_TYPE_HARDWARE)
>     size                             136
>     config                           0 (PERF_COUNT_HW_CPU_CYCLES)
>     { sample_period, sample_freq }   4000
>     sample_type                      IP|TID|TIME|CALLCHAIN|PERIOD
>     read_format                      ID|LOST
>     disabled                         1
>     inherit                          1
>     mmap                             1
>     comm                             1
>     freq                             1
>     enable_on_exec                   1
>     task                             1
>     sample_id_all                    1
>     mmap2                            1
>     comm_exec                        1
>     ksymbol                          1
>     bpf_event                        1
>     defer_callchain                  1
>     defer_output                     1
>   ------------------------------------------------------------
>   sys_perf_event_open: pid 162755  cpu 0  group_fd -1  flags 0x8
>   sys_perf_event_open failed, error -22
>   switching off deferred callchain support
>
> Signed-off-by: Namhyung Kim <[email protected]>

Reviewed-by: Ian Rogers <[email protected]>

Thanks,
Ian

> ---
>  tools/perf/Documentation/perf-config.txt |  3 +++
>  tools/perf/Documentation/perf-record.txt |  4 ++++
>  tools/perf/util/callchain.c              | 16 +++++++++++++---
>  tools/perf/util/callchain.h              |  1 +
>  tools/perf/util/evsel.c                  | 19 +++++++++++++++++++
>  tools/perf/util/evsel.h                  |  1 +
>  6 files changed, 41 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-config.txt 
> b/tools/perf/Documentation/perf-config.txt
> index c6f33565966735fe..642d1c490d9e3bcd 100644
> --- a/tools/perf/Documentation/perf-config.txt
> +++ b/tools/perf/Documentation/perf-config.txt
> @@ -452,6 +452,9 @@ Variables
>                 kernel space is controlled not by this option but by the
>                 kernel config (CONFIG_UNWINDER_*).
>
> +               The 'defer' mode can be used with 'fp' mode to enable deferred
> +               user callchains (like 'fp,defer').
> +
>         call-graph.dump-size::
>                 The size of stack to dump in order to do post-unwinding. 
> Default is 8192 (byte).
>                 When using dwarf into record-mode, the default size will be 
> used if omitted.
> diff --git a/tools/perf/Documentation/perf-record.txt 
> b/tools/perf/Documentation/perf-record.txt
> index 067891bd7da6edc8..e8b9aadbbfa50574 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -325,6 +325,10 @@ OPTIONS
>         by default.  User can change the number by passing it after comma
>         like "--call-graph fp,32".
>
> +       Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
> +       enable deferred user callchain which will collect user-space 
> callchains
> +       when the thread returns to the user space.
> +
>  -q::
>  --quiet::
>         Don't print any warnings or messages, useful for scripting.
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index d7b7eef740b9d6ed..2884187ccbbecfdc 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct 
> callchain_param *param)
>                         if (tok) {
>                                 unsigned long size;
>
> -                               size = strtoul(tok, &name, 0);
> -                               if (size < (unsigned) sysctl__max_stack())
> -                                       param->max_stack = size;
> +                               if (!strncmp(tok, "defer", sizeof("defer"))) {
> +                                       param->defer = true;
> +                               } else {
> +                                       size = strtoul(tok, &name, 0);
> +                                       if (size < (unsigned) 
> sysctl__max_stack())
> +                                               param->max_stack = size;
> +                               }
>                         }
>                         break;
>
> @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct 
> callchain_param *param)
>         } while (0);
>
>         free(buf);
> +
> +       if (param->defer && param->record_mode != CALLCHAIN_FP) {
> +               pr_err("callchain: deferred callchain only works with FP\n");
> +               return -EINVAL;
> +       }
> +
>         return ret;
>  }
>
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 86ed9e4d04f9ee7b..d5ae4fbb7ce5fa44 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
>
>  struct callchain_param {
>         bool                    enabled;
> +       bool                    defer;
>         enum perf_call_graph_mode record_mode;
>         u32                     dump_size;
>         enum chain_mode         mode;
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 5ee3e7dee93fbbcb..7772ee9cfe3ac1c7 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1065,6 +1065,9 @@ static void __evsel__config_callchain(struct evsel 
> *evsel, struct record_opts *o
>                 pr_info("Disabling user space callchains for function trace 
> event.\n");
>                 attr->exclude_callchain_user = 1;
>         }
> +
> +       if (param->defer && !attr->exclude_callchain_user)
> +               attr->defer_callchain = 1;
>  }
>
>  void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
> @@ -1511,6 +1514,7 @@ void evsel__config(struct evsel *evsel, struct 
> record_opts *opts,
>         attr->mmap2    = track && !perf_missing_features.mmap2;
>         attr->comm     = track;
>         attr->build_id = track && opts->build_id;
> +       attr->defer_output = track && callchain->defer;
>
>         /*
>          * ksymbol is tracked separately with text poke because it needs to be
> @@ -2199,6 +2203,10 @@ static int __evsel__prepare_open(struct evsel *evsel, 
> struct perf_cpu_map *cpus,
>
>  static void evsel__disable_missing_features(struct evsel *evsel)
>  {
> +       if (perf_missing_features.defer_callchain && 
> evsel->core.attr.defer_callchain)
> +               evsel->core.attr.defer_callchain = 0;
> +       if (perf_missing_features.defer_callchain && 
> evsel->core.attr.defer_output)
> +               evsel->core.attr.defer_output = 0;
>         if (perf_missing_features.inherit_sample_read && 
> evsel->core.attr.inherit &&
>             (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
>                 evsel->core.attr.inherit = 0;
> @@ -2473,6 +2481,13 @@ static bool evsel__detect_missing_features(struct 
> evsel *evsel, struct perf_cpu
>
>         /* Please add new feature detection here. */
>
> +       attr.defer_callchain = true;
> +       if (has_attr_feature(&attr, /*flags=*/0))
> +               goto found;
> +       perf_missing_features.defer_callchain = true;
> +       pr_debug2("switching off deferred callchain support\n");
> +       attr.defer_callchain = false;
> +
>         attr.inherit = true;
>         attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
>         if (has_attr_feature(&attr, /*flags=*/0))
> @@ -2584,6 +2599,10 @@ static bool evsel__detect_missing_features(struct 
> evsel *evsel, struct perf_cpu
>         errno = old_errno;
>
>  check:
> +       if ((evsel->core.attr.defer_callchain || 
> evsel->core.attr.defer_output) &&
> +           perf_missing_features.defer_callchain)
> +               return true;
> +
>         if (evsel->core.attr.inherit &&
>             (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
>             perf_missing_features.inherit_sample_read)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -221,6 +221,7 @@ struct perf_missing_features {
>         bool branch_counters;
>         bool aux_action;
>         bool inherit_sample_read;
> +       bool defer_callchain;
>  };
>
>  extern struct perf_missing_features perf_missing_features;
> --
> 2.52.0.rc1.455.g30608eb744-goog
>

Reply via email to