Add a new callchain record mode option for deferred callchains.  For now
it only works with FP (frame-pointer) mode.

And add the missing feature detection logic to clear the flag on old
kernels.

  $ perf record --call-graph fp,defer -vv true
  ...
  ------------------------------------------------------------
  perf_event_attr:
    type                             0 (PERF_TYPE_HARDWARE)
    size                             136
    config                           0 (PERF_COUNT_HW_CPU_CYCLES)
    { sample_period, sample_freq }   4000
    sample_type                      IP|TID|TIME|CALLCHAIN|PERIOD
    read_format                      ID|LOST
    disabled                         1
    inherit                          1
    mmap                             1
    comm                             1
    freq                             1
    enable_on_exec                   1
    task                             1
    sample_id_all                    1
    mmap2                            1
    comm_exec                        1
    ksymbol                          1
    bpf_event                        1
    defer_callchain                  1
    defer_output                     1
  ------------------------------------------------------------
  sys_perf_event_open: pid 162755  cpu 0  group_fd -1  flags 0x8
  sys_perf_event_open failed, error -22
  switching off deferred callchain support

Signed-off-by: Namhyung Kim <[email protected]>
---
 tools/perf/Documentation/perf-config.txt |  3 +++
 tools/perf/Documentation/perf-record.txt |  4 ++++
 tools/perf/util/callchain.c              | 16 +++++++++++++---
 tools/perf/util/callchain.h              |  1 +
 tools/perf/util/evsel.c                  | 19 +++++++++++++++++++
 tools/perf/util/evsel.h                  |  1 +
 6 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-config.txt 
b/tools/perf/Documentation/perf-config.txt
index c6f33565966735fe..642d1c490d9e3bcd 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -452,6 +452,9 @@ Variables
                kernel space is controlled not by this option but by the
                kernel config (CONFIG_UNWINDER_*).
 
+               The 'defer' mode can be used with 'fp' mode to enable deferred
+               user callchains (like 'fp,defer').
+
        call-graph.dump-size::
                The size of stack to dump in order to do post-unwinding. 
Default is 8192 (byte).
                When using dwarf into record-mode, the default size will be 
used if omitted.
diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 067891bd7da6edc8..e8b9aadbbfa50574 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -325,6 +325,10 @@ OPTIONS
        by default.  User can change the number by passing it after comma
        like "--call-graph fp,32".
 
+       Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
+       enable deferred user callchain which will collect user-space callchains
+       when the thread returns to the user space.
+
 -q::
 --quiet::
        Don't print any warnings or messages, useful for scripting.
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index d7b7eef740b9d6ed..2884187ccbbecfdc 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct 
callchain_param *param)
                        if (tok) {
                                unsigned long size;
 
-                               size = strtoul(tok, &name, 0);
-                               if (size < (unsigned) sysctl__max_stack())
-                                       param->max_stack = size;
+                               if (!strncmp(tok, "defer", sizeof("defer"))) {
+                                       param->defer = true;
+                               } else {
+                                       size = strtoul(tok, &name, 0);
+                                       if (size < (unsigned) 
sysctl__max_stack())
+                                               param->max_stack = size;
+                               }
                        }
                        break;
 
@@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct 
callchain_param *param)
        } while (0);
 
        free(buf);
+
+       if (param->defer && param->record_mode != CALLCHAIN_FP) {
+               pr_err("callchain: deferred callchain only works with FP\n");
+               return -EINVAL;
+       }
+
        return ret;
 }
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 86ed9e4d04f9ee7b..d5ae4fbb7ce5fa44 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
 
 struct callchain_param {
        bool                    enabled;
+       bool                    defer;
        enum perf_call_graph_mode record_mode;
        u32                     dump_size;
        enum chain_mode         mode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5ee3e7dee93fbbcb..7772ee9cfe3ac1c7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1065,6 +1065,9 @@ static void __evsel__config_callchain(struct evsel 
*evsel, struct record_opts *o
                pr_info("Disabling user space callchains for function trace 
event.\n");
                attr->exclude_callchain_user = 1;
        }
+
+       if (param->defer && !attr->exclude_callchain_user)
+               attr->defer_callchain = 1;
 }
 
 void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
@@ -1511,6 +1514,7 @@ void evsel__config(struct evsel *evsel, struct 
record_opts *opts,
        attr->mmap2    = track && !perf_missing_features.mmap2;
        attr->comm     = track;
        attr->build_id = track && opts->build_id;
+       attr->defer_output = track && callchain->defer;
 
        /*
         * ksymbol is tracked separately with text poke because it needs to be
@@ -2199,6 +2203,10 @@ static int __evsel__prepare_open(struct evsel *evsel, 
struct perf_cpu_map *cpus,
 
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
+       if (perf_missing_features.defer_callchain && 
evsel->core.attr.defer_callchain)
+               evsel->core.attr.defer_callchain = 0;
+       if (perf_missing_features.defer_callchain && 
evsel->core.attr.defer_output)
+               evsel->core.attr.defer_output = 0;
        if (perf_missing_features.inherit_sample_read && 
evsel->core.attr.inherit &&
            (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
                evsel->core.attr.inherit = 0;
@@ -2473,6 +2481,13 @@ static bool evsel__detect_missing_features(struct evsel 
*evsel, struct perf_cpu
 
        /* Please add new feature detection here. */
 
+       attr.defer_callchain = true;
+       if (has_attr_feature(&attr, /*flags=*/0))
+               goto found;
+       perf_missing_features.defer_callchain = true;
+       pr_debug2("switching off deferred callchain support\n");
+       attr.defer_callchain = false;
+
        attr.inherit = true;
        attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
        if (has_attr_feature(&attr, /*flags=*/0))
@@ -2584,6 +2599,10 @@ static bool evsel__detect_missing_features(struct evsel 
*evsel, struct perf_cpu
        errno = old_errno;
 
 check:
+       if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) 
&&
+           perf_missing_features.defer_callchain)
+               return true;
+
        if (evsel->core.attr.inherit &&
            (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
            perf_missing_features.inherit_sample_read)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -221,6 +221,7 @@ struct perf_missing_features {
        bool branch_counters;
        bool aux_action;
        bool inherit_sample_read;
+       bool defer_callchain;
 };
 
 extern struct perf_missing_features perf_missing_features;
-- 
2.52.0.rc1.455.g30608eb744-goog


Reply via email to