On Fri, Jul 21, 2017 at 02:12:12PM +0200, Jiri Olsa wrote:
> Make perf stat use  group read if there  are groups
> defined. The group read will get the values for all
> member of groups within a single syscall instead of
> calling read syscall for every event.
> 
> We can see considerable less amount of kernel cycles
> spent on single group read, than reading each event
> separately, like for following perf stat command:
> 
>   # perf stat -e {cycles,instructions} -I 10 -a sleep 1
> 
> Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"
> 
> Before:
> 
>         24,325,676      cycles:u
>        297,040,775      cycles:k
> 
>        1.038554134 seconds time elapsed
> 
> After:
>         25,034,418      cycles:u
>        158,256,395      cycles:k
> 
>        1.036864497 seconds time elapsed
> 
> The perf_evsel__open fallback changes contributed by Andi Kleen.
> 
> Link: http://lkml.kernel.org/n/tip-b6g8qarwvptr81cqdtfst...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>
> ---
>  tools/perf/builtin-stat.c | 30 +++++++++++++++++++++++++++---
>  tools/perf/util/counts.h  |  1 +
>  tools/perf/util/evsel.c   | 10 ++++++++++
>  3 files changed, 38 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 48ac53b199fc..866da7aa54bf 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -213,10 +213,20 @@ static void perf_stat__reset_stats(void)
>  static int create_perf_stat_counter(struct perf_evsel *evsel)
>  {
>       struct perf_event_attr *attr = &evsel->attr;
> +     struct perf_evsel *leader = evsel->leader;
>  
> -     if (stat_config.scale)
> +     if (stat_config.scale) {
>               attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
>                                   PERF_FORMAT_TOTAL_TIME_RUNNING;
> +     }
> +
> +     /*
> +      * The event is part of non trivial group, let's enable
> +      * the group read (for leader) and ID retrieval for all
> +      * members.
> +      */
> +     if (leader->nr_members > 1)
> +             attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;

I just wonder ID is really necessary.  Doesn't it have same order we
can traverse with the for_each_group_member()?

Thanks,
Namhyung


>  
>       attr->inherit = !no_inherit;
>  
> @@ -333,13 +343,21 @@ static int read_counter(struct perf_evsel *counter)
>                       struct perf_counts_values *count;
>  
>                       count = perf_counts(counter->counts, cpu, thread);
> -                     if (perf_evsel__read(counter, cpu, thread, count)) {
> +
> +                     /*
> +                      * The leader's group read loads data into its group 
> members
> +                      * (via perf_evsel__read_counter) and sets threir 
> count->loaded.
> +                      */
> +                     if (!count->loaded &&
> +                         perf_evsel__read_counter(counter, cpu, thread)) {
>                               counter->counts->scaled = -1;
>                               perf_counts(counter->counts, cpu, thread)->ena 
> = 0;
>                               perf_counts(counter->counts, cpu, thread)->run 
> = 0;
>                               return -1;
>                       }
>  
> +                     count->loaded = false;
> +
>                       if (STAT_RECORD) {
>                               if (perf_evsel__write_stat_event(counter, cpu, 
> thread, count)) {
>                                       pr_err("failed to write stat event\n");
> @@ -559,6 +577,11 @@ static int store_counter_ids(struct perf_evsel *counter)
>       return __store_counter_ids(counter, cpus, threads);
>  }
>  
> +static bool perf_evsel__should_store_id(struct perf_evsel *counter)
> +{
> +     return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
> +}
> +
>  static int __run_perf_stat(int argc, const char **argv)
>  {
>       int interval = stat_config.interval;
> @@ -631,7 +654,8 @@ static int __run_perf_stat(int argc, const char **argv)
>               if (l > unit_width)
>                       unit_width = l;
>  
> -             if (STAT_RECORD && store_counter_ids(counter))
> +             if (perf_evsel__should_store_id(counter) &&
> +                 store_counter_ids(counter))
>                       return -1;
>       }
>  
> diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
> index 34d8baaf558a..cb45a6aecf9d 100644
> --- a/tools/perf/util/counts.h
> +++ b/tools/perf/util/counts.h
> @@ -12,6 +12,7 @@ struct perf_counts_values {
>               };
>               u64 values[3];
>       };
> +     bool    loaded;
>  };
>  
>  struct perf_counts {
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 89aecf3a35c7..3735c9e0080d 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -49,6 +49,7 @@ static struct {
>       bool clockid_wrong;
>       bool lbr_flags;
>       bool write_backward;
> +     bool group_read;
>  } perf_missing_features;
>  
>  static clockid_t clockid;
> @@ -1321,6 +1322,7 @@ perf_evsel__set_count(struct perf_evsel *counter, int 
> cpu, int thread,
>       count->val    = val;
>       count->ena    = ena;
>       count->run    = run;
> +     count->loaded = true;
>  }
>  
>  static int
> @@ -1677,6 +1679,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct 
> cpu_map *cpus,
>       if (perf_missing_features.lbr_flags)
>               evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS 
> |
>                                    PERF_SAMPLE_BRANCH_NO_CYCLES);
> +     if (perf_missing_features.group_read && evsel->attr.inherit)
> +             evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
>  retry_sample_id:
>       if (perf_missing_features.sample_id_all)
>               evsel->attr.sample_id_all = 0;
> @@ -1832,6 +1836,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct 
> cpu_map *cpus,
>               perf_missing_features.lbr_flags = true;
>               pr_debug2("switching off branch sample type no 
> (cycles/flags)\n");
>               goto fallback_missing_features;
> +     } else if (!perf_missing_features.group_read &&
> +                 evsel->attr.inherit &&
> +                (evsel->attr.read_format & PERF_FORMAT_GROUP)) {
> +             perf_missing_features.group_read = true;
> +             pr_debug2("switching off group read\n");
> +             goto fallback_missing_features;
>       }
>  out_close:
>       do {
> -- 
> 2.9.4
> 

Reply via email to