Re: [PATCH] perf stat: improve readability of shadow stats

2021-03-04 Thread Changbin Du
On Tue, Mar 02, 2021 at 09:25:24AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Mar 02, 2021 at 11:08:40AM +0900, Namhyung Kim escreveu:
> > On Tue, Mar 2, 2021 at 4:19 AM Jiri Olsa  wrote:
> > >
> > > On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > > > This does follow two changes:
> > > >   1) Select appropriate unit between K/M/G.
> > > >   2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> > > >
> > > > $ sudo ./perf stat -a -- sleep 1
> > > >
> > > > Before: Unit 'M' is selected even the number is very small.
> > > >  Performance counter stats for 'system wide':
> > > >
> > > >   4,003.06 msec cpu-clock #3.998 CPUs 
> > > > utilized
> > > > 16,179  context-switches  #0.004 M/sec
> > > >161  cpu-migrations#0.040 K/sec
> > > >  4,699  page-faults   #0.001 M/sec
> > > >  6,135,801,925  cycles#1.533 GHz
> > > >   (83.21%)
> > > >  5,783,308,491  stalled-cycles-frontend   #   94.26% frontend 
> > > > cycles idle (83.21%)
> > > >  4,543,694,050  stalled-cycles-backend#   74.05% backend 
> > > > cycles idle  (66.49%)
> > > >  4,720,130,587  instructions  #0.77  insn per 
> > > > cycle
> > > >   #1.23  stalled 
> > > > cycles per insn  (83.28%)
> > > >753,848,078  branches  #  188.318 M/sec  
> > > >   (83.61%)
> > > > 37,457,747  branch-misses #4.97% of all 
> > > > branches  (83.48%)
> > > >
> > > >1.001283725 seconds time elapsed
> > > >
> > > > After:
> > > > $ sudo ./perf stat -a -- sleep 2
> > > >
> > > >  Performance counter stats for 'system wide':
> > > >
> > > >   8,003.20 msec cpu-clock #3.998 CPUs 
> > > > utilized
> > > >  9,768  context-switches  #1.221 K/cpu-sec
> > > >164  cpu-migrations#   20.492  /cpu-sec
> > >
> > > should you remove also the leading '/' in ' /cpu-sec' ?
> > 
> > The change looks good.  And I think we should keep '/' otherwise it'd be
> > more confusing.
> 
> Perhaps:
> 
>8,003.20 msec cpu-clock #3.998 CPUs utilized
>   9,768  context-switches  #1.221 K/cpu-sec
> 164  cpu-migrations#   20.492 /cpu-sec
> 
> ?
>
sure. Now it is:
$ sudo ./perf stat 
[sudo] password for changbin: 
^C
 Performance counter stats for 'system wide':

  6,046.61 msec cpu-clock #3.999 CPUs utilized  

 7,569  context-switches  #1.252 K/sec  

   587  cpu-migrations#   97.079 /sec   

   229  page-faults   #   37.872 /sec   

 1,484,855,385  cycles#0.246 GHz

 2,112,863,483  stalled-cycles-frontend   #  142.29% frontend cycles 
idle   
 1,980,746,623  stalled-cycles-backend#  133.40% backend cycles 
idle
   454,477,035  instructions  #0.31  insn per cycle 

  #4.65  stalled cycles per 
insn
89,992,521  branches  #   14.883 M/sec  

 6,196,599  branch-misses #6.89% of all branches


   1.512029866 seconds time elapsed

 
> - Arnaldo
>  
> > >
> > >
> > > SNIP
> > >
> > > > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
> > > > perf_stat_config *config,
> > > >   generic_metric(config, evsel->metric_expr, 
> > > > evsel->metric_events, NULL,
> > > >   evsel->name, evsel->metric_name, NULL, 1, 
> > > > cpu, out, st);
> > > >   } else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
> > > > - char unit = 'M';
> > > > + char unit = ' ';
> > > >   char unit_buf[10];
> > > >
> > > >   total = runtime_stat_avg(st, STAT_NSECS, cpu, );
> > > > -
> > > >   if (total)
> > > > - ratio = 1000.0 * avg / total;
> > > > - if (ratio < 0.001) {
> > > > - ratio *= 1000;
> > > > - unit = 'K';
> > > > - }
> > > > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > > > + ratio = convert_unit_double(10.0 * avg / 
> > > > total, );
> > > > +
> > > > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > > >   print_metric(config, ctxp, NULL, "%8.3f", unit_buf, 
> > > > ratio);
> > >
> > > hum this will change -x output that people parse, so I don't think we 

Re: [PATCH] perf stat: improve readability of shadow stats

2021-03-04 Thread Changbin Du
On Mon, Mar 01, 2021 at 08:19:20PM +0100, Jiri Olsa wrote:
> On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > This does follow two changes:
> >   1) Select appropriate unit between K/M/G.
> >   2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> > 
> > $ sudo ./perf stat -a -- sleep 1
> > 
> > Before: Unit 'M' is selected even the number is very small.
> >  Performance counter stats for 'system wide':
> > 
> >   4,003.06 msec cpu-clock #3.998 CPUs utilized
> > 16,179  context-switches  #0.004 M/sec
> >161  cpu-migrations#0.040 K/sec
> >  4,699  page-faults   #0.001 M/sec
> >  6,135,801,925  cycles#1.533 GHz
> >   (83.21%)
> >  5,783,308,491  stalled-cycles-frontend   #   94.26% frontend 
> > cycles idle (83.21%)
> >  4,543,694,050  stalled-cycles-backend#   74.05% backend cycles 
> > idle  (66.49%)
> >  4,720,130,587  instructions  #0.77  insn per cycle
> >   #1.23  stalled cycles 
> > per insn  (83.28%)
> >753,848,078  branches  #  188.318 M/sec  
> >   (83.61%)
> > 37,457,747  branch-misses #4.97% of all 
> > branches  (83.48%)
> > 
> >1.001283725 seconds time elapsed
> > 
> > After:
> > $ sudo ./perf stat -a -- sleep 2
> > 
> >  Performance counter stats for 'system wide':
> > 
> >   8,003.20 msec cpu-clock #3.998 CPUs utilized
> >  9,768  context-switches  #1.221 K/cpu-sec
> >164  cpu-migrations#   20.492  /cpu-sec
> 
> should you remove also the leading '/' in ' /cpu-sec' ?
> 
> 
> SNIP
> 
> > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
> > perf_stat_config *config,
> > generic_metric(config, evsel->metric_expr, 
> > evsel->metric_events, NULL,
> > evsel->name, evsel->metric_name, NULL, 1, cpu, 
> > out, st);
> > } else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
> > -   char unit = 'M';
> > +   char unit = ' ';
> > char unit_buf[10];
> >  
> > total = runtime_stat_avg(st, STAT_NSECS, cpu, );
> > -
> > if (total)
> > -   ratio = 1000.0 * avg / total;
> > -   if (ratio < 0.001) {
> > -   ratio *= 1000;
> > -   unit = 'K';
> > -   }
> > -   snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > +   ratio = convert_unit_double(10.0 * avg / total, 
> > );
> > +
> > +   snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
> 
> hum this will change -x output that people parse, so I don't think we can do 
> that
okay, so skip this change and keep another.

-- 
Cheers,
Changbin Du


Re: [PATCH] perf stat: improve readability of shadow stats

2021-03-02 Thread Arnaldo Carvalho de Melo
Em Tue, Mar 02, 2021 at 11:08:40AM +0900, Namhyung Kim escreveu:
> On Tue, Mar 2, 2021 at 4:19 AM Jiri Olsa  wrote:
> >
> > On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > > This does follow two changes:
> > >   1) Select appropriate unit between K/M/G.
> > >   2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> > >
> > > $ sudo ./perf stat -a -- sleep 1
> > >
> > > Before: Unit 'M' is selected even the number is very small.
> > >  Performance counter stats for 'system wide':
> > >
> > >   4,003.06 msec cpu-clock #3.998 CPUs utilized
> > > 16,179  context-switches  #0.004 M/sec
> > >161  cpu-migrations#0.040 K/sec
> > >  4,699  page-faults   #0.001 M/sec
> > >  6,135,801,925  cycles#1.533 GHz  
> > > (83.21%)
> > >  5,783,308,491  stalled-cycles-frontend   #   94.26% frontend 
> > > cycles idle (83.21%)
> > >  4,543,694,050  stalled-cycles-backend#   74.05% backend 
> > > cycles idle  (66.49%)
> > >  4,720,130,587  instructions  #0.77  insn per 
> > > cycle
> > >   #1.23  stalled 
> > > cycles per insn  (83.28%)
> > >753,848,078  branches  #  188.318 M/sec
> > > (83.61%)
> > > 37,457,747  branch-misses #4.97% of all 
> > > branches  (83.48%)
> > >
> > >1.001283725 seconds time elapsed
> > >
> > > After:
> > > $ sudo ./perf stat -a -- sleep 2
> > >
> > >  Performance counter stats for 'system wide':
> > >
> > >   8,003.20 msec cpu-clock #3.998 CPUs utilized
> > >  9,768  context-switches  #1.221 K/cpu-sec
> > >164  cpu-migrations#   20.492  /cpu-sec
> >
> > should you remove also the leading '/' in ' /cpu-sec' ?
> 
> The change looks good.  And I think we should keep '/' otherwise it'd be
> more confusing.

Perhaps:

   8,003.20 msec cpu-clock #3.998 CPUs utilized
  9,768  context-switches  #1.221 K/cpu-sec
164  cpu-migrations#   20.492 /cpu-sec

?

- Arnaldo
 
> >
> >
> > SNIP
> >
> > > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
> > > perf_stat_config *config,
> > >   generic_metric(config, evsel->metric_expr, 
> > > evsel->metric_events, NULL,
> > >   evsel->name, evsel->metric_name, NULL, 1, 
> > > cpu, out, st);
> > >   } else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
> > > - char unit = 'M';
> > > + char unit = ' ';
> > >   char unit_buf[10];
> > >
> > >   total = runtime_stat_avg(st, STAT_NSECS, cpu, );
> > > -
> > >   if (total)
> > > - ratio = 1000.0 * avg / total;
> > > - if (ratio < 0.001) {
> > > - ratio *= 1000;
> > > - unit = 'K';
> > > - }
> > > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > > + ratio = convert_unit_double(10.0 * avg / 
> > > total, );
> > > +
> > > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> > >   print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
> >
> > hum this will change -x output that people parse, so I don't think we can 
> > do that
> 
> Agreed.
> 
> >
> > >   } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
> > >   print_smi_cost(config, cpu, out, st, );
> > > diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> > > index a46762aec4c9..ac13b5ecde31 100644
> > > --- a/tools/perf/util/units.c
> > > +++ b/tools/perf/util/units.c
> > > @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char 
> > > *unit)
> > >   return value;
> > >  }
> > >
> > > +double convert_unit_double(double value, char *unit)
> > > +{
> > > + *unit = ' ';
> > > +
> > > + if (value > 1000.0) {
> > > + value /= 1000.0;
> > > + *unit = 'K';
> > > + }
> > > +
> > > + if (value > 1000.0) {
> > > + value /= 1000.0;
> > > + *unit = 'M';
> > > + }
> > > +
> > > + if (value > 1000.0) {
> > > + value /= 1000.0;
> > > + *unit = 'G';
> > > + }
> > > +
> > > + return value;
> > > +}
> >
> > we have convert_unit function just above doing the same only with
> > unsigned long.. let's have one base function with double values and
> > another one casting the result to unsigned long
> 
> Sounds good.
> 
> Thanks,
> Namhyung

-- 

- Arnaldo


Re: [PATCH] perf stat: improve readability of shadow stats

2021-03-02 Thread kajoljain



On 3/1/21 10:54 PM, Changbin Du wrote:
> This does follow two changes:
>   1) Select appropriate unit between K/M/G.
>   2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> 
> $ sudo ./perf stat -a -- sleep 1
> 
> Before: Unit 'M' is selected even the number is very small.
>  Performance counter stats for 'system wide':
> 
>   4,003.06 msec cpu-clock #3.998 CPUs utilized
> 16,179  context-switches  #0.004 M/sec
>161  cpu-migrations#0.040 K/sec
>  4,699  page-faults   #0.001 M/sec
>  6,135,801,925  cycles#1.533 GHz  
> (83.21%)
>  5,783,308,491  stalled-cycles-frontend   #   94.26% frontend cycles 
> idle (83.21%)
>  4,543,694,050  stalled-cycles-backend#   74.05% backend cycles 
> idle  (66.49%)
>  4,720,130,587  instructions  #0.77  insn per cycle
>   #1.23  stalled cycles 
> per insn  (83.28%)
>753,848,078  branches  #  188.318 M/sec
> (83.61%)
> 37,457,747  branch-misses #4.97% of all branches  
> (83.48%)
> 
>1.001283725 seconds time elapsed
> 
> After:
> $ sudo ./perf stat -a -- sleep 2
> 
>  Performance counter stats for 'system wide':
> 
>   8,003.20 msec cpu-clock #3.998 CPUs utilized
>  9,768  context-switches  #1.221 K/cpu-sec
>164  cpu-migrations#   20.492  /cpu-sec
> 74,146  page-faults   #9.265 K/cpu-sec
> 19,008,796,806  cycles#2.375 GHz  
> (83.21%)
> 14,789,443,853  stalled-cycles-frontend   #   77.80% frontend cycles 
> idle (83.29%)
> 11,867,812,064  stalled-cycles-backend#   62.43% backend cycles 
> idle  (66.80%)
>  9,898,252,603  instructions  #0.52  insn per cycle
>   #1.49  stalled cycles 
> per insn  (83.41%)
>  2,063,251,998  branches  #  257.803 M/cpu-sec
> (83.41%)
> 86,941,704  branch-misses #4.21% of all branches  
> (83.30%)
> 
>2.001743706 seconds time elapsed
> 
> Signed-off-by: Changbin Du 
> ---
>  tools/perf/util/stat-shadow.c | 13 +
>  tools/perf/util/units.c   | 22 ++
>  tools/perf/util/units.h   |  1 +
>  3 files changed, 28 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index 6ccf21a72f06..786b5ef512d8 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -9,6 +9,7 @@
>  #include "expr.h"
>  #include "metricgroup.h"
>  #include "cgroup.h"
> +#include "units.h"
>  #include 
>  

>  /*
> @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
> perf_stat_config *config,
>   generic_metric(config, evsel->metric_expr, 
> evsel->metric_events, NULL,
>   evsel->name, evsel->metric_name, NULL, 1, cpu, 
> out, st);
>   } else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
> - char unit = 'M';
> + char unit = ' ';
>   char unit_buf[10];
>  
>   total = runtime_stat_avg(st, STAT_NSECS, cpu, );
> -
>   if (total)
> - ratio = 1000.0 * avg / total;
> - if (ratio < 0.001) {
> - ratio *= 1000;
> - unit = 'K';
> - }
> - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> + ratio = convert_unit_double(10.0 * avg / total, 
> );
> +
> + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
>   print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
>   } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
>   print_smi_cost(config, cpu, out, st, );
> diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> index a46762aec4c9..ac13b5ecde31 100644
> --- a/tools/perf/util/units.c
> +++ b/tools/perf/util/units.c
> @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
>   return value;
>  }
>

Hi Changbin,
  Since we are using added function `convert_unit_double` just in 
stat-shadow.c,
I think there is no need to add it in units.h, we can directly create static 
func inside `stat-shadow.c`itself.

Thanks,
Kajol Jain
  
> +double convert_unit_double(double value, char *unit)
> +{
> + *unit = ' ';
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'K';
> + }
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + 

Re: [PATCH] perf stat: improve readability of shadow stats

2021-03-01 Thread Namhyung Kim
On Tue, Mar 2, 2021 at 4:19 AM Jiri Olsa  wrote:
>
> On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> > This does follow two changes:
> >   1) Select appropriate unit between K/M/G.
> >   2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> >
> > $ sudo ./perf stat -a -- sleep 1
> >
> > Before: Unit 'M' is selected even the number is very small.
> >  Performance counter stats for 'system wide':
> >
> >   4,003.06 msec cpu-clock #3.998 CPUs utilized
> > 16,179  context-switches  #0.004 M/sec
> >161  cpu-migrations#0.040 K/sec
> >  4,699  page-faults   #0.001 M/sec
> >  6,135,801,925  cycles#1.533 GHz
> >   (83.21%)
> >  5,783,308,491  stalled-cycles-frontend   #   94.26% frontend 
> > cycles idle (83.21%)
> >  4,543,694,050  stalled-cycles-backend#   74.05% backend cycles 
> > idle  (66.49%)
> >  4,720,130,587  instructions  #0.77  insn per cycle
> >   #1.23  stalled cycles 
> > per insn  (83.28%)
> >753,848,078  branches  #  188.318 M/sec  
> >   (83.61%)
> > 37,457,747  branch-misses #4.97% of all 
> > branches  (83.48%)
> >
> >1.001283725 seconds time elapsed
> >
> > After:
> > $ sudo ./perf stat -a -- sleep 2
> >
> >  Performance counter stats for 'system wide':
> >
> >   8,003.20 msec cpu-clock #3.998 CPUs utilized
> >  9,768  context-switches  #1.221 K/cpu-sec
> >164  cpu-migrations#   20.492  /cpu-sec
>
> should you remove also the leading '/' in ' /cpu-sec' ?

The change looks good.  And I think we should keep '/' otherwise it'd be
more confusing.

>
>
> SNIP
>
> > @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
> > perf_stat_config *config,
> >   generic_metric(config, evsel->metric_expr, 
> > evsel->metric_events, NULL,
> >   evsel->name, evsel->metric_name, NULL, 1, 
> > cpu, out, st);
> >   } else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
> > - char unit = 'M';
> > + char unit = ' ';
> >   char unit_buf[10];
> >
> >   total = runtime_stat_avg(st, STAT_NSECS, cpu, );
> > -
> >   if (total)
> > - ratio = 1000.0 * avg / total;
> > - if (ratio < 0.001) {
> > - ratio *= 1000;
> > - unit = 'K';
> > - }
> > - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> > + ratio = convert_unit_double(10.0 * avg / 
> > total, );
> > +
> > + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
> >   print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
>
> hum this will change -x output that people parse, so I don't think we can do 
> that

Agreed.

>
> >   } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
> >   print_smi_cost(config, cpu, out, st, );
> > diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> > index a46762aec4c9..ac13b5ecde31 100644
> > --- a/tools/perf/util/units.c
> > +++ b/tools/perf/util/units.c
> > @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char 
> > *unit)
> >   return value;
> >  }
> >
> > +double convert_unit_double(double value, char *unit)
> > +{
> > + *unit = ' ';
> > +
> > + if (value > 1000.0) {
> > + value /= 1000.0;
> > + *unit = 'K';
> > + }
> > +
> > + if (value > 1000.0) {
> > + value /= 1000.0;
> > + *unit = 'M';
> > + }
> > +
> > + if (value > 1000.0) {
> > + value /= 1000.0;
> > + *unit = 'G';
> > + }
> > +
> > + return value;
> > +}
>
> we have convert_unit function just above doing the same only with
> unsigned long.. let's have one base function with double values and
> another one casting the result to unsigned long

Sounds good.

Thanks,
Namhyung


Re: [PATCH] perf stat: improve readability of shadow stats

2021-03-01 Thread Jiri Olsa
On Tue, Mar 02, 2021 at 01:24:02AM +0800, Changbin Du wrote:
> This does follow two changes:
>   1) Select appropriate unit between K/M/G.
>   2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.
> 
> $ sudo ./perf stat -a -- sleep 1
> 
> Before: Unit 'M' is selected even the number is very small.
>  Performance counter stats for 'system wide':
> 
>   4,003.06 msec cpu-clock #3.998 CPUs utilized
> 16,179  context-switches  #0.004 M/sec
>161  cpu-migrations#0.040 K/sec
>  4,699  page-faults   #0.001 M/sec
>  6,135,801,925  cycles#1.533 GHz  
> (83.21%)
>  5,783,308,491  stalled-cycles-frontend   #   94.26% frontend cycles 
> idle (83.21%)
>  4,543,694,050  stalled-cycles-backend#   74.05% backend cycles 
> idle  (66.49%)
>  4,720,130,587  instructions  #0.77  insn per cycle
>   #1.23  stalled cycles 
> per insn  (83.28%)
>753,848,078  branches  #  188.318 M/sec
> (83.61%)
> 37,457,747  branch-misses #4.97% of all branches  
> (83.48%)
> 
>1.001283725 seconds time elapsed
> 
> After:
> $ sudo ./perf stat -a -- sleep 2
> 
>  Performance counter stats for 'system wide':
> 
>   8,003.20 msec cpu-clock #3.998 CPUs utilized
>  9,768  context-switches  #1.221 K/cpu-sec
>164  cpu-migrations#   20.492  /cpu-sec

should you remove also the leading '/' in ' /cpu-sec' ?


SNIP

> @@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
> perf_stat_config *config,
>   generic_metric(config, evsel->metric_expr, 
> evsel->metric_events, NULL,
>   evsel->name, evsel->metric_name, NULL, 1, cpu, 
> out, st);
>   } else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
> - char unit = 'M';
> + char unit = ' ';
>   char unit_buf[10];
>  
>   total = runtime_stat_avg(st, STAT_NSECS, cpu, );
> -
>   if (total)
> - ratio = 1000.0 * avg / total;
> - if (ratio < 0.001) {
> - ratio *= 1000;
> - unit = 'K';
> - }
> - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
> + ratio = convert_unit_double(10.0 * avg / total, 
> );
> +
> + snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
>   print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);

hum this will change -x output that people parse, so I don't think we can do 
that

>   } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
>   print_smi_cost(config, cpu, out, st, );
> diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
> index a46762aec4c9..ac13b5ecde31 100644
> --- a/tools/perf/util/units.c
> +++ b/tools/perf/util/units.c
> @@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
>   return value;
>  }
>  
> +double convert_unit_double(double value, char *unit)
> +{
> + *unit = ' ';
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'K';
> + }
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'M';
> + }
> +
> + if (value > 1000.0) {
> + value /= 1000.0;
> + *unit = 'G';
> + }
> +
> + return value;
> +}

we have convert_unit function just above doing the same only with
unsigned long.. let's have one base function with double values and
another one casting the result to unsigned long

jirka



[PATCH] perf stat: improve readability of shadow stats

2021-03-01 Thread Changbin Du
This does follow two changes:
  1) Select appropriate unit between K/M/G.
  2) Use 'cpu-sec' instead of 'sec' to state this is not the wall-time.

$ sudo ./perf stat -a -- sleep 1

Before: Unit 'M' is selected even the number is very small.
 Performance counter stats for 'system wide':

  4,003.06 msec cpu-clock #3.998 CPUs utilized
16,179  context-switches  #0.004 M/sec
   161  cpu-migrations#0.040 K/sec
 4,699  page-faults   #0.001 M/sec
 6,135,801,925  cycles#1.533 GHz
  (83.21%)
 5,783,308,491  stalled-cycles-frontend   #   94.26% frontend cycles 
idle (83.21%)
 4,543,694,050  stalled-cycles-backend#   74.05% backend cycles 
idle  (66.49%)
 4,720,130,587  instructions  #0.77  insn per cycle
  #1.23  stalled cycles per 
insn  (83.28%)
   753,848,078  branches  #  188.318 M/sec  
  (83.61%)
37,457,747  branch-misses #4.97% of all branches
  (83.48%)

   1.001283725 seconds time elapsed

After:
$ sudo ./perf stat -a -- sleep 2

 Performance counter stats for 'system wide':

  8,003.20 msec cpu-clock #3.998 CPUs utilized
 9,768  context-switches  #1.221 K/cpu-sec
   164  cpu-migrations#   20.492  /cpu-sec
74,146  page-faults   #9.265 K/cpu-sec
19,008,796,806  cycles#2.375 GHz
  (83.21%)
14,789,443,853  stalled-cycles-frontend   #   77.80% frontend cycles 
idle (83.29%)
11,867,812,064  stalled-cycles-backend#   62.43% backend cycles 
idle  (66.80%)
 9,898,252,603  instructions  #0.52  insn per cycle
  #1.49  stalled cycles per 
insn  (83.41%)
 2,063,251,998  branches  #  257.803 M/cpu-sec  
  (83.41%)
86,941,704  branch-misses #4.21% of all branches
  (83.30%)

   2.001743706 seconds time elapsed

Signed-off-by: Changbin Du 
---
 tools/perf/util/stat-shadow.c | 13 +
 tools/perf/util/units.c   | 22 ++
 tools/perf/util/units.h   |  1 +
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 6ccf21a72f06..786b5ef512d8 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -9,6 +9,7 @@
 #include "expr.h"
 #include "metricgroup.h"
 #include "cgroup.h"
+#include "units.h"
 #include 
 
 /*
@@ -1270,18 +1271,14 @@ void perf_stat__print_shadow_stats(struct 
perf_stat_config *config,
generic_metric(config, evsel->metric_expr, 
evsel->metric_events, NULL,
evsel->name, evsel->metric_name, NULL, 1, cpu, 
out, st);
} else if (runtime_stat_n(st, STAT_NSECS, cpu, ) != 0) {
-   char unit = 'M';
+   char unit = ' ';
char unit_buf[10];
 
total = runtime_stat_avg(st, STAT_NSECS, cpu, );
-
if (total)
-   ratio = 1000.0 * avg / total;
-   if (ratio < 0.001) {
-   ratio *= 1000;
-   unit = 'K';
-   }
-   snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+   ratio = convert_unit_double(10.0 * avg / total, 
);
+
+   snprintf(unit_buf, sizeof(unit_buf), "%c/cpu-sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
print_smi_cost(config, cpu, out, st, );
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
index a46762aec4c9..ac13b5ecde31 100644
--- a/tools/perf/util/units.c
+++ b/tools/perf/util/units.c
@@ -55,6 +55,28 @@ unsigned long convert_unit(unsigned long value, char *unit)
return value;
 }
 
+double convert_unit_double(double value, char *unit)
+{
+   *unit = ' ';
+
+   if (value > 1000.0) {
+   value /= 1000.0;
+   *unit = 'K';
+   }
+
+   if (value > 1000.0) {
+   value /= 1000.0;
+   *unit = 'M';
+   }
+
+   if (value > 1000.0) {
+   value /= 1000.0;
+   *unit = 'G';
+   }
+
+   return value;
+}
+
 int unit_number__scnprintf(char *buf, size_t size, u64 n)
 {
char unit[4] = "BKMG";
diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h
index 99263b6a23f7..b3ace67ac16f 100644
--- a/tools/perf/util/units.h
+++ b/tools/perf/util/units.h
@@ -13,6 +13,7 @@ struct parse_tag {
 unsigned long