On Fri, Sep 04, 2015 at 03:45:13PM -0700, Andi Kleen wrote: > From: Andi Kleen <a...@linux.intel.com> > > Abstract the printing of shadow metrics. Instead of every > metric calling fprintf directly and taking care of indentation, > use two call backs: one to print metrics and another to > start a new line. > > This will allow adding metrics to CSV mode and also > using them for other purposes. > > The computation of padding is now done in the central > callback, instead of every metric doing it manually. > This makes it easier to add new metrics. > > v2: Refactor functions, printout now does more. Move > shadow printing. Improve fallback callbacks. Don't > use void * callback data. > v3: Remove unnecessary hunk. Add typedef for new_line > Signed-off-by: Andi Kleen <a...@linux.intel.com>
before this patch: [jolsa@krava perf]$ ./perf stat -e cycles -I 1000 -x, yes > /dev/null 28.002841829,2907511555,,cycles,1000612137,100.00 29.002944242,2893835898,,cycles,1000615661,100.00 30.003043253,2910405767,,cycles,1000618513,100.00 31.003140585,2909645607,,cycles,1000621442,100.00 32.003244286,2910397988,,cycles,1000624150,100.00 33.003347139,2908927486,,cycles,1000626890,100.00 ^C 33.581210913,1677490047,,cycles,988203059,100.00 33.581315820,157136,,cycles,959970233,100.00 yes: Interrupt after this patch: [jolsa@krava perf]$ ./perf stat -e cycles -I 1000 -x, yes > /dev/null 1.000101739,3074999967,,cycles # Ghz ,1000247731,100.00 2.000223351,3019361712,,cycles # Ghz ,999945890,100.00 3.000327235,3012966500,,cycles # Ghz ,999805678,100.00 4.000430983,3009829158,,cycles # Ghz ,999871044,100.00 I think the reason is that this patch does too much and needs to be separated to smaller changes, like: - introduce the print_metrics interface for perf_stat__print_shadow_stats - introduce printout function - introduce support for printing metrics in csv in interval mode - and enable it or any other way you find feasible *easy to review*.. I cannot ack it at this state jirka > --- > tools/perf/builtin-stat.c | 100 ++++++++++++++------ > tools/perf/util/stat-shadow.c | 212 > +++++++++++++++++++++++------------------- > tools/perf/util/stat.h | 15 ++- > 3 files changed, 197 insertions(+), 130 deletions(-) > SNIP > @@ -544,12 +581,32 @@ static void abs_printout(int id, int nr, struct > perf_evsel *evsel, double avg) > > if (evsel->cgrp) > fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); > +} > > - if (csv_output || stat_config.interval) > - return; now you printout metrics for interval/csv > +static void printout(int id, int nr, struct perf_evsel *counter, double uval) > +{ > + struct outstate os = { .fh = stat_config.output }; > + struct perf_stat_output_ctx out; > + print_metric_t pm = print_metric_std; > + void (*nl)(void *); > + > + if (stat_config.aggr_mode == AGGR_NONE) > + nl = new_line_no_aggr_std; > + else > + nl = new_line_std; > > - perf_stat__print_shadow_stats(output, evsel, avg, cpu, > - stat_config.aggr_mode); > + if (nsec_counter(counter)) > + nsec_printout(id, nr, counter, uval); > + else > + abs_printout(id, nr, counter, uval); > + > + out.print_metric = pm; > + out.new_line = nl; > + out.ctx = &os; > + perf_stat__print_shadow_stats(counter, uval, > + stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > + cpu_map__id_to_cpu(id), > + &out); > } > > static void print_aggr(char *prefix) > @@ -605,12 +662,7 @@ static void print_aggr(char *prefix) > continue; > } > uval = val * counter->scale; > - > - if (nsec_counter(counter)) > - nsec_printout(id, nr, counter, uval); > - else > - abs_printout(id, nr, counter, uval); > - > + printout(id, nr, counter, uval); > if (!csv_output) > print_noise(counter, 1.0); > > @@ -641,11 +693,7 @@ static void print_aggr_thread(struct perf_evsel > *counter, char *prefix) > fprintf(output, "%s", prefix); > > uval = val * counter->scale; > - > - if (nsec_counter(counter)) > - nsec_printout(thread, 0, counter, uval); > - else > - abs_printout(thread, 0, counter, uval); > + printout(thread, 0, counter, uval); > > if (!csv_output) > print_noise(counter, 1.0); > @@ -695,11 +743,7 @@ static void print_counter_aggr(struct perf_evsel > *counter, char *prefix) > } > > uval = avg * counter->scale; > - > - if (nsec_counter(counter)) > - nsec_printout(-1, 0, counter, uval); > - else > - abs_printout(-1, 0, counter, uval); > + printout(-1, 0, counter, uval); > > print_noise(counter, avg); > > @@ -752,15 +796,9 @@ static void print_counter(struct perf_evsel *counter, > char *prefix) > } > > uval = val * counter->scale; > - > - if (nsec_counter(counter)) > - nsec_printout(cpu, 0, counter, uval); > - else > - abs_printout(cpu, 0, counter, uval); > - > + printout(cpu, 0, counter, uval); > if (!csv_output) > print_noise(counter, 1.0); > - print_running(run, ena); > > fputc('\n', output); > } > diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c > index 625ab3b..d7d27b5 100644 > --- a/tools/perf/util/stat-shadow.c > +++ b/tools/perf/util/stat-shadow.c > @@ -137,9 +137,10 @@ static const char *get_ratio_color(enum grc_type type, > double ratio) > return color; > } > > -static void print_stalled_cycles_frontend(FILE *out, int cpu, > +static void print_stalled_cycles_frontend(int cpu, > struct perf_evsel *evsel > - __maybe_unused, double avg) > + __maybe_unused, double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -152,14 +153,13 @@ static void print_stalled_cycles_frontend(FILE *out, > int cpu, > > color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); > > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " frontend cycles idle "); > + out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", > ratio); > } > > -static void print_stalled_cycles_backend(FILE *out, int cpu, > +static void print_stalled_cycles_backend(int cpu, > struct perf_evsel *evsel > - __maybe_unused, double avg) > + __maybe_unused, double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -172,14 +172,13 @@ static void print_stalled_cycles_backend(FILE *out, int > cpu, > > color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); > > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " backend cycles idle "); > + out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", > ratio); > } > > -static void print_branch_misses(FILE *out, int cpu, > +static void print_branch_misses(int cpu, > struct perf_evsel *evsel __maybe_unused, > - double avg) > + double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -192,14 +191,13 @@ static void print_branch_misses(FILE *out, int cpu, > > color = get_ratio_color(GRC_CACHE_MISSES, ratio); > > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " of all branches "); > + out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); > } > > -static void print_l1_dcache_misses(FILE *out, int cpu, > +static void print_l1_dcache_misses(int cpu, > struct perf_evsel *evsel __maybe_unused, > - double avg) > + double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -212,14 +210,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu, > > color = get_ratio_color(GRC_CACHE_MISSES, ratio); > > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " of all L1-dcache hits "); > + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", > ratio); > } > > -static void print_l1_icache_misses(FILE *out, int cpu, > +static void print_l1_icache_misses(int cpu, > struct perf_evsel *evsel __maybe_unused, > - double avg) > + double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -231,15 +228,13 @@ static void print_l1_icache_misses(FILE *out, int cpu, > ratio = avg / total * 100.0; > > color = get_ratio_color(GRC_CACHE_MISSES, ratio); > - > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " of all L1-icache hits "); > + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", > ratio); > } > > -static void print_dtlb_cache_misses(FILE *out, int cpu, > +static void print_dtlb_cache_misses(int cpu, > struct perf_evsel *evsel __maybe_unused, > - double avg) > + double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -251,15 +246,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu, > ratio = avg / total * 100.0; > > color = get_ratio_color(GRC_CACHE_MISSES, ratio); > - > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " of all dTLB cache hits "); > + out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", > ratio); > } > > -static void print_itlb_cache_misses(FILE *out, int cpu, > +static void print_itlb_cache_misses(int cpu, > struct perf_evsel *evsel __maybe_unused, > - double avg) > + double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -271,15 +264,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu, > ratio = avg / total * 100.0; > > color = get_ratio_color(GRC_CACHE_MISSES, ratio); > - > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " of all iTLB cache hits "); > + out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", > ratio); > } > > -static void print_ll_cache_misses(FILE *out, int cpu, > +static void print_ll_cache_misses(int cpu, > struct perf_evsel *evsel __maybe_unused, > - double avg) > + double avg, > + struct perf_stat_output_ctx *out) > { > double total, ratio = 0.0; > const char *color; > @@ -291,15 +282,15 @@ static void print_ll_cache_misses(FILE *out, int cpu, > ratio = avg / total * 100.0; > > color = get_ratio_color(GRC_CACHE_MISSES, ratio); > - > - fprintf(out, " # "); > - color_fprintf(out, color, "%6.2f%%", ratio); > - fprintf(out, " of all LL-cache hits "); > + out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", > ratio); > } > > -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, > - double avg, int cpu, enum aggr_mode aggr) > +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, > + double avg, int cpu, > + struct perf_stat_output_ctx *out) > { > + void *ctxp = out->ctx; > + print_metric_t print_metric = out->print_metric; > double total, ratio = 0.0, total2; > int ctx = evsel_context(evsel); > > @@ -307,117 +298,144 @@ void perf_stat__print_shadow_stats(FILE *out, struct > perf_evsel *evsel, > total = avg_stats(&runtime_cycles_stats[ctx][cpu]); > if (total) { > ratio = avg / total; > - fprintf(out, " # %5.2f insns per cycle ", > ratio); > + print_metric(ctxp, NULL, "%7.2f ", > + "insn per cycle", ratio); > } else { > - fprintf(out, " "); > + print_metric(ctxp, NULL, NULL, "insn per cycle", 0); > } > total = > avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); > total = max(total, > avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); > > if (total && avg) { > ratio = total / avg; > - fprintf(out, "\n"); > - if (aggr == AGGR_NONE) > - fprintf(out, " "); > - fprintf(out, " > # %5.2f stalled cycles per insn", ratio); > + out->new_line(ctxp); > + print_metric(ctxp, NULL, "%7.2f ", > + "stalled cycles per insn", > + ratio); > + } else { > + print_metric(ctxp, NULL, NULL, > + "stalled cycles per insn", 0); > } > - > - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && > - runtime_branches_stats[ctx][cpu].n != 0) { > - print_branch_misses(out, cpu, evsel, avg); > + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { > + if (runtime_branches_stats[ctx][cpu].n != 0) > + print_branch_misses(cpu, evsel, avg, out); > + else > + print_metric(ctxp, NULL, NULL, "of all branches", 0); > } else if ( > evsel->attr.type == PERF_TYPE_HW_CACHE && > evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | > ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | > - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16)) && > - runtime_l1_dcache_stats[ctx][cpu].n != 0) { > - print_l1_dcache_misses(out, cpu, evsel, avg); > + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16))) { > + if (runtime_l1_dcache_stats[ctx][cpu].n != 0) > + print_l1_dcache_misses(cpu, evsel, avg, out); > + else > + print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", > 0); > } else if ( > evsel->attr.type == PERF_TYPE_HW_CACHE && > evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | > ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | > - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16)) && > - runtime_l1_icache_stats[ctx][cpu].n != 0) { > - print_l1_icache_misses(out, cpu, evsel, avg); > + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16))) { > + if (runtime_l1_icache_stats[ctx][cpu].n != 0) > + print_l1_icache_misses(cpu, evsel, avg, out); > + else > + print_metric(ctxp, NULL, NULL, "of all L1-icache hits", > 0); > } else if ( > evsel->attr.type == PERF_TYPE_HW_CACHE && > evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | > ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | > - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16)) && > - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { > - print_dtlb_cache_misses(out, cpu, evsel, avg); > + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16))) { > + if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) > + print_dtlb_cache_misses(cpu, evsel, avg, out); > + else > + print_metric(ctxp, NULL, NULL, "of all dTLB cache > hits", 0); > } else if ( > evsel->attr.type == PERF_TYPE_HW_CACHE && > evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | > ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | > - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16)) && > - runtime_itlb_cache_stats[ctx][cpu].n != 0) { > - print_itlb_cache_misses(out, cpu, evsel, avg); > + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16))) { > + if (runtime_itlb_cache_stats[ctx][cpu].n != 0) > + print_itlb_cache_misses(cpu, evsel, avg, out); > + else > + print_metric(ctxp, NULL, NULL, "of all iTLB cache > hits", 0); > } else if ( > evsel->attr.type == PERF_TYPE_HW_CACHE && > evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | > ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | > - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16)) && > - runtime_ll_cache_stats[ctx][cpu].n != 0) { > - print_ll_cache_misses(out, cpu, evsel, avg); > - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && > - runtime_cacherefs_stats[ctx][cpu].n != 0) { > + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << > 16))) { > + if (runtime_ll_cache_stats[ctx][cpu].n != 0) > + print_ll_cache_misses(cpu, evsel, avg, out); > + else > + print_metric(ctxp, NULL, NULL, "of all LL-cache hits", > 0); > + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { > total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); > > if (total) > ratio = avg * 100 / total; > > - fprintf(out, " # %8.3f %% of all cache refs ", ratio); > - > + if (runtime_cacherefs_stats[ctx][cpu].n != 0) > + print_metric(ctxp, NULL, "%8.3f %%", > + "of all cache refs", ratio); > + else > + print_metric(ctxp, NULL, NULL, "of all cache refs", 0); > } else if (perf_evsel__match(evsel, HARDWARE, > HW_STALLED_CYCLES_FRONTEND)) { > - print_stalled_cycles_frontend(out, cpu, evsel, avg); > + print_stalled_cycles_frontend(cpu, evsel, avg, out); > } else if (perf_evsel__match(evsel, HARDWARE, > HW_STALLED_CYCLES_BACKEND)) { > - print_stalled_cycles_backend(out, cpu, evsel, avg); > + print_stalled_cycles_backend(cpu, evsel, avg, out); > } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { > total = avg_stats(&runtime_nsecs_stats[cpu]); > > if (total) { > ratio = avg / total; > - fprintf(out, " # %8.3f GHz ", ratio); > + print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); > } else { > - fprintf(out, " "); > + print_metric(ctxp, NULL, NULL, "Ghz", 0); > } > } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { > total = avg_stats(&runtime_cycles_stats[ctx][cpu]); > if (total) > - fprintf(out, > - " # %5.2f%% transactional cycles ", > - 100.0 * (avg / total)); > + print_metric(ctxp, NULL, > + "%7.2f%%", "transactional cycles", > + 100.0 * (avg / total)); > + else > + print_metric(ctxp, NULL, NULL, "transactional cycles", > + 0); > } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { > total = avg_stats(&runtime_cycles_stats[ctx][cpu]); > total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); > if (total2 < avg) > total2 = avg; > if (total) > - fprintf(out, > - " # %5.2f%% aborted cycles ", > + print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", > 100.0 * ((total2-avg) / total)); > - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && > - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { > + else > + print_metric(ctxp, NULL, NULL, "aborted cycles", 0); > + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { > total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); > > if (avg) > ratio = total / avg; > > - fprintf(out, " # %8.0f cycles / transaction ", ratio); > - } else if (perf_stat_evsel__is(evsel, ELISION_START) && > - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { > + if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) > + print_metric(ctxp, NULL, "%8.0f", > + "cycles / transaction", ratio); > + else > + print_metric(ctxp, NULL, NULL, "cycles / transaction", > + 0); > + } else if (perf_stat_evsel__is(evsel, ELISION_START)) { > total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); > > if (avg) > ratio = total / avg; > - > - fprintf(out, " # %8.0f cycles / elision ", ratio); > - } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) && > - (ratio = avg_stats(&walltime_nsecs_stats)) != 0) { > - fprintf(out, " # %8.3f CPUs utilized ", avg / ratio); > + print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); > + } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { > + if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) > + print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", > + avg / ratio); > + else > + print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); > } else if (runtime_nsecs_stats[cpu].n != 0) { > char unit = 'M'; > + char unit_buf[10]; > > total = avg_stats(&runtime_nsecs_stats[cpu]); > > @@ -427,9 +445,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct > perf_evsel *evsel, > ratio *= 1000; > unit = 'K'; > } > - > - fprintf(out, " # %8.3f %c/sec ", ratio, unit); > + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); > + print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); > } else { > - fprintf(out, " "); > + print_metric(ctxp, NULL, NULL, NULL, 0); > } > } > diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h > index 62448c8..e1899f6 100644 > --- a/tools/perf/util/stat.h > +++ b/tools/perf/util/stat.h > @@ -67,11 +67,22 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel); > > extern struct stats walltime_nsecs_stats; > > +typedef void (*print_metric_t)(void *ctx, const char *color, const char > *unit, > + const char *fmt, double val); > +typedef void (*new_line_t )(void *ctx); > + > void perf_stat__reset_shadow_stats(void); > void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, > int cpu); > -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, > - double avg, int cpu, enum aggr_mode aggr); > +struct perf_stat_output_ctx { > + void *ctx; > + print_metric_t print_metric; > + new_line_t new_line; > +}; > + > +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, > + double avg, int cpu, > + struct perf_stat_output_ctx *out); > > void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); > int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); > -- > 2.4.3 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/