From: Andi Kleen <a...@linux.intel.com>

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the intx and intx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction 
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other

Signed-off-by: Andi Kleen <a...@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |    3 +
 tools/perf/builtin-stat.c              |  104 +++++++++++++++++++++++++++++---
 2 files changed, 99 insertions(+), 8 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fa173b..6e55bd9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -108,7 +108,10 @@ with it.  --append may be used here.  Examples:
      3>results  perf stat --log-fd 3          -- $cmd
      3>>results perf stat --log-fd 3 --append -- $cmd
 
+-T::
+--transaction::
 
+Print statistics of transactional execution.  Implies --group.
 
 EXAMPLES
 --------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 861f0ae..2364605 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -64,6 +64,9 @@
 #define CNTR_NOT_SUPPORTED     "<not supported>"
 #define CNTR_NOT_COUNTED       "<not counted>"
 
+#define is_intx(e)             ((e)->attr.intx && !(e)->attr.intx_checkpointed)
+#define is_intx_cp(e)          ((e)->attr.intx && (e)->attr.intx_checkpointed)
+
 static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             
},
@@ -171,7 +174,21 @@ static struct perf_event_attr very_very_detailed_attrs[] = 
{
        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          
},
 };
 
+/*
+ * Transactional memory stats (-T)
+ * Must run as a group.
+ */
+static struct perf_event_attr transaction_attrs[] = {
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             
},
 
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS           
},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES             
},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .intx = 1  
},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES,
+    .intx = 1, .intx_checkpointed = 1 },
+  { .type = PERF_TYPE_HW_TRANSACTION, .config = 
PERF_COUNT_HW_TRANSACTION_START        },
+  { .type = PERF_TYPE_HW_TRANSACTION, .config = PERF_COUNT_HW_ELISION_START    
},
+};
 
 static struct perf_evlist      *evsel_list;
 
@@ -187,6 +204,7 @@ static bool                 no_aggr                         
= false;
 static pid_t                   child_pid                       = -1;
 static bool                    null_run                        =  false;
 static int                     detailed_run                    =  0;
+static bool                    transaction_run                 =  false;
 static bool                    sync_run                        =  false;
 static bool                    big_num                         =  true;
 static int                     big_num_opt                     =  -1;
@@ -275,7 +293,11 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intx_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intxcp_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static int create_perf_stat_counter(struct perf_evsel *evsel,
                                    struct perf_evsel *first)
@@ -350,10 +372,18 @@ static void update_shadow_stats(struct perf_evsel 
*counter, u64 *count)
 {
        if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
                update_stats(&runtime_nsecs_stats[0], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-               update_stats(&runtime_cycles_stats[0], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, 
HW_STALLED_CYCLES_FRONTEND))
-               update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) {
+               if (is_intx(counter))
+                       update_stats(&runtime_cycles_intx_stats[0], count[0]);
+               else if (is_intx_cp(counter))
+                       update_stats(&runtime_cycles_intxcp_stats[0], count[0]);
+               else
+                       update_stats(&runtime_cycles_stats[0], count[0]);
+       } else if (perf_evsel__match(counter, HW_TRANSACTION,
+                                    HW_TRANSACTION_START))
+               update_stats(&runtime_transaction_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HW_TRANSACTION, HW_ELISION_START))
+               update_stats(&runtime_elision_stats[0], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, 
HW_STALLED_CYCLES_BACKEND))
                update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
@@ -774,7 +804,7 @@ static void print_ll_cache_misses(int cpu, struct 
perf_evsel *evsel __used, doub
 
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
-       double total, ratio = 0.0;
+       double total, ratio = 0.0, total2;
        char cpustr[16] = { '\0', };
        const char *fmt;
 
@@ -868,12 +898,50 @@ static void abs_printout(int cpu, struct perf_evsel 
*evsel, double avg)
        } else if (perf_evsel__match(evsel, HARDWARE, 
HW_STALLED_CYCLES_BACKEND)) {
                print_stalled_cycles_backend(cpu, evsel, avg);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-               total = avg_stats(&runtime_nsecs_stats[cpu]);
+               if (is_intx(evsel)) {
+                       total = avg_stats(&runtime_cycles_stats[cpu]);
+                       if (total)
+                               fprintf(output,
+                                       " #   %5.2f%% transactional          ",
+                                       100.0 * (avg / total));
+               } else if (is_intx_cp(evsel)) {
+                       total = avg_stats(&runtime_cycles_stats[cpu]);
+                       total2 = avg_stats(&runtime_cycles_intx_stats[cpu]);
+                       if (total)
+                               fprintf(output,
+                                       " #   %5.2f%% aborted cycles         ",
+                                       100.0 * ((total2-avg) / total));
+               } else {
+                       total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+                       if (total)
+                               ratio = 1.0 * avg / total;
+
+                       fprintf(output, " # %8.3f GHz                    ", 
+                                       ratio);
+               }
+       } else if (perf_evsel__match(evsel, HW_TRANSACTION,
+                                    HW_TRANSACTION_START) &&
+                  avg > 0 &&
+                  runtime_cycles_intx_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_cycles_intx_stats[cpu]);
+
+               if (total)
+                       ratio = total / avg;
+
+               fprintf(output, " # %8.0f cycles / transaction ", ratio);
+
+       } else if (perf_evsel__match(evsel, HW_TRANSACTION,
+                                     HW_ELISION_START) &&
+                  avg > 0 &&
+                  runtime_cycles_intx_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_cycles_intx_stats[cpu]);
 
                if (total)
-                       ratio = 1.0 * avg / total;
+                       ratio = total / avg;
+
+               fprintf(output, " # %8.0f cycles / elision     ", ratio);
 
-               fprintf(output, " # %8.3f GHz                    ", ratio);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
                char unit = 'M';
 
@@ -1068,6 +1136,16 @@ static int stat__set_big_num(const struct option *opt 
__used,
        return 0;
 }
 
+/* Must force groups for transactions */
+static int stat__parse_transaction(const struct option *opt __used,
+                                  const char *str __used,
+                                  int unset __used)
+{
+       transaction_run = true;
+       group = true;
+       return 0;
+}
+
 static bool append_file;
 
 static const struct option options[] = {
@@ -1115,6 +1193,9 @@ static const struct option options[] = {
        OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
        OPT_INTEGER(0, "log-fd", &output_fd,
                    "log output to fd, instead of stderr"),
+       OPT_CALLBACK_NOOPT('T', "transaction", NULL, NULL,
+                    "capture hardware transaction success",
+                    stat__parse_transaction),
        OPT_END()
 };
 
@@ -1128,6 +1209,13 @@ static int add_default_attributes(void)
        if (null_run)
                return 0;
 
+       if (transaction_run) {
+               if (perf_evlist__add_attrs_array(evsel_list, 
+                                                transaction_attrs) < 0)
+                       return -1;
+               return 0;
+       }
+
        if (!evsel_list->nr_entries) {
                if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 
0)
                        return -1;
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to