Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-21 Thread Andi Kleen
On Wed, Aug 21, 2013 at 10:15:25AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Aug 15, 2013 at 06:42:03PM +0200, Andi Kleen escreveu:
> > 
> > Here's an updated patch.
>  
> > perf, tools: Add perf stat --transaction v4
> > @@ -1419,6 +1559,8 @@ int cmd_stat(int argc, const char **argv, const char 
> > *prefix __maybe_unused)
> >  "aggregate counts per processor socket", AGGR_SOCKET),
> > OPT_SET_UINT(0, "per-core", _mode,
> >  "aggregate counts per physical processor core", AGGR_CORE),
> > +   OPT_BOOLEAN('T', "transaction", _run,
> > +   "hardware transaction statistics"),
> > OPT_END()
> 
> Clashes with another patch, from you, that is in Ingo's perf/core as
> well, that adds -D, so the above hunk doesn't apply:

I moved the new option to the beginning to avoid this problem

---

perf, tools: Add perf stat --transaction v5

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
v4: Remove merge error. Avoid linear walks for comparisons. Check
transaction_run earlier. Minor fixes.
v5: Move option to avoid conflict. Improve description.
Signed-off-by: Andi Kleen 
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 144 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..6bd90e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/,"
+   "cpu/el-start/,"
+   "cpu/cycles-ct/"
+   "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/"
+   "}"
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct 

Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-21 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 15, 2013 at 06:42:03PM +0200, Andi Kleen escreveu:
> 
> Here's an updated patch.
 
> perf, tools: Add perf stat --transaction v4
> @@ -1419,6 +1559,8 @@ int cmd_stat(int argc, const char **argv, const char 
> *prefix __maybe_unused)
>"aggregate counts per processor socket", AGGR_SOCKET),
>   OPT_SET_UINT(0, "per-core", _mode,
>"aggregate counts per physical processor core", AGGR_CORE),
> + OPT_BOOLEAN('T', "transaction", _run,
> + "hardware transaction statistics"),
>   OPT_END()

Clashes with another patch, from you, that is in Ingo's perf/core as
well, that adds -D, so the above hunk doesn't apply:

OPT_UINTEGER('I', "interval-print", ,
"print counts at regular interval in ms (>= 100)"),
OPT_SET_UINT(0, "per-socket", _mode,
 "aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", _mode,
 "aggregate counts per physical processor core", AGGR_CORE),
OPT_UINTEGER('D', "delay", _delay,
 "ms to wait before starting measurement after program 
start"),
OPT_END()
};

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-21 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 15, 2013 at 06:42:03PM +0200, Andi Kleen escreveu:
 
 Here's an updated patch.
 
 perf, tools: Add perf stat --transaction v4
 @@ -1419,6 +1559,8 @@ int cmd_stat(int argc, const char **argv, const char 
 *prefix __maybe_unused)
aggregate counts per processor socket, AGGR_SOCKET),
   OPT_SET_UINT(0, per-core, aggr_mode,
aggregate counts per physical processor core, AGGR_CORE),
 + OPT_BOOLEAN('T', transaction, transaction_run,
 + hardware transaction statistics),
   OPT_END()

Clashes with another patch, from you, that is in Ingo's perf/core as
well, that adds -D, so the above hunk doesn't apply:

OPT_UINTEGER('I', interval-print, interval,
print counts at regular interval in ms (= 100)),
OPT_SET_UINT(0, per-socket, aggr_mode,
 aggregate counts per processor socket, AGGR_SOCKET),
OPT_SET_UINT(0, per-core, aggr_mode,
 aggregate counts per physical processor core, AGGR_CORE),
OPT_UINTEGER('D', delay, initial_delay,
 ms to wait before starting measurement after program 
start),
OPT_END()
};

- Arnaldo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-21 Thread Andi Kleen
On Wed, Aug 21, 2013 at 10:15:25AM -0300, Arnaldo Carvalho de Melo wrote:
 Em Thu, Aug 15, 2013 at 06:42:03PM +0200, Andi Kleen escreveu:
  
  Here's an updated patch.
  
  perf, tools: Add perf stat --transaction v4
  @@ -1419,6 +1559,8 @@ int cmd_stat(int argc, const char **argv, const char 
  *prefix __maybe_unused)
   aggregate counts per processor socket, AGGR_SOCKET),
  OPT_SET_UINT(0, per-core, aggr_mode,
   aggregate counts per physical processor core, AGGR_CORE),
  +   OPT_BOOLEAN('T', transaction, transaction_run,
  +   hardware transaction statistics),
  OPT_END()
 
 Clashes with another patch, from you, that is in Ingo's perf/core as
 well, that adds -D, so the above hunk doesn't apply:

I moved the new option to the beginning to avoid this problem

---

perf, tools: Add perf stat --transaction v5

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
v4: Remove merge error. Avoid linear walks for comparisons. Check
transaction_run earlier. Minor fixes.
v5: Move option to avoid conflict. Improve description.
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 144 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..6bd90e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include util/util.h
 #include util/parse-options.h
 #include util/parse-events.h
+#include util/pmu.h
 #include util/event.h
 #include util/evlist.h
 #include util/evsel.h
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/,
+   cpu/el-start/,
+   cpu/cycles-ct/
+   }
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/
+   }
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];

Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Andi Kleen

Here's an updated patch.

perf, tools: Add perf stat --transaction v4

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
v4: Remove merge error. Avoid linear walks for comparisons.
Check transaction_run
earlier. Minor fixes.
Signed-off-by: Andi Kleen 
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 144 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..d008384 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/,"
+   "cpu/el-start/,"
+   "cpu/cycles-ct/"
+   "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/"
+   "}"
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   memset(runtime_cycles_in_tx_stats, 0,
+   sizeof(runtime_cycles_in_tx_stats));
+   memset(runtime_transaction_stats, 0,
+   sizeof(runtime_transaction_stats));
+   

Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 15, 2013 at 04:29:25PM +0200, Andi Kleen escreveu:
> > >   * Update various tracking values we maintain to print
> > >   * more semantic information such as miss/hit ratios,
> > > @@ -283,8 +340,12 @@ static void update_shadow_stats(struct perf_evsel 
> > > *counter, u64 *count)
> > >   update_stats(_nsecs_stats[0], count[0]);
> > >   else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
> > >   update_stats(_cycles_stats[0], count[0]);
> > > - else if (perf_evsel__match(counter, HARDWARE, 
> > > HW_STALLED_CYCLES_FRONTEND))
> > > - update_stats(_stalled_cycles_front_stats[0], count[0]);
> > 
> > Why remove the test for HW_STALLED_CYCLES_FRONTEND?
> 
> Hmm that was probably a merge error.
> 
> I'll fix & resend.

Ok
 
> > Got it why it doesn't need to account for the '{' in the array ;-)
> > 
> > While this works and isn't in any fast path, I find it ugly with all
> > this looping in nth_evsel.
> > 
> > Why not:
> > 
> > } else if (evsel->idx == T_CYCLES_IN_TX)) &&
> > 
> > ? I guess this works as you expect, no?
> 
> I had some problems with people using -T, but also setting custom
> events, that is why I added the extra comparison

But it'd fail the same, as if you go from the first entry in the
evlist->entries linked list, looping while incrementing a number, that
number should match the nth entry ->idx member.

I'll try to experiment with this when you resend with the parts you
agreed should be changed.
 
> The event lists are small enough that it's not really noticeable.
> I suppose could set up an array once.

Its not about performance hits, they are negligible, but about using
something that should provide equivalent results.

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Andi Kleen
> >   * Update various tracking values we maintain to print
> >   * more semantic information such as miss/hit ratios,
> > @@ -283,8 +340,12 @@ static void update_shadow_stats(struct perf_evsel 
> > *counter, u64 *count)
> > update_stats(_nsecs_stats[0], count[0]);
> > else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
> > update_stats(_cycles_stats[0], count[0]);
> > -   else if (perf_evsel__match(counter, HARDWARE, 
> > HW_STALLED_CYCLES_FRONTEND))
> > -   update_stats(_stalled_cycles_front_stats[0], count[0]);
> 
> Why remove the test for HW_STALLED_CYCLES_FRONTEND?

Hmm that was probably a merge error.

I'll fix & resend.

> Got it why it doesn't need to account for the '{' in the array ;-)
> 
> While this works and isn't in any fast path, I find it ugly with all
> this looping in nth_evsel.
> 
> Why not:
> 
>   } else if (evsel->idx == T_CYCLES_IN_TX)) &&
> 
> ? I guess this works as you expect, no?

I had some problems with people using -T, but also setting custom
events, that is why I added the extra comparison

The event lists are small enough that it's not really noticeable.
I suppose could set up an array once.

-Andi
-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 15, 2013 at 04:06:33PM +0200, Andi Kleen escreveu:
> Anyways I use this option pretty heavily and the results are good
> to my knowledge.

Have you seen the comments about using evsel->idx?

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Andi Kleen
> > +/* Default events used for perf stat -T */
> > +static const char * const transaction_attrs[] = {
> > +   "task-clock",
> > +   "{"
> > +   "instructions,"
> > +   "cycles,"
> > +   "cpu/cycles-t/,"
> > +   "cpu/tx-start/,"
> > +   "cpu/el-start/,"
> > +   "cpu/cycles-ct/"
> > +   "}"
> > +};
> > +
> > +/* More limited version when the CPU does not have all events. */
> > +static const char * const transaction_limited_attrs[] = {
> > +   "task-clock",
> > +   "{"
> > +   "instructions,"
> > +   "cycles,"
> > +   "cpu/cycles-t/,"
> > +   "cpu/tx-start/"
> > +   "}"
> > +};
> > +
> > +/* must match the transaction_attrs above */
> 
> Match in what way? It kinda matches the first one (transaction_attrs):

The second is just the beginning of the first.

The { } don't count for matches.

For the limited run the comparisons of the elements that are not there
fail.

> 
> enum {
>   T_TASK_CLOCK,== "task-clock",
>   T_INSTRUCTIONS,  == "instructions,"
>   T_CYCLES,== "cycles,"
>   T_CYCLES_IN_TX,  ~= "cpu/cycles-t/,"
>   T_TRANSACTION_START, != "cpu/tx-start/,"
>   T_ELISION_START, ~= "cpu/el-start/,"
>   T_CYCLES_IN_TX_CP,   != "cpu/cycles-ct/"
> };

I did a quick test of the fallback path by manually disabling the events,
and it seemed to work, but it's really for POWER based on Michael E's feedback.

> 
> Also the enum numbers won't match the array positions due to the '{'
> grouping (?) entries, so, without looking further, how can this match?
> Reading on...

The match is on the result array.  The results don't contain the { } 
as individual elements.

Anyways I use this option pretty heavily and the results are good
to my knowledge.

-Andi

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Arnaldo Carvalho de Melo
Em Wed, Aug 14, 2013 at 11:34:27AM -0700, Andi Kleen escreveu:
> From: Andi Kleen 
> 
> Add support to perf stat to print the basic transactional execution 
> statistics:
> Total cycles, Cycles in Transaction, Cycles in aborted transsactions
> using the in_tx and in_tx_checkpoint qualifiers.
> Transaction Starts and Elision Starts, to compute the average transaction 
> length.
> 
> This is a reasonable overview over the success of the transactions.
> 
> Enable with a new --transaction / -T option.
> 
> This requires measuring these events in a group, since they depend on each
> other.
> 
> This is implemented by using TM sysfs events exported by the kernel
> 
> v2: Only print the extended statistics when the option is enabled.
> This avoids negative output when the user specifies the -T events
> in separate groups.
> v3: Port to latest tree
> Signed-off-by: Andi Kleen 
> ---
>  tools/perf/Documentation/perf-stat.txt |   5 ++
>  tools/perf/builtin-stat.c  | 132 
> -
>  tools/perf/util/evsel.h|   6 ++
>  tools/perf/util/pmu.c  |  16 
>  tools/perf/util/pmu.h  |   1 +
>  5 files changed, 157 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-stat.txt 
> b/tools/perf/Documentation/perf-stat.txt
> index 2fe87fb..40bc65a 100644
> --- a/tools/perf/Documentation/perf-stat.txt
> +++ b/tools/perf/Documentation/perf-stat.txt
> @@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
> cores.  To enable this mod
>  use --per-core in addition to -a. (system-wide).  The output includes the
>  core number and the number of online logical processors on that physical 
> processor.
>  
> +-T::
> +--transaction::
> +
> +Print statistics of transactional execution if supported.
> +
>  EXAMPLES
>  
>  
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 352fbd7..d68bf93 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -46,6 +46,7 @@
>  #include "util/util.h"
>  #include "util/parse-options.h"
>  #include "util/parse-events.h"
> +#include "util/pmu.h"
>  #include "util/event.h"
>  #include "util/evlist.h"
>  #include "util/evsel.h"
> @@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
> char *prefix);
>  static void print_counter(struct perf_evsel *counter, char *prefix);
>  static void print_aggr(char *prefix);
>  
> +/* Default events used for perf stat -T */
> +static const char * const transaction_attrs[] = {
> + "task-clock",
> + "{"
> + "instructions,"
> + "cycles,"
> + "cpu/cycles-t/,"
> + "cpu/tx-start/,"
> + "cpu/el-start/,"
> + "cpu/cycles-ct/"
> + "}"
> +};
> +
> +/* More limited version when the CPU does not have all events. */
> +static const char * const transaction_limited_attrs[] = {
> + "task-clock",
> + "{"
> + "instructions,"
> + "cycles,"
> + "cpu/cycles-t/,"
> + "cpu/tx-start/"
> + "}"
> +};
> +
> +/* must match the transaction_attrs above */

Match in what way? It kinda matches the first one (transaction_attrs):

enum {
T_TASK_CLOCK,== "task-clock",
T_INSTRUCTIONS,  == "instructions,"
T_CYCLES,== "cycles,"
T_CYCLES_IN_TX,  ~= "cpu/cycles-t/,"
T_TRANSACTION_START, != "cpu/tx-start/,"
T_ELISION_START, ~= "cpu/el-start/,"
T_CYCLES_IN_TX_CP,   != "cpu/cycles-ct/"
};

Also the enum numbers won't match the array positions due to the '{'
grouping (?) entries, so, without looking further, how can this match?
Reading on...

Also:

~=  Kinda matches
==  Matches
!=  Doesn't look like matching, does it?

:-)

>  static struct perf_evlist*evsel_list;
>  
>  static struct perf_targettarget = {
> @@ -90,6 +126,7 @@ static enum aggr_mode  aggr_mode   
> = AGGR_GLOBAL;
>  static volatile pid_tchild_pid   = -1;
>  static bool  null_run=  false;
>  static int   detailed_run=  0;
> +static bool  transaction_run;
>  static bool  big_num =  true;
>  static int   big_num_opt =  -1;
>  static const char*csv_sep= NULL;
> @@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
>  static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
>  static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
>  static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
> +static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
>  static struct stats walltime_nsecs_stats;
> +static struct stats runtime_transaction_stats[MAX_NR_CPUS];
> +static struct stats runtime_elision_stats[MAX_NR_CPUS];
>  
>  static void perf_stat__reset_stats(struct perf_evlist *evlist)
>  

Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Peter Zijlstra
On Wed, Aug 14, 2013 at 11:34:27AM -0700, Andi Kleen wrote:
> From: Andi Kleen 
> 
> Add support to perf stat to print the basic transactional execution 
> statistics:
> Total cycles, Cycles in Transaction, Cycles in aborted transsactions
> using the in_tx and in_tx_checkpoint qualifiers.
> Transaction Starts and Elision Starts, to compute the average transaction 
> length.
> 
> This is a reasonable overview over the success of the transactions.
> 
> Enable with a new --transaction / -T option.
> 
> This requires measuring these events in a group, since they depend on each
> other.
> 
> This is implemented by using TM sysfs events exported by the kernel
> 
> v2: Only print the extended statistics when the option is enabled.
> This avoids negative output when the user specifies the -T events
> in separate groups.
> v3: Port to latest tree
> Signed-off-by: Andi Kleen 
> ---
>  tools/perf/Documentation/perf-stat.txt |   5 ++
>  tools/perf/builtin-stat.c  | 132 
> -
>  tools/perf/util/evsel.h|   6 ++
>  tools/perf/util/pmu.c  |  16 
>  tools/perf/util/pmu.h  |   1 +
>  5 files changed, 157 insertions(+), 3 deletions(-)

Arnaldo, ACK on this?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Peter Zijlstra
On Wed, Aug 14, 2013 at 11:34:27AM -0700, Andi Kleen wrote:
 From: Andi Kleen a...@linux.intel.com
 
 Add support to perf stat to print the basic transactional execution 
 statistics:
 Total cycles, Cycles in Transaction, Cycles in aborted transsactions
 using the in_tx and in_tx_checkpoint qualifiers.
 Transaction Starts and Elision Starts, to compute the average transaction 
 length.
 
 This is a reasonable overview over the success of the transactions.
 
 Enable with a new --transaction / -T option.
 
 This requires measuring these events in a group, since they depend on each
 other.
 
 This is implemented by using TM sysfs events exported by the kernel
 
 v2: Only print the extended statistics when the option is enabled.
 This avoids negative output when the user specifies the -T events
 in separate groups.
 v3: Port to latest tree
 Signed-off-by: Andi Kleen a...@linux.intel.com
 ---
  tools/perf/Documentation/perf-stat.txt |   5 ++
  tools/perf/builtin-stat.c  | 132 
 -
  tools/perf/util/evsel.h|   6 ++
  tools/perf/util/pmu.c  |  16 
  tools/perf/util/pmu.h  |   1 +
  5 files changed, 157 insertions(+), 3 deletions(-)

Arnaldo, ACK on this?
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Arnaldo Carvalho de Melo
Em Wed, Aug 14, 2013 at 11:34:27AM -0700, Andi Kleen escreveu:
 From: Andi Kleen a...@linux.intel.com
 
 Add support to perf stat to print the basic transactional execution 
 statistics:
 Total cycles, Cycles in Transaction, Cycles in aborted transsactions
 using the in_tx and in_tx_checkpoint qualifiers.
 Transaction Starts and Elision Starts, to compute the average transaction 
 length.
 
 This is a reasonable overview over the success of the transactions.
 
 Enable with a new --transaction / -T option.
 
 This requires measuring these events in a group, since they depend on each
 other.
 
 This is implemented by using TM sysfs events exported by the kernel
 
 v2: Only print the extended statistics when the option is enabled.
 This avoids negative output when the user specifies the -T events
 in separate groups.
 v3: Port to latest tree
 Signed-off-by: Andi Kleen a...@linux.intel.com
 ---
  tools/perf/Documentation/perf-stat.txt |   5 ++
  tools/perf/builtin-stat.c  | 132 
 -
  tools/perf/util/evsel.h|   6 ++
  tools/perf/util/pmu.c  |  16 
  tools/perf/util/pmu.h  |   1 +
  5 files changed, 157 insertions(+), 3 deletions(-)
 
 diff --git a/tools/perf/Documentation/perf-stat.txt 
 b/tools/perf/Documentation/perf-stat.txt
 index 2fe87fb..40bc65a 100644
 --- a/tools/perf/Documentation/perf-stat.txt
 +++ b/tools/perf/Documentation/perf-stat.txt
 @@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
 cores.  To enable this mod
  use --per-core in addition to -a. (system-wide).  The output includes the
  core number and the number of online logical processors on that physical 
 processor.
  
 +-T::
 +--transaction::
 +
 +Print statistics of transactional execution if supported.
 +
  EXAMPLES
  
  
 diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
 index 352fbd7..d68bf93 100644
 --- a/tools/perf/builtin-stat.c
 +++ b/tools/perf/builtin-stat.c
 @@ -46,6 +46,7 @@
  #include util/util.h
  #include util/parse-options.h
  #include util/parse-events.h
 +#include util/pmu.h
  #include util/event.h
  #include util/evlist.h
  #include util/evsel.h
 @@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
 char *prefix);
  static void print_counter(struct perf_evsel *counter, char *prefix);
  static void print_aggr(char *prefix);
  
 +/* Default events used for perf stat -T */
 +static const char * const transaction_attrs[] = {
 + task-clock,
 + {
 + instructions,
 + cycles,
 + cpu/cycles-t/,
 + cpu/tx-start/,
 + cpu/el-start/,
 + cpu/cycles-ct/
 + }
 +};
 +
 +/* More limited version when the CPU does not have all events. */
 +static const char * const transaction_limited_attrs[] = {
 + task-clock,
 + {
 + instructions,
 + cycles,
 + cpu/cycles-t/,
 + cpu/tx-start/
 + }
 +};
 +
 +/* must match the transaction_attrs above */

Match in what way? It kinda matches the first one (transaction_attrs):

enum {
T_TASK_CLOCK,== task-clock,
T_INSTRUCTIONS,  == instructions,
T_CYCLES,== cycles,
T_CYCLES_IN_TX,  ~= cpu/cycles-t/,
T_TRANSACTION_START, != cpu/tx-start/,
T_ELISION_START, ~= cpu/el-start/,
T_CYCLES_IN_TX_CP,   != cpu/cycles-ct/
};

Also the enum numbers won't match the array positions due to the '{'
grouping (?) entries, so, without looking further, how can this match?
Reading on...

Also:

~=  Kinda matches
==  Matches
!=  Doesn't look like matching, does it?

:-)

  static struct perf_evlist*evsel_list;
  
  static struct perf_targettarget = {
 @@ -90,6 +126,7 @@ static enum aggr_mode  aggr_mode   
 = AGGR_GLOBAL;
  static volatile pid_tchild_pid   = -1;
  static bool  null_run=  false;
  static int   detailed_run=  0;
 +static bool  transaction_run;
  static bool  big_num =  true;
  static int   big_num_opt =  -1;
  static const char*csv_sep= NULL;
 @@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
  static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
  static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
  static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
 +static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
  static struct stats walltime_nsecs_stats;
 +static struct stats runtime_transaction_stats[MAX_NR_CPUS];
 +static struct stats runtime_elision_stats[MAX_NR_CPUS];
  
  static void perf_stat__reset_stats(struct perf_evlist *evlist)
  {
 @@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
 *evlist)
   memset(runtime_ll_cache_stats, 0, 

Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Andi Kleen
  +/* Default events used for perf stat -T */
  +static const char * const transaction_attrs[] = {
  +   task-clock,
  +   {
  +   instructions,
  +   cycles,
  +   cpu/cycles-t/,
  +   cpu/tx-start/,
  +   cpu/el-start/,
  +   cpu/cycles-ct/
  +   }
  +};
  +
  +/* More limited version when the CPU does not have all events. */
  +static const char * const transaction_limited_attrs[] = {
  +   task-clock,
  +   {
  +   instructions,
  +   cycles,
  +   cpu/cycles-t/,
  +   cpu/tx-start/
  +   }
  +};
  +
  +/* must match the transaction_attrs above */
 
 Match in what way? It kinda matches the first one (transaction_attrs):

The second is just the beginning of the first.

The { } don't count for matches.

For the limited run the comparisons of the elements that are not there
fail.

 
 enum {
   T_TASK_CLOCK,== task-clock,
   T_INSTRUCTIONS,  == instructions,
   T_CYCLES,== cycles,
   T_CYCLES_IN_TX,  ~= cpu/cycles-t/,
   T_TRANSACTION_START, != cpu/tx-start/,
   T_ELISION_START, ~= cpu/el-start/,
   T_CYCLES_IN_TX_CP,   != cpu/cycles-ct/
 };

I did a quick test of the fallback path by manually disabling the events,
and it seemed to work, but it's really for POWER based on Michael E's feedback.

 
 Also the enum numbers won't match the array positions due to the '{'
 grouping (?) entries, so, without looking further, how can this match?
 Reading on...

The match is on the result array.  The results don't contain the { } 
as individual elements.

Anyways I use this option pretty heavily and the results are good
to my knowledge.

-Andi

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 15, 2013 at 04:06:33PM +0200, Andi Kleen escreveu:
 Anyways I use this option pretty heavily and the results are good
 to my knowledge.

Have you seen the comments about using evsel-idx?

- Arnaldo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Andi Kleen
* Update various tracking values we maintain to print
* more semantic information such as miss/hit ratios,
  @@ -283,8 +340,12 @@ static void update_shadow_stats(struct perf_evsel 
  *counter, u64 *count)
  update_stats(runtime_nsecs_stats[0], count[0]);
  else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
  update_stats(runtime_cycles_stats[0], count[0]);
  -   else if (perf_evsel__match(counter, HARDWARE, 
  HW_STALLED_CYCLES_FRONTEND))
  -   update_stats(runtime_stalled_cycles_front_stats[0], count[0]);
 
 Why remove the test for HW_STALLED_CYCLES_FRONTEND?

Hmm that was probably a merge error.

I'll fix  resend.

 Got it why it doesn't need to account for the '{' in the array ;-)
 
 While this works and isn't in any fast path, I find it ugly with all
 this looping in nth_evsel.
 
 Why not:
 
   } else if (evsel-idx == T_CYCLES_IN_TX)) 
 
 ? I guess this works as you expect, no?

I had some problems with people using -T, but also setting custom
events, that is why I added the extra comparison

The event lists are small enough that it's not really noticeable.
I suppose could set up an array once.

-Andi
-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 15, 2013 at 04:29:25PM +0200, Andi Kleen escreveu:
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
   @@ -283,8 +340,12 @@ static void update_shadow_stats(struct perf_evsel 
   *counter, u64 *count)
 update_stats(runtime_nsecs_stats[0], count[0]);
 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 update_stats(runtime_cycles_stats[0], count[0]);
   - else if (perf_evsel__match(counter, HARDWARE, 
   HW_STALLED_CYCLES_FRONTEND))
   - update_stats(runtime_stalled_cycles_front_stats[0], count[0]);
  
  Why remove the test for HW_STALLED_CYCLES_FRONTEND?
 
 Hmm that was probably a merge error.
 
 I'll fix  resend.

Ok
 
  Got it why it doesn't need to account for the '{' in the array ;-)
  
  While this works and isn't in any fast path, I find it ugly with all
  this looping in nth_evsel.
  
  Why not:
  
  } else if (evsel-idx == T_CYCLES_IN_TX)) 
  
  ? I guess this works as you expect, no?
 
 I had some problems with people using -T, but also setting custom
 events, that is why I added the extra comparison

But it'd fail the same, as if you go from the first entry in the
evlist-entries linked list, looping while incrementing a number, that
number should match the nth entry -idx member.

I'll try to experiment with this when you resend with the parts you
agreed should be changed.
 
 The event lists are small enough that it's not really noticeable.
 I suppose could set up an array once.

Its not about performance hits, they are negligible, but about using
something that should provide equivalent results.

- Arnaldo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-15 Thread Andi Kleen

Here's an updated patch.

perf, tools: Add perf stat --transaction v4

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
v4: Remove merge error. Avoid linear walks for comparisons.
Check transaction_run
earlier. Minor fixes.
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 144 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..d008384 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include util/util.h
 #include util/parse-options.h
 #include util/parse-events.h
+#include util/pmu.h
 #include util/event.h
 #include util/evlist.h
 #include util/evsel.h
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/,
+   cpu/el-start/,
+   cpu/cycles-ct/
+   }
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/
+   }
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   memset(runtime_cycles_in_tx_stats, 0,
+   sizeof(runtime_cycles_in_tx_stats));
+   memset(runtime_transaction_stats, 0,
+   sizeof(runtime_transaction_stats));
+   memset(runtime_elision_stats, 0, 

[PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-14 Thread Andi Kleen
From: Andi Kleen 

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction 
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
Signed-off-by: Andi Kleen 
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 132 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 157 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..d68bf93 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/,"
+   "cpu/el-start/,"
+   "cpu/cycles-ct/"
+   "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/"
+   "}"
+};
+
+/* must match the transaction_attrs above */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   memset(runtime_cycles_in_tx_stats, 0,
+   sizeof(runtime_cycles_in_tx_stats));
+   memset(runtime_transaction_stats, 0,
+   sizeof(runtime_transaction_stats));
+   memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
@@ -272,6 +317,18 @@ static inline int nsec_counter(struct 

[PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-14 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction 
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 132 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 157 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..d68bf93 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include util/util.h
 #include util/parse-options.h
 #include util/parse-events.h
+#include util/pmu.h
 #include util/event.h
 #include util/evlist.h
 #include util/evsel.h
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/,
+   cpu/el-start/,
+   cpu/cycles-ct/
+   }
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/
+   }
+};
+
+/* must match the transaction_attrs above */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   memset(runtime_cycles_in_tx_stats, 0,
+   sizeof(runtime_cycles_in_tx_stats));
+   memset(runtime_transaction_stats, 0,
+   sizeof(runtime_transaction_stats));
+   memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
@@ -272,6 +317,18 @@ static inline int nsec_counter(struct 

[PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-08 Thread Andi Kleen
From: Andi Kleen 

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction 
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
Signed-off-by: Andi Kleen 
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 132 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 157 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..d68bf93 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/,"
+   "cpu/el-start/,"
+   "cpu/cycles-ct/"
+   "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/"
+   "}"
+};
+
+/* must match the transaction_attrs above */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   memset(runtime_cycles_in_tx_stats, 0,
+   sizeof(runtime_cycles_in_tx_stats));
+   memset(runtime_transaction_stats, 0,
+   sizeof(runtime_transaction_stats));
+   memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
@@ -272,6 +317,18 @@ static inline int nsec_counter(struct 

[PATCH 4/4] perf, tools: Add perf stat --transaction v3

2013-08-08 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction 
length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 132 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 157 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..d68bf93 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include util/util.h
 #include util/parse-options.h
 #include util/parse-events.h
+#include util/pmu.h
 #include util/event.h
 #include util/evlist.h
 #include util/evsel.h
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/,
+   cpu/el-start/,
+   cpu/cycles-ct/
+   }
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   task-clock,
+   {
+   instructions,
+   cycles,
+   cpu/cycles-t/,
+   cpu/tx-start/
+   }
+};
+
+/* must match the transaction_attrs above */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   memset(runtime_cycles_in_tx_stats, 0,
+   sizeof(runtime_cycles_in_tx_stats));
+   memset(runtime_transaction_stats, 0,
+   sizeof(runtime_transaction_stats));
+   memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
@@ -272,6 +317,18 @@ static inline int nsec_counter(struct