Re: [Qemu-devel] [PATCH v5 02/10] accel: collecting TB execution count

2019-08-15 Thread Alex Bennée


vandersonmr  writes:

> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
> enabled, then we instrument the start code of this TB
> to atomically count the number of times it is executed.
> We count both the number of "normal" executions and atomic
> executions of a TB.
>
> The execution count of the TB is stored in its respective
> TBS.
>
> All TBStatistics are created by default with the flags from
> default_tbstats_flag.
>
> Signed-off-by: Vanderson M. do Rosario 
> ---
>  accel/tcg/cpu-exec.c  |  4 
>  accel/tcg/tb-stats.c  |  5 +
>  accel/tcg/tcg-runtime.c   |  7 +++
>  accel/tcg/tcg-runtime.h   |  2 ++
>  accel/tcg/translate-all.c |  7 +++
>  accel/tcg/translator.c|  1 +
>  include/exec/gen-icount.h |  9 +
>  include/exec/tb-stats.h   | 19 +++
>  util/log.c|  1 +
>  9 files changed, 55 insertions(+)
>
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 6c85c3ee1e..e54be69499 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -252,6 +252,10 @@ void cpu_exec_step_atomic(CPUState *cpu)
>
>  start_exclusive();
>
> +if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
> +tb->tb_stats->executions.atomic++;
> +}
> +
>  /* Since we got here, we know that parallel_cpus must be true.  */
>  parallel_cpus = false;
>  in_exclusive_region = true;
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> index 02844717cb..3489133e9e 100644
> --- a/accel/tcg/tb-stats.c
> +++ b/accel/tcg/tb-stats.c
> @@ -37,3 +37,8 @@ bool tb_stats_collection_paused(void)
>  {
>  return tcg_collect_tb_stats == TB_STATS_PAUSED;
>  }
> +
> +uint32_t get_default_tbstats_flag(void)
> +{
> +return default_tbstats_flag;
> +}
> diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
> index 8a1e408e31..6f4aafba11 100644
> --- a/accel/tcg/tcg-runtime.c
> +++ b/accel/tcg/tcg-runtime.c
> @@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
>  {
>  cpu_loop_exit_atomic(env_cpu(env), GETPC());
>  }
> +
> +void HELPER(inc_exec_freq)(void *ptr)
> +{
> +TBStatistics *stats = (TBStatistics *) ptr;
> +g_assert(stats);
> +atomic_inc(>executions.normal);
> +}
> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
> index 4fa61b49b4..bf0b75dbe8 100644
> --- a/accel/tcg/tcg-runtime.h
> +++ b/accel/tcg/tcg-runtime.h
> @@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, 
> env)
>
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
> +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
> +
>  #ifdef CONFIG_SOFTMMU
>
>  DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index b7bccacd3b..df08d183df 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1785,6 +1785,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>   */
>  if (tb_stats_collection_enabled()) {
>  tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags, tb);
> +uint32_t flag = get_default_tbstats_flag();
> +
> +if (qemu_log_in_addr_range(tb->pc)) {

Minor nit: the compiler should spot it but you could move the flag
fetching inside this block.

> +if (flag & TB_EXEC_STATS) {
> +tb->tb_stats->stats_enabled |= TB_EXEC_STATS;
> +}
> +}
>  } else {
>  tb->tb_stats = NULL;
>  }
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 9226a348a3..396a11e828 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -46,6 +46,7 @@ void translator_loop(const TranslatorOps *ops, 
> DisasContextBase *db,
>
>  ops->init_disas_context(db, cpu);
>  tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
> +gen_tb_exec_count(tb);
>
>  /* Reset the temp count so that we can identify leaks */
>  tcg_clear_temp_count();
> diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
> index f7669b6841..b3efe41894 100644
> --- a/include/exec/gen-icount.h
> +++ b/include/exec/gen-icount.h
> @@ -7,6 +7,15 @@
>
>  static TCGOp *icount_start_insn;
>
> +static inline void gen_tb_exec_count(TranslationBlock *tb)
> +{
> +if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
> +TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats);
> +gen_helper_inc_exec_freq(ptr);
> +tcg_temp_free_ptr(ptr);
> +}
> +}
> +
>  static inline void gen_tb_start(TranslationBlock *tb)
>  {
>  TCGv_i32 count, imm;
> diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
> index cc8f8a6ce6..0265050b79 100644
> --- a/include/exec/tb-stats.h
> +++ b/include/exec/tb-stats.h
> @@ -6,6 +6,9 @@
>  #include "exec/tb-context.h"
>  #include "tcg.h"
>
> +#define tb_stats_enabled(tb, JIT_STATS) \
> +(tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
> +
>  typedef struct TBStatistics TBStatistics;
>

[Qemu-devel] [PATCH v5 02/10] accel: collecting TB execution count

2019-08-14 Thread vandersonmr
If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

Signed-off-by: Vanderson M. do Rosario 
---
 accel/tcg/cpu-exec.c  |  4 
 accel/tcg/tb-stats.c  |  5 +
 accel/tcg/tcg-runtime.c   |  7 +++
 accel/tcg/tcg-runtime.h   |  2 ++
 accel/tcg/translate-all.c |  7 +++
 accel/tcg/translator.c|  1 +
 include/exec/gen-icount.h |  9 +
 include/exec/tb-stats.h   | 19 +++
 util/log.c|  1 +
 9 files changed, 55 insertions(+)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 6c85c3ee1e..e54be69499 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -252,6 +252,10 @@ void cpu_exec_step_atomic(CPUState *cpu)
 
 start_exclusive();
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 /* Since we got here, we know that parallel_cpus must be true.  */
 parallel_cpus = false;
 in_exclusive_region = true;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 02844717cb..3489133e9e 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -37,3 +37,8 @@ bool tb_stats_collection_paused(void)
 {
 return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 8a1e408e31..6f4aafba11 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
 {
 cpu_loop_exit_atomic(env_cpu(env), GETPC());
 }
+
+void HELPER(inc_exec_freq)(void *ptr)
+{
+TBStatistics *stats = (TBStatistics *) ptr;
+g_assert(stats);
+atomic_inc(>executions.normal);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4fa61b49b4..bf0b75dbe8 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
 
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
+DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
+
 #ifdef CONFIG_SOFTMMU
 
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index b7bccacd3b..df08d183df 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1785,6 +1785,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
  */
 if (tb_stats_collection_enabled()) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags, tb);
+uint32_t flag = get_default_tbstats_flag();
+
+if (qemu_log_in_addr_range(tb->pc)) {
+if (flag & TB_EXEC_STATS) {
+tb->tb_stats->stats_enabled |= TB_EXEC_STATS;
+}
+}
 } else {
 tb->tb_stats = NULL;
 }
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 9226a348a3..396a11e828 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -46,6 +46,7 @@ void translator_loop(const TranslatorOps *ops, 
DisasContextBase *db,
 
 ops->init_disas_context(db, cpu);
 tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
+gen_tb_exec_count(tb);
 
 /* Reset the temp count so that we can identify leaks */
 tcg_clear_temp_count();
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index f7669b6841..b3efe41894 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -7,6 +7,15 @@
 
 static TCGOp *icount_start_insn;
 
+static inline void gen_tb_exec_count(TranslationBlock *tb)
+{
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats);
+gen_helper_inc_exec_freq(ptr);
+tcg_temp_free_ptr(ptr);
+}
+}
+
 static inline void gen_tb_start(TranslationBlock *tb)
 {
 TCGv_i32 count, imm;
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index cc8f8a6ce6..0265050b79 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -6,6 +6,9 @@
 #include "exec/tb-context.h"
 #include "tcg.h"
 
+#define tb_stats_enabled(tb, JIT_STATS) \
+(tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
+
 typedef struct TBStatistics TBStatistics;
 
 /*
@@ -22,6 +25,15 @@ struct TBStatistics {
 uint32_t flags;
 /* cs_base isn't included in the hash but we do check for matches */
 target_ulong cs_base;
+
+uint32_t stats_enabled;
+
+/* Execution stats */
+struct {
+unsigned long normal;
+unsigned long atomic;
+} executions;
+
 /* current TB linked to this TBStatistics */