Re: [Qemu-devel] [PATCH v4 1/7] accel: introducing TBStatistics structure

2019-07-26 Thread Alex Bennée


vandersonmr  writes:

> To store statistics for each TB we created a TBStatistics structure
> which is linked with the TBs. The TBStatistics can stay alive after
> tb_flush and be relinked to a regenerated TB. So the statistics can
> be accumulated even through flushes.
>
> TBStatistics will be also referred to as TBS or tbstats.
>
> Signed-off-by: Vanderson M. do Rosario 
> ---
>  accel/tcg/translate-all.c | 57 +++
>  include/exec/exec-all.h   | 15 +++
>  include/exec/tb-context.h | 12 +
>  include/exec/tb-hash.h|  7 +
>  include/exec/tb-stats.h   | 29 
>  include/qemu/log.h|  6 +
>  util/log.c| 28 +++
>  7 files changed, 143 insertions(+), 11 deletions(-)
>  create mode 100644 include/exec/tb-stats.h
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..a574890a80 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size)
>  }
>  }
>
> +/*
> + * This is the more or less the same compare,

We should reference "same compare as tb_cmp"

> but the data persists
> + * over tb_flush. We also aggregate the various variations of cflags
> + * under one record and ignore the details of page overlap (although
> + * we can count it).
> + */
> +bool tb_stats_cmp(const void *ap, const void *bp)
> +{
> +const TBStatistics *a = ap;
> +const TBStatistics *b = bp;
> +
> +return a->phys_pc == b->phys_pc &&
> +a->pc == b->pc &&
> +a->cs_base == b->cs_base &&
> +a->flags == b->flags;
> +}
> +
>  static bool tb_cmp(const void *ap, const void *bp)
>  {
>  const TranslationBlock *a = ap;
> @@ -1137,6 +1154,9 @@ static void tb_htable_init(void)
>  unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
>  qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +if (tb_stats_collection_enabled()) {
> +qht_init(_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +}
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1666,6 +1686,32 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t 
> phys_pc,
>  return tb;
>  }
>
> +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
> +  target_ulong cs_base, uint32_t flags)
> +{
> +TBStatistics *new_stats = g_new0(TBStatistics, 1);
> +uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
> +void *existing_stats = NULL;
> +new_stats->phys_pc = phys_pc;
> +new_stats->pc = pc;
> +new_stats->cs_base = cs_base;
> +new_stats->flags = flags;
> +
> +qht_insert(_ctx.tb_stats, new_stats, hash, _stats);
> +
> +if (unlikely(existing_stats)) {
> +/*
> + * If there is already a TBStatistic for this TB from a previous 
> flush
> + * then just make the new TB point to the older TBStatistic
> + */
> +g_free(new_stats);
> +return existing_stats;
> +} else {
> +return new_stats;
> +}
> +}
> +
> +
>  /* Called with mmap_lock held for user mode emulation.  */
>  TranslationBlock *tb_gen_code(CPUState *cpu,
>target_ulong pc, target_ulong cs_base,
> @@ -1732,6 +1778,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>  ti = profile_getclock();
>  #endif
>
> +/*
> + * We want to fetch the stats structure before we start code
> + * generation so we can count interesting things about this
> + * generation.
> + */
> +if (tb_stats_collection_enabled()) {
> +tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
> +} else {
> +tb->tb_stats = NULL;
> +}
> +
>  tcg_func_start(tcg_ctx);
>
>  tcg_ctx->cpu = env_cpu(env);
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 16034ee651..24bd6a0a0c 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -22,21 +22,11 @@
>
>  #include "exec/tb-context.h"
>  #include "sysemu/cpus.h"
> +#include "exec/tb-stats.h"
>
>  /* allow to see translation results - the slowdown should be negligible, so 
> we leave it */
>  #define DEBUG_DISAS
>
> -/* Page tracking code uses ram addresses in system mode, and virtual
> -   addresses in userspace mode.  Define tb_page_addr_t to be an appropriate
> -   type.  */
> -#if defined(CONFIG_USER_ONLY)
> -typedef abi_ulong tb_page_addr_t;
> -#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx
> -#else
> -typedef ram_addr_t tb_page_addr_t;
> -#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
> -#endif
> -

Why are we moving this? I suspect it would be better to split the move
into a separate patch for the purposes of bisection.

>  #include "qemu/log.h"
>
>  void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int 
> max_insns);
> @@ -403,6 +393,9 @@ struct TranslationBlock {
>  uintptr_t jmp_list_head;
>  uintptr_t 

[Qemu-devel] [PATCH v4 1/7] accel: introducing TBStatistics structure

2019-07-19 Thread vandersonmr
To store statistics for each TB we created a TBStatistics structure
which is linked with the TBs. The TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

TBStatistics will be also referred to as TBS or tbstats.

Signed-off-by: Vanderson M. do Rosario 
---
 accel/tcg/translate-all.c | 57 +++
 include/exec/exec-all.h   | 15 +++
 include/exec/tb-context.h | 12 +
 include/exec/tb-hash.h|  7 +
 include/exec/tb-stats.h   | 29 
 include/qemu/log.h|  6 +
 util/log.c| 28 +++
 7 files changed, 143 insertions(+), 11 deletions(-)
 create mode 100644 include/exec/tb-stats.h

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..a574890a80 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size)
 }
 }
 
+/*
+ * This is the more or less the same compare, but the data persists
+ * over tb_flush. We also aggregate the various variations of cflags
+ * under one record and ignore the details of page overlap (although
+ * we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -1137,6 +1154,9 @@ static void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+if (tb_stats_collection_enabled()) {
+qht_init(_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode);
+}
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -1666,6 +1686,32 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t 
phys_pc,
 return tb;
 }
 
+static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
+  target_ulong cs_base, uint32_t flags)
+{
+TBStatistics *new_stats = g_new0(TBStatistics, 1);
+uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
+void *existing_stats = NULL;
+new_stats->phys_pc = phys_pc;
+new_stats->pc = pc;
+new_stats->cs_base = cs_base;
+new_stats->flags = flags;
+
+qht_insert(_ctx.tb_stats, new_stats, hash, _stats);
+
+if (unlikely(existing_stats)) {
+/*
+ * If there is already a TBStatistic for this TB from a previous flush
+ * then just make the new TB point to the older TBStatistic
+ */
+g_free(new_stats);
+return existing_stats;
+} else {
+return new_stats;
+}
+}
+
+
 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,
   target_ulong pc, target_ulong cs_base,
@@ -1732,6 +1778,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 ti = profile_getclock();
 #endif
 
+/*
+ * We want to fetch the stats structure before we start code
+ * generation so we can count interesting things about this
+ * generation.
+ */
+if (tb_stats_collection_enabled()) {
+tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+} else {
+tb->tb_stats = NULL;
+}
+
 tcg_func_start(tcg_ctx);
 
 tcg_ctx->cpu = env_cpu(env);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 16034ee651..24bd6a0a0c 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -22,21 +22,11 @@
 
 #include "exec/tb-context.h"
 #include "sysemu/cpus.h"
+#include "exec/tb-stats.h"
 
 /* allow to see translation results - the slowdown should be negligible, so we 
leave it */
 #define DEBUG_DISAS
 
-/* Page tracking code uses ram addresses in system mode, and virtual
-   addresses in userspace mode.  Define tb_page_addr_t to be an appropriate
-   type.  */
-#if defined(CONFIG_USER_ONLY)
-typedef abi_ulong tb_page_addr_t;
-#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx
-#else
-typedef ram_addr_t tb_page_addr_t;
-#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
-#endif
-
 #include "qemu/log.h"
 
 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
@@ -403,6 +393,9 @@ struct TranslationBlock {
 uintptr_t jmp_list_head;
 uintptr_t jmp_list_next[2];
 uintptr_t jmp_dest[2];
+
+/* Pointer to a struct where statistics from the TB is stored */
+TBStatistics *tb_stats;
 };
 
 extern bool parallel_cpus;
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index feb585e0a7..3cfb62a338 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -23,6 +23,17 @@
 #include "qemu/thread.h"
 #include "qemu/qht.h"
 
+/* Page tracking code uses ram addresses