Re: [Qemu-devel] [PATCH v5 01/10] accel: introducing TBStatistics structure

2019-08-15 Thread Alex Bennée


vandersonmr  writes:

> To store statistics for each TB, we created a TBStatistics structure
> which is linked with the TBs. TBStatistics can stay alive after
> tb_flush and be relinked to a regenerated TB. So the statistics can
> be accumulated even through flushes.
>
> The goal is to have all present and future qemu/tcg statistics and
> meta-data stored in this new structure.
>
> Signed-off-by: Vanderson M. do Rosario 

Reviewed-by: Alex Bennée 

> ---
>  accel/tcg/Makefile.objs  |  2 +-
>  accel/tcg/perf/Makefile.objs |  1 +
>  accel/tcg/tb-stats.c | 39 
>  accel/tcg/translate-all.c| 57 
>  include/exec/exec-all.h  | 15 +++---
>  include/exec/tb-context.h| 12 
>  include/exec/tb-hash.h   |  7 +
>  include/exec/tb-stats.h  | 43 +++
>  util/log.c   |  2 ++
>  9 files changed, 166 insertions(+), 12 deletions(-)
>  create mode 100644 accel/tcg/perf/Makefile.objs
>  create mode 100644 accel/tcg/tb-stats.c
>  create mode 100644 include/exec/tb-stats.h
>
> diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
> index d381a02f34..49ffe81b5d 100644
> --- a/accel/tcg/Makefile.objs
> +++ b/accel/tcg/Makefile.objs
> @@ -2,7 +2,7 @@ obj-$(CONFIG_SOFTMMU) += tcg-all.o
>  obj-$(CONFIG_SOFTMMU) += cputlb.o
>  obj-y += tcg-runtime.o tcg-runtime-gvec.o
>  obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
> -obj-y += translator.o
> +obj-y += translator.o tb-stats.o
>
>  obj-$(CONFIG_USER_ONLY) += user-exec.o
>  obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
> diff --git a/accel/tcg/perf/Makefile.objs b/accel/tcg/perf/Makefile.objs
> new file mode 100644
> index 00..f82fba35e5
> --- /dev/null
> +++ b/accel/tcg/perf/Makefile.objs
> @@ -0,0 +1 @@
> +obj-y += jitdump.o
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> new file mode 100644
> index 00..02844717cb
> --- /dev/null
> +++ b/accel/tcg/tb-stats.c
> @@ -0,0 +1,39 @@
> +#include "qemu/osdep.h"
> +
> +#include "disas/disas.h"
> +
> +#include "exec/tb-stats.h"
> +
> +void init_tb_stats_htable_if_not(void)
> +{
> +if (tb_stats_collection_enabled() && !tb_ctx.tb_stats.map) {
> +qht_init(_ctx.tb_stats, tb_stats_cmp,
> +CODE_GEN_HTABLE_SIZE, QHT_MODE_AUTO_RESIZE);
> +}
> +}
> +
> +void enable_collect_tb_stats(void)
> +{
> +init_tb_stats_htable_if_not();
> +tcg_collect_tb_stats = TB_STATS_RUNNING;
> +}
> +
> +void disable_collect_tb_stats(void)
> +{
> +tcg_collect_tb_stats = TB_STATS_PAUSED;
> +}
> +
> +void pause_collect_tb_stats(void)
> +{
> +tcg_collect_tb_stats = TB_STATS_STOPPED;
> +}
> +
> +bool tb_stats_collection_enabled(void)
> +{
> +return tcg_collect_tb_stats == TB_STATS_RUNNING;
> +}
> +
> +bool tb_stats_collection_paused(void)
> +{
> +return tcg_collect_tb_stats == TB_STATS_PAUSED;
> +}
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..b7bccacd3b 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size)
>  }
>  }
>
> +/*
> + * This is the more or less the same compare as tb_cmp(), but the
> + * data persists over tb_flush. We also aggregate the various
> + * variations of cflags under one record and ignore the details of
> + * page overlap (although we can count it).
> + */
> +bool tb_stats_cmp(const void *ap, const void *bp)
> +{
> +const TBStatistics *a = ap;
> +const TBStatistics *b = bp;
> +
> +return a->phys_pc == b->phys_pc &&
> +a->pc == b->pc &&
> +a->cs_base == b->cs_base &&
> +a->flags == b->flags;
> +}
> +
>  static bool tb_cmp(const void *ap, const void *bp)
>  {
>  const TranslationBlock *a = ap;
> @@ -1137,6 +1154,7 @@ static void tb_htable_init(void)
>  unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
>  qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +init_tb_stats_htable_if_not();
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1666,6 +1684,34 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t 
> phys_pc,
>  return tb;
>  }
>
> +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
> +  target_ulong cs_base, uint32_t flags,
> +  TranslationBlock *current_tb)
> +{
> +TBStatistics *new_stats = g_new0(TBStatistics, 1);
> +uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
> +void *existing_stats = NULL;
> +new_stats->phys_pc = phys_pc;
> +new_stats->pc = pc;
> +new_stats->cs_base = cs_base;
> +new_stats->flags = flags;
> +new_stats->tb = current_tb;
> +
> +qht_insert(_ctx.tb_stats, new_stats, hash, _stats);
> +
> +if (unlikely(existing_stats)) {
> +/*
> + * If there is already a TBStatistic for this TB from a previous 
> 

[Qemu-devel] [PATCH v5 01/10] accel: introducing TBStatistics structure

2019-08-14 Thread vandersonmr
To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Signed-off-by: Vanderson M. do Rosario 
---
 accel/tcg/Makefile.objs  |  2 +-
 accel/tcg/perf/Makefile.objs |  1 +
 accel/tcg/tb-stats.c | 39 
 accel/tcg/translate-all.c| 57 
 include/exec/exec-all.h  | 15 +++---
 include/exec/tb-context.h| 12 
 include/exec/tb-hash.h   |  7 +
 include/exec/tb-stats.h  | 43 +++
 util/log.c   |  2 ++
 9 files changed, 166 insertions(+), 12 deletions(-)
 create mode 100644 accel/tcg/perf/Makefile.objs
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats.h

diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index d381a02f34..49ffe81b5d 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -2,7 +2,7 @@ obj-$(CONFIG_SOFTMMU) += tcg-all.o
 obj-$(CONFIG_SOFTMMU) += cputlb.o
 obj-y += tcg-runtime.o tcg-runtime-gvec.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
-obj-y += translator.o
+obj-y += translator.o tb-stats.o
 
 obj-$(CONFIG_USER_ONLY) += user-exec.o
 obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
diff --git a/accel/tcg/perf/Makefile.objs b/accel/tcg/perf/Makefile.objs
new file mode 100644
index 00..f82fba35e5
--- /dev/null
+++ b/accel/tcg/perf/Makefile.objs
@@ -0,0 +1 @@
+obj-y += jitdump.o
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..02844717cb
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,39 @@
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+
+void init_tb_stats_htable_if_not(void)
+{
+if (tb_stats_collection_enabled() && !tb_ctx.tb_stats.map) {
+qht_init(_ctx.tb_stats, tb_stats_cmp,
+CODE_GEN_HTABLE_SIZE, QHT_MODE_AUTO_RESIZE);
+}
+}
+
+void enable_collect_tb_stats(void)
+{
+init_tb_stats_htable_if_not();
+tcg_collect_tb_stats = TB_STATS_RUNNING;
+}
+
+void disable_collect_tb_stats(void)
+{
+tcg_collect_tb_stats = TB_STATS_PAUSED;
+}
+
+void pause_collect_tb_stats(void)
+{
+tcg_collect_tb_stats = TB_STATS_STOPPED;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return tcg_collect_tb_stats == TB_STATS_RUNNING;
+}
+
+bool tb_stats_collection_paused(void)
+{
+return tcg_collect_tb_stats == TB_STATS_PAUSED;
+}
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..b7bccacd3b 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size)
 }
 }
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -1137,6 +1154,7 @@ static void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable_if_not();
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -1666,6 +1684,34 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t 
phys_pc,
 return tb;
 }
 
+static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
+  target_ulong cs_base, uint32_t flags,
+  TranslationBlock *current_tb)
+{
+TBStatistics *new_stats = g_new0(TBStatistics, 1);
+uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
+void *existing_stats = NULL;
+new_stats->phys_pc = phys_pc;
+new_stats->pc = pc;
+new_stats->cs_base = cs_base;
+new_stats->flags = flags;
+new_stats->tb = current_tb;
+
+qht_insert(_ctx.tb_stats, new_stats, hash, _stats);
+
+if (unlikely(existing_stats)) {
+/*
+ * If there is already a TBStatistic for this TB from a previous flush
+ * then just make the new TB point to the older TBStatistic
+ */
+g_free(new_stats);
+return existing_stats;
+} else {
+return new_stats;
+}
+}
+
+
 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,