Re: [Qemu-devel] [PATCH v4 1/7] accel: introducing TBStatistics structure
vandersonmr writes: > To store statistics for each TB we created a TBStatistics structure > which is linked with the TBs. The TBStatistics can stay alive after > tb_flush and be relinked to a regenerated TB. So the statistics can > be accumulated even through flushes. > > TBStatistics will be also referred to as TBS or tbstats. > > Signed-off-by: Vanderson M. do Rosario > --- > accel/tcg/translate-all.c | 57 +++ > include/exec/exec-all.h | 15 +++ > include/exec/tb-context.h | 12 + > include/exec/tb-hash.h| 7 + > include/exec/tb-stats.h | 29 > include/qemu/log.h| 6 + > util/log.c| 28 +++ > 7 files changed, 143 insertions(+), 11 deletions(-) > create mode 100644 include/exec/tb-stats.h > > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 5d1e08b169..a574890a80 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size) > } > } > > +/* > + * This is the more or less the same compare, We should reference "same compare as tb_cmp" > but the data persists > + * over tb_flush. We also aggregate the various variations of cflags > + * under one record and ignore the details of page overlap (although > + * we can count it). > + */ > +bool tb_stats_cmp(const void *ap, const void *bp) > +{ > +const TBStatistics *a = ap; > +const TBStatistics *b = bp; > + > +return a->phys_pc == b->phys_pc && > +a->pc == b->pc && > +a->cs_base == b->cs_base && > +a->flags == b->flags; > +} > + > static bool tb_cmp(const void *ap, const void *bp) > { > const TranslationBlock *a = ap; > @@ -1137,6 +1154,9 @@ static void tb_htable_init(void) > unsigned int mode = QHT_MODE_AUTO_RESIZE; > > qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); > +if (tb_stats_collection_enabled()) { > +qht_init(_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode); > +} > } > > /* Must be called before using the QEMU cpus. 'tb_size' is the size > @@ -1666,6 +1686,32 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t > phys_pc, > return tb; > } > > +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc, > + target_ulong cs_base, uint32_t flags) > +{ > +TBStatistics *new_stats = g_new0(TBStatistics, 1); > +uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags); > +void *existing_stats = NULL; > +new_stats->phys_pc = phys_pc; > +new_stats->pc = pc; > +new_stats->cs_base = cs_base; > +new_stats->flags = flags; > + > +qht_insert(_ctx.tb_stats, new_stats, hash, _stats); > + > +if (unlikely(existing_stats)) { > +/* > + * If there is already a TBStatistic for this TB from a previous > flush > + * then just make the new TB point to the older TBStatistic > + */ > +g_free(new_stats); > +return existing_stats; > +} else { > +return new_stats; > +} > +} > + > + > /* Called with mmap_lock held for user mode emulation. */ > TranslationBlock *tb_gen_code(CPUState *cpu, >target_ulong pc, target_ulong cs_base, > @@ -1732,6 +1778,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > ti = profile_getclock(); > #endif > > +/* > + * We want to fetch the stats structure before we start code > + * generation so we can count interesting things about this > + * generation. > + */ > +if (tb_stats_collection_enabled()) { > +tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); > +} else { > +tb->tb_stats = NULL; > +} > + > tcg_func_start(tcg_ctx); > > tcg_ctx->cpu = env_cpu(env); > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 16034ee651..24bd6a0a0c 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -22,21 +22,11 @@ > > #include "exec/tb-context.h" > #include "sysemu/cpus.h" > +#include "exec/tb-stats.h" > > /* allow to see translation results - the slowdown should be negligible, so > we leave it */ > #define DEBUG_DISAS > > -/* Page tracking code uses ram addresses in system mode, and virtual > - addresses in userspace mode. Define tb_page_addr_t to be an appropriate > - type. */ > -#if defined(CONFIG_USER_ONLY) > -typedef abi_ulong tb_page_addr_t; > -#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx > -#else > -typedef ram_addr_t tb_page_addr_t; > -#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT > -#endif > - Why are we moving this? I suspect it would be better to split the move into a separate patch for the purposes of bisection. > #include "qemu/log.h" > > void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int > max_insns); > @@ -403,6 +393,9 @@ struct TranslationBlock { > uintptr_t jmp_list_head; > uintptr_t
[Qemu-devel] [PATCH v4 1/7] accel: introducing TBStatistics structure
To store statistics for each TB we created a TBStatistics structure which is linked with the TBs. The TBStatistics can stay alive after tb_flush and be relinked to a regenerated TB. So the statistics can be accumulated even through flushes. TBStatistics will be also referred to as TBS or tbstats. Signed-off-by: Vanderson M. do Rosario --- accel/tcg/translate-all.c | 57 +++ include/exec/exec-all.h | 15 +++ include/exec/tb-context.h | 12 + include/exec/tb-hash.h| 7 + include/exec/tb-stats.h | 29 include/qemu/log.h| 6 + util/log.c| 28 +++ 7 files changed, 143 insertions(+), 11 deletions(-) create mode 100644 include/exec/tb-stats.h diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 5d1e08b169..a574890a80 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size) } } +/* + * This is the more or less the same compare, but the data persists + * over tb_flush. We also aggregate the various variations of cflags + * under one record and ignore the details of page overlap (although + * we can count it). + */ +bool tb_stats_cmp(const void *ap, const void *bp) +{ +const TBStatistics *a = ap; +const TBStatistics *b = bp; + +return a->phys_pc == b->phys_pc && +a->pc == b->pc && +a->cs_base == b->cs_base && +a->flags == b->flags; +} + static bool tb_cmp(const void *ap, const void *bp) { const TranslationBlock *a = ap; @@ -1137,6 +1154,9 @@ static void tb_htable_init(void) unsigned int mode = QHT_MODE_AUTO_RESIZE; qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); +if (tb_stats_collection_enabled()) { +qht_init(_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode); +} } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -1666,6 +1686,32 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, return tb; } +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc, + target_ulong cs_base, uint32_t flags) +{ +TBStatistics *new_stats = g_new0(TBStatistics, 1); +uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags); +void *existing_stats = NULL; +new_stats->phys_pc = phys_pc; +new_stats->pc = pc; +new_stats->cs_base = cs_base; +new_stats->flags = flags; + +qht_insert(_ctx.tb_stats, new_stats, hash, _stats); + +if (unlikely(existing_stats)) { +/* + * If there is already a TBStatistic for this TB from a previous flush + * then just make the new TB point to the older TBStatistic + */ +g_free(new_stats); +return existing_stats; +} else { +return new_stats; +} +} + + /* Called with mmap_lock held for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, @@ -1732,6 +1778,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif +/* + * We want to fetch the stats structure before we start code + * generation so we can count interesting things about this + * generation. + */ +if (tb_stats_collection_enabled()) { +tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); +} else { +tb->tb_stats = NULL; +} + tcg_func_start(tcg_ctx); tcg_ctx->cpu = env_cpu(env); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 16034ee651..24bd6a0a0c 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -22,21 +22,11 @@ #include "exec/tb-context.h" #include "sysemu/cpus.h" +#include "exec/tb-stats.h" /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS -/* Page tracking code uses ram addresses in system mode, and virtual - addresses in userspace mode. Define tb_page_addr_t to be an appropriate - type. */ -#if defined(CONFIG_USER_ONLY) -typedef abi_ulong tb_page_addr_t; -#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx -#else -typedef ram_addr_t tb_page_addr_t; -#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT -#endif - #include "qemu/log.h" void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); @@ -403,6 +393,9 @@ struct TranslationBlock { uintptr_t jmp_list_head; uintptr_t jmp_list_next[2]; uintptr_t jmp_dest[2]; + +/* Pointer to a struct where statistics from the TB is stored */ +TBStatistics *tb_stats; }; extern bool parallel_cpus; diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index feb585e0a7..3cfb62a338 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -23,6 +23,17 @@ #include "qemu/thread.h" #include "qemu/qht.h" +/* Page tracking code uses ram addresses