A new hash map was added to store the accumulated execution frequency of the TBs even after tb_flush events. A dump function was also added as a way to visualize these frequencies.
Signed-off-by: vandersonmr <vanderson...@gmail.com> --- accel/tcg/translate-all.c | 59 +++++++++++++++++++++++++++++++++++++++ accel/tcg/translate-all.h | 2 ++ exec.c | 7 +++++ include/exec/exec-all.h | 3 ++ include/exec/tb-context.h | 9 ++++++ include/qom/cpu.h | 2 ++ 6 files changed, 82 insertions(+) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 5d1e08b169..0bc670ffad 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1118,6 +1118,12 @@ static inline void code_gen_alloc(size_t tb_size) } } +static bool statistics_cmp(const void* ap, const void *bp) { + const TBStatistics *a = ap; + const TBStatistics *b = bp; + return a->pc == b->pc && a->cs_base == b->cs_base && a->flags == b->flags; +} + static bool tb_cmp(const void *ap, const void *bp) { const TranslationBlock *a = ap; @@ -1137,6 +1143,7 @@ static void tb_htable_init(void) unsigned int mode = QHT_MODE_AUTO_RESIZE; qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); + qht_init(&tb_ctx.tb_statistics, statistics_cmp, CODE_GEN_HTABLE_SIZE, QHT_MODE_AUTO_RESIZE); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -1228,6 +1235,53 @@ static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data) return false; } +static void do_tb_dump_exec_freq(void *p, uint32_t hash, void *userp) +{ +#if TARGET_LONG_SIZE == 8 + TBStatistics *tbs = p; + printf("%016lx\t%lu\n", tbs->pc, tbs->total_exec_freq); +#elif TARGET_LONG_SIZE == 4 + TBStatistics *tbs = p; + printf("%016x\t%lu\n", tbs->pc, tbs->total_exec_freq); +#endif +} + +void tb_dump_all_exec_freq(void) +{ + tb_read_exec_freq(); + qht_iter(&tb_ctx.tb_statistics, do_tb_dump_exec_freq, NULL); +} + +static void do_tb_read_exec_freq(void *p, uint32_t hash, void *userp) +{ + TranslationBlock *tb = p; + TBStatistics tbscmp; + tbscmp.pc = tb->pc; + tbscmp.cs_base = tb->cs_base; + tbscmp.flags = tb->flags; + + TBStatistics *tbs = qht_lookup(userp, &tbscmp, hash); + + uint64_t exec_freq = tb_get_and_reset_exec_freq((TranslationBlock*) p); + + if (tbs) { + tbs->total_exec_freq += exec_freq; + } else { + void *existing; + tbs = malloc(sizeof(TBStatistics)); + tbs->total_exec_freq = exec_freq; + tbs->pc = tb->pc; + tbs->cs_base = tb->cs_base; + tbs->flags = tb->flags; + qht_insert(userp, tbs, hash, &existing); + } +} + +void tb_read_exec_freq(void) +{ + qht_iter(&tb_ctx.htable, do_tb_read_exec_freq, &tb_ctx.tb_statistics); +} + /* flush all the translation blocks */ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) { @@ -1252,6 +1306,10 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) cpu_tb_jmp_cache_clear(cpu); } + if (enable_freq_count) { + tb_read_exec_freq(); + } + qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE); page_flush_tb(); @@ -1723,6 +1781,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->flags = flags; tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; + tb->exec_freq = 0; tcg_ctx->tb_cflags = cflags; tb_overflow: diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 64f5fd9a05..e13088c36d 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -32,6 +32,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); void tb_check_watchpoint(CPUState *cpu); +extern bool enable_freq_count; + #ifdef CONFIG_USER_ONLY int page_unprotect(target_ulong address, uintptr_t pc); #endif diff --git a/exec.c b/exec.c index e7622d1956..9b64a012ee 100644 --- a/exec.c +++ b/exec.c @@ -1013,6 +1013,13 @@ const char *parse_cpu_option(const char *cpu_option) return cpu_type; } +uint64_t tb_get_and_reset_exec_freq(TranslationBlock *tb) +{ + uint64_t exec_freq = atomic_load_acquire(&tb->exec_freq); + atomic_store_release(&tb->exec_freq, 0); + return exec_freq; +} + #if defined(CONFIG_USER_ONLY) void tb_invalidate_phys_addr(target_ulong addr) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index a80407622e..5d3d829d18 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -513,4 +513,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, /* vl.c */ extern int singlestep; +void tb_read_exec_freq(void); +void tb_dump_all_exec_freq(void); + #endif diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index feb585e0a7..ba518d47a0 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -28,6 +28,14 @@ typedef struct TranslationBlock TranslationBlock; typedef struct TBContext TBContext; +typedef struct TBStatistics TBStatistics; + +struct TBStatistics { + target_ulong pc; + target_ulong cs_base; + uint32_t flags; + uint64_t total_exec_freq; +}; struct TBContext { @@ -35,6 +43,7 @@ struct TBContext { /* statistics */ unsigned tb_flush_count; + struct qht tb_statistics; }; extern TBContext tb_ctx; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 5ee0046b62..593c1f1137 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -474,6 +474,8 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) } } +uint64_t tb_get_and_reset_exec_freq(struct TranslationBlock*); + /** * qemu_tcg_mttcg_enabled: * Check whether we are running MultiThread TCG or not. -- 2.22.0