Alex Bennée <alex.ben...@linaro.org> writes:
> vandersonmr <vanderson...@gmail.com> writes: > >> Adding info [tbs|tb|coverset] commands to HMP. >> These commands allow the exploration of TBs >> generated by the TCG. Understand which one >> hotter, with more guest/host instructions... >> and examine their guest, host and IR code. >> >> The goal of this command is to allow the dynamic exploration >> of TCG behavior and code quality. Therefore, for now, a >> corresponding QMP command is not worthwhile. >> >> Signed-off-by: vandersonmr <vanderson...@gmail.com> >> --- >> accel/tcg/tb-stats.c | 275 +++++++++++++++++++++++++++++++++++ >> hmp-commands-info.hx | 23 +++ >> include/exec/tb-stats.h | 37 +++++ >> include/qemu/log-for-trace.h | 2 + >> include/qemu/log.h | 1 + >> linux-user/exit.c | 4 + >> monitor/misc.c | 71 +++++++++ >> util/log.c | 26 +++- >> 8 files changed, 431 insertions(+), 8 deletions(-) >> >> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c >> index 6c330e1b02..85a16cd563 100644 >> --- a/accel/tcg/tb-stats.c >> +++ b/accel/tcg/tb-stats.c >> @@ -212,3 +212,278 @@ void do_hmp_tbstats_safe(CPUState *cpu, >> run_on_cpu_data icmd) >> g_free(cmdinfo); >> } >> >> +static void collect_tb_stats(void *p, uint32_t hash, void *userp) >> +{ >> + last_search = g_list_prepend(last_search, p); >> +} >> + >> +static void dump_tb_header(TBStatistics *tbs) >> +{ >> + unsigned g = tbs->translations.total ? >> + tbs->code.num_guest_inst / tbs->translations.total : 0; >> + unsigned ops = tbs->translations.total ? >> + tbs->code.num_tcg_ops / tbs->translations.total : 0; >> + unsigned ops_opt = tbs->translations.total ? >> + tbs->code.num_tcg_ops_opt / tbs->translations.total : 0; >> + unsigned h = tbs->translations.total ? >> + tbs->code.num_host_inst / tbs->translations.total : 0; >> + unsigned spills = tbs->translations.total ? >> + tbs->code.spills / tbs->translations.total : 0; >> + >> + float guest_host_prop = g ? ((float) h / g) : 0; >> + >> + qemu_log("TB%d: phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx >> + " flags:%#08x (trans:%lu uncached:%lu exec:%lu ints: g:%u >> op:%u op_opt:%u h:%u h/g:%.2f spills:%d)\n", >> + tbs->display_id, >> + tbs->phys_pc, tbs->pc, tbs->flags, >> + tbs->translations.total, tbs->translations.uncached, >> + tbs->executions.total, g, ops, ops_opt, h, guest_host_prop, >> + spills); >> +} >> + >> +static gint >> +inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by) >> +{ >> + const TBStatistics *tbs1 = (TBStatistics *) p1; >> + const TBStatistics *tbs2 = (TBStatistics *) p2; >> + int sort_by = *((int *) psort_by); >> + unsigned long c1 = 0; >> + unsigned long c2 = 0; >> + >> + if (likely(sort_by == SORT_BY_SPILLS)) { >> + c1 = tbs1->code.spills; >> + c2 = tbs2->code.spills; >> + } else if (likely(sort_by == SORT_BY_HOTNESS)) { >> + c1 = tbs1->executions.total; >> + c2 = tbs2->executions.total; >> + } else if (likely(sort_by == SORT_BY_HG)) { >> + if (tbs1->code.num_guest_inst == 0) { >> + return -1; >> + } >> + if (tbs2->code.num_guest_inst == 0) { >> + return 1; >> + } >> + >> + float a = (float) tbs1->code.num_host_inst / >> tbs1->code.num_guest_inst; >> + float b = (float) tbs2->code.num_host_inst / >> tbs2->code.num_guest_inst; >> + c1 = a <= b ? 0 : 1; >> + c2 = a <= b ? 1 : 0; >> + } >> + >> + return c1 < c2 ? 1 : c1 == c2 ? 0 : -1; >> +} >> + >> +static void do_dump_coverset_info(int percentage) >> +{ >> + uint64_t total_exec_count = 0; >> + uint64_t covered_exec_count = 0; >> + unsigned coverset_size = 0; >> + int id = 1; >> + GList *i; >> + >> + g_list_free(last_search); >> + last_search = NULL; >> + >> + qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL); >> + >> + int sort_by = SORT_BY_HOTNESS; >> + last_search = g_list_sort_with_data(last_search, inverse_sort_tbs, >> &sort_by); >> + >> + if (!last_search) { >> + qemu_log("No data collected yet\n"); >> + return; >> + } >> + >> + /* Compute total execution count for all tbs */ >> + for (i = last_search; i; i = i->next) { >> + TBStatistics *tbs = (TBStatistics *) i->data; >> + total_exec_count += tbs->executions.total * >> tbs->code.num_guest_inst; >> + } >> + >> + for (i = last_search; i; i = i->next) { >> + TBStatistics *tbs = (TBStatistics *) i->data; >> + covered_exec_count += tbs->executions.total * >> tbs->code.num_guest_inst; >> + tbs->display_id = id++; >> + coverset_size++; >> + dump_tb_header(tbs); >> + >> + /* Iterate and display tbs until reach the percentage count cover */ >> + if (((double) covered_exec_count / total_exec_count) > >> + ((double) percentage / 100)) { >> + break; >> + } >> + } >> + >> + qemu_log("\n------------------------------\n"); >> + qemu_log("# of TBs to reach %d%% of the total of guest insts exec: >> %u\t", >> + percentage, coverset_size); >> + qemu_log("Total of guest insts exec: %lu\n", total_exec_count); >> + qemu_log("\n------------------------------\n"); >> + >> + /* free the unused bits */ >> + if (i) { >> + if (i->next) { >> + i->next->prev = NULL; >> + } >> + g_list_free(i->next); >> + i->next = NULL; >> + } >> +} >> + >> +static void do_dump_tbs_info(int count, int sort_by) >> +{ >> + int id = 1; >> + GList *i; >> + >> + g_list_free(last_search); >> + last_search = NULL; >> + >> + qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL); >> + >> + last_search = g_list_sort_with_data(last_search, inverse_sort_tbs, >> + &sort_by); >> + >> + if (!last_search) { >> + qemu_printf("No data collected yet!\n"); >> + return; >> + } >> + >> + for (i = last_search; i && count--; i = i->next) { >> + TBStatistics *tbs = (TBStatistics *) i->data; >> + tbs->display_id = id++; >> + dump_tb_header(tbs); >> + } >> + >> + /* free the unused bits */ >> + if (i) { >> + if (i->next) { >> + i->next->prev = NULL; >> + } >> + g_list_free(i->next); >> + i->next = NULL; >> + } >> +} >> + >> +static void >> +do_dump_coverset_info_safe(CPUState *cpu, run_on_cpu_data percentage) >> +{ >> + qemu_log_to_monitor(true); >> + do_dump_coverset_info(percentage.host_int); >> + qemu_log_to_monitor(false); >> +} >> + >> +struct tbs_dump_info { >> + int count; >> + int sort_by; >> +}; >> + >> +static void do_dump_tbs_info_safe(CPUState *cpu, run_on_cpu_data tbdi) >> +{ >> + struct tbs_dump_info *info = tbdi.host_ptr; >> + qemu_log_to_monitor(true); >> + do_dump_tbs_info(info->count, info->sort_by); >> + qemu_log_to_monitor(false); >> + g_free(info); >> +} >> + >> +/* >> + * When we dump_tbs_info on a live system via the HMP we want to >> + * ensure the system is quiessent before we start outputting stuff. >> + * Otherwise we could pollute the output with other logging output. >> + */ >> +void dump_coverset_info(int percentage, bool use_monitor) >> +{ >> + if (use_monitor) { >> + async_safe_run_on_cpu(first_cpu, do_dump_coverset_info_safe, >> + RUN_ON_CPU_HOST_INT(percentage)); >> + } else { >> + do_dump_coverset_info(percentage); >> + } >> +} >> + >> +void dump_tbs_info(int count, int sort_by, bool use_monitor) >> +{ >> + if (use_monitor) { >> + struct tbs_dump_info *tbdi = g_new(struct tbs_dump_info, 1); >> + tbdi->count = count; >> + tbdi->sort_by = sort_by; >> + async_safe_run_on_cpu(first_cpu, do_dump_tbs_info_safe, >> + RUN_ON_CPU_HOST_PTR(tbdi)); >> + } else { >> + do_dump_tbs_info(count, sort_by); >> + } >> +} >> + >> +static void do_tb_dump_with_statistics(TBStatistics *tbs, int log_flags) >> +{ >> + CPUState *cpu = current_cpu; >> + uint32_t cflags = curr_cflags() | CF_NOCACHE; >> + int old_log_flags = qemu_loglevel; >> + TranslationBlock *tb = NULL; >> + >> + qemu_set_log(log_flags); >> + >> + qemu_log("\n------------------------------\n"); >> + dump_tb_header(tbs); >> + >> + if (sigsetjmp(cpu->jmp_env, 0) == 0) { >> + mmap_lock(); >> + tb = tb_gen_code(cpu, tbs->pc, tbs->cs_base, tbs->flags, cflags); >> + tb_phys_invalidate(tb, -1); >> + mmap_unlock(); >> + } else { >> + /* >> + * The mmap_lock is dropped by tb_gen_code if it runs out of >> + * memory. >> + */ >> + fprintf(stderr, "%s: dbg failed!\n", __func__); >> + assert_no_pages_locked(); >> + } > > So this fails if we can't generate a TB as tcg_tb_remove(NULL) will barf > (this was the crash I was seeing). However with two additional patches > from my plugin series: > > translate-all: use cpu_in_exclusive_work_context() in tb_flush > translate-all: use cpu_in_exclusive_work_context() in tb_flush > > We can do a simple repeat operation: > > @@ -420,6 +421,7 @@ static void do_tb_dump_with_statistics(TBStatistics *tbs, > int log_flags) > > qemu_log("\n------------------------------\n"); > dump_tb_header(tbs); > + redo: > > if (sigsetjmp(cpu->jmp_env, 0) == 0) { > mmap_lock(); > @@ -433,6 +435,7 @@ static void do_tb_dump_with_statistics(TBStatistics *tbs, > int log_flags) > */ > fprintf(stderr, "%s: dbg failed!\n", __func__); > assert_no_pages_locked(); > + goto redo; > } > > Because the tb_flush triggered by the generation code will have occured > straight away. > > I'd suggest cherry picking those two patches into your next series. Hmm not quite enough because we need to handle the case that we cannot re-translate because the page has been mapped out. > >> + >> + qemu_set_log(old_log_flags); >> + >> + tcg_tb_remove(tb); >> +} >> + >> +struct tb_dump_info { >> + int id; >> + int log_flags; >> + bool use_monitor; >> +}; >> + >> +static void do_dump_tb_info_safe(CPUState *cpu, run_on_cpu_data info) >> +{ >> + struct tb_dump_info *tbdi = (struct tb_dump_info *) info.host_ptr; >> + GList *iter; >> + >> + if (!last_search) { >> + qemu_printf("no search on record"); >> + return; >> + } >> + qemu_log_to_monitor(tbdi->use_monitor); >> + >> + for (iter = last_search; iter; iter = g_list_next(iter)) { >> + TBStatistics *tbs = iter->data; >> + if (tbs->display_id == tbdi->id) { >> + do_tb_dump_with_statistics(tbs, tbdi->log_flags); >> + break; >> + } >> + } >> + qemu_log_to_monitor(false); >> + g_free(tbdi); >> +} >> + >> +void dump_tb_info(int id, int log_mask, bool use_monitor) >> +{ >> + struct tb_dump_info *tbdi = g_new(struct tb_dump_info, 1); >> + >> + tbdi->id = id; >> + tbdi->log_flags = log_mask; >> + tbdi->use_monitor = use_monitor; >> + >> + async_safe_run_on_cpu(first_cpu, do_dump_tb_info_safe, >> + RUN_ON_CPU_HOST_PTR(tbdi)); >> + >> + /* tbdi free'd by do_dump_tb_info_safe */ >> +} >> + >> diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx >> index c59444c461..761c76619b 100644 >> --- a/hmp-commands-info.hx >> +++ b/hmp-commands-info.hx >> @@ -289,6 +289,29 @@ ETEXI >> .help = "show dynamic compiler info", >> .cmd = hmp_info_jit, >> }, >> + { >> + .name = "tbs", >> + .args_type = "number:i?,sortedby:s?", >> + .params = "[number sortedby]", >> + .help = "show a [number] translated blocks sorted by >> [sortedby]" >> + "sortedby opts: hotness hg", >> + .cmd = hmp_info_tbs, >> + }, >> + { >> + .name = "tb", >> + .args_type = "id:i,flags:s?", >> + .params = "id [log1[,...] flags]", >> + .help = "show information about one translated block by id", >> + .cmd = hmp_info_tb, >> + }, >> + { >> + .name = "coverset", >> + .args_type = "number:i?", >> + .params = "[number]", >> + .help = "show hottest translated blocks neccesary to cover" >> + "[number]% of the execution count", >> + .cmd = hmp_info_coverset, >> + }, >> #endif >> >> STEXI >> diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h >> index d1debd3262..e2ab1068c3 100644 >> --- a/include/exec/tb-stats.h >> +++ b/include/exec/tb-stats.h >> @@ -57,6 +57,9 @@ struct TBStatistics { >> unsigned long uncached; >> unsigned long spanning; >> } translations; >> + >> + /* HMP information - used for referring to previous search */ >> + int display_id; >> }; >> >> bool tb_stats_cmp(const void *ap, const void *bp); >> @@ -73,4 +76,38 @@ struct TbstatsCommand { >> >> void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd); >> >> +/** >> + * dump_coverset_info: report the hottest blocks to cover n% of execution >> + * >> + * @percentage: cover set percentage >> + * @use_monitor: redirect output to monitor >> + * >> + * Report the hottest blocks to either the log or monitor >> + */ >> +void dump_coverset_info(int percentage, bool use_monitor); >> + >> + >> +/** >> + * dump_tbs_info: report the hottest blocks >> + * >> + * @count: the limit of hotblocks >> + * @sort_by: property in which the dump will be sorted >> + * @use_monitor: redirect output to monitor >> + * >> + * Report the hottest blocks to either the log or monitor >> + */ >> +void dump_tbs_info(int count, int sort_by, bool use_monitor); >> + >> +/** >> + * dump_tb_info: dump information about one TB >> + * >> + * @id: the display id of the block (from previous search) >> + * @mask: the temporary logging mask >> + * @Use_monitor: redirect output to monitor >> + * >> + * Re-run a translation of a block at addr for the purposes of debug output >> + */ >> +void dump_tb_info(int id, int log_mask, bool use_monitor); >> + >> + >> #endif >> diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h >> index 2f0a5b080e..d65eb83037 100644 >> --- a/include/qemu/log-for-trace.h >> +++ b/include/qemu/log-for-trace.h >> @@ -21,6 +21,8 @@ >> /* Private global variable, don't use */ >> extern int qemu_loglevel; >> >> +extern int32_t max_num_hot_tbs_to_dump; >> + >> #define LOG_TRACE (1 << 15) >> >> /* Returns true if a bit is set in the current loglevel mask */ >> diff --git a/include/qemu/log.h b/include/qemu/log.h >> index 47071d72c7..27ba267faa 100644 >> --- a/include/qemu/log.h >> +++ b/include/qemu/log.h >> @@ -114,6 +114,7 @@ typedef struct QEMULogItem { >> extern const QEMULogItem qemu_log_items[]; >> >> void qemu_set_log(int log_flags); >> +void qemu_log_to_monitor(bool enable); >> void qemu_log_needs_buffers(void); >> void qemu_set_log_filename(const char *filename, Error **errp); >> void qemu_set_dfilter_ranges(const char *ranges, Error **errp); >> diff --git a/linux-user/exit.c b/linux-user/exit.c >> index bdda720553..7226104959 100644 >> --- a/linux-user/exit.c >> +++ b/linux-user/exit.c >> @@ -28,6 +28,10 @@ extern void __gcov_dump(void); >> >> void preexit_cleanup(CPUArchState *env, int code) >> { >> + if (tb_stats_collection_enabled()) { >> + dump_tbs_info(max_num_hot_tbs_to_dump, SORT_BY_HOTNESS, false); >> + } >> + >> #ifdef TARGET_GPROF >> _mcleanup(); >> #endif >> diff --git a/monitor/misc.c b/monitor/misc.c >> index aea9b0db4f..78ada8aa0e 100644 >> --- a/monitor/misc.c >> +++ b/monitor/misc.c >> @@ -499,6 +499,77 @@ static void hmp_tbstats(Monitor *mon, const QDict >> *qdict) >> >> } >> >> +static void hmp_info_tbs(Monitor *mon, const QDict *qdict) >> +{ >> + int n; >> + const char *s = NULL; >> + if (!tcg_enabled()) { >> + error_report("TB information is only available with accel=tcg"); >> + return; >> + } >> + if (!tb_ctx.tb_stats.map) { >> + error_report("no TB information recorded"); >> + return; >> + } >> + >> + n = qdict_get_try_int(qdict, "number", 10); >> + s = qdict_get_try_str(qdict, "sortedby"); >> + >> + int sortedby = 0; >> + if (s == NULL || strcmp(s, "hotness") == 0) { >> + sortedby = SORT_BY_HOTNESS; >> + } else if (strcmp(s, "hg") == 0) { >> + sortedby = SORT_BY_HG; >> + } else if (strcmp(s, "spills") == 0) { >> + sortedby = SORT_BY_SPILLS; >> + } >> + >> + dump_tbs_info(n, sortedby, true); >> +} >> + >> +static void hmp_info_tb(Monitor *mon, const QDict *qdict) >> +{ >> + const int id = qdict_get_int(qdict, "id"); >> + const char *flags = qdict_get_try_str(qdict, "flags"); >> + int mask; >> + >> + if (!tcg_enabled()) { >> + error_report("TB information is only available with accel=tcg"); >> + return; >> + } >> + >> + mask = flags ? qemu_str_to_log_mask(flags) : CPU_LOG_TB_IN_ASM; >> + >> + if (!mask) { >> + help_cmd(mon, "log"); >> + return; >> + } >> + >> + dump_tb_info(id, mask, true); >> +} >> + >> +static void hmp_info_coverset(Monitor *mon, const QDict *qdict) >> +{ >> + int n; >> + if (!tcg_enabled()) { >> + error_report("TB information is only available with accel=tcg"); >> + return; >> + } >> + if (!tb_stats_collection_enabled()) { >> + error_report("TB information not being recorded"); >> + return; >> + } >> + >> + n = qdict_get_try_int(qdict, "number", 90); >> + >> + if (n < 0 || n > 100) { >> + error_report("Coverset percentage should be between 0 and 100"); >> + return; >> + } >> + >> + dump_coverset_info(n, true); >> +} >> + >> static void hmp_info_jit(Monitor *mon, const QDict *qdict) >> { >> if (!tcg_enabled()) { >> diff --git a/util/log.c b/util/log.c >> index 8109d19afb..7bfcbe3703 100644 >> --- a/util/log.c >> +++ b/util/log.c >> @@ -19,6 +19,7 @@ >> >> #include "qemu/osdep.h" >> #include "qemu/log.h" >> +#include "qemu/qemu-print.h" >> #include "qemu/range.h" >> #include "qemu/error-report.h" >> #include "qapi/error.h" >> @@ -31,25 +32,34 @@ int qemu_loglevel; >> static int log_append = 0; >> static GArray *debug_regions; >> int32_t max_num_hot_tbs_to_dump; >> +static bool to_monitor; >> >> /* Return the number of characters emitted. */ >> int qemu_log(const char *fmt, ...) >> { >> int ret = 0; >> - if (qemu_logfile) { >> - va_list ap; >> - va_start(ap, fmt); >> + va_list ap; >> + va_start(ap, fmt); >> + >> + if (to_monitor) { >> + ret = qemu_vprintf(fmt, ap); >> + } else if (qemu_logfile) { >> ret = vfprintf(qemu_logfile, fmt, ap); >> - va_end(ap); >> + } >> + va_end(ap); >> >> - /* Don't pass back error results. */ >> - if (ret < 0) { >> - ret = 0; >> - } >> + /* Don't pass back error results. */ >> + if (ret < 0) { >> + ret = 0; >> } >> return ret; >> } >> >> +void qemu_log_to_monitor(bool enable) >> +{ >> + to_monitor = enable; >> +} >> + >> static bool log_uses_own_buffers; >> >> /* enable or disable low levels log */ -- Alex Bennée