On Feb 16 10:34, Alex Bennée wrote: > > Aaron Lindsay <aa...@os.amperecomputing.com> writes: > > > On Feb 12 16:04, Alex Bennée wrote: > >> Do you see two stores or one store? I think I got the sense the wrong > >> way around because the store is instrumented before the mmu code, > >> hence should be skipped on a re-instrumented block. > > > > I only see one store between the instruction callback for the store and > > the instruction callback for the subsequent instruction. > > I've posted: > > Subject: [PATCH v3 00/23] plugins/next pre-PR (hwprofile, regression > fixes, icount count fix) > Date: Sat, 13 Feb 2021 13:03:02 +0000 > Message-Id: <20210213130325.14781-1-alex.ben...@linaro.org> > > which I think solves it. Could you have a look?
Just did, and it looks good to me - Thanks! -Aaron > > > > -Aaron > > > >> On Fri, 12 Feb 2021 at 15:41, Aaron Lindsay > >> <aa...@os.amperecomputing.com> wrote: > >> > > >> > On Feb 12 14:43, Alex Bennée wrote: > >> > > Aaron Lindsay <aa...@os.amperecomputing.com> writes: > >> > > > On Feb 10 22:10, Alex Bennée wrote: > >> > > >> When icount is enabled and we recompile an MMIO access we end up > >> > > >> double counting the instruction execution. To avoid this we > >> > > >> introduce > >> > > >> the CF_NOINSTR cflag which disables instrumentation for the next TB. > >> > > >> As this is part of the hashed compile flags we will only execute the > >> > > >> generated TB while coming out of a cpu_io_recompile. > >> > > > > >> > > > Unfortunately this patch works a little too well! > >> > > > > >> > > > With this change, the memory access callbacks registered via > >> > > > `qemu_plugin_register_vcpu_mem_cb()` are never called for the > >> > > > re-translated instruction making the IO access, since we've disabled > >> > > > all > >> > > > instrumentation. > >> > > > > >> > > > Is it possible to selectively disable only instruction callbacks > >> > > > using > >> > > > this mechanism, while still allowing others that would not yet have > >> > > > been > >> > > > called for the re-translated instruction? > >> > > > >> > > Can you try the following fugly patch on top of this series: > >> > > >> > This patch does allow me to successfully observe memory callbacks for > >> > stores in this case. It seems from looking at the patch that you > >> > intentionally only allowed memory callbacks for stores in this case, and > >> > I still don't see callbacks any for loads. > >> > > >> > -Aaron > >> > > >> > > --8<---------------cut here---------------start------------->8--- > >> > > diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h > >> > > index 4834a9e2f4..b1b72b5d90 100644 > >> > > --- a/include/exec/plugin-gen.h > >> > > +++ b/include/exec/plugin-gen.h > >> > > @@ -19,7 +19,7 @@ struct DisasContextBase; > >> > > > >> > > #ifdef CONFIG_PLUGIN > >> > > > >> > > -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb); > >> > > +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, > >> > > bool supress); > >> > > void plugin_gen_tb_end(CPUState *cpu); > >> > > void plugin_gen_insn_start(CPUState *cpu, const struct > >> > > DisasContextBase *db); > >> > > void plugin_gen_insn_end(void); > >> > > @@ -41,7 +41,7 @@ static inline void plugin_insn_append(const void > >> > > *from, size_t size) > >> > > #else /* !CONFIG_PLUGIN */ > >> > > > >> > > static inline > >> > > -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) > >> > > +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, > >> > > bool supress) > >> > > { > >> > > return false; > >> > > } > >> > > diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h > >> > > index 841deed79c..2a26a2277f 100644 > >> > > --- a/include/qemu/plugin.h > >> > > +++ b/include/qemu/plugin.h > >> > > @@ -92,6 +92,7 @@ struct qemu_plugin_dyn_cb { > >> > > }; > >> > > }; > >> > > > >> > > +/* Internal context for instrumenting an instruction */ > >> > > struct qemu_plugin_insn { > >> > > GByteArray *data; > >> > > uint64_t vaddr; > >> > > @@ -99,6 +100,7 @@ struct qemu_plugin_insn { > >> > > GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES]; > >> > > bool calls_helpers; > >> > > bool mem_helper; > >> > > + bool store_only; > >> > > }; > >> > > > >> > > /* > >> > > @@ -128,6 +130,7 @@ static inline struct qemu_plugin_insn > >> > > *qemu_plugin_insn_alloc(void) > >> > > return insn; > >> > > } > >> > > > >> > > +/* Internal context for this TranslationBlock */ > >> > > struct qemu_plugin_tb { > >> > > GPtrArray *insns; > >> > > size_t n; > >> > > @@ -135,6 +138,7 @@ struct qemu_plugin_tb { > >> > > uint64_t vaddr2; > >> > > void *haddr1; > >> > > void *haddr2; > >> > > + bool store_only; > >> > > GArray *cbs[PLUGIN_N_CB_SUBTYPES]; > >> > > }; > >> > > > >> > > diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c > >> > > index 8a1bb801e0..137b91282e 100644 > >> > > --- a/accel/tcg/plugin-gen.c > >> > > +++ b/accel/tcg/plugin-gen.c > >> > > @@ -842,7 +842,7 @@ static void plugin_gen_inject(const struct > >> > > qemu_plugin_tb *plugin_tb) > >> > > pr_ops(); > >> > > } > >> > > > >> > > -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) > >> > > +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, > >> > > bool store_only) > >> > > { > >> > > struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; > >> > > bool ret = false; > >> > > @@ -855,6 +855,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const > >> > > TranslationBlock *tb) > >> > > ptb->vaddr2 = -1; > >> > > get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); > >> > > ptb->haddr2 = NULL; > >> > > + ptb->store_only = store_only; > >> > > > >> > > plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); > >> > > } > >> > > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c > >> > > index 14d1ea795d..082f2c8ee1 100644 > >> > > --- a/accel/tcg/translator.c > >> > > +++ b/accel/tcg/translator.c > >> > > @@ -58,7 +58,7 @@ void translator_loop(const TranslatorOps *ops, > >> > > DisasContextBase *db, > >> > > ops->tb_start(db, cpu); > >> > > tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ > >> > > > >> > > - plugin_enabled = !(tb_cflags(db->tb) & CF_NOINSTR) && > >> > > plugin_gen_tb_start(cpu, tb); > >> > > + plugin_enabled = plugin_gen_tb_start(cpu, tb, tb_cflags(db->tb) & > >> > > CF_NOINSTR); > >> > > > >> > > while (true) { > >> > > db->num_insns++; > >> > > @@ -100,6 +100,8 @@ void translator_loop(const TranslatorOps *ops, > >> > > DisasContextBase *db, > >> > > gen_io_start(); > >> > > ops->translate_insn(db, cpu); > >> > > } else { > >> > > + /* we should only see NOINSTR for io_recompile */ > >> > > + g_assert(!(tb_cflags(db->tb) & CF_NOINSTR)); > >> > > ops->translate_insn(db, cpu); > >> > > } > >> > > > >> > > diff --git a/plugins/api.c b/plugins/api.c > >> > > index 5dc8e6f934..ac8475707d 100644 > >> > > --- a/plugins/api.c > >> > > +++ b/plugins/api.c > >> > > @@ -84,15 +84,19 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct > >> > > qemu_plugin_tb *tb, > >> > > enum qemu_plugin_cb_flags > >> > > flags, > >> > > void *udata) > >> > > { > >> > > - plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR], > >> > > - cb, flags, udata); > >> > > + if (!tb->store_only) { > >> > > + plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR], > >> > > + cb, flags, udata); > >> > > + } > >> > > } > >> > > > >> > > void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb > >> > > *tb, > >> > > enum qemu_plugin_op op, > >> > > void *ptr, uint64_t imm) > >> > > { > >> > > - plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, > >> > > imm); > >> > > + if (!tb->store_only) { > >> > > + plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, > >> > > ptr, imm); > >> > > + } > >> > > } > >> > > > >> > > void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn > >> > > *insn, > >> > > @@ -100,16 +104,20 @@ void > >> > > qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn, > >> > > enum qemu_plugin_cb_flags > >> > > flags, > >> > > void *udata) > >> > > { > >> > > - > >> > > plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], > >> > > - cb, flags, udata); > >> > > + if (!insn->store_only) { > >> > > + > >> > > plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], > >> > > + cb, flags, udata); > >> > > + } > >> > > } > >> > > > >> > > void qemu_plugin_register_vcpu_insn_exec_inline(struct > >> > > qemu_plugin_insn *insn, > >> > > enum qemu_plugin_op > >> > > op, > >> > > void *ptr, uint64_t > >> > > imm) > >> > > { > >> > > - > >> > > plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE], > >> > > - 0, op, ptr, imm); > >> > > + if (!insn->store_only) { > >> > > + > >> > > plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE], > >> > > + 0, op, ptr, imm); > >> > > + } > >> > > } > >> > > > >> > > > >> > > @@ -120,8 +128,13 @@ void qemu_plugin_register_vcpu_mem_cb(struct > >> > > qemu_plugin_insn *insn, > >> > > enum qemu_plugin_mem_rw rw, > >> > > void *udata) > >> > > { > >> > > - > >> > > plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], > >> > > - cb, flags, rw, udata); > >> > > + if (insn->store_only && (rw & QEMU_PLUGIN_MEM_W)) { > >> > > + > >> > > plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], > >> > > + cb, flags, QEMU_PLUGIN_MEM_W, > >> > > udata); > >> > > + } else { > >> > > + > >> > > plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], > >> > > + cb, flags, rw, udata); > >> > > + } > >> > > } > >> > > > >> > > void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn > >> > > *insn, > >> > > @@ -129,8 +142,10 @@ void qemu_plugin_register_vcpu_mem_inline(struct > >> > > qemu_plugin_insn *insn, > >> > > enum qemu_plugin_op op, > >> > > void *ptr, > >> > > uint64_t imm) > >> > > { > >> > > - > >> > > plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE], > >> > > - rw, op, ptr, imm); > >> > > + if (!insn->store_only) { > >> > > + > >> > > plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE], > >> > > + rw, op, ptr, imm); > >> > > + } > >> > > } > >> > > > >> > > void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id, > >> > > @@ -181,10 +196,13 @@ uint64_t qemu_plugin_tb_vaddr(const struct > >> > > qemu_plugin_tb *tb) > >> > > struct qemu_plugin_insn * > >> > > qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx) > >> > > { > >> > > + struct qemu_plugin_insn *insn; > >> > > if (unlikely(idx >= tb->n)) { > >> > > return NULL; > >> > > } > >> > > - return g_ptr_array_index(tb->insns, idx); > >> > > + insn = g_ptr_array_index(tb->insns, idx); > >> > > + insn->store_only = tb->store_only; > >> > > + return insn; > >> > > } > >> > > > >> > > /* > >> > > --8<---------------cut here---------------end--------------->8--- > >> > > > >> > > -- > >> > > Alex Bennée > >> > >> > >> > >> -- > >> Alex Bennée > >> KVM/QEMU Hacker for Linaro > > > -- > Alex Bennée