Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-28 Thread Emilio G. Cota
On Wed, Nov 28, 2018 at 12:40:23 +, Alex Bennée wrote:
> I was envisioning something more like the following so all the plugin
> gubins could be kept in the core code:
(snip)
>  static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
>  bool sctlr_b)
>  {
> -uint32_t insn = cpu_ldl_code(env, addr);
> -if (bswap_code(sctlr_b)) {
> -return bswap32(insn);
> -}
> -return insn;
> +return translator_ld32(env, addr, bswap_code(sctlr_b));
>  }
> 
>  /* Ditto, for a halfword (Thumb) instruction */
>  static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
>   bool sctlr_b)
>  {
> -uint16_t insn;
>  #ifndef CONFIG_USER_ONLY
>  /* In big-endian (BE32) mode, adjacent Thumb instructions have been 
> swapped
> within each word.  Undo that now.  */
> @@ -46,11 +40,7 @@ static inline uint16_t arm_lduw_code(CPUARMState *env, 
> target_ulong addr,
>  addr ^= 2;
>  }
>  #endif
> -insn = cpu_lduw_code(env, addr);
> -if (bswap_code(sctlr_b)) {
> -return bswap16(insn);
> -}
> -return insn;
> +return translator_ld16(env, addr, bswap_code(sctlr_b));
>  }

I like this, thanks.

However, for Thumb I think we still need to call qemu_plugin_insn_append
directly:

@@ -13304,11 +13306,16 @@ static void thumb_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cpu)
 insn = arm_lduw_code(env, dc->pc, dc->sctlr_b);
 is_16bit = thumb_insn_is_16bit(dc, insn);
 dc->pc += 2;
-if (!is_16bit) {
+if (is_16bit) {
+uint16_t insn16 = insn;
+
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn16, sizeof(insn16));
+} else {
 uint32_t insn2 = arm_lduw_code(env, dc->pc, dc->sctlr_b);

 insn = insn << 16 | insn2;
 dc->pc += 2;
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
 }
 
Otherwise we might mess up the contents of 32-bit insns.

Thanks,

E.



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-28 Thread Alex Bennée


Emilio G. Cota  writes:

> On Mon, Nov 26, 2018 at 20:38:25 -0500, Emilio G. Cota wrote:
>> On Mon, Nov 26, 2018 at 11:30:25 -0800, Richard Henderson wrote:
>> > On 11/26/18 11:07 AM, Emilio G. Cota wrote:
>> > > The main reason why I added the qemu_plugin_insn_append calls
>> > > was to avoid reading the instructions twice from guest memory,
>> > > because I was worried that doing so might somehow alter the
>> > > guest's execution, e.g. what if we read a cross-page instruction,
>> > > and both pages mapped to the same TLB entry? We'd end up having
>> > > more TLB misses because instrumentation was enabled.
>> >
>> > A better solution for this, I think is to change direct calls from
>> >
>> >   cpu_ldl_code(env, pc);
>> > to
>> >   translator_ldl_code(dc_base, env, pc);
>> >
>> > instead of passing around a new argument separate from DisasContextBase?
>>
>> I think this + diff'ing pc_next should work to figure out the
>> contents and size of each instruction.
>
> I just tried doing things this way.
>
> For some targets like i386, the translator_ld* helpers work
> great; the instruction contents are copied, and through
> the helpers we get the sizes of the instructions as well.
>
> For ARM though (and maybe others, I haven't gone
> through all of them yet), arm_ldl_code does the following:
>
> /* Load an instruction and return it in the standard little-endian order */
> static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
> bool sctlr_b)
> {
> uint32_t insn = cpu_ldl_code(env, addr);
> if (bswap_code(sctlr_b)) {
> return bswap32(insn);
> }
> return insn;
> }
>
> To avoid altering the signature of .translate_insn, I've modified
> arm_ldl_code directly, as follows:
>
>  uint32_t insn = cpu_ldl_code(env, addr);
> +
>  if (bswap_code(sctlr_b)) {
> -return bswap32(insn);
> +insn = bswap32(insn);
> +}
> +if (tcg_ctx->plugin_insn) {
> +qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
>  }
>  return insn;
>  }
>
> (NB. tcg_ctx->plugin_insn is updated by translator_loop
> on every iteration.)
>
> Let me know if you think I should do this differently.

I was envisioning something more like the following so all the plugin
gubins could be kept in the core code:

accel/tcg/translator.c| 25 +
include/exec/translator.h | 10 ++
target/arm/arm_ldst.h | 16 +++-

modified   accel/tcg/translator.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
+#include "qemu/bswap.h"
 #include "cpu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
@@ -18,6 +19,30 @@
 #include "exec/log.h"
 #include "exec/translator.h"
 #include "exec/plugin-gen.h"
+#include "exec/cpu_ldst.h"
+
+/*
+ * Translator Code Load Helpers
+ */
+
+uint16_t translator_ld16(CPUArchState *env, target_ulong ptr, bool bswap)
+{
+uint16_t insn = cpu_lduw_code(env, ptr);
+if (bswap) {
+insn = bswap16(insn);
+}
+return insn;
+}
+
+uint32_t translator_ld32(CPUArchState *env, target_ulong ptr, bool bswap)
+{
+uint32_t insn = cpu_ldl_code(env, ptr);
+if (bswap) {
+insn = bswap32(insn);
+}
+return insn;
+}
+

 /* Pairs with tcg_clear_temp_count.
To be called by #TranslatorOps.{translate_insn,tb_stop} if
modified   include/exec/translator.h
@@ -147,4 +147,14 @@ void translator_loop(const TranslatorOps *ops, 
DisasContextBase *db,

 void translator_loop_temp_check(DisasContextBase *db);

+/*
+ * Translator Code Load Helpers
+ *
+ * These allow the core translator code to track where code is being
+ * loaded from.
+ */
+
+uint16_t translator_ld16(CPUArchState *env, target_ulong ptr, bool bswap);
+uint32_t translator_ld32(CPUArchState *env, target_ulong ptr, bool bswap);
+
 #endif  /* EXEC__TRANSLATOR_H */
modified   target/arm/arm_ldst.h
@@ -20,25 +20,19 @@
 #ifndef ARM_LDST_H
 #define ARM_LDST_H

-#include "exec/cpu_ldst.h"
-#include "qemu/bswap.h"
+#include "exec/translator.h"

 /* Load an instruction and return it in the standard little-endian order */
 static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
 bool sctlr_b)
 {
-uint32_t insn = cpu_ldl_code(env, addr);
-if (bswap_code(sctlr_b)) {
-return bswap32(insn);
-}
-return insn;
+return translator_ld32(env, addr, bswap_code(sctlr_b));
 }

 /* Ditto, for a halfword (Thumb) instruction */
 static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
  bool sctlr_b)
 {
-uint16_t insn;
 #ifndef CONFIG_USER_ONLY
 /* In big-endian (BE32) mode, adjacent Thumb instructions have been swapped
within each word.  Undo that now.  */
@@ -46,11 +40,7 @@ static inline uint16_t arm_lduw_code(CPUARMState *env, 
target_ulong addr,
 addr ^= 2;
 }
 #endif
-insn = cpu_lduw_code(env, add

Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-27 Thread Emilio G. Cota
On Tue, Nov 27, 2018 at 19:54:02 -0500, Emilio G. Cota wrote:
> To avoid altering the signature of .translate_insn, I've modified
> arm_ldl_code directly, as follows:
> 
>  uint32_t insn = cpu_ldl_code(env, addr);
> +
>  if (bswap_code(sctlr_b)) {
> -return bswap32(insn);
> +insn = bswap32(insn);
> +}
> +if (tcg_ctx->plugin_insn) {
> +qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
>  }
>  return insn;
>  }

Turns out it got even more complicated with thumb, since instructions
can be 16 or 32 bits.

I ended up with the appended (qemu_plugin_insn_append() returns
when the first argument is NULL).

Emilio

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 88195ab949..e6caaff976 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -38,6 +38,7 @@
 #include "trace-tcg.h"
 #include "translate-a64.h"
 #include "qemu/atomic128.h"
+#include "qemu/plugin.h"
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
@@ -13321,6 +13322,7 @@ static void disas_a64_insn(CPUARMState *env, 
DisasContext *s)
 uint32_t insn;
 
 insn = arm_ldl_code(env, s->pc, s->sctlr_b);
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
 s->insn = insn;
 s->pc += 4;
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 7c4675ffd8..7523257b85 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -36,6 +36,7 @@
 
 #include "trace-tcg.h"
 #include "exec/log.h"
+#include "qemu/plugin.h"
 
 
 #define ENABLE_ARCH_4Tarm_dc_feature(s, ARM_FEATURE_V4T)
@@ -13234,6 +13235,7 @@ static void arm_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cpu)
 }
 
 insn = arm_ldl_code(env, dc->pc, dc->sctlr_b);
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
 dc->insn = insn;
 dc->pc += 4;
 disas_arm_insn(dc, insn);
@@ -13304,11 +13306,16 @@ static void thumb_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cpu)
 insn = arm_lduw_code(env, dc->pc, dc->sctlr_b);
 is_16bit = thumb_insn_is_16bit(dc, insn);
 dc->pc += 2;
-if (!is_16bit) {
+if (is_16bit) {
+uint16_t insn16 = insn;
+
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn16, sizeof(insn16));
+} else {
 uint32_t insn2 = arm_lduw_code(env, dc->pc, dc->sctlr_b);
 
 insn = insn << 16 | insn2;
 dc->pc += 2;
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
 }
 dc->insn = insn;
 



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-27 Thread Emilio G. Cota
On Mon, Nov 26, 2018 at 20:38:25 -0500, Emilio G. Cota wrote:
> On Mon, Nov 26, 2018 at 11:30:25 -0800, Richard Henderson wrote:
> > On 11/26/18 11:07 AM, Emilio G. Cota wrote:
> > > The main reason why I added the qemu_plugin_insn_append calls
> > > was to avoid reading the instructions twice from guest memory,
> > > because I was worried that doing so might somehow alter the
> > > guest's execution, e.g. what if we read a cross-page instruction,
> > > and both pages mapped to the same TLB entry? We'd end up having
> > > more TLB misses because instrumentation was enabled.
> > 
> > A better solution for this, I think is to change direct calls from
> > 
> >   cpu_ldl_code(env, pc);
> > to
> >   translator_ldl_code(dc_base, env, pc);
> > 
> > instead of passing around a new argument separate from DisasContextBase?
> 
> I think this + diff'ing pc_next should work to figure out the
> contents and size of each instruction.

I just tried doing things this way.

For some targets like i386, the translator_ld* helpers work
great; the instruction contents are copied, and through
the helpers we get the sizes of the instructions as well.

For ARM though (and maybe others, I haven't gone
through all of them yet), arm_ldl_code does the following:

/* Load an instruction and return it in the standard little-endian order */
static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
bool sctlr_b)
{
uint32_t insn = cpu_ldl_code(env, addr);
if (bswap_code(sctlr_b)) {
return bswap32(insn);
}
return insn;
}

To avoid altering the signature of .translate_insn, I've modified
arm_ldl_code directly, as follows:

 uint32_t insn = cpu_ldl_code(env, addr);
+
 if (bswap_code(sctlr_b)) {
-return bswap32(insn);
+insn = bswap32(insn);
+}
+if (tcg_ctx->plugin_insn) {
+qemu_plugin_insn_append(tcg_ctx->plugin_insn, &insn, sizeof(insn));
 }
 return insn;
 }

(NB. tcg_ctx->plugin_insn is updated by translator_loop
on every iteration.)

Let me know if you think I should do this differently.

Thanks,

Emilio



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-27 Thread Pavel Dovgalyuk
> From: Emilio G. Cota [mailto:c...@braap.org]
> On Mon, Nov 26, 2018 at 10:27:12 -0800, Richard Henderson wrote:
> > On 11/26/18 6:52 AM, Alex Bennée wrote:
> > > I'm not convinced this is the best way to go about it. We end up having
> > > to sprinkle the plugin calls into each decoder rather than keeping all
> > > the infrastructure in the common main loop. However the common loop will
> > > need to know the total number of bytes decoded so we could change the
> > > declaration to:
> > >
> > >   int (*translate_insn)(DisasContextBase *db, CPUState *cpu);
> > >
> > > and return the number of bytes decoded.
> >
> > Returning the number of bytes is more difficult than simply just
> >
> > old_pc = db->pc_next;
> > opc->translate_insn(db, cpu);
> > bytes = db->pc_next - old_pc;
> >
> > requiring no target changes at all.
> 
> The main reason why I added the qemu_plugin_insn_append calls
> was to avoid reading the instructions twice from guest memory,
> because I was worried that doing so might somehow alter the
> guest's execution, e.g. what if we read a cross-page instruction,
> and both pages mapped to the same TLB entry? We'd end up having
> more TLB misses because instrumentation was enabled.

In our plugins we use cpu_debug_rw function.
But I think that your example with mapping of the page and simultaneous
unmapping of the previous is impossible, because both pages should
be available to the translator for creating the TB.
The translation immediately interrupted with TLB miss and repeated
again after mapping. It means that the cross-page instruction
should not be unmapped until it completely executes.

Pavel Dovgalyuk




Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Emilio G. Cota
On Mon, Nov 26, 2018 at 11:30:25 -0800, Richard Henderson wrote:
> On 11/26/18 11:07 AM, Emilio G. Cota wrote:
> > The main reason why I added the qemu_plugin_insn_append calls
> > was to avoid reading the instructions twice from guest memory,
> > because I was worried that doing so might somehow alter the
> > guest's execution, e.g. what if we read a cross-page instruction,
> > and both pages mapped to the same TLB entry? We'd end up having
> > more TLB misses because instrumentation was enabled.
> 
> A better solution for this, I think is to change direct calls from
> 
>   cpu_ldl_code(env, pc);
> to
>   translator_ldl_code(dc_base, env, pc);
> 
> instead of passing around a new argument separate from DisasContextBase?

I think this + diff'ing pc_next should work to figure out the
contents and size of each instruction.

I'll do it this way in v2.

Thanks,

Emilio



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Richard Henderson
On 11/26/18 11:07 AM, Emilio G. Cota wrote:
> The main reason why I added the qemu_plugin_insn_append calls
> was to avoid reading the instructions twice from guest memory,
> because I was worried that doing so might somehow alter the
> guest's execution, e.g. what if we read a cross-page instruction,
> and both pages mapped to the same TLB entry? We'd end up having
> more TLB misses because instrumentation was enabled.

A better solution for this, I think is to change direct calls from

  cpu_ldl_code(env, pc);
to
  translator_ldl_code(dc_base, env, pc);

instead of passing around a new argument separate from DisasContextBase?


r~



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Richard Henderson
On 11/26/18 10:56 AM, Alex Bennée wrote:
> 
> 
> On Mon, 26 Nov 2018, 18:27 Richard Henderson   wrote:
> 
> On 11/26/18 6:52 AM, Alex Bennée wrote:
> > I'm not convinced this is the best way to go about it. We end up having
> > to sprinkle the plugin calls into each decoder rather than keeping all
> > the infrastructure in the common main loop. However the common loop will
> > need to know the total number of bytes decoded so we could change the
> > declaration to:
> >
> >   int (*translate_insn)(DisasContextBase *db, CPUState *cpu);
> >
> > and return the number of bytes decoded.
> 
> Returning the number of bytes is more difficult than simply just
> 
>     old_pc = db->pc_next;
>     opc->translate_insn(db, cpu);
>     bytes = db->pc_next - old_pc;
> 
> requiring no target changes at all.
> 
> 
> If that's always true then great, but what happens with direct branches?

pc_next is still updated by the size of the branch, not it's destination;
db->is_jmp will be != DISAS_NEXT, ending the TB.


r~




Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Emilio G. Cota
On Mon, Nov 26, 2018 at 10:27:12 -0800, Richard Henderson wrote:
> On 11/26/18 6:52 AM, Alex Bennée wrote:
> > I'm not convinced this is the best way to go about it. We end up having
> > to sprinkle the plugin calls into each decoder rather than keeping all
> > the infrastructure in the common main loop. However the common loop will
> > need to know the total number of bytes decoded so we could change the
> > declaration to:
> > 
> >   int (*translate_insn)(DisasContextBase *db, CPUState *cpu);
> > 
> > and return the number of bytes decoded. 
> 
> Returning the number of bytes is more difficult than simply just
> 
> old_pc = db->pc_next;
> opc->translate_insn(db, cpu);
> bytes = db->pc_next - old_pc;
> 
> requiring no target changes at all.

The main reason why I added the qemu_plugin_insn_append calls
was to avoid reading the instructions twice from guest memory,
because I was worried that doing so might somehow alter the
guest's execution, e.g. what if we read a cross-page instruction,
and both pages mapped to the same TLB entry? We'd end up having
more TLB misses because instrumentation was enabled.

If you think that's not really a concern, we could just re-do
the reads in the translator loop and get the size as above.

Thanks,

Emilio



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Alex Bennée
On Mon, 26 Nov 2018, 18:27 Richard Henderson  On 11/26/18 6:52 AM, Alex Bennée wrote:
> > I'm not convinced this is the best way to go about it. We end up having
> > to sprinkle the plugin calls into each decoder rather than keeping all
> > the infrastructure in the common main loop. However the common loop will
> > need to know the total number of bytes decoded so we could change the
> > declaration to:
> >
> >   int (*translate_insn)(DisasContextBase *db, CPUState *cpu);
> >
> > and return the number of bytes decoded.
>
> Returning the number of bytes is more difficult than simply just
>
> old_pc = db->pc_next;
> opc->translate_insn(db, cpu);
> bytes = db->pc_next - old_pc;
>
> requiring no target changes at all.
>

If that's always true then great, but what happens with direct branches?

>
> r~
>


Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Richard Henderson
On 11/26/18 6:52 AM, Alex Bennée wrote:
> I'm not convinced this is the best way to go about it. We end up having
> to sprinkle the plugin calls into each decoder rather than keeping all
> the infrastructure in the common main loop. However the common loop will
> need to know the total number of bytes decoded so we could change the
> declaration to:
> 
>   int (*translate_insn)(DisasContextBase *db, CPUState *cpu);
> 
> and return the number of bytes decoded. 

Returning the number of bytes is more difficult than simply just

old_pc = db->pc_next;
opc->translate_insn(db, cpu);
bytes = db->pc_next - old_pc;

requiring no target changes at all.


r~



Re: [Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-11-26 Thread Alex Bennée


Emilio G. Cota  writes:

> Signed-off-by: Emilio G. Cota 
> ---
>  include/exec/translator.h   | 4 +++-
>  accel/tcg/translator.c  | 4 ++--
>  target/alpha/translate.c| 3 ++-
>  target/arm/translate-a64.c  | 3 ++-
>  target/arm/translate.c  | 6 --
>  target/hppa/translate.c | 3 ++-
>  target/i386/translate.c | 3 ++-
>  target/m68k/translate.c | 3 ++-
>  target/mips/translate.c | 3 ++-
>  target/openrisc/translate.c | 3 ++-
>  target/ppc/translate.c  | 3 ++-
>  target/riscv/translate.c| 3 ++-
>  target/s390x/translate.c| 3 ++-
>  target/sh4/translate.c  | 3 ++-
>  target/sparc/translate.c| 3 ++-
>  target/xtensa/translate.c   | 3 ++-
>  16 files changed, 35 insertions(+), 18 deletions(-)
>
> diff --git a/include/exec/translator.h b/include/exec/translator.h
> index 71e7b2c347..a28147b3dd 100644
> --- a/include/exec/translator.h
> +++ b/include/exec/translator.h
> @@ -20,6 +20,7 @@
>
>
>  #include "exec/exec-all.h"
> +#include "qemu/plugin.h"
>  #include "tcg/tcg.h"
>
>
> @@ -112,7 +113,8 @@ typedef struct TranslatorOps {
>  void (*insn_start)(DisasContextBase *db, CPUState *cpu);
>  bool (*breakpoint_check)(DisasContextBase *db, CPUState *cpu,
>   const CPUBreakpoint *bp);
> -void (*translate_insn)(DisasContextBase *db, CPUState *cpu);
> +void (*translate_insn)(DisasContextBase *db, CPUState *cpu,
> +   struct qemu_plugin_insn *plugin_insn);

I'm not convinced this is the best way to go about it. We end up having
to sprinkle the plugin calls into each decoder rather than keeping all
the infrastructure in the common main loop. However the common loop will
need to know the total number of bytes decoded so we could change the
declaration to:

  int (*translate_insn)(DisasContextBase *db, CPUState *cpu);

and return the number of bytes decoded. It would mean a minor
inefficiency in having to re-read the instruction bytes into a buffer in
preparation for passing to the plugin but it would all at least be in
one place.

--
Alex Bennée



[Qemu-devel] [RFC 23/48] translator: add plugin_insn argument to translate_insn

2018-10-25 Thread Emilio G. Cota
Signed-off-by: Emilio G. Cota 
---
 include/exec/translator.h   | 4 +++-
 accel/tcg/translator.c  | 4 ++--
 target/alpha/translate.c| 3 ++-
 target/arm/translate-a64.c  | 3 ++-
 target/arm/translate.c  | 6 --
 target/hppa/translate.c | 3 ++-
 target/i386/translate.c | 3 ++-
 target/m68k/translate.c | 3 ++-
 target/mips/translate.c | 3 ++-
 target/openrisc/translate.c | 3 ++-
 target/ppc/translate.c  | 3 ++-
 target/riscv/translate.c| 3 ++-
 target/s390x/translate.c| 3 ++-
 target/sh4/translate.c  | 3 ++-
 target/sparc/translate.c| 3 ++-
 target/xtensa/translate.c   | 3 ++-
 16 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index 71e7b2c347..a28147b3dd 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -20,6 +20,7 @@
 
 
 #include "exec/exec-all.h"
+#include "qemu/plugin.h"
 #include "tcg/tcg.h"
 
 
@@ -112,7 +113,8 @@ typedef struct TranslatorOps {
 void (*insn_start)(DisasContextBase *db, CPUState *cpu);
 bool (*breakpoint_check)(DisasContextBase *db, CPUState *cpu,
  const CPUBreakpoint *bp);
-void (*translate_insn)(DisasContextBase *db, CPUState *cpu);
+void (*translate_insn)(DisasContextBase *db, CPUState *cpu,
+   struct qemu_plugin_insn *plugin_insn);
 void (*tb_stop)(DisasContextBase *db, CPUState *cpu);
 void (*disas_log)(const DisasContextBase *db, CPUState *cpu);
 } TranslatorOps;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index afd0a49ea6..8591e4b72a 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -101,10 +101,10 @@ void translator_loop(const TranslatorOps *ops, 
DisasContextBase *db,
 && (tb_cflags(db->tb) & CF_LAST_IO)) {
 /* Accept I/O on the last instruction.  */
 gen_io_start();
-ops->translate_insn(db, cpu);
+ops->translate_insn(db, cpu, NULL);
 gen_io_end();
 } else {
-ops->translate_insn(db, cpu);
+ops->translate_insn(db, cpu, NULL);
 }
 
 /* Stop translation if translate_insn so indicated.  */
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 25cd95931d..72a302e102 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -2983,7 +2983,8 @@ static bool alpha_tr_breakpoint_check(DisasContextBase 
*dcbase, CPUState *cpu,
 return true;
 }
 
-static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu,
+struct qemu_plugin_insn *plugin_insn)
 {
 DisasContext *ctx = container_of(dcbase, DisasContext, base);
 CPUAlphaState *env = cpu->env_ptr;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index bb9c4d8ac7..8b1e20dd59 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -13937,7 +13937,8 @@ static bool 
aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
 return true;
 }
 
-static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu,
+  struct qemu_plugin_insn *plugin_insn)
 {
 DisasContext *dc = container_of(dcbase, DisasContext, base);
 CPUARMState *env = cpu->env_ptr;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 1b4bacb522..2fd32a2684 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -12787,7 +12787,8 @@ static void arm_post_translate_insn(DisasContext *dc)
 translator_loop_temp_check(&dc->base);
 }
 
-static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu,
+  struct qemu_plugin_insn *plugin_insn)
 {
 DisasContext *dc = container_of(dcbase, DisasContext, base);
 CPUARMState *env = cpu->env_ptr;
@@ -12854,7 +12855,8 @@ static bool thumb_insn_is_unconditional(DisasContext 
*s, uint32_t insn)
 return false;
 }
 
-static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu,
+struct qemu_plugin_insn *plugin_insn)
 {
 DisasContext *dc = container_of(dcbase, DisasContext, base);
 CPUARMState *env = cpu->env_ptr;
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index df9179e70f..6c2a7fbc46 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -4737,7 +4737,8 @@ static bool hppa_tr_breakpoint_check(DisasContextBase 
*dcbase, CPUState *cs,
 return true;
 }
 
-static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState