On Fri, Aug 8, 2025 at 7:27 PM Pierrick Bouvier <pierrick.bouv...@linaro.org> wrote: > > On 8/8/25 1:28 AM, Manos Pitsidianakis wrote: > > On Fri, 08 Aug 2025 05:06, Pierrick Bouvier <pierrick.bouv...@linaro.org> > > wrote: > >> We define a new CpuOps structure that will be used to implement tracking > >> independently of guest architecture. > >> > >> As well, we now instrument only instructions following ones that might > >> have touch the frame pointer. > > > > s/touch/touched > > > >> > >> Signed-off-by: Pierrick Bouvier <pierrick.bouv...@linaro.org> > >> --- > >> contrib/plugins/uftrace.c | 112 ++++++++++++++++++++++++++++++++++++-- > >> 1 file changed, 108 insertions(+), 4 deletions(-) > >> > >> diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c > >> index d60c1077496..4b1a2f38143 100644 > >> --- a/contrib/plugins/uftrace.c > >> +++ b/contrib/plugins/uftrace.c > >> @@ -11,14 +11,94 @@ > >> > >> #include <qemu-plugin.h> > >> #include <glib.h> > >> +#include <stdio.h> > >> > >> QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; > >> > >> +typedef struct Cpu Cpu; > >> + > >> +typedef struct { > >> + void (*init)(Cpu *cpu); > >> + void (*end)(Cpu *cpu); > >> + uint64_t (*get_frame_pointer)(Cpu *cpu); > >> + bool (*does_insn_modify_frame_pointer)(const char *disas); > >> +} CpuOps; > >> + > >> typedef struct Cpu { > >> GByteArray *buf; > >> + CpuOps ops; > >> + void *arch; > >> } Cpu; > >> > >> +typedef struct { > >> + struct qemu_plugin_register *reg_fp; > >> +} Aarch64Cpu; > >> + > >> static struct qemu_plugin_scoreboard *score; > >> +static CpuOps arch_ops; > >> + > >> +static uint64_t cpu_read_register64(Cpu *cpu, struct qemu_plugin_register > >> *reg) > >> +{ > >> + GByteArray *buf = cpu->buf; > >> + g_byte_array_set_size(buf, 0); > >> + size_t sz = qemu_plugin_read_register(reg, buf); > >> + g_assert(sz == 8); > >> + g_assert(buf->len == 8); > >> + return *((uint64_t *) buf->data); > >> +} > >> + > >> +static struct qemu_plugin_register *plugin_find_register(const char *name) > >> +{ > >> + g_autoptr(GArray) regs = qemu_plugin_get_registers(); > > > > Question about the plugin API and not this patch per se, if the cpu is > > in a32/thumb mode does it still return the aarch64 registers? > > > > I didn't check this part, and it's a good question though. > It would be a massive headache from plugins point of view if registers > list could vary along execution. > > >> + for (int i = 0; i < regs->len; ++i) { > >> + qemu_plugin_reg_descriptor *reg; > >> + reg = &g_array_index(regs, qemu_plugin_reg_descriptor, i); > >> + if (!strcmp(reg->name, name)) { > >> + return reg->handle; > >> + } > >> + } > >> + return NULL; > >> +} > >> + > >> +static uint64_t aarch64_get_frame_pointer(Cpu *cpu_) > >> +{ > >> + Aarch64Cpu *cpu = cpu_->arch; > >> + return cpu_read_register64(cpu_, cpu->reg_fp); > >> +} > >> + > >> +static void aarch64_init(Cpu *cpu_) > >> +{ > >> + Aarch64Cpu *cpu = g_new0(Aarch64Cpu, 1); > >> + cpu_->arch = cpu; > >> + cpu->reg_fp = plugin_find_register("x29"); > >> + if (!cpu->reg_fp) { > >> + fprintf(stderr, "uftrace plugin: frame pointer register (x29) is > >> not " > >> + "available. Please use an AArch64 cpu (or -cpu > >> max).\n"); > >> + g_abort(); > >> + } > >> +} > >> + > >> +static void aarch64_end(Cpu *cpu) > >> +{ > >> + g_free(cpu->arch); > >> +} > >> + > >> +static bool aarch64_does_insn_modify_frame_pointer(const char *disas) > >> +{ > >> + /* > >> + * Check if current instruction concerns fp register "x29". > >> + * We add a prefix space to make sure we don't match addresses dump > >> + * in disassembly. > >> + */ > >> + return strstr(disas, " x29"); > > > > Hm is the whitespace before x29 guaranteed? Neat trick otherwise. > > > > At least for aarch64 disassembler, yes, from what I saw. > Either it's the first operand, and then there is a whitespace between > instruction name and it. Or it's another operand, and we always have a > whitespace after ','. > I don't think we'll change disassembler soon, but in case this looks too > fragile, we can always simply return yes if x29 is fine. In worst case, > we match some additional instructions, but it should not have a huge > performance hit. > > >> +} > >> + > >> +static CpuOps aarch64_ops = { > >> + .init = aarch64_init, > >> + .end = aarch64_end, > >> + .get_frame_pointer = aarch64_get_frame_pointer, > >> + .does_insn_modify_frame_pointer = > >> aarch64_does_insn_modify_frame_pointer, > >> +}; > >> > >> static void track_callstack(unsigned int cpu_index, void *udata) > >> { > >> @@ -28,20 +108,36 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct > >> qemu_plugin_tb *tb) > >> { > >> size_t n_insns = qemu_plugin_tb_n_insns(tb); > >> > >> + /* > >> + * We instrument all instructions following one that might have > >> updated > >> + * the frame pointer. We always instrument first instruction in > >> block, as > >> + * last executed instruction, in previous tb, may have modified it. > > > > Modified it how? > > > > I ran an assert that latest instruction of a block never contained " > x29", and it was triggered quickly. I don't remember exactly which > instruction triggered the assert. > > >> + */ > >> + bool instrument_insn = true; > >> for (int i = 0; i < n_insns; i++) { > >> struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); > >> > >> - uintptr_t pc = qemu_plugin_insn_vaddr(insn); > >> - qemu_plugin_register_vcpu_insn_exec_cb(insn, track_callstack, > >> - QEMU_PLUGIN_CB_R_REGS, > >> - (void *) pc); > >> + if (instrument_insn) { > >> + uintptr_t pc = qemu_plugin_insn_vaddr(insn); > >> + qemu_plugin_register_vcpu_insn_exec_cb(insn, track_callstack, > >> + QEMU_PLUGIN_CB_R_REGS, > >> + (void *) pc); > >> + instrument_insn = false; > >> + } > >> > >> + char *disas = qemu_plugin_insn_disas(insn); > >> + if (arch_ops.does_insn_modify_frame_pointer(disas)) { > >> + instrument_insn = true; > >> + } > > > > So if I understand correctly you check if an instruction needs to be > > instrumented and then do it in the next forloop. This means if the last > > insn needs to be instrumented too it is not done, is that ok? > > > > Yes, that is why we always instrument the first, to catch this. > There is no (current) way to instrument *after* instruction, and > probably hard to implement, considering an instruction might generate a > fault. So the only safe choice left is to instrument the first one of > any tb. >
Fair enough, thanks for the explanation :) Reviewed-by: Manos Pitsidianakis <manos.pitsidiana...@linaro.org> > >> } > >> } > >> > >> static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index) > >> { > >> Cpu *cpu = qemu_plugin_scoreboard_find(score, vcpu_index); > >> + cpu->ops = arch_ops; > >> + > >> + cpu->ops.init(cpu); > >> cpu->buf = g_byte_array_new(); > >> } > >> > >> @@ -65,6 +161,14 @@ QEMU_PLUGIN_EXPORT int > >> qemu_plugin_install(qemu_plugin_id_t id, > >> const qemu_info_t *info, > >> int argc, char **argv) > >> { > >> + if (!strcmp(info->target_name, "aarch64")) { > >> + arch_ops = aarch64_ops; > >> + } else { > >> + fprintf(stderr, "plugin uftrace: %s target is not supported\n", > >> + info->target_name); > >> + return 1; > >> + } > >> + > >> score = qemu_plugin_scoreboard_new(sizeof(Cpu)); > >> qemu_plugin_register_vcpu_init_cb(id, vcpu_init); > >> qemu_plugin_register_atexit_cb(id, at_exit, NULL); > >> -- > >> 2.47.2 > >> > > > > LGTM overall, it makes sense. >