We implement tracing, following uftrace format. Trace is flushed every 32 MB, so file operations don't impact performance at runtime.
A different trace is generated per cpu, and we ensure they have a unique name, based on vcpu_index, while keeping room for privilege level coming in next commit. Uftrace format is not officially documented, but it can be found here: https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909 Signed-off-by: Pierrick Bouvier <pierrick.bouv...@linaro.org> --- contrib/plugins/uftrace.c | 150 +++++++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c index bb775916270..830967c215b 100644 --- a/contrib/plugins/uftrace.c +++ b/contrib/plugins/uftrace.c @@ -12,6 +12,13 @@ #include <qemu-plugin.h> #include <glib.h> #include <stdio.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#define MiB (INT64_C(1) << 20) +#define NANOSECONDS_PER_SECOND 1000000000LL QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; @@ -24,6 +31,13 @@ typedef struct { uint64_t frame_pointer; } CallstackEntry; +typedef struct { + GArray *t; + GString *path; + GString *name; + uint32_t id; +} Trace; + typedef struct Cpu Cpu; typedef struct { @@ -34,6 +48,7 @@ typedef struct { } CpuOps; typedef struct Cpu { + Trace *trace; Callstack *cs; GByteArray *buf; CpuOps ops; @@ -44,9 +59,41 @@ typedef struct { struct qemu_plugin_register *reg_fp; } Aarch64Cpu; +typedef struct { + uint64_t timestamp; + uint64_t data; +} UftraceEntry; + +typedef enum { + UFTRACE_ENTRY, + UFTRACE_EXIT, + UFTRACE_LOST, + UFTRACE_EVENT +} UftraceRecordType; + static struct qemu_plugin_scoreboard *score; static CpuOps arch_ops; +static uint64_t gettime_ns(void) +{ +#ifdef _WIN32 + /* + * On Windows, timespec_get is available only with UCRT, but not with + * MinGW64 environment. Simplify by using only gettimeofday on this + * platform. This may result in a precision loss. + */ + struct timeval tv; + gettimeofday(&tv, NULL); + uint64_t now_ns = tv.tv_sec * NANOSECONDS_PER_SECOND + tv.tv_usec * 1000; +#else + /* We need nanosecond precision for short lived functions. */ + struct timespec ts; + timespec_get(&ts, TIME_UTC); + uint64_t now_ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec; +#endif + return now_ns; +} + static Callstack *callstack_new(void) { Callstack *cs = g_new0(Callstack, 1); @@ -112,6 +159,86 @@ static CallstackEntry callstack_pop(Callstack *cs) return e; } +static Trace *trace_new(uint32_t id, GString *name) +{ + Trace *t = g_new0(Trace, 1); + t->t = g_array_new(false, false, sizeof(UftraceEntry)); + t->path = g_string_new(NULL); + g_string_append_printf(t->path, "./uftrace.data/%"PRIu32".dat", id); + t->name = g_string_new(name->str); + t->id = id; + return t; +} + +static void trace_free(Trace *t) +{ + g_assert(t->t->len == 0); + g_array_free(t->t, true); + t->t = NULL; + g_string_free(t->path, true); + t->path = NULL; + g_string_free(t->name, true); + t->name = NULL; + g_free(t); +} + +static void trace_flush(Trace *t, bool append) +{ + int create_dir = g_mkdir_with_parents("./uftrace.data", + S_IRWXU | S_IRWXG | S_IRWXO); + g_assert(create_dir == 0); + FILE *dat = fopen(t->path->str, append ? "a" : "w"); + g_assert(dat); + GArray *data = t->t; + if (data->len) { + size_t wrote = fwrite(data->data, sizeof(UftraceEntry), data->len, dat); + g_assert(wrote == data->len); + } + fclose(dat); + g_array_set_size(data, 0); +} + +static void trace_add_entry(Trace *t, uint64_t timestamp, uint64_t pc, + size_t depth, UftraceRecordType type) +{ + /* https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909 */ + const uint64_t record_magic = 0x5; + uint64_t data = type | record_magic << 3; + data += depth << 6; + data += pc << 16; + UftraceEntry e = {.timestamp = timestamp, .data = data}; + g_array_append_val(t->t, e); + if (t->t->len * sizeof(UftraceEntry) > 32 * MiB) { + trace_flush(t, true); + } +} + +static void trace_enter_function(Trace *t, uint64_t timestamp, + uint64_t pc, size_t depth) +{ + trace_add_entry(t, timestamp, pc, depth, UFTRACE_ENTRY); +} + +static void trace_exit_function(Trace *t, uint64_t timestamp, + uint64_t pc, size_t depth) +{ + trace_add_entry(t, timestamp, pc, depth, UFTRACE_EXIT); +} + +static void trace_enter_stack(Trace *t, Callstack *cs, uint64_t timestamp) +{ + for (size_t depth = 1; depth <= callstack_depth(cs); ++depth) { + trace_enter_function(t, timestamp, callstack_at(cs, depth)->pc, depth); + } +} + +static void trace_exit_stack(Trace *t, Callstack *cs, uint64_t timestamp) +{ + for (size_t depth = callstack_depth(cs); depth > 0; --depth) { + trace_exit_function(t, timestamp, callstack_at(cs, depth)->pc, depth); + } +} + static uint64_t cpu_read_register64(Cpu *cpu, struct qemu_plugin_register *reg) { GByteArray *buf = cpu->buf; @@ -223,7 +350,9 @@ static void track_callstack(unsigned int cpu_index, void *udata) { uint64_t pc = (uintptr_t) udata; Cpu *cpu = qemu_plugin_scoreboard_find(score, cpu_index); + uint64_t timestamp = gettime_ns(); Callstack *cs = cpu->cs; + Trace *t = cpu->trace; uint64_t fp = cpu->ops.get_frame_pointer(cpu); if (!fp && callstack_empty(cs)) { @@ -232,6 +361,7 @@ static void track_callstack(unsigned int cpu_index, void *udata) * long as a proper call does not happen. */ callstack_push(cs, (CallstackEntry){.frame_pointer = fp, .pc = pc}); + trace_enter_function(t, timestamp, pc, callstack_depth(cs)); return; } @@ -244,7 +374,8 @@ static void track_callstack(unsigned int cpu_index, void *udata) CallstackEntry caller = callstack_caller(cs); if (fp == caller.frame_pointer) { /* return */ - callstack_pop(cs); + CallstackEntry e = callstack_pop(cs); + trace_exit_function(t, timestamp, e.pc, callstack_depth(cs)); return; } @@ -252,12 +383,16 @@ static void track_callstack(unsigned int cpu_index, void *udata) if (caller_fp == top.frame_pointer) { /* call */ callstack_push(cs, (CallstackEntry){.frame_pointer = fp, .pc = pc}); + trace_enter_function(t, timestamp, pc, callstack_depth(cs)); return; } /* discontinuity, exit current stack and unwind new one */ + trace_exit_stack(t, cs, timestamp); callstack_clear(cs); + cpu_unwind_stack(cpu, fp, pc); + trace_enter_stack(t, cs, timestamp); } static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) @@ -297,6 +432,16 @@ static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index) cpu->ops.init(cpu); cpu->buf = g_byte_array_new(); + g_assert(vcpu_index < UINT32_MAX / 100); + /* trace_id is: cpu_number * 100 */ + uint32_t trace_id = (vcpu_index + 1) * 100; + + g_autoptr(GString) trace_name = g_string_new(NULL); + g_string_append_printf(trace_name, "cpu%u", vcpu_index); + cpu->trace = trace_new(trace_id, trace_name); + /* create/truncate trace file */ + trace_flush(cpu->trace, false); + cpu->cs = callstack_new(); } @@ -305,6 +450,7 @@ static void vcpu_end(unsigned int vcpu_index) Cpu *cpu = qemu_plugin_scoreboard_find(score, vcpu_index); g_byte_array_free(cpu->buf, true); + trace_free(cpu->trace); callstack_free(cpu->cs); memset(cpu, 0, sizeof(Cpu)); } @@ -312,6 +458,8 @@ static void vcpu_end(unsigned int vcpu_index) static void at_exit(qemu_plugin_id_t id, void *data) { for (size_t i = 0; i < qemu_plugin_num_vcpus(); ++i) { + Cpu *cpu = qemu_plugin_scoreboard_find(score, i); + trace_flush(cpu->trace, true); vcpu_end(i); } -- 2.47.2