Re: Question about memory trace with execlog plugin

Yannis Bolliger via Tue, 08 Apr 2025 11:05:31 -0700


Hi Pierrick,


Thanks again for your help.

> That's a good starting point.
> 
> > Although gva2gpa usually only works on either the user virt address or the 
> > kernel one, perhaps depending on what context the cpu is running in?
> 
> 
> I'm not familiar with our monitor command gva2gpa, but I guess it's
> relying on current page table set. Kernel and user space may have
> different ones (and additional ones depending on privilege level and
> architecture). To be able to write to user space, kernel must still map
> this address in its own page table, so there still should be a correct
> virtual -> physical correct mapping from kernel context.
>
> I'm not sure how those details are handled in Linux kernel, but I know
> that kernel space is mapped on a specific partition of the address
> space, and user space on the rest. So user address space is accessible
> to kernel (but not the opposite, for obvious reasons).
> Before Meltdown exploit, it used to be the same page table (but with U
> bit set to 0 on kernel entries). To mitigate it, they now have distinct
> page tables (KPTI) [1][2], but I don't think it's enabled by default
> because of the cost related to syscalls (flushing TLB completely).
> 
> [1] https://www.kernel.org/doc/html/next/x86/pti.html
> [2] https://en.wikipedia.org/wiki/Kernel_page-table_isolation
> 
> There is not a single and absolute answer to "Which physical address
> matches this virtual one?" during all the execution. It will vary based
> on current context.

I read the intro to KPTI and it seems it's mostly about avoiding mapping the 
kernel while running in the user space,
but in kernel space (where I do my logs) the page table should include both 
mappings, if I interpreted it correctly.
I checked with the monitor commands that when I'm in kernel context (EL1), the 
user virtual address I just logged is reported as unmapped by gva2gpa.
Although of course a user page may be paged out I don't think it should happen 
that quickly. I see in the code that an mmu index is looked up based on 
exception level in the
code used by gva2gpa (arm_cpu_get_phys_page_attrs_debug if I traced it 
correctly) so that may have something to do with it but I have not had enough 
time to look into it in more detail.

>
> Do you see any store at all (even with a different address)?

Yes, I see many stores too. Just not to the addresses I expect.
In other examples I see only stores to the user buffer address but not loads to 
the corresponding kernel buffer.
I just never see both in an interleaved manner as should happen based on the 
ARM memcpy assembly code used for the buffer copies in the kernel.

> 
> If you don't see any store, the most obvious idea I have is to check if
> you use QEMU_PLUGIN_MEM_RW, and not only QEMU_PLUGIN_MEM_R, when
> registering the plugin memory callback.
> 
> From QEMU perspective, the fact those things happen from a kernel or
> user space does not really matters. All it sees are load/stores, and
> instrument them.
> 

> If you can, please share your plugin code, or at least the memory
> callback setup to make sure everything is ok.

I added it to the end of this message.

> It's possible that what you try to observe is split amongst several
> vcpus. I'm not sure how Linux kernel deals with those copies, but if
> several kernel threads are involved, you won't see all side effects only
> by observing a single vcpu.
> 
> I would suggest to debug with -smp 1 first.

Although I log my memory trace per CPU, I do an offline merging of it in the 
end.
So the behavior I observe should be including all vCPUs.
But just to be sure testing with -smp 1 is definitely a good idea though.

I'll keep you updated if I find something.

Thanks and kind regards,
Yannis

------------- Code below -------------

#include <glib.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <qemu-plugin.h>

#define BUF_SIZE (1 * 1024 * 1024)

typedef struct LogRecord {
  uint64_t insn_count;
  char store;
  uint64_t address;
} LogRecord;

typedef struct CPU {
  char *last_exec;
  char *buf_start;
  FILE *logfile;
  LogRecord record;
} CPU;

QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

static CPU *cpus;
static int cpu_len;

static uint64_t insn_count;

static inline void buf_dump(CPU *cpu) {
  fwrite(cpu->buf_start, cpu->last_exec - cpu->buf_start, 1, cpu->logfile);
  cpu->last_exec = cpu->buf_start;
}

static inline void buf_write(CPU *cpu, void *value, size_t len) {
  if ((cpu->last_exec + len) > (cpu->buf_start + BUF_SIZE)) {
    buf_dump(cpu);
  }
  memcpy(cpu->last_exec, value, len);
  cpu->last_exec += len;
}

static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
                     uint64_t vaddr, void *udata) {
  if (!qemu_plugin_log_is_enabled()) {
    return;
  }
  CPU *c = cpus + cpu_index;
  if (qemu_plugin_mem_is_store(info)) {
    c->record.store = 1;
  } else {
    c->record.store = 0;
  }
  struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
  if (qemu_plugin_hwaddr_is_io(hwaddr)) {
    return;
  }
  uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
  c->record.address = addr;
  buf_write(c, &c->record, sizeof(LogRecord));
}

static void vcpu_insn_exec(unsigned int cpu_index, void *udata) {
  if (!qemu_plugin_log_is_enabled()) {
    return;
  }
  cpus[cpu_index].record.insn_count =
      __atomic_fetch_add(&insn_count, 1, __ATOMIC_SEQ_CST);
}

static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) {
  struct qemu_plugin_insn *insn;

  size_t n_insns = qemu_plugin_tb_n_insns(tb);
  for (size_t i = 0; i < n_insns; i++) {
    insn = qemu_plugin_tb_get_insn(tb, i);
    qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, QEMU_PLUGIN_CB_NO_REGS,
                                     QEMU_PLUGIN_MEM_RW, NULL);
    qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
                                           QEMU_PLUGIN_CB_NO_REGS, NULL);
  }
}

static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index) {
  CPU *c;
  char filename[32];
  snprintf(filename, sizeof(filename), "exec.log.%d", vcpu_index);
  c = cpus + vcpu_index;
  c->logfile = fopen(filename, "w");
  c->buf_start = malloc(BUF_SIZE); // 1MB
  c->last_exec = c->buf_start;
}

static void plugin_exit(qemu_plugin_id_t id, void *p) {
  int i;
  for (i = 0; i < cpu_len; i++) {
    CPU *c = cpus + i;
    buf_dump(c);
    fclose(c->logfile);
    free(c->buf_start);
  }
}

QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                           const qemu_info_t *info, int argc,
                                           char **argv) {
  cpus = malloc(info->system.max_vcpus * sizeof(CPU));
  cpu_len = info->system.max_vcpus;
  /* Register init, translation block and exit callbacks */
  qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
  qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
  qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);

  return 0;
}

Re: Question about memory trace with execlog plugin

Reply via email to