Am 9. Oktober 2025 07:50:18 UTC schrieb Paolo Bonzini <[email protected]>:
>From: Magnus Kulke <[email protected]>
>
>Add the main vCPU execution loop for MSHV using the MSHV_RUN_VP ioctl.
>
>The execution loop handles guest entry and VM exits. There are handlers for
>memory r/w, PIO and MMIO to which the exit events are dispatched.
>
>In case of MMIO the i386 instruction decoder/emulator is invoked to
>perform the operation in user space.
>
>Signed-off-by: Magnus Kulke <[email protected]>
>Link: 
>https://lore.kernel.org/r/[email protected]
>Signed-off-by: Paolo Bonzini <[email protected]>
>---
> target/i386/mshv/mshv-cpu.c | 444 +++++++++++++++++++++++++++++++++++-
> 1 file changed, 442 insertions(+), 2 deletions(-)
>
>diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c
>index 33a3ce8b110..7edc032cea3 100644
>--- a/target/i386/mshv/mshv-cpu.c
>+++ b/target/i386/mshv/mshv-cpu.c
>@@ -1082,10 +1082,450 @@ void mshv_arch_amend_proc_features(
>     features->access_guest_idle_reg = 1;
> }
> 
>+static int set_memory_info(const struct hyperv_message *msg,
>+                           struct hv_x64_memory_intercept_message *info)
>+{
>+    if (msg->header.message_type != HVMSG_GPA_INTERCEPT
>+            && msg->header.message_type != HVMSG_UNMAPPED_GPA
>+            && msg->header.message_type != HVMSG_UNACCEPTED_GPA) {
>+        error_report("invalid message type");
>+        return -1;
>+    }
>+    memcpy(info, msg->payload, sizeof(*info));
>+
>+    return 0;
>+}
>+
>+static int emulate_instruction(CPUState *cpu,
>+                               const uint8_t *insn_bytes, size_t insn_len,
>+                               uint64_t gva, uint64_t gpa)
>+{
>+    X86CPU *x86_cpu = X86_CPU(cpu);
>+    CPUX86State *env = &x86_cpu->env;
>+    struct x86_decode decode = { 0 };
>+    int ret;
>+    x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len };
>+
>+    ret = mshv_load_regs(cpu);
>+    if (ret < 0) {
>+        error_report("failed to load registers");
>+        return -1;
>+    }
>+
>+    decode_instruction_stream(env, &decode, &stream);
>+    exec_instruction(env, &decode);
>+
>+    ret = mshv_store_regs(cpu);
>+    if (ret < 0) {
>+        error_report("failed to store registers");
>+        return -1;
>+    }
>+
>+    return 0;
>+}
>+
>+static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg,
>+                       MshvVmExit *exit_reason)
>+{
>+    struct hv_x64_memory_intercept_message info = { 0 };
>+    size_t insn_len;
>+    uint8_t access_type;
>+    uint8_t *instruction_bytes;
>+    int ret;
>+
>+    ret = set_memory_info(msg, &info);
>+    if (ret < 0) {
>+        error_report("failed to convert message to memory info");
>+        return -1;
>+    }
>+    insn_len = info.instruction_byte_count;
>+    access_type = info.header.intercept_access_type;
>+
>+    if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) {
>+        error_report("invalid intercept access type: execute");
>+        return -1;
>+    }
>+
>+    if (insn_len > 16) {
>+        error_report("invalid mmio instruction length: %zu", insn_len);
>+        return -1;
>+    }
>+
>+    trace_mshv_handle_mmio(info.guest_virtual_address,
>+                           info.guest_physical_address,
>+                           info.instruction_byte_count, access_type);
>+
>+    instruction_bytes = info.instruction_bytes;
>+
>+    ret = emulate_instruction(cpu, instruction_bytes, insn_len,
>+                              info.guest_virtual_address,
>+                              info.guest_physical_address);
>+    if (ret < 0) {
>+        error_report("failed to emulate mmio");
>+        return -1;
>+    }
>+
>+    *exit_reason = MshvVmExitIgnore;
>+
>+    return 0;
>+}
>+
>+static int set_ioport_info(const struct hyperv_message *msg,
>+                           hv_x64_io_port_intercept_message *info)
>+{
>+    if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) {
>+        error_report("Invalid message type");
>+        return -1;
>+    }
>+    memcpy(info, msg->payload, sizeof(*info));
>+
>+    return 0;
>+}
>+
>+static int set_x64_registers(const CPUState *cpu, const uint32_t *names,
>+                             const uint64_t *values)
>+{
>+
>+    hv_register_assoc assocs[2];
>+    int ret;
>+
>+    for (size_t i = 0; i < ARRAY_SIZE(assocs); i++) {
>+        assocs[i].name = names[i];
>+        assocs[i].value.reg64 = values[i];
>+    }
>+
>+    ret = mshv_set_generic_regs(cpu, assocs, ARRAY_SIZE(assocs));
>+    if (ret < 0) {
>+        error_report("failed to set x64 registers");
>+        return -1;
>+    }
>+
>+    return 0;
>+}
>+
>+static inline MemTxAttrs get_mem_attrs(bool is_secure_mode)
>+{
>+    MemTxAttrs memattr = {0};
>+    memattr.secure = is_secure_mode;
>+    return memattr;
>+}
>+
>+static void pio_read(uint64_t port, uint8_t *data, uintptr_t size,
>+                     bool is_secure_mode)
>+{
>+    int ret = 0;
>+    MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
>+    ret = address_space_rw(&address_space_io, port, memattr, (void *)data, 
>size,
>+                           false);
>+    if (ret != MEMTX_OK) {
>+        error_report("Failed to read from port %lx: %d", port, ret);
>+        abort();
>+    }
>+}
>+
>+static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size,
>+                     bool is_secure_mode)
>+{
>+    int ret = 0;
>+    MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
>+    ret = address_space_rw(&address_space_io, port, memattr, (void *)data, 
>size,
>+                           true);
>+    return ret;
>+}
>+
>+static int handle_pio_non_str(const CPUState *cpu,
>+                              hv_x64_io_port_intercept_message *info)
>+{
>+    size_t len = info->access_info.access_size;
>+    uint8_t access_type = info->header.intercept_access_type;
>+    int ret;
>+    uint32_t val, eax;
>+    const uint32_t eax_mask =  0xffffffffu >> (32 - len * 8);
>+    size_t insn_len;
>+    uint64_t rip, rax;
>+    uint32_t reg_names[2];
>+    uint64_t reg_values[2];
>+    uint16_t port = info->port_number;
>+
>+    if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
>+        union {
>+            uint32_t u32;
>+            uint8_t bytes[4];
>+        } conv;
>+
>+        /* convert the first 4 bytes of rax to bytes */
>+        conv.u32 = (uint32_t)info->rax;
>+        /* secure mode is set to false */
>+        ret = pio_write(port, conv.bytes, len, false);
>+        if (ret < 0) {
>+            error_report("Failed to write to io port");
>+            return -1;
>+        }
>+    } else {
>+        uint8_t data[4] = { 0 };
>+        /* secure mode is set to false */
>+        pio_read(info->port_number, data, len, false);
>+
>+        /* Preserve high bits in EAX, but clear out high bits in RAX */
>+        val = *(uint32_t *)data;
>+        eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask);
>+        info->rax = (uint64_t)eax;
>+    }
>+
>+    insn_len = info->header.instruction_length;
>+
>+    /* Advance RIP and update RAX */
>+    rip = info->header.rip + insn_len;
>+    rax = info->rax;
>+
>+    reg_names[0] = HV_X64_REGISTER_RIP;
>+    reg_values[0] = rip;
>+    reg_names[1] = HV_X64_REGISTER_RAX;
>+    reg_values[1] = rax;
>+
>+    ret = set_x64_registers(cpu, reg_names, reg_values);
>+    if (ret < 0) {
>+        error_report("Failed to set x64 registers");
>+        return -1;
>+    }
>+
>+    cpu->accel->dirty = false;
>+
>+    return 0;
>+}
>+
>+static int fetch_guest_state(CPUState *cpu)
>+{
>+    int ret;
>+
>+    ret = mshv_get_standard_regs(cpu);
>+    if (ret < 0) {
>+        error_report("Failed to get standard registers");
>+        return -1;
>+    }
>+
>+    ret = mshv_get_special_regs(cpu);
>+    if (ret < 0) {
>+        error_report("Failed to get special registers");
>+        return -1;
>+    }
>+
>+    return 0;
>+}
>+
>+static int read_memory(const CPUState *cpu, uint64_t initial_gva,
>+                       uint64_t initial_gpa, uint64_t gva, uint8_t *data,
>+                       size_t len)
>+{
>+    int ret;
>+    uint64_t gpa, flags;
>+
>+    if (gva == initial_gva) {
>+        gpa = initial_gpa;

This assignment is never read and this branch leaves `data` untouched...

>+    } else {
>+        flags = HV_TRANSLATE_GVA_VALIDATE_READ;
>+        ret = translate_gva(cpu, gva, &gpa, flags);
>+        if (ret < 0) {
>+            return -1;
>+        }
>+

while this block:

>+        ret = mshv_guest_mem_read(gpa, data, len, false, false);
>+        if (ret < 0) {
>+            error_report("failed to read guest mem");
>+            return -1;
>+        }

is only executed in the else branch which is inconsistent to write_memory(). Is 
that intended? If so, do we really need the unused assignment above?

Best regards,
Bernhard

>+    }
>+
>+    return 0;
>+}
>+
>+static int write_memory(const CPUState *cpu, uint64_t initial_gva,
>+                        uint64_t initial_gpa, uint64_t gva, const uint8_t 
>*data,
>+                        size_t len)
>+{
>+    int ret;
>+    uint64_t gpa, flags;
>+
>+    if (gva == initial_gva) {
>+        gpa = initial_gpa;
>+    } else {
>+        flags = HV_TRANSLATE_GVA_VALIDATE_WRITE;
>+        ret = translate_gva(cpu, gva, &gpa, flags);
>+        if (ret < 0) {
>+            error_report("failed to translate gva to gpa");
>+            return -1;
>+        }
>+    }
>+    ret = mshv_guest_mem_write(gpa, data, len, false);
>+    if (ret != MEMTX_OK) {
>+        error_report("failed to write to mmio");
>+        return -1;
>+    }
>+
>+    return 0;
>+}
>+
>+static int handle_pio_str_write(CPUState *cpu,
>+                                hv_x64_io_port_intercept_message *info,
>+                                size_t repeat, uint16_t port,
>+                                bool direction_flag)
>+{
>+    int ret;
>+    uint64_t src;
>+    uint8_t data[4] = { 0 };
>+    size_t len = info->access_info.access_size;
>+
>+    src = linear_addr(cpu, info->rsi, R_DS);
>+
>+    for (size_t i = 0; i < repeat; i++) {
>+        ret = read_memory(cpu, 0, 0, src, data, len);
>+        if (ret < 0) {
>+            error_report("Failed to read memory");
>+            return -1;
>+        }
>+        ret = pio_write(port, data, len, false);
>+        if (ret < 0) {
>+            error_report("Failed to write to io port");
>+            return -1;
>+        }
>+        src += direction_flag ? -len : len;
>+        info->rsi += direction_flag ? -len : len;
>+    }
>+
>+    return 0;
>+}
>+
>+static int handle_pio_str_read(CPUState *cpu,
>+                               hv_x64_io_port_intercept_message *info,
>+                               size_t repeat, uint16_t port,
>+                               bool direction_flag)
>+{
>+    int ret;
>+    uint64_t dst;
>+    size_t len = info->access_info.access_size;
>+    uint8_t data[4] = { 0 };
>+
>+    dst = linear_addr(cpu, info->rdi, R_ES);
>+
>+    for (size_t i = 0; i < repeat; i++) {
>+        pio_read(port, data, len, false);
>+
>+        ret = write_memory(cpu, 0, 0, dst, data, len);
>+        if (ret < 0) {
>+            error_report("Failed to write memory");
>+            return -1;
>+        }
>+        dst += direction_flag ? -len : len;
>+        info->rdi += direction_flag ? -len : len;
>+    }
>+
>+    return 0;
>+}
>+
>+static int handle_pio_str(CPUState *cpu, hv_x64_io_port_intercept_message 
>*info)
>+{
>+    uint8_t access_type = info->header.intercept_access_type;
>+    uint16_t port = info->port_number;
>+    bool repop = info->access_info.rep_prefix == 1;
>+    size_t repeat = repop ? info->rcx : 1;
>+    size_t insn_len = info->header.instruction_length;
>+    bool direction_flag;
>+    uint32_t reg_names[3];
>+    uint64_t reg_values[3];
>+    int ret;
>+    X86CPU *x86_cpu = X86_CPU(cpu);
>+    CPUX86State *env = &x86_cpu->env;
>+
>+    ret = fetch_guest_state(cpu);
>+    if (ret < 0) {
>+        error_report("Failed to fetch guest state");
>+        return -1;
>+    }
>+
>+    direction_flag = (env->eflags & DESC_E_MASK) != 0;
>+
>+    if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
>+        ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag);
>+        if (ret < 0) {
>+            error_report("Failed to handle pio str write");
>+            return -1;
>+        }
>+        reg_names[0] = HV_X64_REGISTER_RSI;
>+        reg_values[0] = info->rsi;
>+    } else {
>+        ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag);
>+        reg_names[0] = HV_X64_REGISTER_RDI;
>+        reg_values[0] = info->rdi;
>+    }
>+
>+    reg_names[1] = HV_X64_REGISTER_RIP;
>+    reg_values[1] = info->header.rip + insn_len;
>+    reg_names[2] = HV_X64_REGISTER_RAX;
>+    reg_values[2] = info->rax;
>+
>+    ret = set_x64_registers(cpu, reg_names, reg_values);
>+    if (ret < 0) {
>+        error_report("Failed to set x64 registers");
>+        return -1;
>+    }
>+
>+    cpu->accel->dirty = false;
>+
>+    return 0;
>+}
>+
>+static int handle_pio(CPUState *cpu, const struct hyperv_message *msg)
>+{
>+    struct hv_x64_io_port_intercept_message info = { 0 };
>+    int ret;
>+
>+    ret = set_ioport_info(msg, &info);
>+    if (ret < 0) {
>+        error_report("Failed to convert message to ioport info");
>+        return -1;
>+    }
>+
>+    if (info.access_info.string_op) {
>+        return handle_pio_str(cpu, &info);
>+    }
>+
>+    return handle_pio_non_str(cpu, &info);
>+}
>+
> int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit)
> {
>-    error_report("unimplemented");
>-    abort();
>+    int ret;
>+    enum MshvVmExit exit_reason;
>+    int cpu_fd = mshv_vcpufd(cpu);
>+
>+    ret = ioctl(cpu_fd, MSHV_RUN_VP, msg);
>+    if (ret < 0) {
>+        return MshvVmExitShutdown;
>+    }
>+
>+    switch (msg->header.message_type) {
>+    case HVMSG_UNRECOVERABLE_EXCEPTION:
>+        return MshvVmExitShutdown;
>+    case HVMSG_UNMAPPED_GPA:
>+    case HVMSG_GPA_INTERCEPT:
>+        ret = handle_mmio(cpu, msg, &exit_reason);
>+        if (ret < 0) {
>+            error_report("failed to handle mmio");
>+            return -1;
>+        }
>+        return exit_reason;
>+    case HVMSG_X64_IO_PORT_INTERCEPT:
>+        ret = handle_pio(cpu, msg);
>+        if (ret < 0) {
>+            return MshvVmExitSpecial;
>+        }
>+        return MshvVmExitIgnore;
>+    default:
>+        break;
>+    }
>+
>+    *exit = MshvVmExitIgnore;
>+    return 0;
> }
> 
> void mshv_remove_vcpu(int vm_fd, int cpu_fd)

Reply via email to