Am 9. Oktober 2025 07:50:18 UTC schrieb Paolo Bonzini <[email protected]>: >From: Magnus Kulke <[email protected]> > >Add the main vCPU execution loop for MSHV using the MSHV_RUN_VP ioctl. > >The execution loop handles guest entry and VM exits. There are handlers for >memory r/w, PIO and MMIO to which the exit events are dispatched. > >In case of MMIO the i386 instruction decoder/emulator is invoked to >perform the operation in user space. > >Signed-off-by: Magnus Kulke <[email protected]> >Link: >https://lore.kernel.org/r/[email protected] >Signed-off-by: Paolo Bonzini <[email protected]> >--- > target/i386/mshv/mshv-cpu.c | 444 +++++++++++++++++++++++++++++++++++- > 1 file changed, 442 insertions(+), 2 deletions(-) > >diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c >index 33a3ce8b110..7edc032cea3 100644 >--- a/target/i386/mshv/mshv-cpu.c >+++ b/target/i386/mshv/mshv-cpu.c >@@ -1082,10 +1082,450 @@ void mshv_arch_amend_proc_features( > features->access_guest_idle_reg = 1; > } > >+static int set_memory_info(const struct hyperv_message *msg, >+ struct hv_x64_memory_intercept_message *info) >+{ >+ if (msg->header.message_type != HVMSG_GPA_INTERCEPT >+ && msg->header.message_type != HVMSG_UNMAPPED_GPA >+ && msg->header.message_type != HVMSG_UNACCEPTED_GPA) { >+ error_report("invalid message type"); >+ return -1; >+ } >+ memcpy(info, msg->payload, sizeof(*info)); >+ >+ return 0; >+} >+ >+static int emulate_instruction(CPUState *cpu, >+ const uint8_t *insn_bytes, size_t insn_len, >+ uint64_t gva, uint64_t gpa) >+{ >+ X86CPU *x86_cpu = X86_CPU(cpu); >+ CPUX86State *env = &x86_cpu->env; >+ struct x86_decode decode = { 0 }; >+ int ret; >+ x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len }; >+ >+ ret = mshv_load_regs(cpu); >+ if (ret < 0) { >+ error_report("failed to load registers"); >+ return -1; >+ } >+ >+ decode_instruction_stream(env, &decode, &stream); >+ exec_instruction(env, &decode); >+ >+ ret = mshv_store_regs(cpu); >+ if (ret < 0) { >+ error_report("failed to store registers"); >+ return -1; >+ } >+ >+ return 0; >+} >+ >+static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg, >+ MshvVmExit *exit_reason) >+{ >+ struct hv_x64_memory_intercept_message info = { 0 }; >+ size_t insn_len; >+ uint8_t access_type; >+ uint8_t *instruction_bytes; >+ int ret; >+ >+ ret = set_memory_info(msg, &info); >+ if (ret < 0) { >+ error_report("failed to convert message to memory info"); >+ return -1; >+ } >+ insn_len = info.instruction_byte_count; >+ access_type = info.header.intercept_access_type; >+ >+ if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) { >+ error_report("invalid intercept access type: execute"); >+ return -1; >+ } >+ >+ if (insn_len > 16) { >+ error_report("invalid mmio instruction length: %zu", insn_len); >+ return -1; >+ } >+ >+ trace_mshv_handle_mmio(info.guest_virtual_address, >+ info.guest_physical_address, >+ info.instruction_byte_count, access_type); >+ >+ instruction_bytes = info.instruction_bytes; >+ >+ ret = emulate_instruction(cpu, instruction_bytes, insn_len, >+ info.guest_virtual_address, >+ info.guest_physical_address); >+ if (ret < 0) { >+ error_report("failed to emulate mmio"); >+ return -1; >+ } >+ >+ *exit_reason = MshvVmExitIgnore; >+ >+ return 0; >+} >+ >+static int set_ioport_info(const struct hyperv_message *msg, >+ hv_x64_io_port_intercept_message *info) >+{ >+ if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) { >+ error_report("Invalid message type"); >+ return -1; >+ } >+ memcpy(info, msg->payload, sizeof(*info)); >+ >+ return 0; >+} >+ >+static int set_x64_registers(const CPUState *cpu, const uint32_t *names, >+ const uint64_t *values) >+{ >+ >+ hv_register_assoc assocs[2]; >+ int ret; >+ >+ for (size_t i = 0; i < ARRAY_SIZE(assocs); i++) { >+ assocs[i].name = names[i]; >+ assocs[i].value.reg64 = values[i]; >+ } >+ >+ ret = mshv_set_generic_regs(cpu, assocs, ARRAY_SIZE(assocs)); >+ if (ret < 0) { >+ error_report("failed to set x64 registers"); >+ return -1; >+ } >+ >+ return 0; >+} >+ >+static inline MemTxAttrs get_mem_attrs(bool is_secure_mode) >+{ >+ MemTxAttrs memattr = {0}; >+ memattr.secure = is_secure_mode; >+ return memattr; >+} >+ >+static void pio_read(uint64_t port, uint8_t *data, uintptr_t size, >+ bool is_secure_mode) >+{ >+ int ret = 0; >+ MemTxAttrs memattr = get_mem_attrs(is_secure_mode); >+ ret = address_space_rw(&address_space_io, port, memattr, (void *)data, >size, >+ false); >+ if (ret != MEMTX_OK) { >+ error_report("Failed to read from port %lx: %d", port, ret); >+ abort(); >+ } >+} >+ >+static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size, >+ bool is_secure_mode) >+{ >+ int ret = 0; >+ MemTxAttrs memattr = get_mem_attrs(is_secure_mode); >+ ret = address_space_rw(&address_space_io, port, memattr, (void *)data, >size, >+ true); >+ return ret; >+} >+ >+static int handle_pio_non_str(const CPUState *cpu, >+ hv_x64_io_port_intercept_message *info) >+{ >+ size_t len = info->access_info.access_size; >+ uint8_t access_type = info->header.intercept_access_type; >+ int ret; >+ uint32_t val, eax; >+ const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8); >+ size_t insn_len; >+ uint64_t rip, rax; >+ uint32_t reg_names[2]; >+ uint64_t reg_values[2]; >+ uint16_t port = info->port_number; >+ >+ if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { >+ union { >+ uint32_t u32; >+ uint8_t bytes[4]; >+ } conv; >+ >+ /* convert the first 4 bytes of rax to bytes */ >+ conv.u32 = (uint32_t)info->rax; >+ /* secure mode is set to false */ >+ ret = pio_write(port, conv.bytes, len, false); >+ if (ret < 0) { >+ error_report("Failed to write to io port"); >+ return -1; >+ } >+ } else { >+ uint8_t data[4] = { 0 }; >+ /* secure mode is set to false */ >+ pio_read(info->port_number, data, len, false); >+ >+ /* Preserve high bits in EAX, but clear out high bits in RAX */ >+ val = *(uint32_t *)data; >+ eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask); >+ info->rax = (uint64_t)eax; >+ } >+ >+ insn_len = info->header.instruction_length; >+ >+ /* Advance RIP and update RAX */ >+ rip = info->header.rip + insn_len; >+ rax = info->rax; >+ >+ reg_names[0] = HV_X64_REGISTER_RIP; >+ reg_values[0] = rip; >+ reg_names[1] = HV_X64_REGISTER_RAX; >+ reg_values[1] = rax; >+ >+ ret = set_x64_registers(cpu, reg_names, reg_values); >+ if (ret < 0) { >+ error_report("Failed to set x64 registers"); >+ return -1; >+ } >+ >+ cpu->accel->dirty = false; >+ >+ return 0; >+} >+ >+static int fetch_guest_state(CPUState *cpu) >+{ >+ int ret; >+ >+ ret = mshv_get_standard_regs(cpu); >+ if (ret < 0) { >+ error_report("Failed to get standard registers"); >+ return -1; >+ } >+ >+ ret = mshv_get_special_regs(cpu); >+ if (ret < 0) { >+ error_report("Failed to get special registers"); >+ return -1; >+ } >+ >+ return 0; >+} >+ >+static int read_memory(const CPUState *cpu, uint64_t initial_gva, >+ uint64_t initial_gpa, uint64_t gva, uint8_t *data, >+ size_t len) >+{ >+ int ret; >+ uint64_t gpa, flags; >+ >+ if (gva == initial_gva) { >+ gpa = initial_gpa; This assignment is never read and this branch leaves `data` untouched... >+ } else { >+ flags = HV_TRANSLATE_GVA_VALIDATE_READ; >+ ret = translate_gva(cpu, gva, &gpa, flags); >+ if (ret < 0) { >+ return -1; >+ } >+ while this block: >+ ret = mshv_guest_mem_read(gpa, data, len, false, false); >+ if (ret < 0) { >+ error_report("failed to read guest mem"); >+ return -1; >+ } is only executed in the else branch which is inconsistent to write_memory(). Is that intended? If so, do we really need the unused assignment above? Best regards, Bernhard >+ } >+ >+ return 0; >+} >+ >+static int write_memory(const CPUState *cpu, uint64_t initial_gva, >+ uint64_t initial_gpa, uint64_t gva, const uint8_t >*data, >+ size_t len) >+{ >+ int ret; >+ uint64_t gpa, flags; >+ >+ if (gva == initial_gva) { >+ gpa = initial_gpa; >+ } else { >+ flags = HV_TRANSLATE_GVA_VALIDATE_WRITE; >+ ret = translate_gva(cpu, gva, &gpa, flags); >+ if (ret < 0) { >+ error_report("failed to translate gva to gpa"); >+ return -1; >+ } >+ } >+ ret = mshv_guest_mem_write(gpa, data, len, false); >+ if (ret != MEMTX_OK) { >+ error_report("failed to write to mmio"); >+ return -1; >+ } >+ >+ return 0; >+} >+ >+static int handle_pio_str_write(CPUState *cpu, >+ hv_x64_io_port_intercept_message *info, >+ size_t repeat, uint16_t port, >+ bool direction_flag) >+{ >+ int ret; >+ uint64_t src; >+ uint8_t data[4] = { 0 }; >+ size_t len = info->access_info.access_size; >+ >+ src = linear_addr(cpu, info->rsi, R_DS); >+ >+ for (size_t i = 0; i < repeat; i++) { >+ ret = read_memory(cpu, 0, 0, src, data, len); >+ if (ret < 0) { >+ error_report("Failed to read memory"); >+ return -1; >+ } >+ ret = pio_write(port, data, len, false); >+ if (ret < 0) { >+ error_report("Failed to write to io port"); >+ return -1; >+ } >+ src += direction_flag ? -len : len; >+ info->rsi += direction_flag ? -len : len; >+ } >+ >+ return 0; >+} >+ >+static int handle_pio_str_read(CPUState *cpu, >+ hv_x64_io_port_intercept_message *info, >+ size_t repeat, uint16_t port, >+ bool direction_flag) >+{ >+ int ret; >+ uint64_t dst; >+ size_t len = info->access_info.access_size; >+ uint8_t data[4] = { 0 }; >+ >+ dst = linear_addr(cpu, info->rdi, R_ES); >+ >+ for (size_t i = 0; i < repeat; i++) { >+ pio_read(port, data, len, false); >+ >+ ret = write_memory(cpu, 0, 0, dst, data, len); >+ if (ret < 0) { >+ error_report("Failed to write memory"); >+ return -1; >+ } >+ dst += direction_flag ? -len : len; >+ info->rdi += direction_flag ? -len : len; >+ } >+ >+ return 0; >+} >+ >+static int handle_pio_str(CPUState *cpu, hv_x64_io_port_intercept_message >*info) >+{ >+ uint8_t access_type = info->header.intercept_access_type; >+ uint16_t port = info->port_number; >+ bool repop = info->access_info.rep_prefix == 1; >+ size_t repeat = repop ? info->rcx : 1; >+ size_t insn_len = info->header.instruction_length; >+ bool direction_flag; >+ uint32_t reg_names[3]; >+ uint64_t reg_values[3]; >+ int ret; >+ X86CPU *x86_cpu = X86_CPU(cpu); >+ CPUX86State *env = &x86_cpu->env; >+ >+ ret = fetch_guest_state(cpu); >+ if (ret < 0) { >+ error_report("Failed to fetch guest state"); >+ return -1; >+ } >+ >+ direction_flag = (env->eflags & DESC_E_MASK) != 0; >+ >+ if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { >+ ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag); >+ if (ret < 0) { >+ error_report("Failed to handle pio str write"); >+ return -1; >+ } >+ reg_names[0] = HV_X64_REGISTER_RSI; >+ reg_values[0] = info->rsi; >+ } else { >+ ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag); >+ reg_names[0] = HV_X64_REGISTER_RDI; >+ reg_values[0] = info->rdi; >+ } >+ >+ reg_names[1] = HV_X64_REGISTER_RIP; >+ reg_values[1] = info->header.rip + insn_len; >+ reg_names[2] = HV_X64_REGISTER_RAX; >+ reg_values[2] = info->rax; >+ >+ ret = set_x64_registers(cpu, reg_names, reg_values); >+ if (ret < 0) { >+ error_report("Failed to set x64 registers"); >+ return -1; >+ } >+ >+ cpu->accel->dirty = false; >+ >+ return 0; >+} >+ >+static int handle_pio(CPUState *cpu, const struct hyperv_message *msg) >+{ >+ struct hv_x64_io_port_intercept_message info = { 0 }; >+ int ret; >+ >+ ret = set_ioport_info(msg, &info); >+ if (ret < 0) { >+ error_report("Failed to convert message to ioport info"); >+ return -1; >+ } >+ >+ if (info.access_info.string_op) { >+ return handle_pio_str(cpu, &info); >+ } >+ >+ return handle_pio_non_str(cpu, &info); >+} >+ > int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit) > { >- error_report("unimplemented"); >- abort(); >+ int ret; >+ enum MshvVmExit exit_reason; >+ int cpu_fd = mshv_vcpufd(cpu); >+ >+ ret = ioctl(cpu_fd, MSHV_RUN_VP, msg); >+ if (ret < 0) { >+ return MshvVmExitShutdown; >+ } >+ >+ switch (msg->header.message_type) { >+ case HVMSG_UNRECOVERABLE_EXCEPTION: >+ return MshvVmExitShutdown; >+ case HVMSG_UNMAPPED_GPA: >+ case HVMSG_GPA_INTERCEPT: >+ ret = handle_mmio(cpu, msg, &exit_reason); >+ if (ret < 0) { >+ error_report("failed to handle mmio"); >+ return -1; >+ } >+ return exit_reason; >+ case HVMSG_X64_IO_PORT_INTERCEPT: >+ ret = handle_pio(cpu, msg); >+ if (ret < 0) { >+ return MshvVmExitSpecial; >+ } >+ return MshvVmExitIgnore; >+ default: >+ break; >+ } >+ >+ *exit = MshvVmExitIgnore; >+ return 0; > } > > void mshv_remove_vcpu(int vm_fd, int cpu_fd)
