Add the main vCPU execution loop for MSHV using the MSHV_RUN_VP ioctl. A translate_gva() hypercall is implemented. The execution loop handles guest entry and VM exits. There are handlers for memory r/w, PIO and MMIO to which the exit events are dispatched.
In case of MMIO the i386 instruction decoder/emulator is invoked to perform the operation in user space. Signed-off-by: Magnus Kulke <magnusku...@linux.microsoft.com> --- target/i386/mshv/mshv-cpu.c | 554 ++++++++++++++++++++++++++++++++++-- 1 file changed, 524 insertions(+), 30 deletions(-) diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c index fdc7e5e019..27c6cd6138 100644 --- a/target/i386/mshv/mshv-cpu.c +++ b/target/i386/mshv/mshv-cpu.c @@ -21,6 +21,7 @@ #include "qemu/typedefs.h" #include "system/mshv.h" +#include "system/address-spaces.h" #include "hw/hyperv/linux-mshv.h" #include "hw/hyperv/hvhdk_mini.h" #include "hw/hyperv/hvgdk.h" @@ -145,6 +146,34 @@ static void remove_cpu_guard(int cpu_fd) } } +static int translate_gva(int cpu_fd, uint64_t gva, uint64_t *gpa, + uint64_t flags) +{ + int ret; + union hv_translate_gva_result result = { 0 }; + + *gpa = 0; + mshv_translate_gva args = { + .gva = gva, + .flags = flags, + .gpa = (__u64 *)gpa, + .result = &result, + }; + + ret = ioctl(cpu_fd, MSHV_TRANSLATE_GVA, &args); + if (ret < 0) { + error_report("failed to invoke gpa->gva translation"); + return -errno; + } + if (result.result_code != HV_TRANSLATE_GVA_SUCCESS) { + error_report("failed to translate gva (" TARGET_FMT_lx ") to gpa", gva); + return -1; + + } + + return 0; +} + int mshv_set_generic_regs(int cpu_fd, hv_register_assoc *assocs, size_t n_regs) { struct mshv_vp_registers input = { @@ -1027,10 +1056,503 @@ void mshv_arch_amend_proc_features( features->access_guest_idle_reg = 1; } +static int set_memory_info(const struct hyperv_message *msg, + struct hv_x64_memory_intercept_message *info) +{ + if (msg->header.message_type != HVMSG_GPA_INTERCEPT + && msg->header.message_type != HVMSG_UNMAPPED_GPA + && msg->header.message_type != HVMSG_UNACCEPTED_GPA) { + error_report("invalid message type"); + return -1; + } + memcpy(info, msg->payload, sizeof(*info)); + + return 0; +} + +static int emulate_instruction(CPUState *cpu, + const uint8_t *insn_bytes, size_t insn_len, + uint64_t gva, uint64_t gpa) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + struct x86_decode decode = { 0 }; + int ret; + int cpu_fd = mshv_vcpufd(cpu); + QemuMutex *guard; + x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len }; + + guard = g_hash_table_lookup(cpu_guards, GUINT_TO_POINTER(cpu_fd)); + if (!guard) { + error_report("failed to get cpu guard"); + return -1; + } + + WITH_QEMU_LOCK_GUARD(guard) { + ret = mshv_load_regs(cpu); + if (ret < 0) { + error_report("failed to load registers"); + return -1; + } + + decode_instruction_stream(env, &decode, &stream); + exec_instruction(env, &decode); + + ret = mshv_store_regs(cpu); + if (ret < 0) { + error_report("failed to store registers"); + return -1; + } + } + + return 0; +} + +static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg, + MshvVmExit *exit_reason) +{ + struct hv_x64_memory_intercept_message info = { 0 }; + size_t insn_len; + uint8_t access_type; + uint8_t *instruction_bytes; + int ret; + + ret = set_memory_info(msg, &info); + if (ret < 0) { + error_report("failed to convert message to memory info"); + return -1; + } + insn_len = info.instruction_byte_count; + access_type = info.header.intercept_access_type; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) { + error_report("invalid intercept access type: execute"); + return -1; + } + + if (insn_len > 16) { + error_report("invalid mmio instruction length: %zu", insn_len); + return -1; + } + + if (insn_len == 0) { + warn_report("mmio instruction buffer empty"); + } + + instruction_bytes = info.instruction_bytes; + + ret = emulate_instruction(cpu, instruction_bytes, insn_len, + info.guest_virtual_address, + info.guest_physical_address); + if (ret < 0) { + error_report("failed to emulate mmio"); + return -1; + } + + *exit_reason = MshvVmExitIgnore; + + return 0; +} + +static int handle_unmapped_mem(int vm_fd, CPUState *cpu, + const struct hyperv_message *msg, + MshvVmExit *exit_reason) +{ + struct hv_x64_memory_intercept_message info = { 0 }; + int ret; + + ret = set_memory_info(msg, &info); + if (ret < 0) { + error_report("failed to convert message to memory info"); + return -1; + } + + return handle_mmio(cpu, msg, exit_reason); +} + +static int set_ioport_info(const struct hyperv_message *msg, + hv_x64_io_port_intercept_message *info) +{ + if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) { + error_report("Invalid message type"); + return -1; + } + memcpy(info, msg->payload, sizeof(*info)); + + return 0; +} + +typedef struct X64Registers { + const uint32_t *names; + const uint64_t *values; + uintptr_t count; +} X64Registers; + +static int set_x64_registers(int cpu_fd, const X64Registers *regs) +{ + size_t n_regs = regs->count; + struct hv_register_assoc *assocs; + + assocs = g_new0(hv_register_assoc, n_regs); + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = regs->names[i]; + assocs[i].value.reg64 = regs->values[i]; + } + int ret; + + ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs); + g_free(assocs); + if (ret < 0) { + error_report("failed to set x64 registers"); + return -1; + } + + return 0; +} + +static inline MemTxAttrs get_mem_attrs(bool is_secure_mode) +{ + MemTxAttrs memattr = {0}; + memattr.secure = is_secure_mode; + return memattr; +} + +static void pio_read(uint64_t port, uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret = 0; + MemTxAttrs memattr = get_mem_attrs(is_secure_mode); + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size, + false); + if (ret != MEMTX_OK) { + error_report("Failed to read from port %lx: %d", port, ret); + abort(); + } +} + +static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret = 0; + MemTxAttrs memattr = get_mem_attrs(is_secure_mode); + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size, + true); + return ret; +} + +static int handle_pio_non_str(const CPUState *cpu, + hv_x64_io_port_intercept_message *info) { + size_t len = info->access_info.access_size; + uint8_t access_type = info->header.intercept_access_type; + int ret; + uint32_t val, eax; + const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8); + size_t insn_len; + uint64_t rip, rax; + uint32_t reg_names[2]; + uint64_t reg_values[2]; + struct X64Registers x64_regs = { 0 }; + uint16_t port = info->port_number; + int cpu_fd = mshv_vcpufd(cpu); + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { + union { + uint32_t u32; + uint8_t bytes[4]; + } conv; + + /* convert the first 4 bytes of rax to bytes */ + conv.u32 = (uint32_t)info->rax; + /* secure mode is set to false */ + ret = pio_write(port, conv.bytes, len, false); + if (ret < 0) { + error_report("Failed to write to io port"); + return -1; + } + } else { + uint8_t data[4] = { 0 }; + /* secure mode is set to false */ + pio_read(info->port_number, data, len, false); + + /* Preserve high bits in EAX, but clear out high bits in RAX */ + val = *(uint32_t *)data; + eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask); + info->rax = (uint64_t)eax; + } + + insn_len = info->header.instruction_length; + + /* Advance RIP and update RAX */ + rip = info->header.rip + insn_len; + rax = info->rax; + + reg_names[0] = HV_X64_REGISTER_RIP; + reg_values[0] = rip; + reg_names[1] = HV_X64_REGISTER_RAX; + reg_values[1] = rax; + + x64_regs.names = reg_names; + x64_regs.values = reg_values; + x64_regs.count = 2; + + ret = set_x64_registers(cpu_fd, &x64_regs); + if (ret < 0) { + error_report("Failed to set x64 registers"); + return -1; + } + + cpu->accel->dirty = false; + + return 0; +} + +static int fetch_guest_state(CPUState *cpu) +{ + int ret; + + ret = mshv_get_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to get standard registers"); + return -1; + } + + ret = mshv_get_special_regs(cpu); + if (ret < 0) { + error_report("Failed to get special registers"); + return -1; + } + + return 0; +} + +static int read_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa, + uint64_t gva, uint8_t *data, size_t len) +{ + int ret; + uint64_t gpa, flags; + + if (gva == initial_gva) { + gpa = initial_gpa; + } else { + flags = HV_TRANSLATE_GVA_VALIDATE_READ; + ret = translate_gva(cpu_fd, gva, &gpa, flags); + if (ret < 0) { + return -1; + } + + ret = mshv_guest_mem_read(gpa, data, len, false, false); + if (ret < 0) { + error_report("failed to read guest mem"); + return -1; + } + } + + return 0; +} + +static int write_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa, + uint64_t gva, const uint8_t *data, size_t len) +{ + int ret; + uint64_t gpa, flags; + + if (gva == initial_gva) { + gpa = initial_gpa; + } else { + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE; + ret = translate_gva(cpu_fd, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + } + ret = mshv_guest_mem_write(gpa, data, len, false); + if (ret != MEMTX_OK) { + error_report("failed to write to mmio"); + return -1; + } + + return 0; +} + +static int handle_pio_str_write(CPUState *cpu, + hv_x64_io_port_intercept_message *info, + size_t repeat, uint16_t port, + bool direction_flag) +{ + int ret; + uint64_t src; + uint8_t data[4] = { 0 }; + size_t len = info->access_info.access_size; + int cpu_fd = mshv_vcpufd(cpu); + + src = linear_addr(cpu, info->rsi, R_DS); + + for (size_t i = 0; i < repeat; i++) { + ret = read_memory(cpu_fd, 0, 0, src, data, len); + if (ret < 0) { + error_report("Failed to read memory"); + return -1; + } + ret = pio_write(port, data, len, false); + if (ret < 0) { + error_report("Failed to write to io port"); + return -1; + } + src += direction_flag ? -len : len; + info->rsi += direction_flag ? -len : len; + } + + return 0; +} + +static int handle_pio_str_read(CPUState *cpu, + hv_x64_io_port_intercept_message *info, + size_t repeat, uint16_t port, + bool direction_flag) +{ + int ret; + uint64_t dst; + size_t len = info->access_info.access_size; + uint8_t data[4] = { 0 }; + int cpu_fd = mshv_vcpufd(cpu); + + dst = linear_addr(cpu, info->rdi, R_ES); + + for (size_t i = 0; i < repeat; i++) { + pio_read(port, data, len, false); + + ret = write_memory(cpu_fd, 0, 0, dst, data, len); + if (ret < 0) { + error_report("Failed to write memory"); + return -1; + } + dst += direction_flag ? -len : len; + info->rdi += direction_flag ? -len : len; + } + + return 0; +} + +static int handle_pio_str(CPUState *cpu, + hv_x64_io_port_intercept_message *info) +{ + uint8_t access_type = info->header.intercept_access_type; + uint16_t port = info->port_number; + bool repop = info->access_info.rep_prefix == 1; + size_t repeat = repop ? info->rcx : 1; + size_t insn_len = info->header.instruction_length; + bool direction_flag; + uint32_t reg_names[3]; + uint64_t reg_values[3]; + int ret; + struct X64Registers x64_regs = { 0 }; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + int cpu_fd = mshv_vcpufd(cpu); + + ret = fetch_guest_state(cpu); + if (ret < 0) { + error_report("Failed to fetch guest state"); + return -1; + } + + direction_flag = (env->eflags & DF) != 0; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { + ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag); + if (ret < 0) { + error_report("Failed to handle pio str write"); + return -1; + } + reg_names[0] = HV_X64_REGISTER_RSI; + reg_values[0] = info->rsi; + } else { + ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag); + reg_names[0] = HV_X64_REGISTER_RDI; + reg_values[0] = info->rdi; + } + + reg_names[1] = HV_X64_REGISTER_RIP; + reg_values[1] = info->header.rip + insn_len; + reg_names[2] = HV_X64_REGISTER_RAX; + reg_values[2] = info->rax; + + x64_regs.names = reg_names; + x64_regs.values = reg_values; + x64_regs.count = 2; + + ret = set_x64_registers(cpu_fd, &x64_regs); + if (ret < 0) { + error_report("Failed to set x64 registers"); + return -1; + } + + cpu->accel->dirty = false; + + return 0; +} + +static int handle_pio(CPUState *cpu, const struct hyperv_message *msg) +{ + struct hv_x64_io_port_intercept_message info = { 0 }; + int ret; + + ret = set_ioport_info(msg, &info); + if (ret < 0) { + error_report("Failed to convert message to ioport info"); + return -1; + } + + if (info.access_info.string_op) { + return handle_pio_str(cpu, &info); + } + + return handle_pio_non_str(cpu, &info); +} + int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit) { - error_report("unimplemented"); - abort(); + int ret; + hv_message exit_msg = { 0 }; + enum MshvVmExit exit_reason; + int cpu_fd = mshv_vcpufd(cpu); + + ret = ioctl(cpu_fd, MSHV_RUN_VP, &exit_msg); + if (ret < 0) { + return MshvVmExitShutdown; + } + + switch (exit_msg.header.message_type) { + case HVMSG_UNRECOVERABLE_EXCEPTION: + *msg = exit_msg; + return MshvVmExitShutdown; + case HVMSG_UNMAPPED_GPA: + ret = handle_unmapped_mem(vm_fd, cpu, &exit_msg, &exit_reason); + if (ret < 0) { + error_report("failed to handle unmapped memory"); + return -1; + } + return exit_reason; + case HVMSG_GPA_INTERCEPT: + ret = handle_mmio(cpu, &exit_msg, &exit_reason); + if (ret < 0) { + error_report("failed to handle mmio"); + return -1; + } + return exit_reason; + case HVMSG_X64_IO_PORT_INTERCEPT: + ret = handle_pio(cpu, &exit_msg); + if (ret < 0) { + return MshvVmExitSpecial; + } + return MshvVmExitIgnore; + default: + msg = &exit_msg; + } + + *exit = MshvVmExitIgnore; + return 0; } void mshv_remove_vcpu(int vm_fd, int cpu_fd) @@ -1061,34 +1583,6 @@ int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd) return 0; } -static int translate_gva(int cpu_fd, uint64_t gva, uint64_t *gpa, - uint64_t flags) -{ - int ret; - union hv_translate_gva_result result = { 0 }; - - *gpa = 0; - mshv_translate_gva args = { - .gva = gva, - .flags = flags, - .gpa = (__u64 *)gpa, - .result = &result, - }; - - ret = ioctl(cpu_fd, MSHV_TRANSLATE_GVA, &args); - if (ret < 0) { - error_report("failed to invoke gpa->gva translation"); - return -errno; - } - if (result.result_code != HV_TRANSLATE_GVA_SUCCESS) { - error_report("failed to translate gva (" TARGET_FMT_lx ") to gpa", gva); - return -1; - - } - - return 0; -} - static int guest_mem_read_with_gva(const CPUState *cpu, uint64_t gva, uint8_t *data, uintptr_t size, bool fetch_instruction) -- 2.34.1