From: David Woodhouse <d...@amazon.co.uk> This finally comes with a mechanism for actually injecting events into the guest vCPU, with all the atomic-test-and-set that's involved in setting the bit in the shinfo, then the index in the vcpu_info, and injecting either the lapic vector as MSI, or letting KVM inject the bare vector.
Signed-off-by: David Woodhouse <d...@amazon.co.uk> --- hw/i386/kvm/xen_evtchn.c | 198 ++++++++++++++++++++++++++++++++++++++ hw/i386/kvm/xen_evtchn.h | 2 + include/sysemu/kvm_xen.h | 18 ++++ target/i386/kvm/xen-emu.c | 72 ++++++++++++++ 4 files changed, 290 insertions(+) create mode 100644 include/sysemu/kvm_xen.h diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index d4008e7ee1..50adef0864 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -21,10 +21,13 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" + #include "xen_evtchn.h" #include "xen_overlay.h" #include "sysemu/kvm.h" +#include "sysemu/kvm_xen.h" + #include <linux/kvm.h> #include "standard-headers/xen/memory.h" @@ -39,6 +42,41 @@ typedef struct XenEvtchnPort { uint16_t type_val; /* pirq# / virq# / remote port according to type */ } XenEvtchnPort; +/* 32-bit compatibility definitions, also used natively in 32-bit build */ +struct compat_arch_vcpu_info { + unsigned int cr2; + unsigned int pad[5]; +}; + +struct compat_vcpu_info { + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + uint16_t pad; + uint32_t evtchn_pending_sel; + struct compat_arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ + +struct compat_arch_shared_info { + unsigned int max_pfn; + unsigned int pfn_to_mfn_frame_list_list; + unsigned int nmi_reason; + unsigned int p2m_cr3; + unsigned int p2m_vaddr; + unsigned int p2m_generation; + uint32_t wc_sec_hi; +}; + +struct compat_shared_info { + struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; + uint32_t evtchn_pending[32]; + uint32_t evtchn_mask[32]; + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; + uint32_t wc_nsec; + struct compat_arch_shared_info arch; +}; + #define COMPAT_EVTCHN_2L_NR_CHANNELS 1024 /* @@ -222,6 +260,144 @@ int xen_evtchn_status_op(struct evtchn_status *status) return 0; } +/* + * Never thought I'd hear myself say this, but C++ templates would be + * kind of nice here. + * + * template<class T> static int do_unmask_port(T *shinfo, ...); + */ +static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port, + bool do_unmask, struct shared_info *shinfo, + struct vcpu_info *vcpu_info) +{ + const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]); + typeof(shinfo->evtchn_pending[0]) mask; + int idx = port / bits_per_word; + int offset = port % bits_per_word; + + mask = 1UL << offset; + + if (idx >= bits_per_word) { + return -EINVAL; + } + + if (do_unmask) { + /* If this is a true unmask operation, clear the mask bit. If + * it was already unmasked, we have nothing further to do. */ + if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) { + return 0; + } + } else { + /* This is a pseudo-unmask for affinity changes. We don't + * change the mask bit, and if it's *masked* we have nothing + * else to do. */ + if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) { + return 0; + } + } + + /* If the event was not pending, we're done. */ + if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) { + return 0; + } + + /* Now on to the vcpu_info evtchn_pending_sel index... */ + mask = 1UL << idx; + + /* If a port in this word was already pending for this vCPU, all done. */ + if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) { + return 0; + } + + /* Set evtchn_upcall_pending for this vCPU */ + if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) { + return 0; + } + + kvm_xen_inject_vcpu_callback_vector(s->port_table[port].vcpu); + + return 0; +} + +static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port, + bool do_unmask, + struct compat_shared_info *shinfo, + struct compat_vcpu_info *vcpu_info) +{ + const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]); + typeof(shinfo->evtchn_pending[0]) mask; + int idx = port / bits_per_word; + int offset = port % bits_per_word; + + mask = 1UL << offset; + + if (idx >= bits_per_word) { + return -EINVAL; + } + + if (do_unmask) { + /* If this is a true unmask operation, clear the mask bit. If + * it was already unmasked, we have nothing further to do. */ + if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) { + return 0; + } + } else { + /* This is a pseudo-unmask for affinity changes. We don't + * change the mask bit, and if it's *masked* we have nothing + * else to do. */ + if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) { + return 0; + } + } + + /* If the event was not pending, we're done. */ + if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) { + return 0; + } + + /* Now on to the vcpu_info evtchn_pending_sel index... */ + mask = 1UL << idx; + + /* If a port in this word was already pending for this vCPU, all done. */ + if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) { + return 0; + } + + /* Set evtchn_upcall_pending for this vCPU */ + if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) { + return 0; + } + + kvm_xen_inject_vcpu_callback_vector(s->port_table[port].vcpu); + + return 0; +} + +static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask) +{ + void *vcpu_info, *shinfo; + + if (s->port_table[port].type == EVTCHNSTAT_closed) { + return -EINVAL; + } + + shinfo = xen_overlay_page_ptr(XENMAPSPACE_shared_info, 0); + if (!shinfo) { + return -ENOTSUP; + } + + vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu); + if (!vcpu_info) { + return -EINVAL; + } + + if (xen_is_long_mode()) { + return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info); + } else { + return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info); + } +} + static void free_port(XenEvtchnState *s, evtchn_port_t port) { s->port_table[port].type = EVTCHNSTAT_closed; @@ -272,3 +448,25 @@ int xen_evtchn_close_op(struct evtchn_close *close) return ret; } + +int xen_evtchn_unmask_op(struct evtchn_unmask *unmask) +{ + XenEvtchnState *s = xen_evtchn_singleton; + int ret; + + if (!s) { + return -ENOTSUP; + } + + if (!valid_port(unmask->port)) { + return -EINVAL; + } + + qemu_mutex_lock(&s->port_lock); + + ret = unmask_port(s, unmask->port, true); + + qemu_mutex_unlock(&s->port_lock); + + return ret; +} diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h index 4c00000315..2fb7d70043 100644 --- a/hw/i386/kvm/xen_evtchn.h +++ b/hw/i386/kvm/xen_evtchn.h @@ -15,5 +15,7 @@ int xen_evtchn_set_callback_param(uint64_t param); struct evtchn_status; struct evtchn_close; +struct evtchn_unmask; int xen_evtchn_status_op(struct evtchn_status *status); int xen_evtchn_close_op(struct evtchn_close *close); +int xen_evtchn_unmask_op(struct evtchn_unmask *unmask); diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h new file mode 100644 index 0000000000..ab629feb13 --- /dev/null +++ b/include/sysemu/kvm_xen.h @@ -0,0 +1,18 @@ +/* + * Xen HVM emulation support in KVM + * + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_SYSEMU_KVM_XEN_H +#define QEMU_SYSEMU_KVM_XEN_H + +void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id); +void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id); + +#endif /* QEMU_SYSEMU_KVM_XEN_H */ diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index f57d99f9d6..51cb6bf052 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -15,10 +15,13 @@ #include "qemu/log.h" #include "hw/xen/xen.h" #include "sysemu/kvm_int.h" +#include "sysemu/kvm_xen.h" #include "kvm/kvm_i386.h" #include "exec/address-spaces.h" #include "xen-emu.h" #include "trace.h" +#include "hw/pci/msi.h" +#include "hw/i386/apic-msidef.h" #include "hw/i386/kvm/xen_overlay.h" #include "hw/i386/kvm/xen_evtchn.h" #include "sysemu/runstate.h" @@ -227,6 +230,63 @@ static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data) env->xen_vcpu_info_gpa); } + +static void *gpa_to_hva(uint64_t gpa) +{ + MemoryRegionSection mrs; + + mrs = memory_region_find(get_system_memory(), gpa, 1); + return !mrs.mr ? NULL : qemu_map_ram_ptr(mrs.mr->ram_block, + mrs.offset_within_region); +} + +void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id) +{ + CPUState *cs = qemu_get_cpu(vcpu_id); + CPUX86State *env; + uint64_t gpa; + + if (!cs) { + return NULL; + } + env = &X86_CPU(cs)->env; + + gpa = env->xen_vcpu_info_gpa; + if (gpa == UINT64_MAX) + gpa = env->xen_vcpu_info_default_gpa; + if (gpa == UINT64_MAX) + return NULL; + + return gpa_to_hva(gpa); +} + +void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id) +{ + CPUState *cs = qemu_get_cpu(vcpu_id); + uint8_t vector; + + if (!cs) { + return; + } + vector = X86_CPU(cs)->env.xen_vcpu_callback_vector; + + if (vector) { + /* The per-vCPU callback vector injected via lapic. Just + * deliver it as an MSI. */ + MSIMessage msg = { + .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id, + .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT), + }; + kvm_irqchip_send_msi(kvm_state, msg); + return; + } + + /* If the evtchn_upcall_pending field in the vcpu_info is set, then + * KVM will automatically deliver the vector on entering the vCPU + * so all we have to do is kick it out. */ + qemu_cpu_kick(cs); +} + static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data) { X86CPU *cpu = X86_CPU(cs); @@ -652,6 +712,18 @@ static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu, err = xen_evtchn_close_op(&close); break; } + case EVTCHNOP_unmask: { + struct evtchn_unmask unmask; + + qemu_build_assert(sizeof(unmask) == 4); + if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_unmask_op(&unmask); + break; + } default: return false; } -- 2.35.3