Intercepted MMIO is a fake device for intercepting device MMIO region. It would prevent kvm from setting up page table for the specific pages in device's MMIO region, and emulate them using HIGHMEM mapped kernel pages.
For MSI-X, we need to intercept the guest accessing to a MMIO page which stored MSI-X table, and what we write to real device's MMIO page is another story. Signed-off-by: Sheng Yang <[email protected]> --- arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/mmu.c | 11 +++ arch/x86/kvm/paging_tmpl.h | 11 +++ include/linux/kvm.h | 3 + include/linux/kvm_host.h | 14 +++ virt/kvm/intercepted_mmio.c | 201 +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/intercepted_mmio.h | 23 +++++ virt/kvm/kvm_main.c | 7 ++ 8 files changed, 271 insertions(+), 1 deletions(-) create mode 100644 virt/kvm/intercepted_mmio.c create mode 100644 virt/kvm/intercepted_mmio.h diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c023435..4ce3137 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,7 +3,7 @@ # common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) + coalesced_mmio.o irq_comm.o intercepted_mmio.o) ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9871d9d..39bb483 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2096,6 +2096,17 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, kvm_release_pfn_clean(pfn); return 1; } + + /* + * We mapped MMIO in assigned device for performance, but we + * have to intercept some MMIO access + */ + if (kvm_is_mmio_pfn(pfn) && + kvm_intercept_mmio_pfn(vcpu->kvm, pfn)) { + kvm_release_pfn_clean(pfn); + return 1; + } + spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d206401..7788beb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -440,6 +440,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, return 1; } + /* + * We mapped MMIO in assigned device for performance, but we + * have to intercept some MMIO access + */ + if (kvm_is_mmio_pfn(pfn) && + kvm_intercept_mmio_pfn(vcpu->kvm, pfn)) { + pgprintk("gfn %lx is intercepted mmio\n", walker.gfn); + kvm_release_pfn_clean(pfn); + return 1; + } + spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b091a86..c45b08d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -395,6 +395,9 @@ struct kvm_trace_rec { #endif #define KVM_CAP_SET_GUEST_DEBUG 23 #define KVM_CAP_GSI_MSG 24 +#if defined(CONFIG_X86) +#define KVM_CAP_INTERCEPTED_MMIO 25 +#endif /* * ioctls for VM fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 541ccaf..e4d6b99 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -137,6 +137,7 @@ struct kvm { struct mutex gsi_msg_lock; #define KVM_NR_GSI_MSG 256 DECLARE_BITMAP(gsi_msg_bitmap, KVM_NR_GSI_MSG); + struct kvm_intercepted_mmio_dev *intercepted_mmio_dev; }; /* The guest did something we don't support. */ @@ -304,6 +305,13 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +struct kvm_intercepted_mmio { + pfn_t pfn; + struct page *page; + struct kvm_assigned_dev_kernel *dev; + struct hlist_node link; +}; + struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; struct work_struct interrupt_work; @@ -323,6 +331,7 @@ struct kvm_assigned_dev_kernel { #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) unsigned long irq_requested_type; int irq_source_id; + struct kvm_intercepted_mmio msix_mmio; struct pci_dev *dev; struct kvm *kvm; }; @@ -346,6 +355,11 @@ struct kvm_gsi_msg *kvm_find_gsi_msg(struct kvm *kvm, u32 gsi); void kvm_free_gsi_msg(struct kvm *kvm, struct kvm_gsi_msg *gsi_msg); void kvm_free_gsi_msg_list(struct kvm *kvm); +int kvm_intercept_mmio_pfn(struct kvm *kvm, pfn_t pfn); +int kvm_register_intercept_mmio(struct kvm *kvm, + struct kvm_intercepted_mmio *mmio); +void kvm_unregister_intercept_mmio(struct kvm_intercepted_mmio *mmio); + #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); diff --git a/virt/kvm/intercepted_mmio.c b/virt/kvm/intercepted_mmio.c new file mode 100644 index 0000000..693cdd7 --- /dev/null +++ b/virt/kvm/intercepted_mmio.c @@ -0,0 +1,201 @@ +/* + * KVM intercepted MMIO + * + * Copyright (c) 2008 Intel Corporation + * + * A fake device help to emulate one piece of MMIO region + * + * Author: Sheng Yang <[email protected]> + * + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/highmem.h> + +#include "iodev.h" +#include "intercepted_mmio.h" + +static struct kvm_intercepted_mmio *find_intercepted_mmio( + struct kvm_intercepted_mmio_dev *dev, pfn_t pfn) +{ + struct hlist_node *n; + struct kvm_intercepted_mmio *mmio; + + hlist_for_each_entry(mmio, n, &dev->mmio_list, link) + if (mmio->pfn == pfn) + return mmio; + return NULL; +} + +static int intercepted_mmio_in_range(struct kvm_io_device *this, + gpa_t addr, int len, int is_write) +{ + struct kvm_intercepted_mmio_dev *dev = + (struct kvm_intercepted_mmio_dev*)this->private; + pfn_t pfn; + int r = 0; + + pfn = gfn_to_pfn(dev->kvm, addr >> PAGE_SHIFT); + + if (is_error_pfn(pfn)) { + r = 0; + goto out; + } + + if (!kvm_is_mmio_pfn(pfn)) { + r = 0; + goto out; + } + + /* Notice we can't handle the accessing across the page */ + if (addr >> PAGE_SHIFT != (addr + len - 2) >> PAGE_SHIFT) { + printk(KERN_WARNING + "kvm: intercepted MMIO across the page! " + "gpa 0x%lx, length %d\n", (unsigned long)addr, len); + r = 0; + goto out; + } + + if (find_intercepted_mmio(dev, pfn)) + r = 1; +out: + kvm_release_pfn_clean(pfn); + return r; +} + +int kvm_intercept_mmio_pfn(struct kvm *kvm, pfn_t pfn) +{ + struct kvm_intercepted_mmio_dev *dev = kvm->intercepted_mmio_dev; + int r = 0; + + mutex_lock(&kvm->lock); + if (find_intercepted_mmio(dev, pfn)) + r = 1; + mutex_unlock(&kvm->lock); + + return r; +} + +static void intercepted_mmio_destructor(struct kvm_io_device *this) +{ + struct kvm_intercepted_mmio_dev *dev = + (struct kvm_intercepted_mmio_dev*)this->private; + struct kvm_intercepted_mmio *mmio; + struct hlist_node *n; + + hlist_for_each_entry(mmio, n, &dev->mmio_list, link) + __free_page(mmio->page); + kfree(this); +} + +static void intercepted_mmio_read(struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + struct kvm_intercepted_mmio_dev *dev = + (struct kvm_intercepted_mmio_dev*)this->private; + struct kvm_intercepted_mmio *mmio; + pfn_t pfn; + int offset = addr & ~PAGE_MASK; + void *va; + + pfn = gfn_to_pfn(dev->kvm, addr >> PAGE_SHIFT); + + /* We should already ensure that pfn is legal */ + if (is_error_pfn(pfn)) { + BUG(); + goto out; + } + + if (!kvm_is_mmio_pfn(pfn)) { + BUG(); + goto out; + } + + mmio = find_intercepted_mmio(dev, pfn); + + BUG_ON(!mmio); + BUG_ON(!mmio->page); + + va = kmap(mmio->page); + memcpy(val, (void *)((char *)va + offset), len); + kunmap(mmio->page); + +out: + kvm_release_pfn_clean(pfn); +} + +static void intercepted_mmio_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct kvm_intercepted_mmio_dev *dev = + (struct kvm_intercepted_mmio_dev*)this->private; + struct kvm_intercepted_mmio *mmio; + pfn_t pfn; + int offset = addr & ~PAGE_MASK; + void *va; + + pfn = gfn_to_pfn(dev->kvm, addr >> PAGE_SHIFT); + + /* We should already ensure that pfn is legal */ + if (is_error_pfn(pfn)) { + BUG(); + goto out; + } + + if (!kvm_is_mmio_pfn(pfn)) { + BUG(); + goto out; + } + + mmio = find_intercepted_mmio(dev, pfn); + + BUG_ON(!mmio); + BUG_ON(!mmio->page); + + va = kmap(mmio->page); + memcpy((void *)((char *)va + offset), val, len); + kunmap(mmio->page); + +out: + kvm_release_pfn_clean(pfn); +} + +int kvm_intercepted_mmio_init(struct kvm *kvm) +{ + struct kvm_intercepted_mmio_dev *dev; + + dev = kzalloc(sizeof(struct kvm_intercepted_mmio_dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + dev->dev.write = intercepted_mmio_write; + dev->dev.in_range = intercepted_mmio_in_range; + dev->dev.read = intercepted_mmio_read; + dev->dev.destructor = intercepted_mmio_destructor; + dev->dev.private = dev; + dev->kvm = kvm; + kvm->intercepted_mmio_dev = dev; + kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); + INIT_HLIST_HEAD(&dev->mmio_list); + + return 0; +} + +/* Register intercepted MMIO, called with kvm->lock hold */ +int kvm_register_intercept_mmio(struct kvm *kvm, + struct kvm_intercepted_mmio *mmio) +{ + mmio->page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!mmio->page) + return -ENOMEM; + + hlist_add_head(&mmio->link, &kvm->intercepted_mmio_dev->mmio_list); + return 0; +} + +/* Register intercepted MMIO, called with kvm->lock hold */ +void kvm_unregister_intercept_mmio(struct kvm_intercepted_mmio *mmio) +{ + __free_page(mmio->page); + hlist_del(&mmio->link); +} diff --git a/virt/kvm/intercepted_mmio.h b/virt/kvm/intercepted_mmio.h new file mode 100644 index 0000000..18fe10f --- /dev/null +++ b/virt/kvm/intercepted_mmio.h @@ -0,0 +1,23 @@ +/* + * KVM intercepted MMIO + * + * Copyright (c) 2008 Intel Corporation + * + * A fake device help to emulate one piece of MMIO region + * + * Author: Sheng Yang <[email protected]> + * + */ + +#include "iodev.h" + +#include <linux/kvm_host.h> +#include <linux/kvm.h> + +struct kvm_intercepted_mmio_dev { + struct kvm_io_device dev; + struct kvm *kvm; + struct hlist_head mmio_list; +}; + +int kvm_intercepted_mmio_init(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3955e4d..a5a9763 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -57,6 +57,10 @@ #include "irq.h" #endif +#ifdef KVM_CAP_INTERCEPTED_MMIO +#include "intercepted_mmio.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -784,6 +788,9 @@ static struct kvm *kvm_create_vm(void) #endif INIT_HLIST_HEAD(&kvm->gsi_msg_list); mutex_init(&kvm->gsi_msg_lock); +#ifdef KVM_CAP_INTERCEPTED_MMIO + kvm_intercepted_mmio_init(kvm); +#endif out: return kvm; } -- 1.5.4.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
