Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a 
conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.

Signed-off-by: Sheng Yang <[email protected]>
---
 arch/x86/include/asm/kvm_host.h |    5 ++++-
 arch/x86/kvm/mmu.c              |   16 +++++-----------
 arch/x86/kvm/svm.c              |    4 ++--
 arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++++++---
 virt/kvm/iommu.c                |   27 ++++++++++++++++++++++++---
 5 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ba6906f..b972889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -395,6 +395,8 @@ struct kvm_arch{
        struct list_head active_mmu_pages;
        struct list_head assigned_dev_head;
        struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY      0x1
+       int iommu_flags;
        struct kvm_pic *vpic;
        struct kvm_ioapic *vioapic;
        struct kvm_pit *vpit;
@@ -523,7 +525,7 @@ struct kvm_x86_ops {
        int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
        int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
        int (*get_tdp_level)(void);
-       int (*get_mt_mask_shift)(void);
+       u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -551,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                          const void *val, int bytes);
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
                  gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 55c6923..ea1c2aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1606,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type 
*mtrr_state,
        return mtrr_state->def_type;
 }
 
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
        u8 mtrr;
 
@@ -1616,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t 
gfn)
                mtrr = MTRR_TYPE_WRBACK;
        return mtrr;
 }
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
 
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -1688,16 +1689,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 
*shadow_pte,
                spte |= shadow_user_mask;
        if (largepage)
                spte |= PT_PAGE_SIZE_MASK;
-       if (tdp_enabled) {
-               if (!kvm_is_mmio_pfn(pfn)) {
-                       mt_mask = get_memory_type(vcpu, gfn) <<
-                               kvm_x86_ops->get_mt_mask_shift();
-                       mt_mask |= VMX_EPT_IGMT_BIT;
-               } else
-                       mt_mask = MTRR_TYPE_UNCACHABLE <<
-                               kvm_x86_ops->get_mt_mask_shift();
-               spte |= mt_mask;
-       }
+       if (tdp_enabled)
+               spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+                       kvm_is_mmio_pfn(pfn));
 
        spte |= (u64)pfn << PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 053f3c5..2bbe1de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2617,7 +2617,7 @@ static int get_npt_level(void)
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
        return 0;
 }
@@ -2678,7 +2678,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
        .set_tss_addr = svm_set_tss_addr,
        .get_tdp_level = get_npt_level,
-       .get_mt_mask_shift = svm_get_mt_mask_shift,
+       .get_mt_mask = svm_get_mt_mask,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 85db8b2..db853b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3678,9 +3678,32 @@ static int get_ept_level(void)
        return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
-       return VMX_EPT_MT_EPTE_SHIFT;
+       int ret;
+
+       /* For VT-d and EPT combination
+        * 1. MMIO: always map as UC
+        * 2. EPT with VT-d:
+        *   a. VT-d with snooping control feature: snooping control feature of
+        *      VT-d engine can guarantee the cache correctness. Just set it
+        *      to WB to keep consistent with host. So the same as item 3.
+        *   b. VT-d without snooping control feature: can't guarantee the
+        *      result, try to trust guest.
+        * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+        *    consistent with host MTRR
+        */
+       if (is_mmio)
+               ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+       else if (vcpu->kvm->arch.iommu_domain &&
+               !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+               ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+                     VMX_EPT_MT_EPTE_SHIFT;
+       else
+               ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
+                       | VMX_EPT_IGMT_BIT;
+
+       return ret;
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -3737,7 +3760,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .interrupt_allowed = vmx_interrupt_allowed,
        .set_tss_addr = vmx_set_tss_addr,
        .get_tdp_level = get_ept_level,
-       .get_mt_mask_shift = vmx_get_mt_mask_shift,
+       .get_mt_mask = vmx_get_mt_mask,
 };
 
 static int __init vmx_init(void)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
        pfn_t pfn;
        int i, r = 0;
        struct iommu_domain *domain = kvm->arch.iommu_domain;
+       int flags;
 
        /* check if iommu exists and in use */
        if (!domain)
                return 0;
 
+       flags = IOMMU_READ | IOMMU_WRITE;
+       if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+               flags |= IOMMU_CACHE;
+
        for (i = 0; i < npages; i++) {
                /* check if already mapped */
                if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
                r = iommu_map_range(domain,
                                    gfn_to_gpa(gfn),
                                    pfn_to_hpa(pfn),
-                                   PAGE_SIZE,
-                                   IOMMU_READ | IOMMU_WRITE);
+                                   PAGE_SIZE, flags);
                if (r) {
                        printk(KERN_ERR "kvm_iommu_map_address:"
                               "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
 {
        struct pci_dev *pdev = NULL;
        struct iommu_domain *domain = kvm->arch.iommu_domain;
-       int r;
+       int r, last_flags;
 
        /* check if iommu exists and in use */
        if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
                return r;
        }
 
+       last_flags = kvm->arch.iommu_flags;
+       if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+                                IOMMU_CAP_CACHE_COHERENCY))
+               kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+       /* Check if need to update IOMMU page table for guest memory */
+       if ((last_flags ^ kvm->arch.iommu_flags) ==
+                       KVM_IOMMU_CACHE_COHERENCY) {
+               kvm_iommu_unmap_memslots(kvm);
+               r = kvm_iommu_map_memslots(kvm);
+               if (r)
+                       goto out_unmap;
+       }
+
        printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
                assigned_dev->host_busnr,
                PCI_SLOT(assigned_dev->host_devfn),
                PCI_FUNC(assigned_dev->host_devfn));
 
        return 0;
+out_unmap:
+       kvm_iommu_unmap_memslots(kvm);
+       return r;
 }
 
 int kvm_deassign_device(struct kvm *kvm,
-- 
1.5.4.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to