[PATCH 2/2] KVM: Enable snooping control for supported hardware

2009-04-27 Thread Sheng Yang
Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a 
conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 arch/x86/include/asm/kvm_host.h |5 -
 arch/x86/kvm/mmu.c  |   16 +---
 arch/x86/kvm/svm.c  |4 ++--
 arch/x86/kvm/vmx.c  |   29 ++---
 virt/kvm/iommu.c|   27 ---
 5 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ba6906f..b972889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -395,6 +395,8 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY  0x1
+   int iommu_flags;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
@@ -523,7 +525,7 @@ struct kvm_x86_ops {
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
-   int (*get_mt_mask_shift)(void);
+   u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -551,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
  const void *val, int bytes);
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
  gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 55c6923..ea1c2aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1606,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type 
*mtrr_state,
return mtrr_state-def_type;
 }
 
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
u8 mtrr;
 
@@ -1616,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t 
gfn)
mtrr = MTRR_TYPE_WRBACK;
return mtrr;
 }
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
 
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -1688,16 +1689,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 
*shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
-   if (tdp_enabled) {
-   if (!kvm_is_mmio_pfn(pfn)) {
-   mt_mask = get_memory_type(vcpu, gfn) 
-   kvm_x86_ops-get_mt_mask_shift();
-   mt_mask |= VMX_EPT_IGMT_BIT;
-   } else
-   mt_mask = MTRR_TYPE_UNCACHABLE 
-   kvm_x86_ops-get_mt_mask_shift();
-   spte |= mt_mask;
-   }
+   if (tdp_enabled)
+   spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn,
+   kvm_is_mmio_pfn(pfn));
 
spte |= (u64)pfn  PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 053f3c5..2bbe1de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2617,7 +2617,7 @@ static int get_npt_level(void)
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
return 0;
 }
@@ -2678,7 +2678,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
-   .get_mt_mask_shift = svm_get_mt_mask_shift,
+   .get_mt_mask = svm_get_mt_mask,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 85db8b2..db853b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3678,9 +3678,32 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
-   return VMX_EPT_MT_EPTE_SHIFT;
+   int ret;
+
+   /* For VT-d and EPT combination
+* 1. MMIO: always map as UC
+* 2. EPT with VT-d:
+*   a. VT-d with snooping control feature: snooping control feature of
+*  VT-d engine can guarantee the cache correctness. Just set it
+*  to WB to keep consistent with host. So the same as item 3.
+*   b. VT-d without snooping control feature: can't 

[PATCH 2/2] KVM: Enable snooping control for supported hardware

2009-04-27 Thread Sheng Yang
Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a 
conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 arch/x86/include/asm/kvm_host.h |2 ++
 arch/x86/kvm/vmx.c  |   19 +--
 virt/kvm/iommu.c|   27 ---
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b6d01a4..b972889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -395,6 +395,8 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY  0x1
+   int iommu_flags;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c4f5a3..79fc401 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3682,11 +3682,26 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t 
gfn, bool is_mmio)
 {
u64 ret;
 
+   /* For VT-d and EPT combination
+* 1. MMIO: always map as UC
+* 2. EPT with VT-d:
+*   a. VT-d without snooping control feature: can't guarantee the
+*  result, try to trust guest.
+*   b. VT-d with snooping control feature: snooping control feature of
+*  VT-d engine can guarantee the cache correctness. Just set it
+*  to WB to keep consistent with host. So the same as item 3.
+* 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+*consistent with host MTRR
+*/
if (is_mmio)
ret = MTRR_TYPE_UNCACHABLE  VMX_EPT_MT_EPTE_SHIFT;
+   else if (vcpu-kvm-arch.iommu_domain 
+   !(vcpu-kvm-arch.iommu_flags  KVM_IOMMU_CACHE_COHERENCY))
+   ret = kvm_get_guest_memory_type(vcpu, gfn) 
+ VMX_EPT_MT_EPTE_SHIFT;
else
-   ret = (kvm_get_guest_memory_type(vcpu, gfn) 
-   VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IGMT_BIT;
+   ret = (MTRR_TYPE_WRBACK  VMX_EPT_MT_EPTE_SHIFT)
+   | VMX_EPT_IGMT_BIT;
 
return ret;
 }
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
pfn_t pfn;
int i, r = 0;
struct iommu_domain *domain = kvm-arch.iommu_domain;
+   int flags;
 
/* check if iommu exists and in use */
if (!domain)
return 0;
 
+   flags = IOMMU_READ | IOMMU_WRITE;
+   if (kvm-arch.iommu_flags  KVM_IOMMU_CACHE_COHERENCY)
+   flags |= IOMMU_CACHE;
+
for (i = 0; i  npages; i++) {
/* check if already mapped */
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
r = iommu_map_range(domain,
gfn_to_gpa(gfn),
pfn_to_hpa(pfn),
-   PAGE_SIZE,
-   IOMMU_READ | IOMMU_WRITE);
+   PAGE_SIZE, flags);
if (r) {
printk(KERN_ERR kvm_iommu_map_address:
   iommu failed to map pfn=%lx\n, pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
 {
struct pci_dev *pdev = NULL;
struct iommu_domain *domain = kvm-arch.iommu_domain;
-   int r;
+   int r, last_flags;
 
/* check if iommu exists and in use */
if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
return r;
}
 
+   last_flags = kvm-arch.iommu_flags;
+   if (iommu_domain_has_cap(kvm-arch.iommu_domain,
+IOMMU_CAP_CACHE_COHERENCY))
+   kvm-arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+   /* Check if need to update IOMMU page table for guest memory */
+   if ((last_flags ^ kvm-arch.iommu_flags) ==
+   KVM_IOMMU_CACHE_COHERENCY) {
+   kvm_iommu_unmap_memslots(kvm);
+   r = kvm_iommu_map_memslots(kvm);
+   if (r)
+   goto out_unmap;
+   }
+
printk(KERN_DEBUG assign device: host bdf = %x:%x:%x\n,
assigned_dev-host_busnr,