Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a
conflict of
host memory type then be a potential issue.
Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.
Signed-off-by: Sheng Yang sh...@linux.intel.com
---
arch/x86/include/asm/kvm_host.h |5 -
arch/x86/kvm/mmu.c | 16 +---
arch/x86/kvm/svm.c |4 ++--
arch/x86/kvm/vmx.c | 29 ++---
virt/kvm/iommu.c| 27 ---
5 files changed, 61 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ba6906f..b972889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -395,6 +395,8 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY 0x1
+ int iommu_flags;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
@@ -523,7 +525,7 @@ struct kvm_x86_ops {
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
- int (*get_mt_mask_shift)(void);
+ u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@@ -551,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 55c6923..ea1c2aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1606,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type
*mtrr_state,
return mtrr_state-def_type;
}
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u8 mtrr;
@@ -1616,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t
gfn)
mtrr = MTRR_TYPE_WRBACK;
return mtrr;
}
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -1688,16 +1689,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64
*shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
- if (tdp_enabled) {
- if (!kvm_is_mmio_pfn(pfn)) {
- mt_mask = get_memory_type(vcpu, gfn)
- kvm_x86_ops-get_mt_mask_shift();
- mt_mask |= VMX_EPT_IGMT_BIT;
- } else
- mt_mask = MTRR_TYPE_UNCACHABLE
- kvm_x86_ops-get_mt_mask_shift();
- spte |= mt_mask;
- }
+ if (tdp_enabled)
+ spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn,
+ kvm_is_mmio_pfn(pfn));
spte |= (u64)pfn PAGE_SHIFT;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 053f3c5..2bbe1de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2617,7 +2617,7 @@ static int get_npt_level(void)
#endif
}
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
}
@@ -2678,7 +2678,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
- .get_mt_mask_shift = svm_get_mt_mask_shift,
+ .get_mt_mask = svm_get_mt_mask,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 85db8b2..db853b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3678,9 +3678,32 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
- return VMX_EPT_MT_EPTE_SHIFT;
+ int ret;
+
+ /* For VT-d and EPT combination
+* 1. MMIO: always map as UC
+* 2. EPT with VT-d:
+* a. VT-d with snooping control feature: snooping control feature of
+* VT-d engine can guarantee the cache correctness. Just set it
+* to WB to keep consistent with host. So the same as item 3.
+* b. VT-d without snooping control feature: can't