From: Christoffer Dall <cd...@cs.columbia.edu>

Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.

Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.

Signed-off-by: Christoffer Dall <c.d...@virtualopensystems.com>
---
 arch/arm/include/asm/pgtable-3level.h |    8 ++
 arch/arm/include/asm/pgtable.h        |    4 +
 arch/arm/kvm/mmu.c                    |  107 ++++++++++++++++++++++++++++++++-
 arch/arm/mm/mmu.c                     |    3 +
 4 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-3level.h 
b/arch/arm/include/asm/pgtable-3level.h
index edc3cb9..6dc5331 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,6 +104,14 @@
  */
 #define L_PGD_SWAPPER          (_AT(pgdval_t, 1) << 55)        /* 
swapper_pg_dir entry */
 
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ            (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE           (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB         (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB                (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] 
*/
+
 #ifndef __ASSEMBLY__
 
 #define pud_none(pud)          (!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 20025cc..778856b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -76,6 +76,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
 
 extern pgprot_t                pgprot_user;
 extern pgprot_t                pgprot_kernel;
+extern pgprot_t                pgprot_guest;
 
 #define _MOD_PROT(p, b)        __pgprot(pgprot_val(p) | (b))
 
@@ -89,6 +90,9 @@ extern pgprot_t               pgprot_kernel;
 #define PAGE_KERNEL            _MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC       pgprot_kernel
 #define PAGE_HYP               _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST         _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+                                         L_PTE2_WRITE | L_PTE2_NORM_WB | \
+                                         L_PTE2_INNER_WB)
 
 #define __PAGE_NONE            __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | 
L_PTE_XN)
 #define __PAGE_SHARED          __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index f7a7b17..d468238 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -229,8 +229,111 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
        kvm->arch.pgd = NULL;
 }
 
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+                         gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+       pfn_t pfn;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte, new_pte;
+
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+       if (is_error_pfn(pfn)) {
+               kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
+                               "corresponding host mapping",
+                               gfn, gfn << PAGE_SHIFT);
+               return -EFAULT;
+       }
+
+       /* Create 2nd stage page table mapping - Level 1 */
+       pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
+       pud = pud_offset(pgd, fault_ipa);
+       if (pud_none(*pud)) {
+               pmd = pmd_alloc_one(NULL, fault_ipa);
+               if (!pmd) {
+                       kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
+                       return -ENOMEM;
+               }
+               pud_populate(NULL, pud, pmd);
+               pmd += pmd_index(fault_ipa);
+       } else
+               pmd = pmd_offset(pud, fault_ipa);
+
+       /* Create 2nd stage page table mapping - Level 2 */
+       if (pmd_none(*pmd)) {
+               pte = pte_alloc_one_kernel(NULL, fault_ipa);
+               if (!pte) {
+                       kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
+                       return -ENOMEM;
+               }
+               pmd_populate_kernel(NULL, pmd, pte);
+               pte += pte_index(fault_ipa);
+       } else
+               pte = pte_offset_kernel(pmd, fault_ipa);
+
+       /* Create 2nd stage page table mapping - Level 3 */
+       new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+       set_pte_ext(pte, new_pte, 0);
+
+       return 0;
+}
+
+#define HSR_ABT_FS     (0x3f)
+#define HPFAR_MASK     (~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu:      the VCPU pointer
+ * @run:       the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or 
it
+ * can mean that the guest tried to access I/O memory, which is emulated by 
user
+ * space. The distinction is based on the IPA causing the fault and whether 
this
+ * memory region has been registered as standard RAM by user space.
+ */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-       KVMARM_NOT_IMPLEMENTED();
-       return -EINVAL;
+       unsigned long hsr_ec;
+       unsigned long fault_status;
+       phys_addr_t fault_ipa;
+       struct kvm_memory_slot *memslot = NULL;
+       bool is_iabt;
+       gfn_t gfn;
+
+       hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+       is_iabt = (hsr_ec == HSR_EC_IABT);
+
+       /* Check that the second stage fault is a translation fault */
+       fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+       if ((fault_status & 0x3c) != 0x4) {
+               kvm_err(-EFAULT, "Unsupported fault status: %x",
+                               fault_status & 0x3c);
+               return -EFAULT;
+       }
+
+       fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+       gfn = fault_ipa >> PAGE_SHIFT;
+       if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+               if (is_iabt) {
+                       kvm_err(-EFAULT, "Inst. abort on I/O address");
+                       return -EFAULT;
+               }
+
+               kvm_msg("I/O address abort...");
+               KVMARM_NOT_IMPLEMENTED();
+               return -EINVAL;
+       }
+
+       memslot = gfn_to_memslot(vcpu->kvm, gfn);
+       if (!memslot->user_alloc) {
+               kvm_err(-EINVAL, "non user-alloc memslots not supported");
+               return -EINVAL;
+       }
+
+       return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b836d6b..1aa6e2c 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -53,9 +53,11 @@ static unsigned int cachepolicy __initdata = 
CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
 pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
 
 EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
 
 struct cachepolicy {
        const char      policy[16];
@@ -503,6 +505,7 @@ static void __init build_mem_type_table(void)
        pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
        pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
                                 L_PTE_DIRTY | kern_pgprot);
+       pgprot_guest  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
 
        mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
        mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to