From: Christoffer Dall <christoffer.d...@linaro.org>

Based on the pseudo-code in the ARM ARM, implement a stage 2 software
page table walker.

Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
Signed-off-by: Jintack Lim <jintack....@linaro.org>
[maz: heavily reworked for future ARMv8.4-TTL support]
Signed-off-by: Marc Zyngier <m...@kernel.org>
---
 arch/arm64/include/asm/esr.h        |   1 +
 arch/arm64/include/asm/kvm_arm.h    |   2 +
 arch/arm64/include/asm/kvm_nested.h |  13 ++
 arch/arm64/kvm/nested.c             | 276 ++++++++++++++++++++++++++++
 4 files changed, 292 insertions(+)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index b15921dc889a..4373f4f3d58f 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -125,6 +125,7 @@
 #define ESR_ELx_CM             (UL(1) << ESR_ELx_CM_SHIFT)
 
 /* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_FSC_ADDRSZ     (0x00)
 #define ESR_ELx_CV             (UL(1) << 24)
 #define ESR_ELx_COND_SHIFT     (20)
 #define ESR_ELx_COND_MASK      (UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cdee623ce92f..95dfef820cf1 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -261,6 +261,8 @@
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
+#define SCTLR_EE       (UL(1) << 25)
+
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)  (1 << x)
 
diff --git a/arch/arm64/include/asm/kvm_nested.h 
b/arch/arm64/include/asm/kvm_nested.h
index 88595447b598..3881e51d5a2d 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -17,6 +17,19 @@ extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm *kvm, u64 
vttbr, u64 hcr);
 extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
 extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
 
+struct kvm_s2_trans {
+       phys_addr_t output;
+       unsigned long block_size;
+       bool writable;
+       bool readable;
+       int level;
+       u32 esr;
+       u64 upper_attr;
+};
+
+extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+                             struct kvm_s2_trans *result);
+
 int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe);
 extern bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg,
                            u64 control_bit);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 65dbc1a796a1..573bcfcfe53f 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -80,6 +80,282 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
        return ret;
 }
 
+struct s2_walk_info {
+       int          (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
+       void         *data;
+       u64          baddr;
+       unsigned int max_pa_bits;
+       unsigned int pgshift;
+       unsigned int pgsize;
+       unsigned int ps;
+       unsigned int sl;
+       unsigned int t0sz;
+       bool         be;
+       bool         el1_aarch32;
+};
+
+static unsigned int ps_to_output_size(unsigned int ps)
+{
+       switch (ps) {
+       case 0: return 32;
+       case 1: return 36;
+       case 2: return 40;
+       case 3: return 42;
+       case 4: return 44;
+       case 5:
+       default:
+               return 48;
+       }
+}
+
+static u32 compute_fsc(int level, u32 fsc)
+{
+       return fsc | (level & 0x3);
+}
+
+static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+{
+       u32 esr;
+
+       esr = kvm_vcpu_get_hsr(vcpu) & ~ESR_ELx_FSC;
+       esr |= compute_fsc(level, fsc);
+       return esr;
+}
+
+static int check_base_s2_limits(struct s2_walk_info *wi,
+                               int level, int input_size, int stride)
+{
+       int start_size;
+
+       /* Check translation limits */
+       switch (wi->pgsize) {
+       case SZ_64K:
+               if (level == 0 || (level == 1 && wi->max_pa_bits <= 42))
+                       return -EFAULT;
+               break;
+       case SZ_16K:
+               if (level == 0 || (level == 1 && wi->max_pa_bits <= 40))
+                       return -EFAULT;
+               break;
+       case SZ_4K:
+               if (level < 0 || (level == 0 && wi->max_pa_bits <= 42))
+                       return -EFAULT;
+               break;
+       }
+
+       /* Check input size limits */
+       if (input_size > wi->max_pa_bits &&
+           (!wi->el1_aarch32 || input_size > 40))
+               return -EFAULT;
+
+       /* Check number of entries in starting level table */
+       start_size = input_size - ((3 - level) * stride + wi->pgshift);
+       if (start_size < 1 || start_size > stride + 4)
+               return -EFAULT;
+
+       return 0;
+}
+
+/* Check if output is within boundaries */
+static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
+{
+       unsigned int output_size = ps_to_output_size(wi->ps);
+
+       if (output_size > wi->max_pa_bits)
+               output_size = wi->max_pa_bits;
+
+       if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
+               return -1;
+
+       return 0;
+}
+
+/*
+ * This is essentially a C-version of the pseudo code from the ARM ARM
+ * AArch64.TranslationTableWalk  function.  I strongly recommend looking at
+ * that pseudocode in trying to understand this.
+ *
+ * Must be called with the kvm->srcu read lock held
+ */
+static int walk_nested_s2_pgd(phys_addr_t ipa,
+                             struct s2_walk_info *wi, struct kvm_s2_trans *out)
+{
+       int first_block_level, level, stride, input_size, base_lower_bound;
+       phys_addr_t base_addr;
+       unsigned int addr_top, addr_bottom;
+       u64 desc;  /* page table entry */
+       int ret;
+       phys_addr_t paddr;
+
+       switch (wi->pgsize) {
+       case SZ_64K:
+       case SZ_16K:
+               level = 3 - wi->sl;
+               first_block_level = 2;
+               break;
+       case SZ_4K:
+               level = 2 - wi->sl;
+               first_block_level = 1;
+               break;
+       default:
+               /* GCC is braindead */
+               unreachable();
+       }
+
+       stride = wi->pgshift - 3;
+       input_size = 64 - wi->t0sz;
+       if (input_size > 48 || input_size < 25)
+               return -EFAULT;
+
+       ret = check_base_s2_limits(wi, level, input_size, stride);
+       if (WARN_ON(ret))
+               return ret;
+
+       base_lower_bound = 3 + input_size - ((3 - level) * stride +
+                          wi->pgshift);
+       base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound);
+
+       if (check_output_size(wi, base_addr)) {
+               out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+               return 1;
+       }
+
+       addr_top = input_size - 1;
+
+       while (1) {
+               phys_addr_t index;
+
+               addr_bottom = (3 - level) * stride + wi->pgshift;
+               index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
+                       >> (addr_bottom - 3);
+
+               paddr = base_addr | index;
+               ret = wi->read_desc(paddr, &desc, wi->data);
+               if (ret < 0)
+                       return ret;
+
+               /*
+                * Handle reversedescriptors if endianness differs between the
+                * host and the guest hypervisor.
+                */
+               if (wi->be)
+                       desc = be64_to_cpu(desc);
+               else
+                       desc = le64_to_cpu(desc);
+
+               /* Check for valid descriptor at this point */
+               if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+                       out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+                       out->upper_attr = desc;
+                       return 1;
+               }
+
+               /* We're at the final level or block translation level */
+               if ((desc & 3) == 1 || level == 3)
+                       break;
+
+               if (check_output_size(wi, desc)) {
+                       out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+                       out->upper_attr = desc;
+                       return 1;
+               }
+
+               base_addr = desc & GENMASK_ULL(47, wi->pgshift);
+
+               level += 1;
+               addr_top = addr_bottom - 1;
+       }
+
+       if (level < first_block_level) {
+               out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+               out->upper_attr = desc;
+               return 1;
+       }
+
+       /*
+        * We don't use the contiguous bit in the stage-2 ptes, so skip check
+        * for misprogramming of the contiguous bit.
+        */
+
+       if (check_output_size(wi, desc)) {
+               out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+               out->upper_attr = desc;
+               return 1;
+       }
+
+       if (!(desc & BIT(10))) {
+               out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
+               out->upper_attr = desc;
+               return 1;
+       }
+
+       /* Calculate and return the result */
+       paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
+               (ipa & GENMASK_ULL(addr_bottom - 1, 0));
+       out->output = paddr;
+       out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
+       out->readable = desc & (0b01 << 6);
+       out->writable = desc & (0b10 << 6);
+       out->level = level;
+       out->upper_attr = desc & GENMASK_ULL(63, 52);
+       return 0;
+}
+
+static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
+{
+       struct kvm_vcpu *vcpu = data;
+
+       return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
+}
+
+static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
+{
+       wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+
+       switch (vtcr & VTCR_EL2_TG0_MASK) {
+       case VTCR_EL2_TG0_4K:
+               wi->pgshift = 12;        break;
+       case VTCR_EL2_TG0_16K:
+               wi->pgshift = 14;        break;
+       case VTCR_EL2_TG0_64K:
+       default:
+               wi->pgshift = 16;        break;
+       }
+
+       wi->pgsize = 1UL << wi->pgshift;
+       wi->ps = (vtcr & VTCR_EL2_PS_MASK) >> VTCR_EL2_PS_SHIFT;
+       wi->sl = (vtcr & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT;
+       wi->max_pa_bits = VTCR_EL2_IPA(vtcr);
+}
+
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+                      struct kvm_s2_trans *result)
+{
+       u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+       struct s2_walk_info wi;
+       int ret;
+
+       result->esr = 0;
+
+       if (!nested_virt_in_use(vcpu))
+               return 0;
+
+       wi.read_desc = read_guest_s2_desc;
+       wi.data = vcpu;
+       wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+
+       vtcr_to_walk_info(vtcr, &wi);
+
+       wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_EE;
+       wi.el1_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+       ret = walk_nested_s2_pgd(gipa, &wi, result);
+       if (ret)
+               result->esr |= (kvm_vcpu_get_hsr(vcpu) & ~ESR_ELx_FSC);
+
+       return ret;
+}
+
 /* Must be called with kvm->lock held */
 struct kvm_s2_mmu *lookup_s2_mmu(struct kvm *kvm, u64 vttbr, u64 hcr)
 {
-- 
2.20.1

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to