From: Jack Thomson <[email protected]>

Add an arm64 nested-virt selftest for KVM_PRE_FAULT_MEMORY. The guest
enters vEL1 and exits to userspace with a nested/shadow stage-2 MMU as
the vCPU's last-run context.

Before prefaulting, userspace enables HCR_EL2.VM and points VTTBR_EL2 at
an empty nested stage-2 root. A prefault implementation that incorrectly
treats the userspace GPA as an L2 IPA will fail the ioctl; the correct
path swaps to the canonical stage-2 and succeeds.

Restore the original nested state before resuming the guest, then touch
the prefaulted range to check that vEL1 still runs correctly.

Signed-off-by: Jack Thomson <[email protected]>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../kvm/arm64/nv_pre_fault_memory_test.c      | 200 ++++++++++++++++++
 2 files changed, 201 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm 
b/tools/testing/selftests/kvm/Makefile.kvm
index 4609d8f23e38..63d79245b47d 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -170,6 +170,7 @@ TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
 TEST_GEN_PROGS_arm64 += arm64/hello_el2
 TEST_GEN_PROGS_arm64 += arm64/host_sve
 TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/nv_pre_fault_memory_test
 TEST_GEN_PROGS_arm64 += arm64/external_aborts
 TEST_GEN_PROGS_arm64 += arm64/page_fault_test
 TEST_GEN_PROGS_arm64 += arm64/psci_test
diff --git a/tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c 
b/tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c
new file mode 100644
index 000000000000..2bbd5540599c
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * nv_pre_fault_memory_test - Test KVM_PRE_FAULT_MEMORY on a vCPU whose
+ * last-run context is nested.
+ *
+ * The guest starts at vEL2, mirrors its EL2 translation regime into the
+ * real EL1 registers, drops HCR_EL2.TGE and ERETs to vEL1, then exits to
+ * userspace from vEL1 so that the vCPU's last-run context selects a
+ * shadow stage-2 MMU. Userspace then enables an empty nested stage-2
+ * before prefaulting. Prefaulting must target the canonical stage-2,
+ * regardless of the vCPU's nested state.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+#include <linux/sizes.h>
+
+#define TEST_MEM_SLOT          10
+#define NESTED_S2_ROOT_SLOT    11
+#define TEST_MEM_SIZE          SZ_2M
+#define TEST_MEM_GPA           SZ_1G
+#define NESTED_S2_ROOT_GPA     (TEST_MEM_GPA + TEST_MEM_SIZE)
+
+struct nested_s2_state {
+       u64 hcr_el2;
+       u64 vttbr_el2;
+};
+
+static void guest_el1_code(void)
+{
+       u64 offset;
+
+       GUEST_ASSERT_EQ(get_current_el(), 1);
+
+       /* Exit to userspace with the vEL1 (nested) context live. */
+       GUEST_SYNC(1);
+
+       /*
+        * Touch the prefaulted range. vstage-2 is disabled, so the shadow
+        * stage-2 is a 1:1 view of the canonical IPA space.
+        */
+       for (offset = 0; offset < TEST_MEM_SIZE; offset += SZ_4K)
+               READ_ONCE(*(u64 *)(TEST_MEM_GPA + offset));
+
+       GUEST_DONE();
+}
+
+static void guest_code(void)
+{
+       u64 sp;
+
+       GUEST_ASSERT_EQ(get_current_el(), 2);
+
+       /*
+        * Mirror the EL2 translation regime into the real EL1 registers so
+        * that vEL1 runs on the test's stage-1 page tables. With E2H=1, the
+        * _EL1 accessors read the EL2 registers, and the _EL12 accessors
+        * write the real EL1 registers.
+        */
+       write_sysreg_s(read_sysreg(sctlr_el1), SYS_SCTLR_EL12);
+       write_sysreg_s(read_sysreg(tcr_el1), SYS_TCR_EL12);
+       write_sysreg_s(read_sysreg(ttbr0_el1), SYS_TTBR0_EL12);
+       write_sysreg_s(read_sysreg(mair_el1), SYS_MAIR_EL12);
+       write_sysreg_s(read_sysreg(cpacr_el1), SYS_CPACR_EL12);
+
+       /* Run vEL1 on the same stack. */
+       asm volatile("mov %0, sp" : "=r"(sp));
+       write_sysreg(sp, sp_el1);
+
+       /*
+        * Drop TGE so that vEL1 is a nested context rather than host EL0.
+        * KVM backs it with a shadow stage-2 MMU even though vstage-2 is
+        * disabled (HCR_EL2.VM=0).
+        */
+       write_sysreg(read_sysreg(hcr_el2) & ~HCR_EL2_TGE, hcr_el2);
+       isb();
+
+       write_sysreg(PSR_MODE_EL1h | PSR_F_BIT | PSR_I_BIT | PSR_A_BIT |
+                    PSR_D_BIT, spsr_el2);
+       write_sysreg((u64)guest_el1_code, elr_el2);
+       asm volatile("eret");
+
+       GUEST_ASSERT(false);
+}
+
+static void pre_fault(struct kvm_vcpu *vcpu, u64 gpa, u64 size)
+{
+       struct kvm_pre_fault_memory range = {
+               .gpa = gpa,
+               .size = size,
+       };
+       int ret;
+
+       do {
+               ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
+       } while (ret < 0 && errno == EINTR);
+
+       TEST_ASSERT(!ret, "KVM_PRE_FAULT_MEMORY failed, ret: %d errno: %d",
+                   ret, errno);
+       TEST_ASSERT_EQ(range.size, 0);
+}
+
+static struct nested_s2_state enable_empty_nested_s2(struct kvm_vcpu *vcpu)
+{
+       struct nested_s2_state state = {
+               .hcr_el2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2)),
+               .vttbr_el2 = vcpu_get_reg(vcpu,
+                                          KVM_ARM64_SYS_REG(SYS_VTTBR_EL2)),
+       };
+
+       TEST_ASSERT(!(state.hcr_el2 & HCR_EL2_TGE),
+                   "vCPU should be in nested/vEL1 context");
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VTTBR_EL2),
+                    NESTED_S2_ROOT_GPA);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
+                    state.hcr_el2 | HCR_EL2_VM);
+
+       return state;
+}
+
+static void restore_nested_s2(struct kvm_vcpu *vcpu,
+                             struct nested_s2_state *state)
+{
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), state->hcr_el2);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VTTBR_EL2),
+                    state->vttbr_el2);
+}
+
+int main(void)
+{
+       struct nested_s2_state s2;
+       struct kvm_vcpu_init init;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       u64 npages;
+
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY));
+
+       vm = vm_create(1);
+
+       kvm_get_default_vcpu_target(vm, &init);
+       init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+       vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+       kvm_arch_vm_finalize_vcpus(vm);
+
+       npages = TEST_MEM_SIZE / vm->page_size;
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, TEST_MEM_GPA,
+                                   TEST_MEM_SLOT, npages, 0);
+       virt_map(vm, TEST_MEM_GPA, TEST_MEM_GPA, npages);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   NESTED_S2_ROOT_GPA, NESTED_S2_ROOT_SLOT,
+                                   1, 0);
+
+       /* Run the guest until it has ERET'd from vEL2 to vEL1. */
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               TEST_ASSERT_EQ(uc.args[1], 1);
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       default:
+               TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+       }
+
+       /*
+        * The vCPU's last-run context is vEL1, backed by a shadow stage-2
+        * MMU. Enable nested stage-2 with an empty root so that the ioctl
+        * fails if it tries to interpret the userspace GPA as an L2 IPA.
+        * Prefault in two halves so that the second ioctl exercises a
+        * repeated shadow-MMU attach and canonical stage-2 swap.
+        */
+       s2 = enable_empty_nested_s2(vcpu);
+       pre_fault(vcpu, TEST_MEM_GPA, TEST_MEM_SIZE / 2);
+       pre_fault(vcpu, TEST_MEM_GPA + TEST_MEM_SIZE / 2, TEST_MEM_SIZE / 2);
+       restore_nested_s2(vcpu, &s2);
+
+       /* Resume at vEL1 and touch the prefaulted range. */
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       default:
+               TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
-- 
2.43.0


Reply via email to