On Wed, 25 Mar 2026 00:36:18 +0000,
Wei-Lin Chang <[email protected]> wrote:
>
> The API is designed for userspace to first call prepare_{l2_stack,
> hyp_state, eret_destination, nested_sync_handler}, with a function
> supplied to prepare_eret_destination() to be run in L2. Then run_l2()
> can be called in L1 to run the given function in L2.
>
> Signed-off-by: Wei-Lin Chang <[email protected]>
> ---
> tools/testing/selftests/kvm/Makefile.kvm | 2 +
> .../selftests/kvm/include/arm64/nested.h | 18 ++++++
> .../testing/selftests/kvm/lib/arm64/nested.c | 61 +++++++++++++++++++
> .../selftests/kvm/lib/arm64/nested_asm.S | 35 +++++++++++
> 4 files changed, 116 insertions(+)
> create mode 100644 tools/testing/selftests/kvm/include/arm64/nested.h
> create mode 100644 tools/testing/selftests/kvm/lib/arm64/nested.c
> create mode 100644 tools/testing/selftests/kvm/lib/arm64/nested_asm.S
>
> diff --git a/tools/testing/selftests/kvm/Makefile.kvm
> b/tools/testing/selftests/kvm/Makefile.kvm
> index 98da9fa4b8b7..5e681e8e0cd7 100644
> --- a/tools/testing/selftests/kvm/Makefile.kvm
> +++ b/tools/testing/selftests/kvm/Makefile.kvm
> @@ -34,6 +34,8 @@ LIBKVM_arm64 += lib/arm64/gic.c
> LIBKVM_arm64 += lib/arm64/gic_v3.c
> LIBKVM_arm64 += lib/arm64/gic_v3_its.c
> LIBKVM_arm64 += lib/arm64/handlers.S
> +LIBKVM_arm64 += lib/arm64/nested.c
> +LIBKVM_arm64 += lib/arm64/nested_asm.S
> LIBKVM_arm64 += lib/arm64/processor.c
> LIBKVM_arm64 += lib/arm64/spinlock.c
> LIBKVM_arm64 += lib/arm64/ucall.c
> diff --git a/tools/testing/selftests/kvm/include/arm64/nested.h
> b/tools/testing/selftests/kvm/include/arm64/nested.h
> new file mode 100644
> index 000000000000..739ff2ee0161
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/include/arm64/nested.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * ARM64 Nested virtualization defines
> + */
> +
> +#ifndef SELFTEST_KVM_NESTED_H
> +#define SELFTEST_KVM_NESTED_H
> +
> +void prepare_l2_stack(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> +void prepare_hyp_state(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> +void prepare_eret_destination(struct kvm_vm *vm, struct kvm_vcpu *vcpu, void
> *l2_pc);
> +void prepare_nested_sync_handler(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> +
> +void run_l2(void);
> +void after_hvc(void);
> +void do_hvc(void);
> +
> +#endif /* SELFTEST_KVM_NESTED_H */
> diff --git a/tools/testing/selftests/kvm/lib/arm64/nested.c
> b/tools/testing/selftests/kvm/lib/arm64/nested.c
> new file mode 100644
> index 000000000000..111d02f44cfe
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/arm64/nested.c
> @@ -0,0 +1,61 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * ARM64 Nested virtualization helpers
> + */
> +
> +#include "kvm_util.h"
> +#include "nested.h"
> +#include "processor.h"
> +#include "test_util.h"
> +
> +#include <asm/sysreg.h>
> +
> +static void hvc_handler(struct ex_regs *regs)
> +{
> + GUEST_ASSERT_EQ(get_current_el(), 2);
> + GUEST_PRINTF("hvc handler\n");
> + regs->pstate = PSR_MODE_EL2h | PSR_D_BIT | PSR_A_BIT | PSR_I_BIT |
> PSR_F_BIT;
> + regs->pc = (u64)after_hvc;
> +}
> +
> +void prepare_l2_stack(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> + size_t l2_stack_size;
> + uint64_t l2_stack_paddr;
> +
> + l2_stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS *
> vm->page_size :
> + vm->page_size;
Please use symbolic constants. Also, this looks wrong if the default
stack size is 32k and the page size is 16k. You probably want to
express a stack size directly, rather than a number of pages.
> + l2_stack_paddr = __vm_phy_pages_alloc(vm, l2_stack_size / vm->page_size,
> + 0, 0, false);
> + vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), l2_stack_paddr +
> l2_stack_size);
> +}
> +
> +void prepare_hyp_state(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), HCR_EL2_RW);
Surely the E2H value matters. Or are you planning to only run this on
configuration that hardcode E2H==0? That'd be pretty limiting.
> +}
> +
> +void prepare_eret_destination(struct kvm_vm *vm, struct kvm_vcpu *vcpu, void
> *l2_pc)
> +{
> + vm_paddr_t do_hvc_paddr = addr_gva2gpa(vm, (vm_vaddr_t)do_hvc);
> + vm_paddr_t l2_pc_paddr = addr_gva2gpa(vm, (vm_vaddr_t)l2_pc);
> +
> + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SPSR_EL2), PSR_MODE_EL1h |
> + PSR_D_BIT |
> + PSR_A_BIT |
> + PSR_I_BIT |
> + PSR_F_BIT);
> + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ELR_EL2), l2_pc_paddr);
> + /* HACK: use TPIDR_EL2 to pass address, see run_l2() in nested_asm.S */
> + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL2), do_hvc_paddr);
> +}
> +
> +void prepare_nested_sync_handler(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> + if (!vm->handlers) {
> + vm_init_descriptor_tables(vm);
> + vcpu_init_descriptor_tables(vcpu);
> + }
> + vm_install_sync_handler(vm, VECTOR_SYNC_LOWER_64,
> + ESR_ELx_EC_HVC64, hvc_handler);
> +}
> diff --git a/tools/testing/selftests/kvm/lib/arm64/nested_asm.S
> b/tools/testing/selftests/kvm/lib/arm64/nested_asm.S
> new file mode 100644
> index 000000000000..4ecf2d510a6f
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/arm64/nested_asm.S
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * ARM64 Nested virtualization assembly helpers
> + */
> +
> +.globl run_l2
> +.globl after_hvc
> +.globl do_hvc
> +run_l2:
> + /*
> + * At this point TPIDR_EL2 will contain the gpa of do_hvc from
> + * prepare_eret_destination(). gpa of do_hvc have to be passed in
> + * because we want L2 to issue an hvc after it returns from the user
> + * passed function. In order for that to happen the lr must be
> + * controlled, which at this point holds the value of the address of
> + * the next instruction after this run_l2() call, which is not useful
> + * for L2. Additionally, L1 can't translate gva into gpa, so we can't
> + * calculate it here.
> + *
> + * So first save lr, then move TPIDR_EL2 to lr so when the user supplied
> + * L2 function returns, L2 jumps to do_hvc and let the L1 hvc handler
> + * take control. This implies we expect the L2 code to preserve lr and
> + * calls a regular ret in the end, which is true for normal C functions.
> + * The hvc handler will jump back to after_hvc when finished, and lr
> + * will be restored and we can return run_l2().
> + */
> + stp x29, lr, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov lr, x0
> + eret
> +after_hvc:
> + ldp x29, lr, [sp], #16
> + ret
> +do_hvc:
> + hvc #0
This probably works for a single instruction L2 guest, but not having
any save/restore of the L2 context makes it hard to build anything on
top of this.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.