On 6/21/19 10:37 AM, Marc Zyngier wrote:
> Extract the direct HW accessors for later reuse.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
> 1 file changed, 139 insertions(+), 108 deletions(-)
>
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2b8734f75a09..e181359adadf 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const
> struct el2_sysreg_map *map,
> return entry;
> }
>
> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
> +{
> + /*
> + * System registers listed in the switch are not saved on every
> + * exit from the guest but are only saved on vcpu_put.
> + *
> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> + * should never be listed below, because the guest cannot modify its
> + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> + * thread when emulating cross-VCPU communication.
> + */
> + switch (reg) {
> + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
> + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
> + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break;
> + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
> + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
> + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
> + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
> + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
> + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
> + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
> + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
> + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
> + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
> + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
> + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
> + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
> + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
> + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
> + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
> + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break;
> + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
> + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
> + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
> + default: return false;
> + }
> +
> + return true;
> +}
> +
> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
> +{
> + /*
> + * System registers listed in the switch are not restored on every
> + * entry to the guest but are only restored on vcpu_load.
> + *
> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> + * should never be listed below, because the the MPIDR should only be
> + * set once, before running the VCPU, and never changed later.
> + */
> + switch (reg) {
> + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
> + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
> + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break;
> + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
> + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
> + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
> + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
> + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
> + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
> + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
> + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
> + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
> + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
> + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
> + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
> + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
> + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
> + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
> + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
> + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
> + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
> + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
> + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
> + default: return false;
> + }
> +
> + return true;
> +}
> +
> u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
> {
> - u64 val;
> + u64 val = 0x8badf00d8badf00d;
>
> if (!vcpu->arch.sysregs_loaded_on_cpu)
> - goto immediate_read;
> + goto memory_read;
>
> if (unlikely(sysreg_is_el2(reg))) {
> const struct el2_sysreg_map *el2_reg;
>
> if (!is_hyp_ctxt(vcpu))
> - goto immediate_read;
> + goto memory_read;
>
> switch (reg) {
> + case ELR_EL2:
> + return read_sysreg_el1(SYS_ELR);
> case SPSR_EL2:
> val = read_sysreg_el1(SYS_SPSR);
> return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val);
> }
>
> el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> - if (el2_reg) {
> - /*
> - * If this register does not have an EL1 counterpart,
> - * then read the stored EL2 version.
> - */
> - if (el2_reg->mapping == __INVALID_SYSREG__)
> - goto immediate_read;
> -
> - /* Get the current version of the EL1 counterpart. */
> - reg = el2_reg->mapping;
> - }
> - } else {
> - /* EL1 register can't be on the CPU if the guest is in vEL2. */
> - if (unlikely(is_hyp_ctxt(vcpu)))
> - goto immediate_read;
> + BUG_ON(!el2_reg);
> +
> + /*
> + * If this register does not have an EL1 counterpart,
> + * then read the stored EL2 version.
> + */
> + if (el2_reg->mapping == __INVALID_SYSREG__)
> + goto memory_read;
> +
> + if (!vcpu_el2_e2h_is_set(vcpu) &&
> + el2_reg->translate)
> + goto memory_read;
Nit: the condition can be written on one line.
This condition wasn't present in patch 13 which introduced EL2 register
handling, and I'm struggling to understand what it does. As I understand the
code, this condition basically translates into:
- if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read
it from memory.
- if the register is an EL2 register whose value is written unmodified to the
corresponding EL1 register, then read the corresponding EL1 register and return
that value.
Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved
in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be
reflected in the corresponding EL2 register. I think it's safe to always return
the value from memory.
I tried testing this with the following patch:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1235a88ec575..27d39bb9564d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
BUG_ON(!el2_reg);
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ goto memory_read;
+
/*
* If this register does not have an EL1 counterpart,
* then read the stored EL2 version.
@@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
if (el2_reg->mapping == __INVALID_SYSREG__)
goto memory_read;
- if (!vcpu_el2_e2h_is_set(vcpu) &&
- el2_reg->translate)
- goto memory_read;
-
/* Get the current version of the EL1 counterpart. */
reg = el2_reg->mapping;
WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe
hypervisor.
> +
> + /* Get the current version of the EL1 counterpart. */
> + reg = el2_reg->mapping;
> + WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
> + return val;
> }
>
> - /*
> - * System registers listed in the switch are not saved on every
> - * exit from the guest but are only saved on vcpu_put.
> - *
> - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> - * should never be listed below, because the guest cannot modify its
> - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> - * thread when emulating cross-VCPU communication.
> - */
> - switch (reg) {
> - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
> - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12);
> - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
> - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12);
> - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12);
> - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12);
> - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12);
> - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12);
> - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12);
> - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12);
> - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12);
> - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12);
> - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12);
> - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12);
> - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
> - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
> - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
> - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12);
> - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12);
> - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
> - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
> - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
> - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2);
> - case SP_EL2: return read_sysreg(sp_el1);
> - case ELR_EL2: return read_sysreg_el1(SYS_ELR);
> - }
> + /* EL1 register can't be on the CPU if the guest is in vEL2. */
> + if (unlikely(is_hyp_ctxt(vcpu)))
> + goto memory_read;
> +
> + if (__vcpu_read_sys_reg_from_cpu(reg, &val))
> + return val;
>
> -immediate_read:
> +memory_read:
> return __vcpu_sys_reg(vcpu, reg);
> }
>
> void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
> {
> if (!vcpu->arch.sysregs_loaded_on_cpu)
> - goto immediate_write;
> + goto memory_write;
>
> if (unlikely(sysreg_is_el2(reg))) {
> const struct el2_sysreg_map *el2_reg;
>
> if (!is_hyp_ctxt(vcpu))
> - goto immediate_write;
> + goto memory_write;
>
> - /* Store the EL2 version in the sysregs array. */
> + /*
> + * Always store a copy of the write to memory to avoid having
> + * to reverse-translate virtual EL2 system registers for a
> + * non-VHE guest hypervisor.
> + */
> __vcpu_sys_reg(vcpu, reg) = val;
>
> switch (reg) {
> + case ELR_EL2:
> + write_sysreg_el1(val, SYS_ELR);
> + return;
> case SPSR_EL2:
> val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val);
> write_sysreg_el1(val, SYS_SPSR);
> @@ -282,61 +344,30 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val,
> int reg)
> }
>
> el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> - if (el2_reg) {
> - /* Does this register have an EL1 counterpart? */
> - if (el2_reg->mapping == __INVALID_SYSREG__)
> - return;
> + WARN(!el2_reg, "reg: %d\n", reg);
>
> - if (!vcpu_el2_e2h_is_set(vcpu) &&
> - el2_reg->translate)
> - val = el2_reg->translate(val);
> + /* Does this register have an EL1 counterpart? */
> + if (el2_reg->mapping == __INVALID_SYSREG__)
> + goto memory_write;
>
> - /* Redirect this to the EL1 version of the register. */
> - reg = el2_reg->mapping;
> - }
> - } else {
> - /* EL1 register can't be on the CPU if the guest is in vEL2. */
> - if (unlikely(is_hyp_ctxt(vcpu)))
> - goto immediate_write;
> - }
> + if (!vcpu_el2_e2h_is_set(vcpu) &&
> + el2_reg->translate)
> + val = el2_reg->translate(val);
>
> - /*
> - * System registers listed in the switch are not restored on every
> - * entry to the guest but are only restored on vcpu_load.
> - *
> - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> - * should never be listed below, because the the MPIDR should only be
> - * set once, before running the VCPU, and never changed later.
> - */
> - switch (reg) {
> - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
> - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return;
> - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
> - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return;
> - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return;
> - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return;
> - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return;
> - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return;
> - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return;
> - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return;
> - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return;
> - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return;
> - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return;
> - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);
> return;
> - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
> - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
> - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
> - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return;
> - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return;
> - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
> - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
> - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
> - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return;
> - case SP_EL2: write_sysreg(val, sp_el1); return;
> - case ELR_EL2: write_sysreg_el1(val, SYS_ELR); return;
> + /* Redirect this to the EL1 version of the register. */
> + reg = el2_reg->mapping;
> + WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg));
> + return;
> }
>
> -immediate_write:
> + /* EL1 register can't be on the CPU if the guest is in vEL2. */
> + if (unlikely(is_hyp_ctxt(vcpu)))
> + goto memory_write;
> +
> + if (__vcpu_write_sys_reg_to_cpu(val, reg))
> + return;
> +
> +memory_write:
> __vcpu_sys_reg(vcpu, reg) = val;
> }
>
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm