On 6/21/19 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim <[email protected]>
>
> When entering a nested VM, we set up the hypervisor control interface
> based on what the guest hypervisor has set. Especially, we investigate
> each list register written by the guest hypervisor whether HW bit is
> set. If so, we translate hw irq number from the guest's point of view
> to the real hardware irq number if there is a mapping.
>
> Signed-off-by: Jintack Lim <[email protected]>
> [Rewritten to support GICv3 instead of GICv2]
> Signed-off-by: Marc Zyngier <[email protected]>
> [Redesigned execution flow around vcpu load/put]
> Signed-off-by: Christoffer Dall <[email protected]>
> ---
> arch/arm/include/asm/kvm_emulate.h | 1 +
> arch/arm/include/asm/kvm_host.h | 6 +-
> arch/arm64/include/asm/kvm_host.h | 5 +-
> arch/arm64/kvm/Makefile | 1 +
> arch/arm64/kvm/nested.c | 10 ++
> arch/arm64/kvm/sys_regs.c | 178 ++++++++++++++++++++++++++++-
> include/kvm/arm_vgic.h | 18 +++
> virt/kvm/arm/arm.c | 7 +-
> virt/kvm/arm/vgic/vgic-v3-nested.c | 177 ++++++++++++++++++++++++++++
> virt/kvm/arm/vgic/vgic-v3.c | 28 +++++
> virt/kvm/arm/vgic/vgic.c | 32 ++++++
> 11 files changed, 456 insertions(+), 7 deletions(-)
> create mode 100644 virt/kvm/arm/vgic/vgic-v3-nested.c
>
> diff --git a/arch/arm/include/asm/kvm_emulate.h
> b/arch/arm/include/asm/kvm_emulate.h
> index 865ce545b465..a53f19041e16 100644
> --- a/arch/arm/include/asm/kvm_emulate.h
> +++ b/arch/arm/include/asm/kvm_emulate.h
> @@ -334,5 +334,6 @@ static inline unsigned long
> vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
> static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {}
>
> static inline bool is_hyp_ctxt(struct kvm_vcpu *vcpu) { return false; }
> +static inline int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) { BUG(); }
>
> #endif /* __ARM_KVM_EMULATE_H__ */
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index cc761610e41e..d6923ed55796 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -35,10 +35,12 @@
> #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
> #endif
>
> +/* KVM_REQ_GUEST_HYP_IRQ_PENDING is actually unused */
> #define KVM_REQ_SLEEP \
> KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
> -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
> +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
> +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
> +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(3)
>
> DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>
> diff --git a/arch/arm64/include/asm/kvm_host.h
> b/arch/arm64/include/asm/kvm_host.h
> index e0fe9acb46bf..e2e44cc650bf 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -53,8 +53,9 @@
>
> #define KVM_REQ_SLEEP \
> KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
> -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
> +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
> +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
> +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(3)
>
> DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index f11bd8b0d837..045a8f18f465 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -38,3 +38,4 @@ kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
>
> kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
> kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
> +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3-nested.o
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index 214d59019935..df2db9ab7cfb 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -539,3 +539,13 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
> kvm->arch.nested_mmus_size = 0;
> kvm_free_stage2_pgd(&kvm->arch.mmu);
> }
> +
> +bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
> +{
> + bool imo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO;
> + bool fmo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FMO;
> +
> + WARN(imo != fmo, "Separate virtual IRQ/FIQ settings not supported\n");
> +
> + return nested_virt_in_use(vcpu) && imo && fmo && !is_hyp_ctxt(vcpu);
> +}
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2031a59fcf49..ba3bcd29c02d 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -26,6 +26,8 @@
> #include <linux/printk.h>
> #include <linux/uaccess.h>
>
> +#include <linux/irqchip/arm-gic-v3.h>
> +
> #include <asm/cacheflush.h>
> #include <asm/cputype.h>
> #include <asm/debug-monitors.h>
> @@ -505,6 +507,18 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
> return true;
> }
>
> +/*
> + * The architecture says that non-secure write accesses to this register from
> + * EL1 are trapped to EL2, if either:
> + * - HCR_EL2.FMO==1, or
> + * - HCR_EL2.IMO==1
> + */
> +static bool sgi_traps_to_vel2(struct kvm_vcpu *vcpu)
> +{
> + return !vcpu_mode_el2(vcpu) &&
> + !!(__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO));
> +}
> +
> /*
> * Trap handler for the GICv3 SGI generation system register.
> * Forward the request to the VGIC emulation.
> @@ -520,6 +534,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
> if (!p->is_write)
> return read_from_write_only(vcpu, p, r);
>
> + if (sgi_traps_to_vel2(vcpu)) {
> + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> + return false;
> + }
> +
> /*
> * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
> * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
> @@ -563,7 +582,13 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
> if (p->is_write)
> return ignore_write(vcpu, p);
>
> - p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
> + if (p->Op1 == 4) { /* ICC_SRE_EL2 */
> + p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE |
> + ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB);
> + } else { /* ICC_SRE_EL1 */
> + p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
> + }
> +
> return true;
> }
>
> @@ -1793,6 +1818,122 @@ static bool access_id_aa64pfr0_el1(struct kvm_vcpu *v,
> return true;
> }
>
> +static bool access_gic_apr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> + u32 index, *base;
> +
> + index = r->Op2;
> + if (r->CRm == 8)
> + base = cpu_if->vgic_ap0r;
> + else
> + base = cpu_if->vgic_ap1r;
> +
> + if (p->is_write)
> + base[index] = p->regval;
> + else
> + p->regval = base[index];
> +
> + return true;
> +}
> +
> +static bool access_gic_hcr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +
> + if (p->is_write)
> + cpu_if->vgic_hcr = p->regval;
Probably because there's only enough NV support to run an L1 KVM hypervisor + L2
guest, but the L1 guest ICH_HCR_EL2 value is written to the register unmodified
in vgic_v3_load, and there's no support for forwarding traps that can be
configured via ICH_HCR_EL2 (or even handling some traps - ICV_CTLR_EL1 can be
trapped when ICH_HCR_EL2.TC = 1).
> + else
> + p->regval = cpu_if->vgic_hcr;
> +
> + return true;
> +}
> +
> +static bool access_gic_vtr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + if (p->is_write)
> + return write_to_read_only(vcpu, p, r);
> +
> + p->regval = kvm_vgic_global_state.ich_vtr_el2;
> +
> + return true;
> +}
> +
> +static bool access_gic_misr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + if (p->is_write)
> + return write_to_read_only(vcpu, p, r);
> +
> + p->regval = vgic_v3_get_misr(vcpu);
> +
> + return true;
> +}
> +
> +static bool access_gic_eisr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + if (p->is_write)
> + return write_to_read_only(vcpu, p, r);
> +
> + p->regval = vgic_v3_get_eisr(vcpu);
> +
> + return true;
> +}
> +
> +static bool access_gic_elrsr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + if (p->is_write)
> + return write_to_read_only(vcpu, p, r);
> +
> + p->regval = vgic_v3_get_elrsr(vcpu);
> +
> + return true;
> +}
> +
> +static bool access_gic_vmcr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +
> + if (p->is_write)
> + cpu_if->vgic_vmcr = p->regval;
> + else
> + p->regval = cpu_if->vgic_vmcr;
> +
> + return true;
> +}
> +
> +static bool access_gic_lr(struct kvm_vcpu *vcpu,
> + struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> + u32 index;
> +
> + index = p->Op2;
> + if (p->CRm == 13)
> + index += 8;
> +
> + if (p->is_write)
> + cpu_if->vgic_lr[index] = p->regval;
> + else
> + p->regval = cpu_if->vgic_lr[index];
> +
> + return true;
> +}
> +
> /*
> * Architected system registers.
> * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
> @@ -2123,6 +2264,41 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> { SYS_DESC(SYS_RMR_EL2), access_rw, reset_val, RMR_EL2, 0 },
> { SYS_DESC(SYS_VDISR_EL2), trap_undef },
>
> + { SYS_DESC(SYS_ICH_AP0R0_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP0R1_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP0R2_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP0R3_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP1R0_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP1R1_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP1R2_EL2), access_gic_apr },
> + { SYS_DESC(SYS_ICH_AP1R3_EL2), access_gic_apr },
> +
> + { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre },
> +
> + { SYS_DESC(SYS_ICH_HCR_EL2), access_gic_hcr },
> + { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr },
> + { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr },
> + { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr },
> + { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr },
> + { SYS_DESC(SYS_ICH_VMCR_EL2), access_gic_vmcr },
> +
> + { SYS_DESC(SYS_ICH_LR0_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR1_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR2_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR3_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR4_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR5_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR6_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR7_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR8_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR9_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR10_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR11_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR12_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR13_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR14_EL2), access_gic_lr },
> + { SYS_DESC(SYS_ICH_LR15_EL2), access_gic_lr },
> +
> { SYS_DESC(SYS_CONTEXTIDR_EL2), access_rw, reset_val, CONTEXTIDR_EL2, 0
> },
> { SYS_DESC(SYS_TPIDR_EL2), access_rw, reset_val, TPIDR_EL2, 0 },
>
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 163b132e100e..707fbe627155 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -310,6 +310,15 @@ struct vgic_cpu {
>
> struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
>
> + /* CPU vif control registers for the virtual GICH interface */
> + struct vgic_v3_cpu_if nested_vgic_v3;
> +
> + /*
> + * The shadow vif control register loaded to the hardware when
> + * running a nested L2 guest with the virtual IMO/FMO bit set.
> + */
> + struct vgic_v3_cpu_if shadow_vgic_v3;
> +
> raw_spinlock_t ap_list_lock; /* Protects the ap_list */
>
> /*
> @@ -366,6 +375,13 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> void kvm_vgic_load(struct kvm_vcpu *vcpu);
> void kvm_vgic_put(struct kvm_vcpu *vcpu);
>
> +void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
> +void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
> +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
> +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu);
> +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu);
> +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
> +
> #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
> #define vgic_initialized(k) ((k)->arch.vgic.initialized)
> #define vgic_ready(k) ((k)->arch.vgic.ready)
> @@ -411,4 +427,6 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
> void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
> void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
>
> +bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
> +
> #endif /* __KVM_ARM_VGIC_H */
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index ca10a11e044e..ddcab58ae440 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -634,6 +634,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
> * that a VCPU sees new virtual interrupts.
> */
> kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> +
> + if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
> + kvm_inject_nested_irq(vcpu);
> }
> }
>
> @@ -680,10 +683,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu,
> struct kvm_run *run)
> */
> cond_resched();
>
> - update_vmid(&vcpu->arch.hw_mmu->vmid);
> -
> check_vcpu_requests(vcpu);
>
> + update_vmid(&vcpu->arch.hw_mmu->vmid);
Was this change made to prevent having a mmu with a valid vmid_gen, but which
was never actually run? Or something else entirely?
> +
> /*
> * Preparing the interrupts to be injected also
> * involves poking the GIC, which must be done in a
> diff --git a/virt/kvm/arm/vgic/vgic-v3-nested.c
> b/virt/kvm/arm/vgic/vgic-v3-nested.c
> new file mode 100644
> index 000000000000..6fb81dfbb679
> --- /dev/null
> +++ b/virt/kvm/arm/vgic/vgic-v3-nested.c
> @@ -0,0 +1,177 @@
> +#include <linux/cpu.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/uaccess.h>
> +
> +#include <linux/irqchip/arm-gic-v3.h>
> +
> +#include <asm/kvm_emulate.h>
> +#include <asm/kvm_arm.h>
> +#include <kvm/arm_vgic.h>
> +
> +#include "vgic.h"
> +
> +static inline struct vgic_v3_cpu_if *vcpu_nested_if(struct kvm_vcpu *vcpu)
> +{
> + return &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +}
Not especially relevant at this stage, but the nested_vgic_v3 member is accesses
in several other places in sys_regs.c and vgic-v3.c. Perhaps this function could
be moved to include/kvm/arm_vgic.h in a future revision.
> +
> +static inline struct vgic_v3_cpu_if *vcpu_shadow_if(struct kvm_vcpu *vcpu)
> +{
> + return &vcpu->arch.vgic_cpu.shadow_vgic_v3;
> +}
> +
> +static inline bool lr_triggers_eoi(u64 lr)
> +{
> + return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
> +}
> +
> +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> + u16 reg = 0;
> + int i;
> +
> + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> + if (lr_triggers_eoi(cpu_if->vgic_lr[i]))
> + reg |= BIT(i);
> + }
> +
> + return reg;
> +}
> +
> +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> + u16 reg = 0;
> + int i;
> +
> + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> + if (!(cpu_if->vgic_lr[i] & ICH_LR_STATE))
> + reg |= BIT(i);
> + }
> +
> + return reg;
> +}
> +
> +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> + int nr_lr = kvm_vgic_global_state.nr_lr;
> + u64 reg = 0;
> +
> + if (vgic_v3_get_eisr(vcpu))
> + reg |= ICH_MISR_EOI;
> +
> + if (cpu_if->vgic_hcr & ICH_HCR_UIE) {
> + int used_lrs;
> +
> + used_lrs = nr_lr - hweight16(vgic_v3_get_elrsr(vcpu));
> + if (used_lrs <= 1)
> + reg |= ICH_MISR_U;
> + }
> +
> + /* TODO: Support remaining bits in this register */
> + return reg;
> +}
> +
> +/*
> + * For LRs which have HW bit set such as timer interrupts, we modify them to
> + * have the host hardware interrupt number instead of the virtual one
> programmed
> + * by the guest hypervisor.
> + */
> +static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> + struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu);
> + struct vgic_irq *irq;
> + int i;
> +
> + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> + u64 lr = cpu_if->vgic_lr[i];
> + int l1_irq;
> +
> + if (!(lr & ICH_LR_HW))
> + goto next;
> +
> + /* We have the HW bit set */
> + l1_irq = (lr & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT;
> + irq = vgic_get_irq(vcpu->kvm, vcpu, l1_irq);
> +
> + if (!irq || !irq->hw) {
> + /* There was no real mapping, so nuke the HW bit */
> + lr &= ~ICH_LR_HW;
> + if (irq)
> + vgic_put_irq(vcpu->kvm, irq);
> + goto next;
> + }
> +
> + /* Translate the virtual mapping to the real one */
> + lr &= ~ICH_LR_EOI; /* Why? */
> + lr &= ~ICH_LR_PHYS_ID_MASK;
> + lr |= (u64)irq->hwintid << ICH_LR_PHYS_ID_SHIFT;
> + vgic_put_irq(vcpu->kvm, irq);
> +
> +next:
> + s_cpu_if->vgic_lr[i] = lr;
> + }
> +
> + s_cpu_if->used_lrs = kvm_vgic_global_state.nr_lr;
> +}
> +
> +/*
> + * Change the shadow HWIRQ field back to the virtual value before copying
> over
> + * the entire shadow struct to the nested state.
> + */
> +static void vgic_v3_fixup_shadow_lr_state(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> + struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu);
> + int lr;
> +
> + for (lr = 0; lr < kvm_vgic_global_state.nr_lr; lr++) {
> + s_cpu_if->vgic_lr[lr] &= ~ICH_LR_PHYS_ID_MASK;
> + s_cpu_if->vgic_lr[lr] |= cpu_if->vgic_lr[lr] &
> ICH_LR_PHYS_ID_MASK;
> + }
> +}
> +
> +void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +
> + vgic_cpu->shadow_vgic_v3 = vgic_cpu->nested_vgic_v3;
> + vgic_v3_create_shadow_lr(vcpu);
> + __vgic_v3_restore_state(vcpu_shadow_if(vcpu));
> +}
> +
> +void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +
> + __vgic_v3_save_state(vcpu_shadow_if(vcpu));
> +
> + /*
> + * Translate the shadow state HW fields back to the virtual ones
> + * before copying the shadow struct back to the nested one.
> + */
> + vgic_v3_fixup_shadow_lr_state(vcpu);
> + vgic_cpu->nested_vgic_v3 = vgic_cpu->shadow_vgic_v3;
> +}
> +
> +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +
> + /*
> + * If we exit a nested VM with a pending maintenance interrupt from the
> + * GIC, then we need to forward this to the guest hypervisor so that it
> + * can re-sync the appropriate LRs and sample level triggered interrupts
> + * again.
> + */
> + if (vgic_state_is_nested(vcpu) &&
> + (cpu_if->vgic_hcr & ICH_HCR_EN) &&
> + vgic_v3_get_misr(vcpu))
> + kvm_inject_nested_irq(vcpu);
> +}
I don't see this function used anywhere, shouldn't it be part of #53 "KVM:
arm64: nv: Implement maintenance interrupt forwarding"?
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 77d23e817756..25edf32c28fb 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -18,6 +18,7 @@
> #include <kvm/arm_vgic.h>
> #include <asm/kvm_hyp.h>
> #include <asm/kvm_mmu.h>
> +#include <asm/kvm_nested.h>
> #include <asm/kvm_asm.h>
>
> #include "vgic.h"
> @@ -298,6 +299,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
> vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
> ICC_SRE_EL1_DFB |
> ICC_SRE_EL1_SRE);
> + /*
> + * If nesting is allowed, force GICv3 onto the nested
> + * guests as well.
> + */
> + if (nested_virt_in_use(vcpu))
> + vcpu->arch.vgic_cpu.nested_vgic_v3.vgic_sre =
> vgic_v3->vgic_sre;
> vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
> } else {
> vgic_v3->vgic_sre = 0;
> @@ -660,6 +667,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
> {
> struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
>
> + /*
> + * vgic_v3_load_nested only affects the LRs in the shadow
> + * state, so it is fine to pass the nested state around.
> + */
> + if (vgic_state_is_nested(vcpu))
> + cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +
> /*
> * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
> * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
> @@ -672,12 +686,18 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
>
> if (has_vhe())
> __vgic_v3_activate_traps(cpu_if);
> +
> + if (vgic_state_is_nested(vcpu))
> + vgic_v3_load_nested(vcpu);
> }
>
> void vgic_v3_put(struct kvm_vcpu *vcpu)
> {
> struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
>
> + if (vgic_state_is_nested(vcpu))
> + cpu_if = &vcpu->arch.vgic_cpu.shadow_vgic_v3;
> +
> if (likely(cpu_if->vgic_sre))
> cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
>
> @@ -685,4 +705,12 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
>
> if (has_vhe())
> __vgic_v3_deactivate_traps(cpu_if);
> +
> + if (vgic_state_is_nested(vcpu))
> + vgic_v3_put_nested(vcpu);
> }
> +
> +__weak void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) {}
> +__weak void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) {}
> +__weak void vgic_v3_load_nested(struct kvm_vcpu *vcpu) {}
> +__weak void vgic_v3_put_nested(struct kvm_vcpu *vcpu) {}
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index 6953aefecbb6..f32f49b0c803 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -872,6 +872,10 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> {
> int used_lrs;
>
> + /* If nesting, this is a load/put affair, not flush/sync. */
> + if (vgic_state_is_nested(vcpu))
> + return;
> +
> WARN_ON(vgic_v4_sync_hwstate(vcpu));
>
> /* An empty ap_list_head implies used_lrs == 0 */
> @@ -920,6 +924,29 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
> !vgic_supports_direct_msis(vcpu->kvm))
> return;
>
> + /*
> + * If in a nested state, we must return early. Two possibilities:
> + *
> + * - If we have any pending IRQ for the guest and the guest
> + * expects IRQs to be handled in its virtual EL2 mode (the
> + * virtual IMO bit is set) and it is not already running in
> + * virtual EL2 mode, then we have to emulate an IRQ
> + * exception to virtual EL2.
> + *
> + * We do that by placing a request to ourselves which will
> + * abort the entry procedure and inject the exception at the
> + * beginning of the run loop.
> + *
> + * - Otherwise, do exactly *NOTHING*. The guest state is
> + * already loaded, and we can carry on with running it.
> + */
> + if (vgic_state_is_nested(vcpu)) {
> + if (kvm_vgic_vcpu_pending_irq(vcpu))
> + kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
> +
> + return;
> + }
> +
> DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
>
> if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
> @@ -1022,3 +1049,8 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu,
> unsigned int vintid)
>
> return map_is_active;
> }
> +
> +__weak bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
> +{
> + return false;
> +}
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm