On 6/21/19 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim <[email protected]>
>
> When entering a nested VM, we set up the hypervisor control interface
> based on what the guest hypervisor has set. Especially, we investigate
> each list register written by the guest hypervisor whether HW bit is
> set.  If so, we translate hw irq number from the guest's point of view
> to the real hardware irq number if there is a mapping.
>
> Signed-off-by: Jintack Lim <[email protected]>
> [Rewritten to support GICv3 instead of GICv2]
> Signed-off-by: Marc Zyngier <[email protected]>
> [Redesigned execution flow around vcpu load/put]
> Signed-off-by: Christoffer Dall <[email protected]>
> ---
>  arch/arm/include/asm/kvm_emulate.h |   1 +
>  arch/arm/include/asm/kvm_host.h    |   6 +-
>  arch/arm64/include/asm/kvm_host.h  |   5 +-
>  arch/arm64/kvm/Makefile            |   1 +
>  arch/arm64/kvm/nested.c            |  10 ++
>  arch/arm64/kvm/sys_regs.c          | 178 ++++++++++++++++++++++++++++-
>  include/kvm/arm_vgic.h             |  18 +++
>  virt/kvm/arm/arm.c                 |   7 +-
>  virt/kvm/arm/vgic/vgic-v3-nested.c | 177 ++++++++++++++++++++++++++++
>  virt/kvm/arm/vgic/vgic-v3.c        |  28 +++++
>  virt/kvm/arm/vgic/vgic.c           |  32 ++++++
>  11 files changed, 456 insertions(+), 7 deletions(-)
>  create mode 100644 virt/kvm/arm/vgic/vgic-v3-nested.c
>
> diff --git a/arch/arm/include/asm/kvm_emulate.h 
> b/arch/arm/include/asm/kvm_emulate.h
> index 865ce545b465..a53f19041e16 100644
> --- a/arch/arm/include/asm/kvm_emulate.h
> +++ b/arch/arm/include/asm/kvm_emulate.h
> @@ -334,5 +334,6 @@ static inline unsigned long 
> vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
>  static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {}
>  
>  static inline bool is_hyp_ctxt(struct kvm_vcpu *vcpu) { return false; }
> +static inline int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) { BUG(); }
>  
>  #endif /* __ARM_KVM_EMULATE_H__ */
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index cc761610e41e..d6923ed55796 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -35,10 +35,12 @@
>  #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
>  #endif
>  
> +/* KVM_REQ_GUEST_HYP_IRQ_PENDING is actually unused */
>  #define KVM_REQ_SLEEP \
>       KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> -#define KVM_REQ_IRQ_PENDING  KVM_ARCH_REQ(1)
> -#define KVM_REQ_VCPU_RESET   KVM_ARCH_REQ(2)
> +#define KVM_REQ_IRQ_PENDING          KVM_ARCH_REQ(1)
> +#define KVM_REQ_VCPU_RESET           KVM_ARCH_REQ(2)
> +#define KVM_REQ_GUEST_HYP_IRQ_PENDING        KVM_ARCH_REQ(3)
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index e0fe9acb46bf..e2e44cc650bf 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -53,8 +53,9 @@
>  
>  #define KVM_REQ_SLEEP \
>       KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> -#define KVM_REQ_IRQ_PENDING  KVM_ARCH_REQ(1)
> -#define KVM_REQ_VCPU_RESET   KVM_ARCH_REQ(2)
> +#define KVM_REQ_IRQ_PENDING          KVM_ARCH_REQ(1)
> +#define KVM_REQ_VCPU_RESET           KVM_ARCH_REQ(2)
> +#define KVM_REQ_GUEST_HYP_IRQ_PENDING        KVM_ARCH_REQ(3)
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index f11bd8b0d837..045a8f18f465 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -38,3 +38,4 @@ kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
>  
>  kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
> +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3-nested.o
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index 214d59019935..df2db9ab7cfb 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -539,3 +539,13 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
>       kvm->arch.nested_mmus_size = 0;
>       kvm_free_stage2_pgd(&kvm->arch.mmu);
>  }
> +
> +bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
> +{
> +     bool imo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO;
> +     bool fmo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FMO;
> +
> +     WARN(imo != fmo, "Separate virtual IRQ/FIQ settings not supported\n");
> +
> +     return nested_virt_in_use(vcpu) && imo && fmo && !is_hyp_ctxt(vcpu);
> +}
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2031a59fcf49..ba3bcd29c02d 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -26,6 +26,8 @@
>  #include <linux/printk.h>
>  #include <linux/uaccess.h>
>  
> +#include <linux/irqchip/arm-gic-v3.h>
> +
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
>  #include <asm/debug-monitors.h>
> @@ -505,6 +507,18 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
>       return true;
>  }
>  
> +/*
> + * The architecture says that non-secure write accesses to this register from
> + * EL1 are trapped to EL2, if either:
> + *  - HCR_EL2.FMO==1, or
> + *  - HCR_EL2.IMO==1
> + */
> +static bool sgi_traps_to_vel2(struct kvm_vcpu *vcpu)
> +{
> +     return !vcpu_mode_el2(vcpu) &&
> +             !!(__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO));
> +}
> +
>  /*
>   * Trap handler for the GICv3 SGI generation system register.
>   * Forward the request to the VGIC emulation.
> @@ -520,6 +534,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
>       if (!p->is_write)
>               return read_from_write_only(vcpu, p, r);
>  
> +     if (sgi_traps_to_vel2(vcpu)) {
> +             kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> +             return false;
> +     }
> +
>       /*
>        * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
>        * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
> @@ -563,7 +582,13 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
>       if (p->is_write)
>               return ignore_write(vcpu, p);
>  
> -     p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
> +     if (p->Op1 == 4) {      /* ICC_SRE_EL2 */
> +             p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE |
> +                          ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB);
> +     } else {                /* ICC_SRE_EL1 */
> +             p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
> +     }
> +
>       return true;
>  }
>  
> @@ -1793,6 +1818,122 @@ static bool access_id_aa64pfr0_el1(struct kvm_vcpu *v,
>       return true;
>  }
>  
> +static bool access_gic_apr(struct kvm_vcpu *vcpu,
> +                        struct sys_reg_params *p,
> +                        const struct sys_reg_desc *r)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +     u32 index, *base;
> +
> +     index = r->Op2;
> +     if (r->CRm == 8)
> +             base = cpu_if->vgic_ap0r;
> +     else
> +             base = cpu_if->vgic_ap1r;
> +
> +     if (p->is_write)
> +             base[index] = p->regval;
> +     else
> +             p->regval = base[index];
> +
> +     return true;
> +}
> +
> +static bool access_gic_hcr(struct kvm_vcpu *vcpu,
> +                        struct sys_reg_params *p,
> +                        const struct sys_reg_desc *r)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +
> +     if (p->is_write)
> +             cpu_if->vgic_hcr = p->regval;

Probably because there's only enough NV support to run an L1 KVM hypervisor + L2
guest, but the L1 guest ICH_HCR_EL2 value is written to the register unmodified
in vgic_v3_load, and there's no support for forwarding traps that can be
configured via ICH_HCR_EL2 (or even handling some traps - ICV_CTLR_EL1 can be
trapped when ICH_HCR_EL2.TC = 1).

> +     else
> +             p->regval = cpu_if->vgic_hcr;
> +
> +     return true;
> +}
> +
> +static bool access_gic_vtr(struct kvm_vcpu *vcpu,
> +                        struct sys_reg_params *p,
> +                        const struct sys_reg_desc *r)
> +{
> +     if (p->is_write)
> +             return write_to_read_only(vcpu, p, r);
> +
> +     p->regval = kvm_vgic_global_state.ich_vtr_el2;
> +
> +     return true;
> +}
> +
> +static bool access_gic_misr(struct kvm_vcpu *vcpu,
> +                         struct sys_reg_params *p,
> +                         const struct sys_reg_desc *r)
> +{
> +     if (p->is_write)
> +             return write_to_read_only(vcpu, p, r);
> +
> +     p->regval = vgic_v3_get_misr(vcpu);
> +
> +     return true;
> +}
> +
> +static bool access_gic_eisr(struct kvm_vcpu *vcpu,
> +                         struct sys_reg_params *p,
> +                         const struct sys_reg_desc *r)
> +{
> +     if (p->is_write)
> +             return write_to_read_only(vcpu, p, r);
> +
> +     p->regval = vgic_v3_get_eisr(vcpu);
> +
> +     return true;
> +}
> +
> +static bool access_gic_elrsr(struct kvm_vcpu *vcpu,
> +                          struct sys_reg_params *p,
> +                          const struct sys_reg_desc *r)
> +{
> +     if (p->is_write)
> +             return write_to_read_only(vcpu, p, r);
> +
> +     p->regval = vgic_v3_get_elrsr(vcpu);
> +
> +     return true;
> +}
> +
> +static bool access_gic_vmcr(struct kvm_vcpu *vcpu,
> +                         struct sys_reg_params *p,
> +                         const struct sys_reg_desc *r)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +
> +     if (p->is_write)
> +             cpu_if->vgic_vmcr = p->regval;
> +     else
> +             p->regval = cpu_if->vgic_vmcr;
> +
> +     return true;
> +}
> +
> +static bool access_gic_lr(struct kvm_vcpu *vcpu,
> +                       struct sys_reg_params *p,
> +                       const struct sys_reg_desc *r)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +     u32 index;
> +
> +     index = p->Op2;
> +     if (p->CRm == 13)
> +             index += 8;
> +
> +     if (p->is_write)
> +             cpu_if->vgic_lr[index] = p->regval;
> +     else
> +             p->regval = cpu_if->vgic_lr[index];
> +
> +     return true;
> +}
> +
>  /*
>   * Architected system registers.
>   * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
> @@ -2123,6 +2264,41 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>       { SYS_DESC(SYS_RMR_EL2), access_rw, reset_val, RMR_EL2, 0 },
>       { SYS_DESC(SYS_VDISR_EL2), trap_undef },
>  
> +     { SYS_DESC(SYS_ICH_AP0R0_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP0R1_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP0R2_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP0R3_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP1R0_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP1R1_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP1R2_EL2), access_gic_apr },
> +     { SYS_DESC(SYS_ICH_AP1R3_EL2), access_gic_apr },
> +
> +     { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre },
> +
> +     { SYS_DESC(SYS_ICH_HCR_EL2), access_gic_hcr },
> +     { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr },
> +     { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr },
> +     { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr },
> +     { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr },
> +     { SYS_DESC(SYS_ICH_VMCR_EL2), access_gic_vmcr },
> +
> +     { SYS_DESC(SYS_ICH_LR0_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR1_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR2_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR3_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR4_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR5_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR6_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR7_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR8_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR9_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR10_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR11_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR12_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR13_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR14_EL2), access_gic_lr },
> +     { SYS_DESC(SYS_ICH_LR15_EL2), access_gic_lr },
> +
>       { SYS_DESC(SYS_CONTEXTIDR_EL2), access_rw, reset_val, CONTEXTIDR_EL2, 0 
> },
>       { SYS_DESC(SYS_TPIDR_EL2), access_rw, reset_val, TPIDR_EL2, 0 },
>  
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 163b132e100e..707fbe627155 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -310,6 +310,15 @@ struct vgic_cpu {
>  
>       struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
>  
> +     /* CPU vif control registers for the virtual GICH interface */
> +     struct vgic_v3_cpu_if   nested_vgic_v3;
> +
> +     /*
> +      * The shadow vif control register loaded to the hardware when
> +      * running a nested L2 guest with the virtual IMO/FMO bit set.
> +      */
> +     struct vgic_v3_cpu_if   shadow_vgic_v3;
> +
>       raw_spinlock_t ap_list_lock;    /* Protects the ap_list */
>  
>       /*
> @@ -366,6 +375,13 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>  void kvm_vgic_load(struct kvm_vcpu *vcpu);
>  void kvm_vgic_put(struct kvm_vcpu *vcpu);
>  
> +void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
> +void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
> +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
> +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu);
> +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu);
> +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
> +
>  #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
>  #define vgic_initialized(k)  ((k)->arch.vgic.initialized)
>  #define vgic_ready(k)                ((k)->arch.vgic.ready)
> @@ -411,4 +427,6 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>  void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
>  void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
>  
> +bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
> +
>  #endif /* __KVM_ARM_VGIC_H */
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index ca10a11e044e..ddcab58ae440 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -634,6 +634,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>                * that a VCPU sees new virtual interrupts.
>                */
>               kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> +
> +             if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
> +                     kvm_inject_nested_irq(vcpu);
>       }
>  }
>  
> @@ -680,10 +683,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>                */
>               cond_resched();
>  
> -             update_vmid(&vcpu->arch.hw_mmu->vmid);
> -
>               check_vcpu_requests(vcpu);
>  
> +             update_vmid(&vcpu->arch.hw_mmu->vmid);

Was this change made to prevent having a mmu with a valid vmid_gen, but which
was never actually run? Or something else entirely?

> +
>               /*
>                * Preparing the interrupts to be injected also
>                * involves poking the GIC, which must be done in a
> diff --git a/virt/kvm/arm/vgic/vgic-v3-nested.c 
> b/virt/kvm/arm/vgic/vgic-v3-nested.c
> new file mode 100644
> index 000000000000..6fb81dfbb679
> --- /dev/null
> +++ b/virt/kvm/arm/vgic/vgic-v3-nested.c
> @@ -0,0 +1,177 @@
> +#include <linux/cpu.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/uaccess.h>
> +
> +#include <linux/irqchip/arm-gic-v3.h>
> +
> +#include <asm/kvm_emulate.h>
> +#include <asm/kvm_arm.h>
> +#include <kvm/arm_vgic.h>
> +
> +#include "vgic.h"
> +
> +static inline struct vgic_v3_cpu_if *vcpu_nested_if(struct kvm_vcpu *vcpu)
> +{
> +     return &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +}

Not especially relevant at this stage, but the nested_vgic_v3 member is accesses
in several other places in sys_regs.c and vgic-v3.c. Perhaps this function could
be moved to include/kvm/arm_vgic.h in a future revision.

> +
> +static inline struct vgic_v3_cpu_if *vcpu_shadow_if(struct kvm_vcpu *vcpu)
> +{
> +     return &vcpu->arch.vgic_cpu.shadow_vgic_v3;
> +}
> +
> +static inline bool lr_triggers_eoi(u64 lr)
> +{
> +     return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
> +}
> +
> +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +     u16 reg = 0;
> +     int i;
> +
> +     for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> +             if (lr_triggers_eoi(cpu_if->vgic_lr[i]))
> +                     reg |= BIT(i);
> +     }
> +
> +     return reg;
> +}
> +
> +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +     u16 reg = 0;
> +     int i;
> +
> +     for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> +             if (!(cpu_if->vgic_lr[i] & ICH_LR_STATE))
> +                     reg |= BIT(i);
> +     }
> +
> +     return reg;
> +}
> +
> +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +     int nr_lr = kvm_vgic_global_state.nr_lr;
> +     u64 reg = 0;
> +
> +     if (vgic_v3_get_eisr(vcpu))
> +             reg |= ICH_MISR_EOI;
> +
> +     if (cpu_if->vgic_hcr & ICH_HCR_UIE) {
> +             int used_lrs;
> +
> +             used_lrs = nr_lr - hweight16(vgic_v3_get_elrsr(vcpu));
> +             if (used_lrs <= 1)
> +                     reg |= ICH_MISR_U;
> +     }
> +
> +     /* TODO: Support remaining bits in this register */
> +     return reg;
> +}
> +
> +/*
> + * For LRs which have HW bit set such as timer interrupts, we modify them to
> + * have the host hardware interrupt number instead of the virtual one 
> programmed
> + * by the guest hypervisor.
> + */
> +static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +     struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu);
> +     struct vgic_irq *irq;
> +     int i;
> +
> +     for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> +             u64 lr = cpu_if->vgic_lr[i];
> +             int l1_irq;
> +
> +             if (!(lr & ICH_LR_HW))
> +                     goto next;
> +
> +             /* We have the HW bit set */
> +             l1_irq = (lr & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT;
> +             irq = vgic_get_irq(vcpu->kvm, vcpu, l1_irq);
> +
> +             if (!irq || !irq->hw) {
> +                     /* There was no real mapping, so nuke the HW bit */
> +                     lr &= ~ICH_LR_HW;
> +                     if (irq)
> +                             vgic_put_irq(vcpu->kvm, irq);
> +                     goto next;
> +             }
> +
> +             /* Translate the virtual mapping to the real one */
> +             lr &= ~ICH_LR_EOI; /* Why? */
> +             lr &= ~ICH_LR_PHYS_ID_MASK;
> +             lr |= (u64)irq->hwintid << ICH_LR_PHYS_ID_SHIFT;
> +             vgic_put_irq(vcpu->kvm, irq);
> +
> +next:
> +             s_cpu_if->vgic_lr[i] = lr;
> +     }
> +
> +     s_cpu_if->used_lrs = kvm_vgic_global_state.nr_lr;
> +}
> +
> +/*
> + * Change the shadow HWIRQ field back to the virtual value before copying 
> over
> + * the entire shadow struct to the nested state.
> + */
> +static void vgic_v3_fixup_shadow_lr_state(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +     struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu);
> +     int lr;
> +
> +     for (lr = 0; lr < kvm_vgic_global_state.nr_lr; lr++) {
> +             s_cpu_if->vgic_lr[lr] &= ~ICH_LR_PHYS_ID_MASK;
> +             s_cpu_if->vgic_lr[lr] |= cpu_if->vgic_lr[lr] & 
> ICH_LR_PHYS_ID_MASK;
> +     }
> +}
> +
> +void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +
> +     vgic_cpu->shadow_vgic_v3 = vgic_cpu->nested_vgic_v3;
> +     vgic_v3_create_shadow_lr(vcpu);
> +     __vgic_v3_restore_state(vcpu_shadow_if(vcpu));
> +}
> +
> +void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +
> +     __vgic_v3_save_state(vcpu_shadow_if(vcpu));
> +
> +     /*
> +      * Translate the shadow state HW fields back to the virtual ones
> +      * before copying the shadow struct back to the nested one.
> +      */
> +     vgic_v3_fixup_shadow_lr_state(vcpu);
> +     vgic_cpu->nested_vgic_v3 = vgic_cpu->shadow_vgic_v3;
> +}
> +
> +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
> +{
> +     struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> +
> +     /*
> +      * If we exit a nested VM with a pending maintenance interrupt from the
> +      * GIC, then we need to forward this to the guest hypervisor so that it
> +      * can re-sync the appropriate LRs and sample level triggered interrupts
> +      * again.
> +      */
> +     if (vgic_state_is_nested(vcpu) &&
> +         (cpu_if->vgic_hcr & ICH_HCR_EN) &&
> +         vgic_v3_get_misr(vcpu))
> +             kvm_inject_nested_irq(vcpu);
> +}

I don't see this function used anywhere, shouldn't it be part of #53 "KVM:
arm64: nv: Implement maintenance interrupt forwarding"?

> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 77d23e817756..25edf32c28fb 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -18,6 +18,7 @@
>  #include <kvm/arm_vgic.h>
>  #include <asm/kvm_hyp.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/kvm_nested.h>
>  #include <asm/kvm_asm.h>
>  
>  #include "vgic.h"
> @@ -298,6 +299,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
>               vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
>                                    ICC_SRE_EL1_DFB |
>                                    ICC_SRE_EL1_SRE);
> +             /*
> +              * If nesting is allowed, force GICv3 onto the nested
> +              * guests as well.
> +              */
> +             if (nested_virt_in_use(vcpu))
> +                     vcpu->arch.vgic_cpu.nested_vgic_v3.vgic_sre = 
> vgic_v3->vgic_sre;
>               vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
>       } else {
>               vgic_v3->vgic_sre = 0;
> @@ -660,6 +667,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
>  {
>       struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
>  
> +     /*
> +      * vgic_v3_load_nested only affects the LRs in the shadow
> +      * state, so it is fine to pass the nested state around.
> +      */
> +     if (vgic_state_is_nested(vcpu))
> +             cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3;
> +
>       /*
>        * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
>        * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
> @@ -672,12 +686,18 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
>  
>       if (has_vhe())
>               __vgic_v3_activate_traps(cpu_if);
> +
> +     if (vgic_state_is_nested(vcpu))
> +             vgic_v3_load_nested(vcpu);
>  }
>  
>  void vgic_v3_put(struct kvm_vcpu *vcpu)
>  {
>       struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
>  
> +     if (vgic_state_is_nested(vcpu))
> +             cpu_if = &vcpu->arch.vgic_cpu.shadow_vgic_v3;
> +
>       if (likely(cpu_if->vgic_sre))
>               cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
>  
> @@ -685,4 +705,12 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
>  
>       if (has_vhe())
>               __vgic_v3_deactivate_traps(cpu_if);
> +
> +     if (vgic_state_is_nested(vcpu))
> +             vgic_v3_put_nested(vcpu);
>  }
> +
> +__weak void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) {}
> +__weak void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) {}
> +__weak void vgic_v3_load_nested(struct kvm_vcpu *vcpu) {}
> +__weak void vgic_v3_put_nested(struct kvm_vcpu *vcpu) {}
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index 6953aefecbb6..f32f49b0c803 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -872,6 +872,10 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
>  {
>       int used_lrs;
>  
> +     /* If nesting, this is a load/put affair, not flush/sync. */
> +     if (vgic_state_is_nested(vcpu))
> +             return;
> +
>       WARN_ON(vgic_v4_sync_hwstate(vcpu));
>  
>       /* An empty ap_list_head implies used_lrs == 0 */
> @@ -920,6 +924,29 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
>           !vgic_supports_direct_msis(vcpu->kvm))
>               return;
>  
> +     /*
> +      * If in a nested state, we must return early. Two possibilities:
> +      *
> +      * - If we have any pending IRQ for the guest and the guest
> +      *   expects IRQs to be handled in its virtual EL2 mode (the
> +      *   virtual IMO bit is set) and it is not already running in
> +      *   virtual EL2 mode, then we have to emulate an IRQ
> +      *   exception to virtual EL2.
> +      *
> +      *   We do that by placing a request to ourselves which will
> +      *   abort the entry procedure and inject the exception at the
> +      *   beginning of the run loop.
> +      *
> +      * - Otherwise, do exactly *NOTHING*. The guest state is
> +      *   already loaded, and we can carry on with running it.
> +      */
> +     if (vgic_state_is_nested(vcpu)) {
> +             if (kvm_vgic_vcpu_pending_irq(vcpu))
> +                     kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
> +
> +             return;
> +     }
> +
>       DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
>  
>       if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
> @@ -1022,3 +1049,8 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, 
> unsigned int vintid)
>  
>       return map_is_active;
>  }
> +
> +__weak bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
> +{
> +     return false;
> +}
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to