Re: [PATCH v5 2/3] KVM/arm/arm64: enable enhanced armv7 fp/simd lazy switch
On Sun, Dec 06, 2015 at 05:07:13PM -0800, Mario Smarduch wrote: > This patch tracks armv7 fp/simd hardware state with hcptr register. > On vcpu_load saves host fpexc, enables FP access, and sets trapping > on fp/simd access. On first fp/simd access trap to handler to save host and > restore guest context, clear trapping bits to enable vcpu lazy mode. On > vcpu_put if trap bits are cleared save guest and restore host context and > always restore host fpexc. > > Signed-off-by: Mario Smarduch> --- > arch/arm/include/asm/kvm_emulate.h | 50 > > arch/arm/include/asm/kvm_host.h | 1 + > arch/arm/kvm/Makefile| 2 +- > arch/arm/kvm/arm.c | 13 ++ > arch/arm/kvm/fpsimd_switch.S | 46 + > arch/arm/kvm/interrupts.S| 32 +-- > arch/arm/kvm/interrupts_head.S | 33 ++-- > arch/arm64/include/asm/kvm_emulate.h | 9 +++ > arch/arm64/include/asm/kvm_host.h| 1 + > 9 files changed, 142 insertions(+), 45 deletions(-) > create mode 100644 arch/arm/kvm/fpsimd_switch.S > > diff --git a/arch/arm/include/asm/kvm_emulate.h > b/arch/arm/include/asm/kvm_emulate.h > index a9c80a2..3de11a2 100644 > --- a/arch/arm/include/asm/kvm_emulate.h > +++ b/arch/arm/include/asm/kvm_emulate.h > @@ -243,4 +243,54 @@ static inline unsigned long > vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, > } > } > > +#ifdef CONFIG_VFPv3 > +/* Called from vcpu_load - save fpexc and enable guest access to fp/simd > unit */ are you really enabling guest access here or just fiddling with fpexc to ensure you trap accesses to hyp ? > +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu) > +{ > + u32 fpexc; > + > + asm volatile( > + "mrc p10, 7, %0, cr8, cr0, 0\n" > + "str %0, [%1]\n" > + "mov %0, #(1 << 30)\n" > + "mcr p10, 7, %0, cr8, cr0, 0\n" > + "isb\n" why do you need an ISB here? won't there be an implicit one from the HVC call later before you need this to take effect? > + : "+r" (fpexc) > + : "r" (>arch.host_fpexc) > + ); this whole bit can be rewritten something like: fpexc = fmrx(FPEXC); vcpu->arch.host_fpexc = fpexc; fpexc |= FPEXC_EN; fmxr(FPEXC, fpexc); > +} > + > +/* Called from vcpu_put - restore host fpexc */ > +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu) > +{ > + asm volatile( > + "mcr p10, 7, %0, cr8, cr0, 0\n" > + : > + : "r" (vcpu->arch.host_fpexc) > + ); similarly here > +} > + > +/* If trap bits are reset then fp/simd registers are dirty */ > +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu) > +{ > + return !!(~vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11))); this looks complicated, how about: return !(vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11))); > +} > + > +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu) > +{ > + vcpu->arch.hcptr |= (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)); > +} > +#else > +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu) {} > +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu) {} > +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu) > +{ > + return false; > +} > +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu) > +{ > + vcpu->arch.hcptr = HCPTR_TTA; > +} > +#endif > + > #endif /* __ARM_KVM_EMULATE_H__ */ > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 09bb1f2..ecc883a 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -227,6 +227,7 @@ int kvm_perf_teardown(void); > void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); > > struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); > +void kvm_restore_host_vfp_state(struct kvm_vcpu *); > > static inline void kvm_arch_hardware_disable(void) {} > static inline void kvm_arch_hardware_unsetup(void) {} > diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile > index c5eef02c..411b3e4 100644 > --- a/arch/arm/kvm/Makefile > +++ b/arch/arm/kvm/Makefile > @@ -19,7 +19,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o > $(KVM)/eventfd.o $(KVM)/vf > > obj-y += kvm-arm.o init.o interrupts.o > obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o > -obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o > +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o > fpsimd_switch.o > obj-y += $(KVM)/arm/vgic.o > obj-y += $(KVM)/arm/vgic-v2.o > obj-y += $(KVM)/arm/vgic-v2-emul.o > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index dc017ad..1de07ab 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -291,10 +291,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); > >
Re: [PATCH v5 2/3] KVM/arm/arm64: enable enhanced armv7 fp/simd lazy switch
On 12/18/2015 5:49 AM, Christoffer Dall wrote: > On Sun, Dec 06, 2015 at 05:07:13PM -0800, Mario Smarduch wrote: >> This patch tracks armv7 fp/simd hardware state with hcptr register. >> On vcpu_load saves host fpexc, enables FP access, and sets trapping >> on fp/simd access. On first fp/simd access trap to handler to save host and >> restore guest context, clear trapping bits to enable vcpu lazy mode. On >> vcpu_put if trap bits are cleared save guest and restore host context and >> always restore host fpexc. >> >> Signed-off-by: Mario Smarduch>> --- >> arch/arm/include/asm/kvm_emulate.h | 50 >> >> arch/arm/include/asm/kvm_host.h | 1 + >> arch/arm/kvm/Makefile| 2 +- >> arch/arm/kvm/arm.c | 13 ++ >> arch/arm/kvm/fpsimd_switch.S | 46 + >> arch/arm/kvm/interrupts.S| 32 +-- >> arch/arm/kvm/interrupts_head.S | 33 ++-- >> arch/arm64/include/asm/kvm_emulate.h | 9 +++ >> arch/arm64/include/asm/kvm_host.h| 1 + >> 9 files changed, 142 insertions(+), 45 deletions(-) >> create mode 100644 arch/arm/kvm/fpsimd_switch.S >> >> diff --git a/arch/arm/include/asm/kvm_emulate.h >> b/arch/arm/include/asm/kvm_emulate.h >> index a9c80a2..3de11a2 100644 >> --- a/arch/arm/include/asm/kvm_emulate.h >> +++ b/arch/arm/include/asm/kvm_emulate.h >> @@ -243,4 +243,54 @@ static inline unsigned long >> vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, >> } >> } >> >> +#ifdef CONFIG_VFPv3 >> +/* Called from vcpu_load - save fpexc and enable guest access to fp/simd >> unit */ > > are you really enabling guest access here or just fiddling with fpexc to > ensure you trap accesses to hyp ? That's the end goal, but it is setting the fp enable bit? Your later comment of combining functions and remove assembler should work. > >> +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu) >> +{ >> +u32 fpexc; >> + >> +asm volatile( >> + "mrc p10, 7, %0, cr8, cr0, 0\n" >> + "str %0, [%1]\n" >> + "mov %0, #(1 << 30)\n" >> + "mcr p10, 7, %0, cr8, cr0, 0\n" >> + "isb\n" > > why do you need an ISB here? won't there be an implicit one from the > HVC call later before you need this to take effect? I would think so, but besides B.2.7.3 I can't find other references on visibility of context altering instructions. > >> + : "+r" (fpexc) >> + : "r" (>arch.host_fpexc) >> +); > > this whole bit can be rewritten something like: > > fpexc = fmrx(FPEXC); > vcpu->arch.host_fpexc = fpexc; > fpexc |= FPEXC_EN; > fmxr(FPEXC, fpexc); Didn't know about fmrx/fmxr functions - much better. > >> +} >> + >> +/* Called from vcpu_put - restore host fpexc */ >> +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu) >> +{ >> +asm volatile( >> + "mcr p10, 7, %0, cr8, cr0, 0\n" >> + : >> + : "r" (vcpu->arch.host_fpexc) >> +); > > similarly here Ok. > >> +} >> + >> +/* If trap bits are reset then fp/simd registers are dirty */ >> +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu) >> +{ >> +return !!(~vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11))); > > this looks complicated, how about: > > return !(vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11))); Yeah, I twisted the meaning of bool. > >> +} >> + >> +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu) >> +{ >> +vcpu->arch.hcptr |= (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)); >> +} >> +#else >> +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu) {} >> +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu) {} >> +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu) >> +{ >> +return false; >> +} >> +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu) >> +{ >> +vcpu->arch.hcptr = HCPTR_TTA; >> +} >> +#endif >> + >> #endif /* __ARM_KVM_EMULATE_H__ */ >> diff --git a/arch/arm/include/asm/kvm_host.h >> b/arch/arm/include/asm/kvm_host.h >> index 09bb1f2..ecc883a 100644 >> --- a/arch/arm/include/asm/kvm_host.h >> +++ b/arch/arm/include/asm/kvm_host.h >> @@ -227,6 +227,7 @@ int kvm_perf_teardown(void); >> void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); >> >> struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); >> +void kvm_restore_host_vfp_state(struct kvm_vcpu *); >> >> static inline void kvm_arch_hardware_disable(void) {} >> static inline void kvm_arch_hardware_unsetup(void) {} >> diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile >> index c5eef02c..411b3e4 100644 >> --- a/arch/arm/kvm/Makefile >> +++ b/arch/arm/kvm/Makefile >> @@ -19,7 +19,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o >> $(KVM)/eventfd.o $(KVM)/vf >> >> obj-y += kvm-arm.o init.o interrupts.o >> obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o >> -obj-y += coproc.o coproc_a15.o