Re: [PATCH v5 2/3] KVM/arm/arm64: enable enhanced armv7 fp/simd lazy switch

2015-12-18 Thread Christoffer Dall
On Sun, Dec 06, 2015 at 05:07:13PM -0800, Mario Smarduch wrote:
> This patch tracks armv7 fp/simd hardware state with hcptr register.
> On vcpu_load saves host fpexc, enables FP access, and sets trapping
> on fp/simd access. On first fp/simd access trap to handler to save host and 
> restore guest context, clear trapping bits to enable vcpu lazy mode. On 
> vcpu_put if trap bits are cleared save guest and restore host context and 
> always restore host fpexc.
> 
> Signed-off-by: Mario Smarduch 
> ---
>  arch/arm/include/asm/kvm_emulate.h   | 50 
> 
>  arch/arm/include/asm/kvm_host.h  |  1 +
>  arch/arm/kvm/Makefile|  2 +-
>  arch/arm/kvm/arm.c   | 13 ++
>  arch/arm/kvm/fpsimd_switch.S | 46 +
>  arch/arm/kvm/interrupts.S| 32 +--
>  arch/arm/kvm/interrupts_head.S   | 33 ++--
>  arch/arm64/include/asm/kvm_emulate.h |  9 +++
>  arch/arm64/include/asm/kvm_host.h|  1 +
>  9 files changed, 142 insertions(+), 45 deletions(-)
>  create mode 100644 arch/arm/kvm/fpsimd_switch.S
> 
> diff --git a/arch/arm/include/asm/kvm_emulate.h 
> b/arch/arm/include/asm/kvm_emulate.h
> index a9c80a2..3de11a2 100644
> --- a/arch/arm/include/asm/kvm_emulate.h
> +++ b/arch/arm/include/asm/kvm_emulate.h
> @@ -243,4 +243,54 @@ static inline unsigned long 
> vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
>   }
>  }
>  
> +#ifdef CONFIG_VFPv3
> +/* Called from vcpu_load - save fpexc and enable guest access to fp/simd 
> unit */

are you really enabling guest access here or just fiddling with fpexc to
ensure you trap accesses to hyp ?

> +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu)
> +{
> + u32 fpexc;
> +
> + asm volatile(
> +  "mrc p10, 7, %0, cr8, cr0, 0\n"
> +  "str %0, [%1]\n"
> +  "mov %0, #(1 << 30)\n"
> +  "mcr p10, 7, %0, cr8, cr0, 0\n"
> +  "isb\n"

why do you need an ISB here?  won't there be an implicit one from the
HVC call later before you need this to take effect?

> +  : "+r" (fpexc)
> +  : "r" (>arch.host_fpexc)
> + );

this whole bit can be rewritten something like:

fpexc = fmrx(FPEXC);
vcpu->arch.host_fpexc = fpexc;
fpexc |= FPEXC_EN;
fmxr(FPEXC, fpexc);

> +}
> +
> +/* Called from vcpu_put - restore host fpexc */
> +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu)
> +{
> + asm volatile(
> +  "mcr p10, 7, %0, cr8, cr0, 0\n"
> +  :
> +  : "r" (vcpu->arch.host_fpexc)
> + );

similarly here

> +}
> +
> +/* If trap bits are reset then fp/simd registers are dirty */
> +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu)
> +{
> + return !!(~vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11)));

this looks complicated, how about:

return !(vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11)));

> +}
> +
> +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu)
> +{
> + vcpu->arch.hcptr |= (HCPTR_TTA | HCPTR_TCP(10)  | HCPTR_TCP(11));
> +}
> +#else
> +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu) {}
> +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu)
> +{
> + return false;
> +}
> +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu)
> +{
> + vcpu->arch.hcptr = HCPTR_TTA;
> +}
> +#endif
> +
>  #endif /* __ARM_KVM_EMULATE_H__ */
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 09bb1f2..ecc883a 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -227,6 +227,7 @@ int kvm_perf_teardown(void);
>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>  
>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
> +void kvm_restore_host_vfp_state(struct kvm_vcpu *);
>  
>  static inline void kvm_arch_hardware_disable(void) {}
>  static inline void kvm_arch_hardware_unsetup(void) {}
> diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
> index c5eef02c..411b3e4 100644
> --- a/arch/arm/kvm/Makefile
> +++ b/arch/arm/kvm/Makefile
> @@ -19,7 +19,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o 
> $(KVM)/eventfd.o $(KVM)/vf
>  
>  obj-y += kvm-arm.o init.o interrupts.o
>  obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
> -obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
> +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 
> fpsimd_switch.o
>  obj-y += $(KVM)/arm/vgic.o
>  obj-y += $(KVM)/arm/vgic-v2.o
>  obj-y += $(KVM)/arm/vgic-v2-emul.o
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index dc017ad..1de07ab 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -291,10 +291,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>   vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
>  
>   

Re: [PATCH v5 2/3] KVM/arm/arm64: enable enhanced armv7 fp/simd lazy switch

2015-12-18 Thread Mario Smarduch
On 12/18/2015 5:49 AM, Christoffer Dall wrote:
> On Sun, Dec 06, 2015 at 05:07:13PM -0800, Mario Smarduch wrote:
>> This patch tracks armv7 fp/simd hardware state with hcptr register.
>> On vcpu_load saves host fpexc, enables FP access, and sets trapping
>> on fp/simd access. On first fp/simd access trap to handler to save host and 
>> restore guest context, clear trapping bits to enable vcpu lazy mode. On 
>> vcpu_put if trap bits are cleared save guest and restore host context and 
>> always restore host fpexc.
>>
>> Signed-off-by: Mario Smarduch 
>> ---
>>  arch/arm/include/asm/kvm_emulate.h   | 50 
>> 
>>  arch/arm/include/asm/kvm_host.h  |  1 +
>>  arch/arm/kvm/Makefile|  2 +-
>>  arch/arm/kvm/arm.c   | 13 ++
>>  arch/arm/kvm/fpsimd_switch.S | 46 +
>>  arch/arm/kvm/interrupts.S| 32 +--
>>  arch/arm/kvm/interrupts_head.S   | 33 ++--
>>  arch/arm64/include/asm/kvm_emulate.h |  9 +++
>>  arch/arm64/include/asm/kvm_host.h|  1 +
>>  9 files changed, 142 insertions(+), 45 deletions(-)
>>  create mode 100644 arch/arm/kvm/fpsimd_switch.S
>>
>> diff --git a/arch/arm/include/asm/kvm_emulate.h 
>> b/arch/arm/include/asm/kvm_emulate.h
>> index a9c80a2..3de11a2 100644
>> --- a/arch/arm/include/asm/kvm_emulate.h
>> +++ b/arch/arm/include/asm/kvm_emulate.h
>> @@ -243,4 +243,54 @@ static inline unsigned long 
>> vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
>>  }
>>  }
>>  
>> +#ifdef CONFIG_VFPv3
>> +/* Called from vcpu_load - save fpexc and enable guest access to fp/simd 
>> unit */
> 
> are you really enabling guest access here or just fiddling with fpexc to
> ensure you trap accesses to hyp ?

That's the end goal, but it is setting the fp enable bit? Your later comment of
combining functions and remove assembler should work.

> 
>> +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu)
>> +{
>> +u32 fpexc;
>> +
>> +asm volatile(
>> + "mrc p10, 7, %0, cr8, cr0, 0\n"
>> + "str %0, [%1]\n"
>> + "mov %0, #(1 << 30)\n"
>> + "mcr p10, 7, %0, cr8, cr0, 0\n"
>> + "isb\n"
> 
> why do you need an ISB here?  won't there be an implicit one from the
> HVC call later before you need this to take effect?

I would think so, but besides B.2.7.3  I can't find other references on
visibility of context altering instructions.
> 
>> + : "+r" (fpexc)
>> + : "r" (>arch.host_fpexc)
>> +);
> 
> this whole bit can be rewritten something like:
> 
> fpexc = fmrx(FPEXC);
> vcpu->arch.host_fpexc = fpexc;
> fpexc |= FPEXC_EN;
> fmxr(FPEXC, fpexc);

Didn't know about fmrx/fmxr functions - much better.
> 
>> +}
>> +
>> +/* Called from vcpu_put - restore host fpexc */
>> +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu)
>> +{
>> +asm volatile(
>> + "mcr p10, 7, %0, cr8, cr0, 0\n"
>> + :
>> + : "r" (vcpu->arch.host_fpexc)
>> +);
> 
> similarly here
Ok.
> 
>> +}
>> +
>> +/* If trap bits are reset then fp/simd registers are dirty */
>> +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu)
>> +{
>> +return !!(~vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11)));
> 
> this looks complicated, how about:
> 
> return !(vcpu->arch.hcptr & (HCPTR_TCP(10) | HCPTR_TCP(11)));

Yeah, I twisted the meaning of bool.
> 
>> +}
>> +
>> +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu)
>> +{
>> +vcpu->arch.hcptr |= (HCPTR_TTA | HCPTR_TCP(10)  | HCPTR_TCP(11));
>> +}
>> +#else
>> +static inline void kvm_enable_vcpu_fpexc(struct kvm_vcpu *vcpu) {}
>> +static inline void kvm_restore_host_fpexc(struct kvm_vcpu *vcpu) {}
>> +static inline bool kvm_vcpu_vfp_isdirty(struct kvm_vcpu *vcpu)
>> +{
>> +return false;
>> +}
>> +static inline void vcpu_reset_cptr(struct kvm_vcpu *vcpu)
>> +{
>> +vcpu->arch.hcptr = HCPTR_TTA;
>> +}
>> +#endif
>> +
>>  #endif /* __ARM_KVM_EMULATE_H__ */
>> diff --git a/arch/arm/include/asm/kvm_host.h 
>> b/arch/arm/include/asm/kvm_host.h
>> index 09bb1f2..ecc883a 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -227,6 +227,7 @@ int kvm_perf_teardown(void);
>>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>>  
>>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
>> +void kvm_restore_host_vfp_state(struct kvm_vcpu *);
>>  
>>  static inline void kvm_arch_hardware_disable(void) {}
>>  static inline void kvm_arch_hardware_unsetup(void) {}
>> diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
>> index c5eef02c..411b3e4 100644
>> --- a/arch/arm/kvm/Makefile
>> +++ b/arch/arm/kvm/Makefile
>> @@ -19,7 +19,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o 
>> $(KVM)/eventfd.o $(KVM)/vf
>>  
>>  obj-y += kvm-arm.o init.o interrupts.o
>>  obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
>> -obj-y += coproc.o coproc_a15.o