Anthony Liguori wrote:
> Attached patch implements lazy FPU save/restore for SVM. It's much
> more conservative than my previous patch. We can now mark the guest
> FPU as inactive whenever we want which will trigger CR0.TS to be set
> in the guest's shadowed CR0.
>
> Right now, we only mark the FPU as inactive when the guest does (via a
> mov %cr0, clts, etc.) or during a mov %cr3. There may be a better
> heuristic out there but this seemed like the obvious one.
>
> I'm still playing around with the VT version of this patch so I'll
> post that tomorrow.
>
> I've tested on a 32bit and 64bit SVM host using Avi's previously
> posted fpu-test and some others. Everything seems to be fine.
>
> Regards,
>
> Anthony Liguori
> ------------------------------------------------------------------------
>
> Author: Anthony Liguori <[EMAIL PROTECTED]>
> Date: Sun Apr 22 20:34:03 2007 -0500
>
> Lazy FPU support for SVM.
>
> Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
>
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index d1a90c5..4859c32 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -63,6 +63,9 @@
> #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
>
> #define DE_VECTOR 0
> +#define DB_VECTOR 2
> +#define UD_VECTOR 6
> +#define NM_VECTOR 7
>
This, while a nice cleanup, is unrelated.
> #define DF_VECTOR 8
> #define TS_VECTOR 10
> #define NP_VECTOR 11
> @@ -301,6 +304,7 @@ struct kvm_vcpu {
> char fx_buf[FX_BUF_SIZE];
> char *host_fx_image;
> char *guest_fx_image;
> + int fpu_active;
>
> int mmio_needed;
> int mmio_read_completed;
> diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
> index 644efc5..bbde031 100644
> --- a/drivers/kvm/svm.c
> +++ b/drivers/kvm/svm.c
> @@ -30,10 +30,6 @@ MODULE_LICENSE("GPL");
> #define IOPM_ALLOC_ORDER 2
> #define MSRPM_ALLOC_ORDER 1
>
> -#define DB_VECTOR 1
> -#define UD_VECTOR 6
> -#define GP_VECTOR 13
> -
> #define DR7_GD_MASK (1 << 13)
> #define DR6_BD_MASK (1 << 13)
> #define CR4_DE_MASK (1UL << 3)
> @@ -587,6 +583,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
> init_vmcb(vcpu->svm->vmcb);
>
> fx_init(vcpu);
> + vcpu->fpu_active = 1;
> vcpu->apic_base = 0xfee00000 |
> /*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
> MSR_IA32_APICBASE_ENABLE;
> @@ -756,6 +753,11 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> long cr0)
> }
> }
> #endif
> + if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) {
> + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 <<
> NM_VECTOR);
> + vcpu->fpu_active = 1;
> + }
> +
> vcpu->cr0 = cr0;
> cr0 |= CR0_PG_MASK | CR0_WP_MASK;
> cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK);
> @@ -928,6 +930,20 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct
> kvm_run *kvm_run)
> return 0;
> }
>
> +static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +{
> + spin_lock(&vcpu->kvm->lock);
>
Why is the lock needed? everything below is vcpu-local AFAICS.
> +
> + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
> + if (!(vcpu->cr0 & CR0_TS_MASK))
> + vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK;
> + vcpu->fpu_active = 1;
> +
> + spin_unlock(&vcpu->kvm->lock);
> +
> + return 1;
> +}
> +
> static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run
> *kvm_run)
> {
> /*
> @@ -1292,6 +1308,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
> [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
> [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
> [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
> + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
> [SVM_EXIT_INTR] = nop_on_interception,
> [SVM_EXIT_NMI] = nop_on_interception,
> [SVM_EXIT_SMI] = nop_on_interception,
> @@ -1481,8 +1498,10 @@ again:
> load_db_regs(vcpu->svm->db_regs);
> }
>
> - fx_save(vcpu->host_fx_image);
> - fx_restore(vcpu->guest_fx_image);
> + if (vcpu->fpu_active) {
> + fx_save(vcpu->host_fx_image);
> + fx_restore(vcpu->guest_fx_image);
> + }
>
> asm volatile (
> #ifdef CONFIG_X86_64
> @@ -1593,8 +1612,10 @@ again:
> #endif
> : "cc", "memory" );
>
> - fx_save(vcpu->guest_fx_image);
> - fx_restore(vcpu->host_fx_image);
> + if (vcpu->fpu_active) {
> + fx_save(vcpu->guest_fx_image);
> + fx_restore(vcpu->host_fx_image);
> + }
>
> if ((vcpu->svm->vmcb->save.dr7 & 0xff))
> load_db_regs(vcpu->svm->host_db_regs);
> @@ -1664,6 +1685,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu,
> unsigned long root)
> {
> vcpu->svm->vmcb->save.cr3 = root;
> force_new_asid(vcpu);
> +
> + if (vcpu->fpu_active) {
> + vcpu->svm->vmcb->control.intercept_exceptions |= (1 <<
> NM_VECTOR);
> + vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK;
> + vcpu->fpu_active = 0;
> + }
> }
>
> static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
>
Any numbers? I use user/test/vmexit.c to test as it gives the highest
relative speedup.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel