On Wed, Sep 09, 2009 at 04:18:58PM +0200, Alexander Graf wrote:
> X86 CPUs need to have some magic happening to enable the virtualization
> extensions on them. This magic can result in unpleasant results for
> users, like blocking other VMMs from working (vmx) or using invalid TLB
> entries (svm).
> 
> Currently KVM activates virtualization when the respective kernel module
> is loaded. This blocks us from autoloading KVM modules without breaking
> other VMMs.
> 
> To circumvent this problem at least a bit, this patch introduces on
> demand activation of virtualization. This means, that instead
> virtualization is enabled on creation of the first virtual machine
> and disabled on destruction of the last one.
> 
> So using this, KVM can be easily autoloaded, while keeping other
> hypervisors usable.
> 
> Signed-off-by: Alexander Graf <[email protected]>
> 
> --
> 
> I've tested the following:
> 
>   - shutdown
>   - suspend / resume to RAM
>   - running VirtualBox while kvm module is loaded
> ---
>  arch/ia64/kvm/kvm-ia64.c        |    8 ++-
>  arch/powerpc/kvm/powerpc.c      |    3 +-
>  arch/s390/kvm/kvm-s390.c        |    3 +-
>  arch/x86/include/asm/kvm_host.h |    2 +-
>  arch/x86/kvm/svm.c              |   13 ++++--
>  arch/x86/kvm/vmx.c              |    7 +++-
>  arch/x86/kvm/x86.c              |    4 +-
>  include/linux/kvm_host.h        |    2 +-
>  virt/kvm/kvm_main.c             |   82 
> +++++++++++++++++++++++++++++++++------
>  9 files changed, 98 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index f6471c8..5fdeec5 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 
> *opt_handler)
>  
>  static  DEFINE_SPINLOCK(vp_lock);
>  
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
>       long  status;
>       long  tmp_base;
> @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
>       slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
>       local_irq_restore(saved_psr);
>       if (slot < 0)
> -             return;
> +             return -EINVAL;
>  
>       spin_lock(&vp_lock);
>       status = ia64_pal_vp_init_env(kvm_vsa_base ?
> @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
>                       __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
>       if (status != 0) {
>               printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
> -             return ;
> +             return -EINVAL;
>       }
>  
>       if (!kvm_vsa_base) {
> @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
>       }
>       spin_unlock(&vp_lock);
>       ia64_ptr_entry(0x3, slot);
> +
> +     return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 95af622..5902bbc 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct 
> kvm_vcpu *vcpu)
>       return r;
>  }
>  
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
> +     return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 00e2ce8..5445058 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>  static unsigned long long *facilities;
>  
>  /* Section: not file related */
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
>       /* every s390 is virtualization enabled ;-) */
> +     return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6046e6f..b17886f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -462,7 +462,7 @@ struct descriptor_table {
>  struct kvm_x86_ops {
>       int (*cpu_has_kvm_support)(void);          /* __init */
>       int (*disabled_by_bios)(void);             /* __init */
> -     void (*hardware_enable)(void *dummy);      /* __init */
> +     int (*hardware_enable)(void *dummy);
>       void (*hardware_disable)(void *dummy);
>       void (*check_processor_compatibility)(void *rtn);
>       int (*hardware_setup)(void);               /* __init */
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a5f90c7..2f3a388 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
>       cpu_svm_disable();
>  }
>  
> -static void svm_hardware_enable(void *garbage)
> +static int svm_hardware_enable(void *garbage)
>  {
>  
>       struct svm_cpu_data *svm_data;
> @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
>       struct desc_struct *gdt;
>       int me = raw_smp_processor_id();
>  
> +     rdmsrl(MSR_EFER, efer);
> +     if (efer & EFER_SVME)
> +             return -EBUSY;
> +
>       if (!has_svm()) {
>               printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
> -             return;
> +             return -EINVAL;
>       }
>       svm_data = per_cpu(svm_data, me);
>  
>       if (!svm_data) {
>               printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
>                      me);
> -             return;
> +             return -EINVAL;
>       }
>  
>       svm_data->asid_generation = 1;
> @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
>       gdt = (struct desc_struct *)gdt_descr.base;
>       svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
>  
> -     rdmsrl(MSR_EFER, efer);
>       wrmsrl(MSR_EFER, efer | EFER_SVME);
>  
>       wrmsrl(MSR_VM_HSAVE_PA,
>              page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
> +
> +     return 0;
>  }
>  
>  static void svm_cpu_uninit(int cpu)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index d3213ac..c20a902 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
>       /* locked but not enabled */
>  }
>  
> -static void hardware_enable(void *garbage)
> +static int hardware_enable(void *garbage)
>  {
>       int cpu = raw_smp_processor_id();
>       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
>       u64 old;
>  
> +     if (read_cr4() & X86_CR4_VMXE)
> +             return -EBUSY;
> +
>       INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
>       rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
>       if ((old & (FEATURE_CONTROL_LOCKED |
> @@ -1158,6 +1161,8 @@ static void hardware_enable(void *garbage)
>       asm volatile (ASM_VMX_VMXON_RAX
>                     : : "a"(&phys_addr), "m"(phys_addr)
>                     : "memory", "cc");
> +
> +     return 0;
>  }
>  
>  static void vmclear_local_vcpus(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 891234b..ec16169 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>       return kvm_x86_ops->vcpu_reset(vcpu);
>  }
>  
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
> -     kvm_x86_ops->hardware_enable(garbage);
> +     return kvm_x86_ops->hardware_enable(garbage);
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 3e57be4..0bf9ee9 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
>  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
>  
>  int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
> -void kvm_arch_hardware_enable(void *garbage);
> +int kvm_arch_hardware_enable(void *garbage);
>  void kvm_arch_hardware_disable(void *garbage);
>  int kvm_arch_hardware_setup(void);
>  void kvm_arch_hardware_unsetup(void);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 6ce5ef3..39f0f5e 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
>  LIST_HEAD(vm_list);
>  
>  static cpumask_var_t cpus_hardware_enabled;
> +static int kvm_usage_count = 0;
> +static atomic_t hardware_enable_failed;
>  
>  struct kmem_cache *kvm_vcpu_cache;
>  EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
> @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
>  
>  static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
>                          unsigned long arg);
> +static int hardware_enable_all(void);
> +static void hardware_disable_all(void);
>  
>  static bool kvm_rebooting;
>  
> @@ -326,6 +330,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops 
> = {
>  
>  static struct kvm *kvm_create_vm(void)
>  {
> +     int r = 0;
>       struct kvm *kvm = kvm_arch_create_vm();
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>       struct page *page;
> @@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
>  
>       if (IS_ERR(kvm))
>               goto out;
> +
> +     r = hardware_enable_all();
> +     if (r)
> +             goto out_err;
> +
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
>       INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>       INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
> @@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>       if (!page) {
> -             kfree(kvm);
> -             return ERR_PTR(-ENOMEM);
> +             r = -ENOMEM;
> +             goto out_err;
>       }
>       kvm->coalesced_mmio_ring =
>                       (struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
>  
>  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>       {
> -             int err;
>               kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
> -             err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
> -             if (err) {
> +             r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
> +             if (r) {
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>                       put_page(page);
>  #endif
> -                     kfree(kvm);
> -                     return ERR_PTR(err);
> +                     goto out_err;
>               }
>       }
>  #endif
> @@ -382,6 +390,11 @@ static struct kvm *kvm_create_vm(void)
>  #endif
>  out:
>       return kvm;
> +
> +out_err:
> +     hardware_disable_all();
> +     kfree(kvm);
> +     return ERR_PTR(r);
>  }
>  
>  /*
> @@ -440,6 +453,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>       kvm_arch_flush_shadow(kvm);
>  #endif
>       kvm_arch_destroy_vm(kvm);
> +     hardware_disable_all();
>       mmdrop(mm);
>  }
>  
> @@ -1631,11 +1645,41 @@ static struct miscdevice kvm_dev = {
>  static void hardware_enable(void *junk)
>  {
>       int cpu = raw_smp_processor_id();
> +     int r;
>  
>       if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>               return;
> +
>       cpumask_set_cpu(cpu, cpus_hardware_enabled);
> -     kvm_arch_hardware_enable(NULL);
> +
> +     r = kvm_arch_hardware_enable(NULL);
> +
> +     if (r) {
> +             cpumask_clear_cpu(cpu, cpus_hardware_enabled);
> +             atomic_inc(&hardware_enable_failed);
> +             printk(KERN_INFO "kvm: enabling virtualization on "
> +                              "CPU%d failed\n", cpu);
> +     }
> +}
> +
> +static int hardware_enable_all(void)
> +{
> +     int r = 0;
> +
> +     spin_lock(&kvm_lock);
> +
> +     kvm_usage_count++;
> +     if (kvm_usage_count == 1) {
> +             atomic_set(&hardware_enable_failed, 0);
> +             on_each_cpu(hardware_enable, NULL, 1);
> +
> +             if (atomic_read(&hardware_enable_failed))
> +                     r = -EBUSY;
> +     }
> +
> +     spin_unlock(&kvm_lock);
> +
> +     return r;
>  }

I think the kvm_usage_count > 1 path should also test for
hardware_enable_failed (you assume that if kvm_usage_count > 1 
then hardware enablement has succeeded, which is not always true).

Also, better move vmx.c's ept_sync_global from vmx_init to 
hardware_enable.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to