On 28/07/2015 01:17, Steve Rutherford wrote:
> First patch in a series which enables the relocation of the
> PIC/IOAPIC to userspace.
> 
> Adds capability KVM_CAP_SPLIT_IRQCHIP;
> 
> KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
> rest of the irqchip.
> 
> Compile tested for x86.
> 
> Signed-off-by: Steve Rutherford <[email protected]>
> Suggested-by: Andrew Honig <[email protected]>
> ---
>  Documentation/virtual/kvm/api.txt | 15 +++++++++++++++
>  arch/powerpc/kvm/irq.h            |  1 -
>  arch/s390/kvm/irq.h               |  1 -
>  arch/x86/include/asm/kvm_host.h   |  2 ++
>  arch/x86/kvm/i8254.c              |  5 ++++-
>  arch/x86/kvm/ioapic.h             |  9 +++++++++
>  arch/x86/kvm/irq.h                |  6 ++++++
>  arch/x86/kvm/irq_comm.c           |  9 ++++++++-
>  arch/x86/kvm/lapic.c              |  9 ++++++---
>  arch/x86/kvm/vmx.c                |  4 ++--
>  arch/x86/kvm/x86.c                | 23 +++++++++++++++++++++--
>  include/kvm/arm_vgic.h            |  1 +
>  include/linux/kvm_host.h          |  1 +
>  include/uapi/linux/kvm.h          |  1 +
>  14 files changed, 76 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index a4ebcb7..b655024 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3620,6 +3620,21 @@ struct {
>  
>  KVM handlers should exit to userspace with rc = -EREMOTE.
>  
> +7.5 KVM_SPLIT_IRQCHIP
> +
> +Architectures: x86
> +Parameters: None
> +Returns: 0 on success, -1 on error
> +
> +Create a local apic for each processor in the kernel. With this capability
> +enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
> +
> +This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in 
> kernel
> +IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
> +
> +Fails if VCPU has already been created, or if the irqchip is already in the
> +kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
> +
>  
>  8. Other capabilities.
>  ----------------------
> diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
> index 5a9a10b..772fa8c 100644
> --- a/arch/powerpc/kvm/irq.h
> +++ b/arch/powerpc/kvm/irq.h
> @@ -16,5 +16,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>       smp_rmb();
>       return ret;
>  }
> -
>  #endif
> diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
> index d98e415..9a21a86 100644
> --- a/arch/s390/kvm/irq.h
> +++ b/arch/s390/kvm/irq.h
> @@ -18,5 +18,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  {
>       return 1;
>  }
> -
>  #endif
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fa32b53..18a110b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -669,6 +669,8 @@ struct kvm_arch {
>       bool boot_vcpu_runs_old_kvmclock;
>  
>       u64 disabled_quirks;
> +
> +     bool irqchip_split;
>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index f90952f..5708850 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -35,6 +35,7 @@
>  #include <linux/kvm_host.h>
>  #include <linux/slab.h>
>  
> +#include "ioapic.h"
>  #include "irq.h"
>  #include "i8254.h"
>  #include "x86.h"
> @@ -333,7 +334,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, 
> int is_period)
>       struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
>       s64 interval;
>  
> -     if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
> +     if (!irqchip_in_kernel(kvm) ||
> +         !ioapic_in_kernel(kvm) ||

Here the irqchip_in_kernel check is unnecessary.

> +         ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
>               return;
>  
>       interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index ca0b0b4..d8cc54b 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -98,6 +98,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
> *kvm)
>       return kvm->arch.vioapic;
>  }
>  
> +static inline int ioapic_in_kernel(struct kvm *kvm)
> +{
> +     int ret;
> +
> +     ret = (ioapic_irqchip(kvm) != NULL);
> +     smp_rmb();
> +     return ret;
> +}
> +
>  static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
>  {
>       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> index ad68c73..2f13dd5 100644
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -83,11 +83,17 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
>       return kvm->arch.vpic;
>  }
>  
> +static inline int irqchip_split(struct kvm *kvm)
> +{
> +     return kvm->arch.irqchip_split;
> +}
> +
>  static inline int irqchip_in_kernel(struct kvm *kvm)
>  {
>       int ret;
>  
>       ret = (pic_irqchip(kvm) != NULL);
> +     ret |= irqchip_split(kvm);
>       smp_rmb();
>       return ret;
>  }
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 9efff9e..67f6b62 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
> irq_source_id)
>               goto unlock;
>       }
>       clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
> -     if (!irqchip_in_kernel(kvm))
> +     if (!ioapic_in_kernel(kvm))
>               goto unlock;
>  
>       kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
> @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
>       return kvm_set_irq_routing(kvm, default_routing,
>                                  ARRAY_SIZE(default_routing), 0);
>  }
> +
> +static const struct kvm_irq_routing_entry empty_routing[] = {};
> +
> +int kvm_setup_empty_irq_routing(struct kvm *kvm)
> +{
> +     return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
> +}
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2a5ca97..536b79e 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -209,7 +209,8 @@ out:
>       if (old)
>               kfree_rcu(old, rcu);
>  
> -     kvm_vcpu_request_scan_ioapic(kvm);
> +     if (!irqchip_split(kvm))

Here please check ioapic_in_kernel.

> +             kvm_vcpu_request_scan_ioapic(kvm);
>  }
>  
>  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> @@ -1838,7 +1839,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>               kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>                               apic_find_highest_isr(apic));
>       kvm_make_request(KVM_REQ_EVENT, vcpu);
> -     kvm_rtc_eoi_tracking_restore_one(vcpu);
> +     if (!irqchip_split(vcpu->kvm))

Here please check ioapic_in_kernel.

> +             kvm_rtc_eoi_tracking_restore_one(vcpu);
>  }
>  
>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> @@ -1921,7 +1923,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu 
> *vcpu,
>           /* Cache not set: could be safe but we don't bother. */
>           apic->highest_isr_cache == -1 ||
>           /* Need EOI to update ioapic. */
> -         kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
> +         kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
> +         irqchip_split(vcpu->kvm)) {

This is ugly (and if anything irqchip_split should be done before
kvm_ioapic_handles_vector).  Could this just test the EOI exit bitmap
instead?

Also, who sets TMR in the split irqchip case?  I'll post a patch roday
or tomorrow to compute TMR in __apic_accept_irq and to do the
aforementioned EOI exit bitmap test.

>               /*
>                * PV EOI was disabled by apic_sync_pv_eoi_from_guest
>                * so we need not do anything here.
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 4014a82..08203a1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
>  
>  static inline bool vm_need_tpr_shadow(struct kvm *kvm)
>  {
> -     return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
> +     return (cpu_has_vmx_tpr_shadow()) && irqchip_in_kernel(kvm);
>  }
>  
>  static inline bool cpu_has_secondary_exec_ctrls(void)
> @@ -9485,7 +9485,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
> struct vmcs12 *vmcs12)
>       /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>        * emulated by vmx_set_efer(), below.
>        */
> -     vm_entry_controls_init(vmx, 
> +     vm_entry_controls_init(vmx,
>               (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
>                       ~VM_ENTRY_IA32E_MODE) |
>               (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 28076c2..6d4b4dc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2461,6 +2461,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
>       case KVM_CAP_TSC_DEADLINE_TIMER:
>       case KVM_CAP_ENABLE_CAP_VM:
>       case KVM_CAP_DISABLE_QUIRKS:
> +     case KVM_CAP_SPLIT_IRQCHIP:
>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
>       case KVM_CAP_ASSIGN_DEV_IRQ:
>       case KVM_CAP_PCI_2_3:
> @@ -3568,6 +3569,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>               kvm->arch.disabled_quirks = cap->args[0];
>               r = 0;
>               break;
> +     case KVM_CAP_SPLIT_IRQCHIP: {
> +             mutex_lock(&kvm->lock);
> +             r = -EEXIST;
> +             if (irqchip_in_kernel(kvm))
> +                     goto split_irqchip_unlock;
> +             r = -EINVAL;
> +             if (atomic_read(&kvm->online_vcpus))
> +                     goto split_irqchip_unlock;
> +             r = kvm_setup_empty_irq_routing(kvm);
> +             if (r)
> +                     goto split_irqchip_unlock;

Need a smp_wmb() here, pairing with irqchip_in_kernel.

Paolo

> +             kvm->arch.irqchip_split = true;
> +             r = 0;
> +split_irqchip_unlock:
> +             mutex_unlock(&kvm->lock);
> +             break;
> +     }
>       default:
>               r = -EINVAL;
>               break;
> @@ -3686,7 +3704,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>               }
>  
>               r = -ENXIO;
> -             if (!irqchip_in_kernel(kvm))
> +             if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
>                       goto get_irqchip_out;
>               r = kvm_vm_ioctl_get_irqchip(kvm, chip);
>               if (r)
> @@ -3710,7 +3728,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>               }
>  
>               r = -ENXIO;
> -             if (!irqchip_in_kernel(kvm))
> +             if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
>                       goto set_irqchip_out;
>               r = kvm_vm_ioctl_set_irqchip(kvm, chip);
>               if (r)
> @@ -3836,6 +3854,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>               r = kvm_vm_ioctl_enable_cap(kvm, &cap);
>               break;
>       }
> +
>       default:
>               r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
>       }
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 133ea00..ffe1f4e 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>  
>  #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
> +#define lapic_in_kernel(k)      (irqchip_in_kernel(k))
>  #define vgic_initialized(k)  (!!((k)->arch.vgic.nr_cpus))
>  #define vgic_ready(k)                ((k)->arch.vgic.ready)
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 51103f0..f7eab09 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1000,6 +1000,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, 
> unsigned long mmu_seq)
>  #endif
>  
>  int kvm_setup_default_irq_routing(struct kvm *kvm);
> +int kvm_setup_empty_irq_routing(struct kvm *kvm);
>  int kvm_set_irq_routing(struct kvm *kvm,
>                       const struct kvm_irq_routing_entry *entries,
>                       unsigned nr,
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9ef19eb..e4304d0 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -818,6 +818,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_DISABLE_QUIRKS 116
>  #define KVM_CAP_X86_SMM 117
>  #define KVM_CAP_MULTI_ADDRESS_SPACE 118
> +#define KVM_CAP_SPLIT_IRQCHIP 119
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to