Gleb Natapov wrote on 2013-02-04:
> On Mon, Feb 04, 2013 at 05:05:14PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <[email protected]>
>> 
>> Posted Interrupt allows APIC interrupts to inject into guest directly
>> without any vmexit.
>> 
>> - When delivering a interrupt to guest, if target vcpu is running,
>>   update Posted-interrupt requests bitmap and send a notification event
>>   to the vcpu. Then the vcpu will handle this interrupt automatically,
>>   without any software involvemnt.
>> - If target vcpu is not running or there already a notification event
>>   pending in the vcpu, do nothing. The interrupt will be handled by
>>   next vm entry
>> Signed-off-by: Yang Zhang <[email protected]>
>> ---
>>  arch/x86/include/asm/entry_arch.h  |    1 +
>>  arch/x86/include/asm/hw_irq.h      |    1 +
>>  arch/x86/include/asm/irq_vectors.h |    4 +
>>  arch/x86/include/asm/kvm_host.h    |    3 + arch/x86/include/asm/vmx.h
>>          |    4 + arch/x86/kernel/entry_64.S         |    5 +
>>  arch/x86/kernel/irq.c              |   19 ++++
>>  arch/x86/kernel/irqinit.c          |    4 + arch/x86/kvm/lapic.c      
>>          |   15 +++- arch/x86/kvm/lapic.h               |    1 +
>>  arch/x86/kvm/svm.c                 |    6 ++ arch/x86/kvm/vmx.c       
>>           |  164 +++++++++++++++++++++++++++++++-----
>>  arch/x86/kvm/x86.c                 |    4 + include/linux/kvm_host.h  
>>          |    1 + 14 files changed, 208 insertions(+), 24 deletions(-)
>> diff --git a/arch/x86/include/asm/entry_arch.h
>> b/arch/x86/include/asm/entry_arch.h index 40afa00..7b0a29e 100644 ---
>> a/arch/x86/include/asm/entry_arch.h +++
>> b/arch/x86/include/asm/entry_arch.h @@ -18,6 +18,7 @@
>> BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
>>  #endif
>>  
>>  BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
>> +BUILD_INTERRUPT(posted_intr_ipi, POSTED_INTR_VECTOR)
> Missing CONFIG_HAVE_KVM ifdef. Have you verified that this patch
> compiles with KVM support disabled? Also give it a name that will
> associate it with KVM.
Yes, but seems it is selected by x86 by default. And it always enabled when 
building kernel.
I will remove the select in Kconfig and try again.

>> 
>>  /*
>>   * every pentium local APIC has two 'local interrupts', with a
>> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
>> index eb92a6e..ee61af3 100644
>> --- a/arch/x86/include/asm/hw_irq.h
>> +++ b/arch/x86/include/asm/hw_irq.h
>> @@ -28,6 +28,7 @@
>>  /* Interrupt handlers registered during init_IRQ */ extern void
>>  apic_timer_interrupt(void); extern void x86_platform_ipi(void);
>>  +extern void posted_intr_ipi(void); extern void error_interrupt(void);
>>  extern void irq_work_interrupt(void);
>> diff --git a/arch/x86/include/asm/irq_vectors.h
>> b/arch/x86/include/asm/irq_vectors.h index 1508e51..6421a63 100644 ---
>> a/arch/x86/include/asm/irq_vectors.h +++
>> b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,10 @@
>>   */
>>  #define X86_PLATFORM_IPI_VECTOR             0xf7
>> +#ifdef CONFIG_HAVE_KVM
>> +#define POSTED_INTR_VECTOR          0xf2
>> +#endif
>> +
>>  /*
>>   * IRQ work vector:
>>   */
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h index b8388e9..bab1c0a 100644 ---
>> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -704,6 +704,9 @@ struct kvm_x86_ops {
>>      void (*hwapic_isr_update)(struct kvm *kvm, int isr);
>>      void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>>      void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>> +    bool (*send_notification_event)(struct kvm_vcpu *vcpu,
>> +                                    int vector, int *result);
>> +    bool (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>>      int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>>      int (*get_tdp_level)(void);
>>      u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>> index 694586c..f5ec72c 100644
>> --- a/arch/x86/include/asm/vmx.h
>> +++ b/arch/x86/include/asm/vmx.h
>> @@ -153,6 +153,7 @@
>>  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>>  #define PIN_BASED_NMI_EXITING                   0x00000008
>>  #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
>> +#define PIN_BASED_POSTED_INTR                   0x00000080
>> 
>>  #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002 #define
>>  VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200 @@ -175,6 +176,7 @@
>>  /* VMCS Encodings */ enum vmcs_field {      VIRTUAL_PROCESSOR_ID          
>>   = 0x00000000, +    POSTED_INTR_NV                  = 0x00000002,
>>      GUEST_ES_SELECTOR               = 0x00000800,   GUEST_CS_SELECTOR     
>>           = 0x00000802,      GUEST_SS_SELECTOR               = 0x00000804,
>>  @@ -209,6 +211,8 @@ enum vmcs_field {       VIRTUAL_APIC_PAGE_ADDR_HIGH    
>>  = 0x00002013,       APIC_ACCESS_ADDR                = 0x00002014,
>>      APIC_ACCESS_ADDR_HIGH           = 0x00002015,
>> +    POSTED_INTR_DESC_ADDR           = 0x00002016,
>> +    POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
>>      EPT_POINTER                     = 0x0000201a,
>>      EPT_POINTER_HIGH                = 0x0000201b,
>>      EOI_EXIT_BITMAP0                = 0x0000201c,
>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> index 70641af..c6c47a3 100644
>> --- a/arch/x86/kernel/entry_64.S
>> +++ b/arch/x86/kernel/entry_64.S
>> @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
>>  apicinterrupt X86_PLATFORM_IPI_VECTOR \
>>      x86_platform_ipi smp_x86_platform_ipi
>> +#ifdef CONFIG_HAVE_KVM
>> +apicinterrupt POSTED_INTR_VECTOR \
>> +    posted_intr_ipi smp_posted_intr_ipi
>> +#endif
>> +
>>  apicinterrupt THRESHOLD_APIC_VECTOR \
>>      threshold_interrupt smp_threshold_interrupt
>>  apicinterrupt THERMAL_APIC_VECTOR \
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index e4595f1..3551cf2 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -228,6 +228,25 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>>      set_irq_regs(old_regs);
>>  }
>> +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ #ifdef
>> CONFIG_HAVE_KVM +void smp_posted_intr_ipi(struct pt_regs *regs) +{
>> +    struct pt_regs *old_regs = set_irq_regs(regs); + +      ack_APIC_irq(); 
>> +
>> +    irq_enter(); + +        exit_idle(); + +        irq_exit(); +
>> +    set_irq_regs(old_regs); +} + +
> One blank line is enough.
> 
>>  EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
>>  
>>  #ifdef CONFIG_HOTPLUG_CPU
>> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
>> index 6e03b0d..f90c5ae 100644
>> --- a/arch/x86/kernel/irqinit.c
>> +++ b/arch/x86/kernel/irqinit.c
>> @@ -205,6 +205,10 @@ static void __init apic_intr_init(void)
>> 
>>      /* IPI for X86 platform specific use */
>>      alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
>> +#ifdef CONFIG_HAVE_KVM
>> +    /* IPI for posted interrupt use */
>> +    alloc_intr_gate(POSTED_INTR_VECTOR, posted_intr_ipi);
>> +#endif
>> 
>>      /* IPI vectors for APIC spurious and error interrupts */
>>      alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index 02b51dd..df6b6a3 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -379,6 +379,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
>>      if (!apic->irr_pending)
>>              return -1;
>> +    kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>>      result = apic_search_irr(apic);
>>      ASSERT(result == -1 || result >= 16);
>> @@ -685,6 +686,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  {
>>      int result = 0;
>>      struct kvm_vcpu *vcpu = apic->vcpu;
>> +    bool send = false;
>> 
>>      switch (delivery_mode) {
>>      case APIC_DM_LOWEST:
>> @@ -700,7 +702,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>              } else
>>                      apic_clear_vector(vector, apic->regs + APIC_TMR);
>> -            result = !apic_test_and_set_irr(vector, apic);
>> +            if (kvm_x86_ops->vm_has_apicv(vcpu->kvm))
> Just call send_notification_event() and do the check inside. And call it
> deliver_posted_interrupt() or something. It does more than just sends
> notification event. Actually it may not send it at all.
The code logic is different w/ or w/o apicv. So even put the check inside 
callee, we still need check it in caller. I think current solution is more 
clear.

>> +                    send = kvm_x86_ops->send_notification_event(vcpu,
>> +                                            vector, &result);
>> +            else
>> +                    result = !apic_test_and_set_irr(vector, apic);
>> +
>>              trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>>                                        trig_mode, vector, !result);
>>              if (!result) {
>> @@ -710,8 +717,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>                      break;
>>              }
>> -            kvm_make_request(KVM_REQ_EVENT, vcpu);
>> -            kvm_vcpu_kick(vcpu);
>> +            if (!send) {
>> +                    kvm_make_request(KVM_REQ_EVENT, vcpu);
>> +                    kvm_vcpu_kick(vcpu);
>> +            }
>>              break;
>>  
>>      case APIC_DM_REMRD:
>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>> index 1676d34..632111f 100644
>> --- a/arch/x86/kvm/lapic.h
>> +++ b/arch/x86/kvm/lapic.h
>> @@ -46,6 +46,7 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
>>  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
>>  u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
>>  void kvm_apic_set_version(struct kvm_vcpu *vcpu);
>> +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned int *pir);
>> 
>>  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
>>  int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index a7d60d7..37f961d 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3591,6 +3591,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm,
> int isr)
>>      return;
>>  }
>> +static bool svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> +    return false;
>> +}
>> +
>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) {         struct vcpu_svm
>>  *svm = to_svm(vcpu); @@ -4319,6 +4324,7 @@ static struct kvm_x86_ops
>>  svm_x86_ops = {     .vm_has_apicv = svm_vm_has_apicv,       
>> .load_eoi_exitmap
>>  = svm_load_eoi_exitmap,     .hwapic_isr_update = svm_hwapic_isr_update,
>> +    .sync_pir_to_irr = svm_sync_pir_to_irr,
>> 
>>      .set_tss_addr = svm_set_tss_addr,
>>      .get_tdp_level = get_npt_level,
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index e826d29..d2b02f2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -84,8 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
>>  static bool __read_mostly fasteoi = 1;
>>  module_param(fasteoi, bool, S_IRUGO);
>> -static bool __read_mostly enable_apicv_reg_vid = 1;
>> -module_param(enable_apicv_reg_vid, bool, S_IRUGO);
>> +static bool __read_mostly enable_apicv = 1;
>> +module_param(enable_apicv, bool, S_IRUGO);
>> 
>>  /*
>>   * If nested=1, nested virtualization is supported, i.e., guests may use
>> @@ -370,6 +370,41 @@ struct nested_vmx {
>>      struct page *apic_access_page;
>>  };
>> +#define POSTED_INTR_ON  0
>> +/* Posted-Interrupt Descriptor */
>> +struct pi_desc {
>> +    u32 pir[8];     /* Posted interrupt requested */
>> +    union {
>> +            struct {
>> +                    u8  on:1,
>> +                        rsvd:7;
>> +            } control;
>> +            u32 rsvd[8];
>> +    } u;
>> +} __aligned(64);
>> +
>> +static bool pi_test_on(struct pi_desc *pi_desc)
>> +{
>> +    return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->u.control);
>> +}
>> +
>> +static bool pi_test_and_set_on(struct pi_desc *pi_desc)
>> +{
>> +    return test_and_set_bit(POSTED_INTR_ON,
>> +                    (unsigned long *)&pi_desc->u.control);
>> +}
>> +
>> +static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
>> +{
>> +    return test_and_clear_bit(POSTED_INTR_ON,
>> +                    (unsigned long *)&pi_desc->u.control);
>> +}
>> +
>> +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
>> +{
>> +    return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
>> +
>>  struct vcpu_vmx {
>>      struct kvm_vcpu       vcpu;
>>      unsigned long         host_rsp;
>> @@ -434,6 +469,9 @@ struct vcpu_vmx {
>> 
>>      bool rdtscp_enabled;
>> +    /* Posted interrupt descriptor */
>> +    struct pi_desc *pi;
>> +
> You haven't answered on my previous review why are you trying save 46
> bytes here.
Sorry. I cannot get your point. It's just a pointer and only takes 8 bytes.

>>      /* Support for a guest hypervisor (nested VMX) */
>>      struct nested_vmx nested;
>>  };
>> @@ -788,6 +826,18 @@ static inline bool
> cpu_has_vmx_virtual_intr_delivery(void)
>>              SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>>  }
>> +static inline bool cpu_has_vmx_posted_intr(void)
>> +{
>> +    return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
>> +}
>> +
>> +static inline bool cpu_has_vmx_apicv(void)
>> +{
>> +    return cpu_has_vmx_apic_register_virt() &&
>> +            cpu_has_vmx_virtual_intr_delivery() &&
>> +            cpu_has_vmx_posted_intr();
>> +}
>> +
>>  static inline bool cpu_has_vmx_flexpriority(void)
>>  {
>>      return cpu_has_vmx_tpr_shadow() &&
>> @@ -2535,12 +2585,6 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
>>      u32 _vmexit_control = 0;
>>      u32 _vmentry_control = 0;
>> -    min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
>> -    opt = PIN_BASED_VIRTUAL_NMIS;
>> -    if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
>> -                            &_pin_based_exec_control) < 0)
>> -            return -EIO;
>> -
>>      min = CPU_BASED_HLT_EXITING |
>>  #ifdef CONFIG_X86_64
>>            CPU_BASED_CR8_LOAD_EXITING |
>> @@ -2617,6 +2661,17 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
>>                              &_vmexit_control) < 0)
>>              return -EIO;
>> +    min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
>> +    opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
>> +    if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
>> +                            &_pin_based_exec_control) < 0)
>> +            return -EIO;
>> +
>> +    if (!(_cpu_based_2nd_exec_control &
>> +            SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
>> +            !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
>> +            _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
>> +
>>      min = 0;        opt = VM_ENTRY_LOAD_IA32_PAT;   if 
>> (adjust_vmx_controls(min,
>>  opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2795,11 +2850,10 @@ static __init
>>  int hardware_setup(void)    if (!cpu_has_vmx_ple())                 ple_gap 
>> = 0;
>> -    if (!cpu_has_vmx_apic_register_virt() ||
>> -                            !cpu_has_vmx_virtual_intr_delivery())
>> -            enable_apicv_reg_vid = 0;
>> +    if (!cpu_has_vmx_apicv())
>> +            enable_apicv = 0;
>> 
>> -    if (enable_apicv_reg_vid)
>> +    if (enable_apicv)
>>              kvm_x86_ops->update_cr8_intercept = NULL;
>>      else
>>              kvm_x86_ops->hwapic_irr_update = NULL;
>> @@ -3868,6 +3922,61 @@ static void
> vmx_disable_intercept_msr_write_x2apic(u32 msr)
>>                      msr, MSR_TYPE_W);
>>  }
>> +static int vmx_vm_has_apicv(struct kvm *kvm)
>> +{
>> +    return enable_apicv && irqchip_in_kernel(kvm);
>> +}
>> +
>> +static bool vmx_send_notification_event(struct kvm_vcpu *vcpu,
>> +            int vector, int *result)
>> +{
>> +    struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>> +    *result = !pi_test_and_set_pir(vector, vmx->pi);
> The problem here is that interrupt may still be pending in IRR so
> eventually it will be coalesced, but we report it as delivered here. I
> do not see solution for this yet.
Yes, it's true and it may result in the interrupt losing. But even in real 
hardware, an interrupt also will lost in some cases: for example, cpu doesn't 
turn on irq in time or there is a high priority interrupt pending in IRR.
And since there already an interrupt pending in IRR, so the interrupt still 
will be handled.

>> +    if (!pi_test_and_set_on(vmx->pi) && (vcpu->mode == IN_GUEST_MODE)) {
>> +            kvm_make_request(KVM_REQ_PENDING_PIR, vcpu);
> Why not set KVM_REQ_EVENT here? What this intermediate event is needed
> for?
see answer in below.

>> +            apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>> +                                POSTED_INTR_VECTOR);
>> +            if (!pi_test_on(vmx->pi))
> Isn't it too optimistic of you to expect IPI to be delivered and
> processed by remote CPU by this point?
I have collected some data in my box and it shows about 5 percent of the posted 
interrupt will be handled when calling this check. How about add a unlikely() 
here?
Also it means 5% of check the request is unnecessary. And check KVM_REQ_EVENT 
is more costly, so I use a more light request to do it.

>> +                    clear_bit(KVM_REQ_PENDING_PIR, &vcpu->requests) ;
>> +            return true;
>> +    }
>> +    return false;
>> +}
>> +
>> +static bool vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> +    struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +    struct kvm_lapic *apic = vcpu->arch.apic;
>> +    unsigned int i, old, new, ret_val, irr_offset, pir_val;
>> +    bool make_request = false;
>> +
>> +    if (!vmx_vm_has_apicv(vcpu->kvm) || !pi_test_and_clear_on(vmx->pi))
>> +            return false;
>> +
>> +    for (i = 0; i <= 7; i++) {
>> +            pir_val = xchg(&vmx->pi->pir[i], 0);
>> +            if (pir_val) {
>> +                    irr_offset = APIC_IRR + i * 0x10;
>> +                    do {
>> +                            old = kvm_apic_get_reg(apic, irr_offset);
>> +                            new = old | pir_val;
>> +                            ret_val = cmpxchg((u32 *)(apic->regs +
>> +                                            irr_offset), old, new);
>> +                    } while (unlikely(ret_val != old));
>> +                    make_request = true;
>> +            }
>> +    }
>> +
>> +    return make_request;
>> +}
>> +
>> +static void free_pi(struct vcpu_vmx *vmx)
>> +{
>> +    if (vmx_vm_has_apicv(vmx->vcpu.kvm))
>> +            kfree(vmx->pi);
>> +}
>> +
>>  /*
>>   * Set up the vmcs's constant host-state fields, i.e., host-state fields 
>> that
>>   * will not change in the lifetime of the guest.
>> @@ -3928,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct
> vcpu_vmx *vmx)
>>      vmcs_writel(CR4_GUEST_HOST_MASK,
>>  ~vmx->vcpu.arch.cr4_guest_owned_bits); }
>> +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
>> +{
>> +    u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
>> +
>> +    if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
>> +            pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
>> +    return pin_based_exec_ctrl;
>> +}
>> +
>>  static u32 vmx_exec_control(struct vcpu_vmx *vmx) {         u32 exec_control
>>  = vmcs_config.cpu_based_exec_ctrl; @@ -3945,11 +4063,6 @@ static u32
>>  vmx_exec_control(struct vcpu_vmx *vmx)      return exec_control; }
>> -static int vmx_vm_has_apicv(struct kvm *kvm)
>> -{
>> -    return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
>> -}
>> -
>>  static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) {       u32
>>  exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4005,8 +4118,7
>>  @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>      vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>>  
>>      /* Control */
>> -    vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
>> -            vmcs_config.pin_based_exec_ctrl);
>> +    vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
>> 
>>      vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
> vmx_exec_control(vmx));
>> 
>> @@ -4015,13 +4127,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>                              vmx_secondary_exec_control(vmx));
>>      }
>> -    if (enable_apicv_reg_vid) {
>> +    if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
>>              vmcs_write64(EOI_EXIT_BITMAP0, 0);
>>              vmcs_write64(EOI_EXIT_BITMAP1, 0);
>>              vmcs_write64(EOI_EXIT_BITMAP2, 0);
>>              vmcs_write64(EOI_EXIT_BITMAP3, 0);
>>  
>>              vmcs_write16(GUEST_INTR_STATUS, 0);
>> +
>> +            vmx->pi = kzalloc(sizeof(struct pi_desc), GFP_KERNEL);
>> +            vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
>> +            vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((vmx->pi)));
>>      }
>>  
>>      if (ple_gap) { @@ -4171,6 +4287,9 @@ static int vmx_vcpu_reset(struct
>>  kvm_vcpu *vcpu)             vmcs_write64(APIC_ACCESS_ADDR,                  
>>     
>>  page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
>> +    if (vmx_vm_has_apicv(vcpu->kvm))
>> +            memset(vmx->pi, 0, sizeof(struct pi_desc));
>> +
>>      if (vmx->vpid != 0)
>>              vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>> @@ -6746,6 +6865,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>> 
>>      free_vpid(vmx);         free_nested(vmx); +     free_pi(vmx);
>>      free_loaded_vmcs(vmx->loaded_vmcs);     kfree(vmx->guest_msrs);
>>      kvm_vcpu_uninit(vcpu); @@ -7647,6 +7767,8 @@ static struct
>>  kvm_x86_ops vmx_x86_ops = {         .load_eoi_exitmap = 
>> vmx_load_eoi_exitmap,
>>      .hwapic_irr_update = vmx_hwapic_irr_update,     .hwapic_isr_update =
>>  vmx_hwapic_isr_update,
>> +    .sync_pir_to_irr = vmx_sync_pir_to_irr,
>> +    .send_notification_event = vmx_send_notification_event,
>> 
>>      .set_tss_addr = vmx_set_tss_addr,       .get_tdp_level = get_ept_level, 
>> @@
>>  -7750,7 +7872,7 @@ static int __init vmx_init(void)
>>      memcpy(vmx_msr_bitmap_longmode_x2apic,                  
>> vmx_msr_bitmap_longmode,
>>  PAGE_SIZE);
>> -    if (enable_apicv_reg_vid) {
>> +    if (enable_apicv) {
>>              for (msr = 0x800; msr <= 0x8ff; msr++)
>>                      vmx_disable_intercept_msr_read_x2apic(msr);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 9f25d70..6e1e6e7 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2681,6 +2681,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,                  
>>            
>>  struct kvm_lapic_state *s) { +      kvm_x86_ops->sync_pir_to_irr(vcpu);
>>      memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>  
>>      return 0; @@ -5698,6 +5699,9 @@ static int vcpu_enter_guest(struct
>>  kvm_vcpu *vcpu)                     kvm_deliver_pmi(vcpu);          if
>>  (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
>>                      update_eoi_exitmap(vcpu);
>> +            if (kvm_check_request(KVM_REQ_PENDING_PIR, vcpu))
>> +                    if (kvm_x86_ops->sync_pir_to_irr(vcpu))
>> +                            kvm_make_request(KVM_REQ_EVENT, vcpu);
>>      }
>>  
>>      if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 0350e0d..a410819 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -124,6 +124,7 @@ static inline bool is_error_page(struct page *page)
>>  #define KVM_REQ_MCLOCK_INPROGRESS 20
>>  #define KVM_REQ_EPR_EXIT          21
>>  #define KVM_REQ_EOIBITMAP         22
>> +#define KVM_REQ_PENDING_PIR   23
>> 
>>  #define KVM_USERSPACE_IRQ_SOURCE_ID         0
>>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID    1
>> --
>> 1.7.1
> 
> --
>                       Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to