Marcelo Tosatti wrote on 2013-02-23:
> 
> On Fri, Feb 22, 2013 at 09:42:32PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <[email protected]>
>> 
>> Posted Interrupt allows APIC interrupts to inject into guest directly
>> without any vmexit.
>> 
>> - When delivering a interrupt to guest, if target vcpu is running,
>>   update Posted-interrupt requests bitmap and send a notification event
>>   to the vcpu. Then the vcpu will handle this interrupt automatically,
>>   without any software involvemnt.
>> - If target vcpu is not running or there already a notification event
>>   pending in the vcpu, do nothing. The interrupt will be handled by
>>   next vm entry
>> Signed-off-by: Yang Zhang <[email protected]>
>> ---
>> 
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index e4595f1..64616cc 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>>      set_irq_regs(old_regs);
>>  }
>> +#ifdef CONFIG_HAVE_KVM
>> +/*
>> + * Handler for POSTED_INTERRUPT_VECTOR.
>> + */
>> +void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
>> +{
>> +    struct pt_regs *old_regs = set_irq_regs(regs);
>> +
>> +    ack_APIC_irq();
>> +
>> +    irq_enter();
>> +
>> +    exit_idle();
>> +
>> +    irq_exit();
>> +
>> +    set_irq_regs(old_regs);
>> +}
>> +#endif
>> +
> 
> Add per-cpu counters, similarly to other handlers in the same file.
Sure.

>> @@ -379,6 +398,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
>>      if (!apic->irr_pending)
>>              return -1;
>> +    kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>>      result = apic_search_irr(apic);
>>      ASSERT(result == -1 || result >= 16);
> 
> kvm_x86_ops->sync_pir_to_irr() should be called from vcpu_enter_guest,
> before inject_pending_event.
> 
>         if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>               ->
>               inject_pending_event
>               ...
>       }
Some other places will search irr to do something, like 
kvm_cpu_has_interrupt(). So we must to sync pir to irr before touch irr, not 
just before enter guest.

> 
>> @@ -685,6 +705,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  {
>>      int result = 0;
>>      struct kvm_vcpu *vcpu = apic->vcpu;
>> +    bool delivered = false;
>> 
>>      switch (delivery_mode) {
>>      case APIC_DM_LOWEST:
>> @@ -700,7 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>              } else
>>                      apic_clear_vector(vector, apic->regs + APIC_TMR);
>> -            result = !apic_test_and_set_irr(vector, apic);
>> +            if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
>> +                                            &result, &delivered))
>> +                    result = !apic_test_and_set_irr(vector, apic);
>> +
>>              trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>>                                        trig_mode, vector, !result);
>>              if (!result) {
>> @@ -710,8 +734,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>                      break;
>>              }
>> -            kvm_make_request(KVM_REQ_EVENT, vcpu);
>> -            kvm_vcpu_kick(vcpu);
>> +            if (!delivered) {
>> +                    kvm_make_request(KVM_REQ_EVENT, vcpu);
>> +                    kvm_vcpu_kick(vcpu);
>> +            }
> 
> This set bit / kick pair should be for non-APICv only (please
> check for it directly).
Sure
 
>> +    return test_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
>> +
>> +static void pi_set_pir(int vector, struct pi_desc *pi_desc)
>> +{
>> +    __set_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
> 
> This must be locked atomic operation (set_bit).
If using spinlock, pi_set_pir is not called concurrently. It safe to use 
_set_bit.

 
>> +
>>  struct vcpu_vmx {
>>      struct kvm_vcpu       vcpu;
>>      unsigned long         host_rsp;
>> @@ -429,6 +465,10 @@ struct vcpu_vmx {
>> 
>>      bool rdtscp_enabled;
>> +    /* Posted interrupt descriptor */
>> +    struct pi_desc pi_desc;
>> +    spinlock_t pi_lock;
> 
> Don't see why the lock is necessary. Please use the following
> pseudocode for vmx_deliver_posted_interrupt:
> 
> vmx_deliver_posted_interrupt(), returns 'bool injected'.
> 
> 1. orig_irr = read irr from vapic page
> 2. if (orig_irr != 0)
> 3.    return false;
> 4. kvm_make_request(KVM_REQ_EVENT)
> 5. bool injected = !test_and_set_bit(PIR)
> 6. if (vcpu->guest_mode && injected)
> 7.    if (test_and_set_bit(PIR notification bit))
> 8.            send PIR IPI
> 9. return injected

Consider follow case:
vcpu 0                      |                vcpu1
send intr to vcpu1
check irr
                                            receive a posted intr
                                                                                
pir->irr(pir is cleared, irr is set)
injected=test_and_set_bit=true
pir=set

Then both irr and pir have the interrupt pending, they may merge to one later, 
but injected reported as true. Wrong.

> On vcpu_enter_guest:
> 
> if (kvm_check_request(KVM_REQ_EVENT))  {
>       pir->virr sync                  (*)
>       ...
> }
> ...
> vcpu->mode = IN_GUEST_MODE;
> local_irq_disable
> clear pir notification bit unconditionally (*)
Why clear ON bit here? If there is no pir->irr operation in this vmentry, clear 
on here is redundant. 
 
> Right after local_irq_disable.
> 
>> +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
>> +            int vector, int *result, bool *delivered)
>> +{
>> +    struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +    unsigned long flags;
>> +
>> +    if (!vmx_vm_has_apicv(vcpu->kvm))
>> +            return false;
>> +
>> +    spin_lock_irqsave(&vmx->pi_lock, flags);
>> +    if (pi_test_pir(vector, &vmx->pi_desc) ||
>> +            kvm_apic_test_irr(vector, vcpu->arch.apic)) {
>> +            spin_unlock_irqrestore(&vmx->pi_lock, flags);
>> +            return true;
>> +    }
>> +
>> +    pi_set_pir(vector, &vmx->pi_desc);
>> +    *result = 1;
>> +    if (!pi_test_and_set_on(&vmx->pi_desc) &&
>> +                    (vcpu->mode == IN_GUEST_MODE)) {
>> +            kvm_make_request(KVM_REQ_EVENT, vcpu);
>> +            apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>> +                                POSTED_INTR_VECTOR);
>> +            *delivered = true;
>> +    }
>> +    spin_unlock_irqrestore(&vmx->pi_lock, flags);
>> +
>> +    return true;
>> +}
> 
> Please confirm whether a spinlock is necessary with the pseudocode above.
In current implementation, spinlock is necessary to avoid race condition 
between vcpus when delivery posted interrupt and sync pir->irr.

>> +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> +    struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +    unsigned long flags;
>> +
>> +    if (!vmx_vm_has_apicv(vcpu->kvm))
>> +            return;
>> +
>> +    spin_lock_irqsave(&vmx->pi_lock, flags);
> 
> 
>> +    if (!pi_test_and_clear_on(&vmx->pi_desc)) {
> 
> This needs to move to vcpu_enter_guest, after local_irq_disabled,
> unconditionally, as noted.
> 
>> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,                  
>>            
>>  struct kvm_lapic_state *s) { +      kvm_x86_ops->sync_pir_to_irr(vcpu);
>>      memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>  
>>      return 0;
>> --
>> 1.7.1
>


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to