Marcelo Tosatti wrote on 2013-02-23:
>
> On Fri, Feb 22, 2013 at 09:42:32PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <[email protected]>
>>
>> Posted Interrupt allows APIC interrupts to inject into guest directly
>> without any vmexit.
>>
>> - When delivering a interrupt to guest, if target vcpu is running,
>> update Posted-interrupt requests bitmap and send a notification event
>> to the vcpu. Then the vcpu will handle this interrupt automatically,
>> without any software involvemnt.
>> - If target vcpu is not running or there already a notification event
>> pending in the vcpu, do nothing. The interrupt will be handled by
>> next vm entry
>> Signed-off-by: Yang Zhang <[email protected]>
>> ---
>>
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index e4595f1..64616cc 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>> set_irq_regs(old_regs);
>> }
>> +#ifdef CONFIG_HAVE_KVM
>> +/*
>> + * Handler for POSTED_INTERRUPT_VECTOR.
>> + */
>> +void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
>> +{
>> + struct pt_regs *old_regs = set_irq_regs(regs);
>> +
>> + ack_APIC_irq();
>> +
>> + irq_enter();
>> +
>> + exit_idle();
>> +
>> + irq_exit();
>> +
>> + set_irq_regs(old_regs);
>> +}
>> +#endif
>> +
>
> Add per-cpu counters, similarly to other handlers in the same file.
Sure.
>> @@ -379,6 +398,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
>> if (!apic->irr_pending)
>> return -1;
>> + kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>> result = apic_search_irr(apic);
>> ASSERT(result == -1 || result >= 16);
>
> kvm_x86_ops->sync_pir_to_irr() should be called from vcpu_enter_guest,
> before inject_pending_event.
>
> if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> ->
> inject_pending_event
> ...
> }
Some other places will search irr to do something, like
kvm_cpu_has_interrupt(). So we must to sync pir to irr before touch irr, not
just before enter guest.
>
>> @@ -685,6 +705,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>> {
>> int result = 0;
>> struct kvm_vcpu *vcpu = apic->vcpu;
>> + bool delivered = false;
>>
>> switch (delivery_mode) {
>> case APIC_DM_LOWEST:
>> @@ -700,7 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>> } else
>> apic_clear_vector(vector, apic->regs + APIC_TMR);
>> - result = !apic_test_and_set_irr(vector, apic);
>> + if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
>> + &result, &delivered))
>> + result = !apic_test_and_set_irr(vector, apic);
>> +
>> trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>> trig_mode, vector, !result);
>> if (!result) {
>> @@ -710,8 +734,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>> break;
>> }
>> - kvm_make_request(KVM_REQ_EVENT, vcpu);
>> - kvm_vcpu_kick(vcpu);
>> + if (!delivered) {
>> + kvm_make_request(KVM_REQ_EVENT, vcpu);
>> + kvm_vcpu_kick(vcpu);
>> + }
>
> This set bit / kick pair should be for non-APICv only (please
> check for it directly).
Sure
>> + return test_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
>> +
>> +static void pi_set_pir(int vector, struct pi_desc *pi_desc)
>> +{
>> + __set_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
>
> This must be locked atomic operation (set_bit).
If using spinlock, pi_set_pir is not called concurrently. It safe to use
_set_bit.
>> +
>> struct vcpu_vmx {
>> struct kvm_vcpu vcpu;
>> unsigned long host_rsp;
>> @@ -429,6 +465,10 @@ struct vcpu_vmx {
>>
>> bool rdtscp_enabled;
>> + /* Posted interrupt descriptor */
>> + struct pi_desc pi_desc;
>> + spinlock_t pi_lock;
>
> Don't see why the lock is necessary. Please use the following
> pseudocode for vmx_deliver_posted_interrupt:
>
> vmx_deliver_posted_interrupt(), returns 'bool injected'.
>
> 1. orig_irr = read irr from vapic page
> 2. if (orig_irr != 0)
> 3. return false;
> 4. kvm_make_request(KVM_REQ_EVENT)
> 5. bool injected = !test_and_set_bit(PIR)
> 6. if (vcpu->guest_mode && injected)
> 7. if (test_and_set_bit(PIR notification bit))
> 8. send PIR IPI
> 9. return injected
Consider follow case:
vcpu 0 | vcpu1
send intr to vcpu1
check irr
receive a posted intr
pir->irr(pir is cleared, irr is set)
injected=test_and_set_bit=true
pir=set
Then both irr and pir have the interrupt pending, they may merge to one later,
but injected reported as true. Wrong.
> On vcpu_enter_guest:
>
> if (kvm_check_request(KVM_REQ_EVENT)) {
> pir->virr sync (*)
> ...
> }
> ...
> vcpu->mode = IN_GUEST_MODE;
> local_irq_disable
> clear pir notification bit unconditionally (*)
Why clear ON bit here? If there is no pir->irr operation in this vmentry, clear
on here is redundant.
> Right after local_irq_disable.
>
>> +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
>> + int vector, int *result, bool *delivered)
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> + unsigned long flags;
>> +
>> + if (!vmx_vm_has_apicv(vcpu->kvm))
>> + return false;
>> +
>> + spin_lock_irqsave(&vmx->pi_lock, flags);
>> + if (pi_test_pir(vector, &vmx->pi_desc) ||
>> + kvm_apic_test_irr(vector, vcpu->arch.apic)) {
>> + spin_unlock_irqrestore(&vmx->pi_lock, flags);
>> + return true;
>> + }
>> +
>> + pi_set_pir(vector, &vmx->pi_desc);
>> + *result = 1;
>> + if (!pi_test_and_set_on(&vmx->pi_desc) &&
>> + (vcpu->mode == IN_GUEST_MODE)) {
>> + kvm_make_request(KVM_REQ_EVENT, vcpu);
>> + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>> + POSTED_INTR_VECTOR);
>> + *delivered = true;
>> + }
>> + spin_unlock_irqrestore(&vmx->pi_lock, flags);
>> +
>> + return true;
>> +}
>
> Please confirm whether a spinlock is necessary with the pseudocode above.
In current implementation, spinlock is necessary to avoid race condition
between vcpus when delivery posted interrupt and sync pir->irr.
>> +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> + unsigned long flags;
>> +
>> + if (!vmx_vm_has_apicv(vcpu->kvm))
>> + return;
>> +
>> + spin_lock_irqsave(&vmx->pi_lock, flags);
>
>
>> + if (!pi_test_and_clear_on(&vmx->pi_desc)) {
>
> This needs to move to vcpu_enter_guest, after local_irq_disabled,
> unconditionally, as noted.
>
>> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>> static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
>>
>> struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu);
>> memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>
>> return 0;
>> --
>> 1.7.1
>
Best regards,
Yang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html