On Fri, Feb 22, 2013 at 09:42:32PM +0800, Yang Zhang wrote:
> From: Yang Zhang <[email protected]>
>
> Posted Interrupt allows APIC interrupts to inject into guest directly
> without any vmexit.
>
> - When delivering a interrupt to guest, if target vcpu is running,
> update Posted-interrupt requests bitmap and send a notification event
> to the vcpu. Then the vcpu will handle this interrupt automatically,
> without any software involvemnt.
>
> - If target vcpu is not running or there already a notification event
> pending in the vcpu, do nothing. The interrupt will be handled by
> next vm entry
>
> Signed-off-by: Yang Zhang <[email protected]>
> ---
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index e4595f1..64616cc 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
> set_irq_regs(old_regs);
> }
>
> +#ifdef CONFIG_HAVE_KVM
> +/*
> + * Handler for POSTED_INTERRUPT_VECTOR.
> + */
> +void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + ack_APIC_irq();
> +
> + irq_enter();
> +
> + exit_idle();
> +
> + irq_exit();
> +
> + set_irq_regs(old_regs);
> +}
> +#endif
> +
Add per-cpu counters, similarly to other handlers in the same file.
> @@ -379,6 +398,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
> if (!apic->irr_pending)
> return -1;
>
> + kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
> result = apic_search_irr(apic);
> ASSERT(result == -1 || result >= 16);
kvm_x86_ops->sync_pir_to_irr() should be called from vcpu_enter_guest,
before inject_pending_event.
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
->
inject_pending_event
...
}
> @@ -685,6 +705,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
> {
> int result = 0;
> struct kvm_vcpu *vcpu = apic->vcpu;
> + bool delivered = false;
>
> switch (delivery_mode) {
> case APIC_DM_LOWEST:
> @@ -700,7 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
> } else
> apic_clear_vector(vector, apic->regs + APIC_TMR);
>
> - result = !apic_test_and_set_irr(vector, apic);
> + if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
> + &result, &delivered))
> + result = !apic_test_and_set_irr(vector, apic);
> +
> trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
> trig_mode, vector, !result);
> if (!result) {
> @@ -710,8 +734,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
> break;
> }
>
> - kvm_make_request(KVM_REQ_EVENT, vcpu);
> - kvm_vcpu_kick(vcpu);
> + if (!delivered) {
> + kvm_make_request(KVM_REQ_EVENT, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }
This set bit / kick pair should be for non-APICv only (please
check for it directly).
> + return test_bit(vector, (unsigned long *)pi_desc->pir);
> +}
> +
> +static void pi_set_pir(int vector, struct pi_desc *pi_desc)
> +{
> + __set_bit(vector, (unsigned long *)pi_desc->pir);
> +}
This must be locked atomic operation (set_bit).
> +
> struct vcpu_vmx {
> struct kvm_vcpu vcpu;
> unsigned long host_rsp;
> @@ -429,6 +465,10 @@ struct vcpu_vmx {
>
> bool rdtscp_enabled;
>
> + /* Posted interrupt descriptor */
> + struct pi_desc pi_desc;
> + spinlock_t pi_lock;
Don't see why the lock is necessary. Please use the following
pseudocode for vmx_deliver_posted_interrupt:
vmx_deliver_posted_interrupt(), returns 'bool injected'.
1. orig_irr = read irr from vapic page
2. if (orig_irr != 0)
3. return false;
4. kvm_make_request(KVM_REQ_EVENT)
5. bool injected = !test_and_set_bit(PIR)
6. if (vcpu->guest_mode && injected)
7. if (test_and_set_bit(PIR notification bit))
8. send PIR IPI
9. return injected
On vcpu_enter_guest:
if (kvm_check_request(KVM_REQ_EVENT)) {
pir->virr sync (*)
...
}
...
vcpu->mode = IN_GUEST_MODE;
local_irq_disable
clear pir notification bit unconditionally (*)
Right after local_irq_disable.
> +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
> + int vector, int *result, bool *delivered)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + unsigned long flags;
> +
> + if (!vmx_vm_has_apicv(vcpu->kvm))
> + return false;
> +
> + spin_lock_irqsave(&vmx->pi_lock, flags);
> + if (pi_test_pir(vector, &vmx->pi_desc) ||
> + kvm_apic_test_irr(vector, vcpu->arch.apic)) {
> + spin_unlock_irqrestore(&vmx->pi_lock, flags);
> + return true;
> + }
> +
> + pi_set_pir(vector, &vmx->pi_desc);
> + *result = 1;
> + if (!pi_test_and_set_on(&vmx->pi_desc) &&
> + (vcpu->mode == IN_GUEST_MODE)) {
> + kvm_make_request(KVM_REQ_EVENT, vcpu);
> + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
> + POSTED_INTR_VECTOR);
> + *delivered = true;
> + }
> + spin_unlock_irqrestore(&vmx->pi_lock, flags);
> +
> + return true;
> +}
Please confirm whether a spinlock is necessary with the pseudocode
above.
> +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + unsigned long flags;
> +
> + if (!vmx_vm_has_apicv(vcpu->kvm))
> + return;
> +
> + spin_lock_irqsave(&vmx->pi_lock, flags);
> + if (!pi_test_and_clear_on(&vmx->pi_desc)) {
This needs to move to vcpu_enter_guest, after local_irq_disabled,
unconditionally, as noted.
> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
> static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
> struct kvm_lapic_state *s)
> {
> + kvm_x86_ops->sync_pir_to_irr(vcpu);
> memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>
> return 0;
> --
> 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html