On Mon, Jun 14, 2010 at 01:11:20PM -0400, Chris Lalancette wrote:
> We really want to "kvm_set_irq" during the hrtimer callback,
> but that is risky because that is during interrupt context.
> Instead, offload the work to a workqueue, which is a bit safer
> and should provide most of the same functionality.
>
> Signed-off-by: Chris Lalancette <[email protected]>
> ---
> arch/x86/kvm/i8254.c | 125 ++++++++++++++++++++++++++++---------------------
> arch/x86/kvm/i8254.h | 4 +-
> arch/x86/kvm/irq.c | 1 -
> 3 files changed, 74 insertions(+), 56 deletions(-)
>
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index 188d827..3bed8ac 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -34,6 +34,7 @@
>
> #include <linux/kvm_host.h>
> #include <linux/slab.h>
> +#include <linux/workqueue.h>
>
> #include "irq.h"
> #include "i8254.h"
> @@ -244,11 +245,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier
> *kian)
> {
> struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
> irq_ack_notifier);
> - raw_spin_lock(&ps->inject_lock);
> + spin_lock(&ps->inject_lock);
> if (atomic_dec_return(&ps->pit_timer.pending) < 0)
> atomic_inc(&ps->pit_timer.pending);
> ps->irq_ack = 1;
> - raw_spin_unlock(&ps->inject_lock);
> + spin_unlock(&ps->inject_lock);
> }
>
> void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
> @@ -267,7 +268,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
> static void destroy_pit_timer(struct kvm_timer *pt)
> {
> pr_debug("execute del timer!\n");
> - hrtimer_cancel(&pt->timer);
> + if (hrtimer_cancel(&pt->timer))
> + cancel_work_sync(&pt->kvm->arch.vpit->expired);
> }
>
> static bool kpit_is_periodic(struct kvm_timer *ktimer)
> @@ -281,6 +283,58 @@ static struct kvm_timer_ops kpit_ops = {
> .is_periodic = kpit_is_periodic,
> };
>
> +static void pit_do_work(struct work_struct *work)
> +{
> + struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
> + struct kvm *kvm = pit->kvm;
> + struct kvm_vcpu *vcpu;
> + int i;
> + struct kvm_kpit_state *ps = &pit->pit_state;
> + int inject = 0;
> +
> + /* Try to inject pending interrupts when
> + * last one has been acked.
> + */
> + spin_lock(&ps->inject_lock);
> + if (ps->irq_ack) {
> + ps->irq_ack = 0;
> + inject = 1;
> + }
> + spin_unlock(&ps->inject_lock);
> + if (inject) {
> + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
> + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
> +
> + /*
> + * Provides NMI watchdog support via Virtual Wire mode.
> + * The route is: PIT -> PIC -> LVT0 in NMI mode.
> + *
> + * Note: Our Virtual Wire implementation is simplified, only
> + * propagating PIT interrupts to all VCPUs when they have set
> + * LVT0 to NMI delivery. Other PIC interrupts are just sent to
> + * VCPU0, and only if its LVT0 is in EXTINT mode.
> + */
> + if (kvm->arch.vapics_in_nmi_mode > 0)
> + kvm_for_each_vcpu(i, vcpu, kvm)
> + kvm_apic_nmi_wd_deliver(vcpu);
> + }
> +}
> +
> +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
> +{
> + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
> + struct kvm_pit *pt = ktimer->kvm->arch.vpit;
> +
> + if (ktimer->reinject)
> + queue_work(pt->wq, &pt->expired);
The "problem" is queue_work only queues the work if it was not already
queued. So multiple queue_work() calls can collapse into one executed
job.
You need to maintain a counter here in pit_timer_fn, and reinject at
some point (perhaps on ACK) if there are multiple interrupts pending.
> +
> + if (ktimer->t_ops->is_periodic(ktimer)) {
> + hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
> + return HRTIMER_RESTART;
> + } else
> + return HRTIMER_NORESTART;
> +}
> +
> static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int
> is_period)
> {
> struct kvm_timer *pt = &ps->pit_timer;
> @@ -291,14 +345,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps,
> u32 val, int is_period)
> pr_debug("create pit timer, interval is %llu nsec\n", interval);
>
> /* TODO The new value only affected after the retriggered */
> - hrtimer_cancel(&pt->timer);
> + if (hrtimer_cancel(&pt->timer))
> + cancel_work_sync(&pt->kvm->arch.vpit->expired);
There can be a queued work instance even if the hrtimer is not active,
so cancel_work_sync should be unconditional.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html