On Mon, Jul 27, 2009 at 11:13:50AM +0300, Gleb Natapov wrote:
> On Mon, Jul 27, 2009 at 11:02:06AM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jul 27, 2009 at 10:31:38AM +0300, Gleb Natapov wrote:
> > > On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote:
> > > > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote:
> > > > > > Here's an untested patch with partial support for level triggered
> > > > > > interrupts in irqfd. What this patch has: support for clearing 
> > > > > > interrupt
> > > > > > on ack. What this patch does not have: support signalling eventfd 
> > > > > > on ack
> > > > > > so that userspace can take action and e.g. reenable interrupt.
> > > > > > 
> > > > > Do we want level triggered irqfd to be used only for device 
> > > > > assignment?
> > > > 
> > > > generally level triggered-not only.
> > > > But irqfd is promarily for device assignment and emulated devices.
> > > > 
> > > If it is for emulated device zeroing irq level on ack is not acceptable.
> > 
> > OK, forget about emulated devices for now. Let's focus on assigned
> > devices.
> > 
> Can't. We should design interface that works for everything.

We can always add the KVM_MAKE_COFFEE ioctl later.

> > > > > Because if not it is not appropriate to zero irq level on ack.
> > > > 
> > > > The bit that is missing in this patch, is that we'll signal eventfd
> > > > after we zero irq. Userspace polls that and re-asserts irq.
> > > That is not needed for device emulation. Emulated device should lower
> > > irq line by itself. It know exactly when to do it. Please don't push
> > > broken device assignment model to emulated devices.
> > > 
> > > > 
> > > > > And you
> > > > > have no other way to zero irq level?!
> > > > 
> > > > The reason is interrupt sharing: device can assert interrupt
> > > > but can not clear it by itself.
> > > For interrupt sharing to work you need to allocate source id for each
> > > irqfd.
> > 
> > BTW, current SET_IRQ_LINE interface uses a common source id for all
> > irqs. Does this mean that interrupt sharing in guest is broken now?
> No. It means that sharing in handled in userspace by chipset emulation.

Aha. Where's that code in qemu-kvm? I only see it forwarding to the
ioctl call directly. And what about assigned devices? These bypass qemu.

> > 
> > > > qemu can still use the SET_IRQ ioctl if it wants full control.
> > > > 
> > > > 
> > > > > Another note is that level
> > > > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may
> > > > > be shared.
> > > > 
> > > > Since we zero on ack, I think this is not a problem.
> > > > 
> > > We will not zero on ack for emulated devices. Please don't abuse ack
> > > notifiers. Even if you zero on ack this is still the problem BTW since
> > > other device may lower your line level.
> > > 
> > > > > > Gleb, Marcelo, I'd like your input on the approach taken wrt 
> > > > > > locking.
> > > > > > Does it look sane?
> > > > > > 
> > > > > > Avi, how's the interface? I intend to also add an eventfd probably 
> > > > > > in
> > > > > > the padding in the irqfd struct.
> > > > > > 
> > > > > > Signed-off-by: Michael S. Tsirkin <[email protected]>
> > > > > > 
> > > > > > ---
> > > > > > 
> > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > > > > > index 230a91a..8bf16af 100644
> > > > > > --- a/include/linux/kvm.h
> > > > > > +++ b/include/linux/kvm.h
> > > > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce {
> > > > > >  #endif
> > > > > >  
> > > > > >  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
> > > > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1)
> > > > > >  
> > > > > >  struct kvm_irqfd {
> > > > > >     __u32 fd;
> > > > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > > > > > index 99017e8..fcbf5b5 100644
> > > > > > --- a/virt/kvm/eventfd.c
> > > > > > +++ b/virt/kvm/eventfd.c
> > > > > > @@ -45,12 +45,14 @@ struct _irqfd {
> > > > > >     struct kvm               *kvm;
> > > > > >     struct eventfd_ctx       *eventfd;
> > > > > >     int                       gsi;
> > > > > > +   int                       is_level;
> > > > > >     struct list_head          list;
> > > > > >     poll_table                pt;
> > > > > >     wait_queue_head_t        *wqh;
> > > > > >     wait_queue_t              wait;
> > > > > >     struct work_struct        inject;
> > > > > >     struct work_struct        shutdown;
> > > > > > +   struct kvm_irq_ack_notifier kian;
> > > > > >  };
> > > > > >  
> > > > > >  static struct workqueue_struct *irqfd_cleanup_wq;
> > > > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work)
> > > > > >  
> > > > > >     mutex_lock(&kvm->irq_lock);
> > > > > >     kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > > > > > -   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > > > > > +   if (!irqfd->is_level)
> > > > > > +           kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 
> > > > > > irqfd->gsi, 0);
> > > > > >     mutex_unlock(&kvm->irq_lock);
> > > > > >  }
> > > > > >  
> > > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian)
> > > > > > +{
> > > > > > +   kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 
> > > > > > 0);
> > > > > > +}
> > > > > >  /*
> > > > > >   * Race-free decouple logic (ordering is critical)
> > > > > >   */
> > > > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work)
> > > > > >      */
> > > > > >     flush_work(&irqfd->inject);
> > > > > >  
> > > > > > +   if (irqfd->is_level)
> > > > > > +           kvm_unregister_irq_ack_notifier(&irqfd->kian);
> > > > > > +
> > > > > >     /*
> > > > > >      * It is now safe to release the object's resources
> > > > > >      */
> > > > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, 
> > > > > > wait_queue_head_t *wqh,
> > > > > >  }
> > > > > >  
> > > > > >  static int
> > > > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> > > > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level)
> > > > > >  {
> > > > > >     struct _irqfd *irqfd;
> > > > > >     struct file *file = NULL;
> > > > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int 
> > > > > > gsi)
> > > > > >  
> > > > > >     irqfd->kvm = kvm;
> > > > > >     irqfd->gsi = gsi;
> > > > > > +   irqfd->is_level = is_level;
> > > > > >     INIT_LIST_HEAD(&irqfd->list);
> > > > > >     INIT_WORK(&irqfd->inject, irqfd_inject);
> > > > > >     INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
> > > > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int 
> > > > > > gsi)
> > > > > >  
> > > > > >     irqfd->eventfd = eventfd;
> > > > > >  
> > > > > > +   if (is_level) {
> > > > > > +           irqfd->kian.gsi = gsi;
> > > > > > +           irqfd->kian.irq_acked = irqfd_irq_acked;
> > > > > > +           kvm_register_irq_ack_notifier(&irqfd->kian);
> > > > > > +   }
> > > > > > +
> > > > > >     /*
> > > > > >      * Install our own custom wake-up handling so we are notified 
> > > > > > via
> > > > > >      * a callback whenever someone signals the underlying eventfd
> > > > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, 
> > > > > > int gsi)
> > > > > >  int
> > > > > >  kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> > > > > >  {
> > > > > > +   if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL))
> > > > > > +           return -EINVAL;
> > > > > > +
> > > > > >     if (flags & KVM_IRQFD_FLAG_DEASSIGN)
> > > > > >             return kvm_irqfd_deassign(kvm, fd, gsi);
> > > > > >  
> > > > > > -   return kvm_irqfd_assign(kvm, fd, gsi);
> > > > > > +   return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & 
> > > > > > KVM_IRQFD_FLAG_LEVEL));
> > > > > >  }
> > > > > >  
> > > > > >  /*
> > > > > 
> > > > > --
> > > > >                       Gleb.
> > > 
> > > --
> > >                   Gleb.
> 
> --
>                       Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to