On Sun, Jul 03, 2011 at 08:04:49PM +0300, Sasha Levin wrote:
> The new flag allows passing a write side of a pipe instead of an
> eventfd to be notified of writes to the specified memory region.
> 
> Instead of signaling an event, the value written to the memory region
> is written to the pipe.
> 
> Using a pipe instead of an eventfd is usefull when any value can be
> written to the memory region but we're interested in recieving the
> actual value instead of just a notification.
> 
> A simple example for practical use is the serial port. we are not
> interested in an exit every time a char is written to the port, but
> we do need to know what was written so we could handle it on the guest.

Looking at this example, how would you handle a pipe full condition?
We can't buffer unlimited amount of data in the host.

> Cc: Avi Kivity <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Marcelo Tosatti <[email protected]>
> Cc: Michael S. Tsirkin <[email protected]>
> Cc: Pekka Enberg <[email protected]>
> Signed-off-by: Sasha Levin <[email protected]>
> ---
>  include/linux/kvm.h |    2 +
>  virt/kvm/eventfd.c  |   65 +++++++++++++++++++++++++++++++++++---------------
>  2 files changed, 47 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 55ef181..548f23a 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -387,12 +387,14 @@ enum {
>       kvm_ioeventfd_flag_nr_datamatch,
>       kvm_ioeventfd_flag_nr_pio,
>       kvm_ioeventfd_flag_nr_deassign,
> +     kvm_ioeventfd_flag_nr_pipe,
>       kvm_ioeventfd_flag_nr_max,
>  };
>  
>  #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
>  #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
>  #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
> +#define KVM_IOEVENTFD_FLAG_PIPE      (1 << kvm_ioeventfd_flag_nr_pipe)
>  
>  #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
>  
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 73358d2..434293e 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -413,10 +413,11 @@ module_exit(irqfd_module_exit);
>  
>  /*
>   * --------------------------------------------------------------------
> - * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
> + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
> + *            a pipe write.
>   *
> - * userspace can register a PIO/MMIO address with an eventfd for receiving
> - * notification when the memory has been touched.
> + * userspace can register a PIO/MMIO address with an eventfd or a 
> + * pipe for receiving notification when the memory has been touched.
>   * --------------------------------------------------------------------
>   */
>  
> @@ -424,6 +425,7 @@ struct _ioeventfd {
>       struct list_head     list;
>       u64                  addr;
>       int                  length;
> +     struct file         *pipe;
>       struct eventfd_ctx  *eventfd;
>       u64                  datamatch;
>       struct kvm_io_device dev;
> @@ -439,7 +441,11 @@ to_ioeventfd(struct kvm_io_device *dev)
>  static void
>  ioeventfd_release(struct _ioeventfd *p)
>  {
> -     eventfd_ctx_put(p->eventfd);
> +     if (p->eventfd)
> +             eventfd_ctx_put(p->eventfd);
> +     else
> +             fput(p->pipe);
> +
>       list_del(&p->list);
>       kfree(p);
>  }
> @@ -481,6 +487,21 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int 
> len, const void *val)
>       return _val == p->datamatch ? true : false;
>  }
>  
> +static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
> +                         loff_t pos)
> +{
> +     mm_segment_t old_fs;
> +     ssize_t res;
> +
> +     old_fs = get_fs();
> +     set_fs(get_ds());
> +     /* The cast to a user pointer is valid due to the set_fs() */

Interesting. Is buf really always a user pointer?
Why don't we tag it __user then?

> +     res = vfs_write(file, (const char __user *)buf, count, &pos);

If pipe is non-blocking, or if we get a signal,
this might fail or return a value < len.
Data will be lost then, won't it?

> +     set_fs(old_fs);
> +
> +     return res;
> +}
> +
>  /* MMIO/PIO writes trigger an event if the addr/val match */
>  static int
>  ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
> @@ -491,7 +512,11 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, 
> int len,
>       if (!ioeventfd_in_range(p, addr, len, val))
>               return -EOPNOTSUPP;
>  
> -     eventfd_signal(p->eventfd, 1);
> +     if (p->pipe)
> +             kernel_write(p->pipe, val, len, 0);
> +     else
> +             eventfd_signal(p->eventfd, 1);
> +
>       return 0;
>  }
>  
> @@ -533,7 +558,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct 
> kvm_ioeventfd *args)
>       int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
>       enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
>       struct _ioeventfd        *p;
> -     struct eventfd_ctx       *eventfd;
> +     struct eventfd_ctx       *eventfd = NULL;
>       int                       ret;
>  
>       /* must be natural-word sized */
> @@ -555,9 +580,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct 
> kvm_ioeventfd *args)
>       if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
>               return -EINVAL;
>  
> -     eventfd = eventfd_ctx_fdget(args->fd);
> -     if (IS_ERR(eventfd))
> -             return PTR_ERR(eventfd);
> +     if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE)) {
> +             eventfd = eventfd_ctx_fdget(args->fd);
> +             if (IS_ERR(eventfd))
> +                     return PTR_ERR(eventfd);
> +     }
>  
>       p = kzalloc(sizeof(*p), GFP_KERNEL);
>       if (!p) {
> @@ -568,7 +595,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct 
> kvm_ioeventfd *args)
>       INIT_LIST_HEAD(&p->list);
>       p->addr    = args->addr;
>       p->length  = args->len;
> -     p->eventfd = eventfd;
> +
> +     if (args->flags & KVM_IOEVENTFD_FLAG_PIPE)
> +             p->pipe = fget(args->fd);

This really needs to check that the fd is a pipe.
Otherwise you can do weird things like pass in
the kvm device fd itself.


> +     else
> +             p->eventfd = eventfd;
>  
>       /* The datamatch feature is optional, otherwise this is a wildcard */
>       if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
> @@ -601,7 +632,9 @@ unlock_fail:
>  
>  fail:
>       kfree(p);
> -     eventfd_ctx_put(eventfd);
> +
> +     if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE))
> +             eventfd_ctx_put(eventfd);
>  
>       return ret;
>  }
> @@ -612,20 +645,14 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
> kvm_ioeventfd *args)
>       int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
>       enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
>       struct _ioeventfd        *p, *tmp;
> -     struct eventfd_ctx       *eventfd;
>       int                       ret = -ENOENT;
>  
> -     eventfd = eventfd_ctx_fdget(args->fd);
> -     if (IS_ERR(eventfd))
> -             return PTR_ERR(eventfd);
> -
>       mutex_lock(&kvm->slots_lock);
>  
>       list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
>               bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
>  
> -             if (p->eventfd != eventfd  ||
> -                 p->addr != args->addr  ||
> +             if (p->addr != args->addr  ||
>                   p->length != args->len ||
>                   p->wildcard != wildcard)
>                       continue;
> @@ -641,8 +668,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
> kvm_ioeventfd *args)
>  
>       mutex_unlock(&kvm->slots_lock);
>  
> -     eventfd_ctx_put(eventfd);
> -

Don't we need to put on deassign?

>       return ret;
>  }
>  
> -- 
> 1.7.6
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to