Re: [PATCH v3 3/4] VSOCK: Introduce vhost-vsock.ko

2015-12-15 Thread Stefan Hajnoczi
On Fri, Dec 11, 2015 at 01:45:29PM +, Alex Bennée wrote:
> > +   if (head == vq->num) {
> > +   if (unlikely(vhost_enable_notify(>dev, vq))) {
> > +   vhost_disable_notify(>dev, vq);
> > +   continue;
> 
> Why are we doing this? If we enable something we then disable it? A
> comment as to what is going on here would be useful.

This is a standard optimization to avoid vmexits that other vhost
devices and QEMU implement too.

When the host begins pulling buffers off a virtqueue it first disables
guest->host notifications.  If the guest adds additional buffers while
the host is processing, the notification (vmexit) is skipped.  The host
re-enables guest->host notifications when it finishes virtqueue
processing.

If the guest added buffers after vhost_get_vq_desc() but before
vhost_enable_notify(), then vhost_enable_notify() returns true and the
host must process the buffers (i.e. restart the loop).  Failure to do so
could result in deadlocks because the guest didn't notify and the host
would be waiting for a notification.

I will add comments to the code.

> > +   vhost_add_used(vq, head, pkt->len); /* TODO should this
> > be sizeof(pkt->hdr) + pkt->len? */
> 
> TODO needs sorting our or removing.

Will fix in the next revision.

> > +   /* Respect global tx buf limitation */
> > +   mutex_lock(>mutex);
> > +   while (pkt_len + vsock->total_tx_buf >
> > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
> 
> I'm curious about the relationship between
> VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE above and VIRTIO_VSOCK_MAX_TX_BUF_SIZE
> just here. Why do we need to limit pkt_len to the smaller when really
> all that matters is pkt_len + vsock->total_tx_buf >
> VIRTIO_VSOCK_MAX_TX_BUF_SIZE?

There are two separate issues:

1. The total amount of pending data.  The idea is to stop queuing
   packets and make the caller wait until resources become available so
   that vhost_vsock.ko memory consumption is bounded.

   total_tx_buf len is an artificial limit that is lower than the actual
   virtqueue maximum data size.  Otherwise we could just rely on the
   virtqueue to limit the size but it can be very large.

2. Splitting data into packets that fit into rx virtqueue buffers.  The
   guest sets up the rx virtqueue with VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE
   buffers.  Here, vhost_vsock.ko is assuming that the rx virtqueue
   buffers are always VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE bytes so it
   splits data along this boundary.

   This is ugly because the guest could choose a different buffer size
   and the host has VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE hardcoded.  I'll
   look into eliminating this assumption.

> > +static void vhost_vsock_handle_ctl_kick(struct vhost_work *work)
> > +{
> > +   struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
> > + poll.work);
> > +   struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
> > +dev);
> > +
> > +   pr_debug("%s vq=%p, vsock=%p\n", __func__, vq, vsock);
> > +}
> 
> This doesn't handle anything, it just prints debug stuff. Should this be
> a NOP function?

The control virtqueue is currently not used.  In the next revision this
function will be dropped.

> > +static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 
> > features)
> > +{
> > +   struct vhost_virtqueue *vq;
> > +   int i;
> > +
> > +   if (features & ~VHOST_VSOCK_FEATURES)
> > +   return -EOPNOTSUPP;
> > +
> > +   mutex_lock(>dev.mutex);
> > +   if ((features & (1 << VHOST_F_LOG_ALL)) &&
> > +   !vhost_log_access_ok(>dev)) {
> > +   mutex_unlock(>dev.mutex);
> > +   return -EFAULT;
> > +   }
> > +
> > +   for (i = 0; i < VSOCK_VQ_MAX; i++) {
> > +   vq = >vqs[i].vq;
> > +   mutex_lock(>mutex);
> > +   vq->acked_features = features;
> 
> Is this a user supplied flag? Should it be masked to valid values?

That is already done above where VHOST_VSOCK_FEATURES is checked.


signature.asc
Description: PGP signature


Re: [PATCH v3 3/4] VSOCK: Introduce vhost-vsock.ko

2015-12-11 Thread Alex Bennée

Stefan Hajnoczi  writes:

> From: Asias He 
>
> VM sockets vhost transport implementation. This module runs in host
> kernel.

As per previous checkpatch comments.

>
> Signed-off-by: Asias He 
> Signed-off-by: Stefan Hajnoczi 
> ---
> v3:
>  * Remove unneeded variable used to store return value
>(Fengguang Wu  and Julia Lawall
>)
> v2:
>  * Add missing total_tx_buf decrement
>  * Support flexible rx/tx descriptor layout
>  * Refuse to assign reserved CIDs
>  * Refuse guest CID if already in use
>  * Only accept correctly addressed packets
> ---
>  drivers/vhost/vsock.c | 628 
> ++
>  drivers/vhost/vsock.h |   4 +
>  2 files changed, 632 insertions(+)
>  create mode 100644 drivers/vhost/vsock.c
>  create mode 100644 drivers/vhost/vsock.h
>
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> new file mode 100644
> index 000..3c0034a
> --- /dev/null
> +++ b/drivers/vhost/vsock.c
> @@ -0,0 +1,628 @@
> +/*
> + * vhost transport for vsock
> + *
> + * Copyright (C) 2013-2015 Red Hat, Inc.
> + * Author: Asias He 
> + * Stefan Hajnoczi 
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include "vhost.h"
> +#include "vsock.h"
> +
> +#define VHOST_VSOCK_DEFAULT_HOST_CID 2
> +
> +static int vhost_transport_socket_init(struct vsock_sock *vsk,
> +struct vsock_sock *psk);
> +
> +enum {
> + VHOST_VSOCK_FEATURES = VHOST_FEATURES,
> +};
> +
> +/* Used to track all the vhost_vsock instances on the system. */
> +static LIST_HEAD(vhost_vsock_list);
> +static DEFINE_MUTEX(vhost_vsock_mutex);
> +
> +struct vhost_vsock_virtqueue {
> + struct vhost_virtqueue vq;
> +};
> +
> +struct vhost_vsock {
> + /* Vhost device */
> + struct vhost_dev dev;
> + /* Vhost vsock virtqueue*/
> + struct vhost_vsock_virtqueue vqs[VSOCK_VQ_MAX];
> + /* Link to global vhost_vsock_list*/
> + struct list_head list;
> + /* Head for pkt from host to guest */
> + struct list_head send_pkt_list;
> + /* Work item to send pkt */
> + struct vhost_work send_pkt_work;
> + /* Wait queue for send pkt */
> + wait_queue_head_t queue_wait;
> + /* Used for global tx buf limitation */
> + u32 total_tx_buf;
> + /* Guest contex id this vhost_vsock instance handles */
> + u32 guest_cid;
> +};

As with 2/4 there is a fair bit of redundancy in the comments but I
don't see any obvious grouping here that could streamline it.

> +
> +static u32 vhost_transport_get_local_cid(void)
> +{
> + return VHOST_VSOCK_DEFAULT_HOST_CID;
> +}
> +
> +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> +{
> + struct vhost_vsock *vsock;
> +
> + mutex_lock(_vsock_mutex);
> + list_for_each_entry(vsock, _vsock_list, list) {
> + if (vsock->guest_cid == guest_cid) {
> + mutex_unlock(_vsock_mutex);
> + return vsock;
> + }
> + }
> + mutex_unlock(_vsock_mutex);
> +
> + return NULL;
> +}
> +
> +static void
> +vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
> + struct vhost_virtqueue *vq)
> +{
> + bool added = false;
> +
> + mutex_lock(>mutex);
> + vhost_disable_notify(>dev, vq);
> + for (;;) {
> + struct virtio_vsock_pkt *pkt;
> + struct iov_iter iov_iter;
> + unsigned out, in;
> + struct sock *sk;
> + size_t nbytes;
> + size_t len;
> + int head;
> +
> + if (list_empty(>send_pkt_list)) {
> + vhost_enable_notify(>dev, vq);
> + break;
> + }
> +
> + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> +  , , NULL, NULL);
> + pr_debug("%s: head = %d\n", __func__, head);
> + if (head < 0)
> + break;
> +
> + if (head == vq->num) {
> + if (unlikely(vhost_enable_notify(>dev, vq))) {
> + vhost_disable_notify(>dev, vq);
> + continue;

Why are we doing this? If we enable something we then disable it? A
comment as to what is going on here would be useful.

> + }
> + break;
> + }
> +
> + pkt = list_first_entry(>send_pkt_list,
> +struct virtio_vsock_pkt, list);
> + list_del_init(>list);
> +
> + if (out) {
> + virtio_transport_free_pkt(pkt);
> + vq_err(vq, "Expected 0 output buffers, got %u\n", out);
> +   

[PATCH v3 3/4] VSOCK: Introduce vhost-vsock.ko

2015-12-09 Thread Stefan Hajnoczi
From: Asias He 

VM sockets vhost transport implementation. This module runs in host
kernel.

Signed-off-by: Asias He 
Signed-off-by: Stefan Hajnoczi 
---
v3:
 * Remove unneeded variable used to store return value
   (Fengguang Wu  and Julia Lawall
   )
v2:
 * Add missing total_tx_buf decrement
 * Support flexible rx/tx descriptor layout
 * Refuse to assign reserved CIDs
 * Refuse guest CID if already in use
 * Only accept correctly addressed packets
---
 drivers/vhost/vsock.c | 628 ++
 drivers/vhost/vsock.h |   4 +
 2 files changed, 632 insertions(+)
 create mode 100644 drivers/vhost/vsock.c
 create mode 100644 drivers/vhost/vsock.h

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
new file mode 100644
index 000..3c0034a
--- /dev/null
+++ b/drivers/vhost/vsock.c
@@ -0,0 +1,628 @@
+/*
+ * vhost transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He 
+ * Stefan Hajnoczi 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include "vhost.h"
+#include "vsock.h"
+
+#define VHOST_VSOCK_DEFAULT_HOST_CID   2
+
+static int vhost_transport_socket_init(struct vsock_sock *vsk,
+  struct vsock_sock *psk);
+
+enum {
+   VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+};
+
+/* Used to track all the vhost_vsock instances on the system. */
+static LIST_HEAD(vhost_vsock_list);
+static DEFINE_MUTEX(vhost_vsock_mutex);
+
+struct vhost_vsock_virtqueue {
+   struct vhost_virtqueue vq;
+};
+
+struct vhost_vsock {
+   /* Vhost device */
+   struct vhost_dev dev;
+   /* Vhost vsock virtqueue*/
+   struct vhost_vsock_virtqueue vqs[VSOCK_VQ_MAX];
+   /* Link to global vhost_vsock_list*/
+   struct list_head list;
+   /* Head for pkt from host to guest */
+   struct list_head send_pkt_list;
+   /* Work item to send pkt */
+   struct vhost_work send_pkt_work;
+   /* Wait queue for send pkt */
+   wait_queue_head_t queue_wait;
+   /* Used for global tx buf limitation */
+   u32 total_tx_buf;
+   /* Guest contex id this vhost_vsock instance handles */
+   u32 guest_cid;
+};
+
+static u32 vhost_transport_get_local_cid(void)
+{
+   return VHOST_VSOCK_DEFAULT_HOST_CID;
+}
+
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+{
+   struct vhost_vsock *vsock;
+
+   mutex_lock(_vsock_mutex);
+   list_for_each_entry(vsock, _vsock_list, list) {
+   if (vsock->guest_cid == guest_cid) {
+   mutex_unlock(_vsock_mutex);
+   return vsock;
+   }
+   }
+   mutex_unlock(_vsock_mutex);
+
+   return NULL;
+}
+
+static void
+vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+   struct vhost_virtqueue *vq)
+{
+   bool added = false;
+
+   mutex_lock(>mutex);
+   vhost_disable_notify(>dev, vq);
+   for (;;) {
+   struct virtio_vsock_pkt *pkt;
+   struct iov_iter iov_iter;
+   unsigned out, in;
+   struct sock *sk;
+   size_t nbytes;
+   size_t len;
+   int head;
+
+   if (list_empty(>send_pkt_list)) {
+   vhost_enable_notify(>dev, vq);
+   break;
+   }
+
+   head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+, , NULL, NULL);
+   pr_debug("%s: head = %d\n", __func__, head);
+   if (head < 0)
+   break;
+
+   if (head == vq->num) {
+   if (unlikely(vhost_enable_notify(>dev, vq))) {
+   vhost_disable_notify(>dev, vq);
+   continue;
+   }
+   break;
+   }
+
+   pkt = list_first_entry(>send_pkt_list,
+  struct virtio_vsock_pkt, list);
+   list_del_init(>list);
+
+   if (out) {
+   virtio_transport_free_pkt(pkt);
+   vq_err(vq, "Expected 0 output buffers, got %u\n", out);
+   break;
+   }
+
+   len = iov_length(>iov[out], in);
+   iov_iter_init(_iter, READ, >iov[out], in, len);
+
+   nbytes = copy_to_iter(>hdr, sizeof(pkt->hdr), _iter);
+   if (nbytes != sizeof(pkt->hdr)) {
+   virtio_transport_free_pkt(pkt);
+   vq_err(vq, "Faulted on copying pkt hdr\n");
+   break;
+   }
+
+   nbytes = copy_to_iter(pkt->buf, pkt->len,