Re: [PATCH v3 2/4] VSOCK: Introduce virtio-vsock.ko

2015-12-10 Thread Alex Bennée

Stefan Hajnoczi  writes:

> From: Asias He 
>
> VM sockets virtio transport implementation. This module runs in guest
> kernel.

checkpatch warns on a bunch of whitespace/tab issues.

>
> Signed-off-by: Asias He 
> Signed-off-by: Stefan Hajnoczi 
> ---
> v2:
>  * Fix total_tx_buf accounting
>  * Add virtio_transport global mutex to prevent races
> ---
>  net/vmw_vsock/virtio_transport.c | 466 
> +++
>  1 file changed, 466 insertions(+)
>  create mode 100644 net/vmw_vsock/virtio_transport.c
>
> diff --git a/net/vmw_vsock/virtio_transport.c 
> b/net/vmw_vsock/virtio_transport.c
> new file mode 100644
> index 000..df65dca
> --- /dev/null
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -0,0 +1,466 @@
> +/*
> + * virtio transport for vsock
> + *
> + * Copyright (C) 2013-2015 Red Hat, Inc.
> + * Author: Asias He 
> + * Stefan Hajnoczi 
> + *
> + * Some of the code is take from Gerd Hoffmann 's
> + * early virtio-vsock proof-of-concept bits.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static struct workqueue_struct *virtio_vsock_workqueue;
> +static struct virtio_vsock *the_virtio_vsock;
> +static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
> +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock);
> +
> +struct virtio_vsock {
> + /* Virtio device */
> + struct virtio_device *vdev;
> + /* Virtio virtqueue */
> + struct virtqueue *vqs[VSOCK_VQ_MAX];
> + /* Wait queue for send pkt */
> + wait_queue_head_t queue_wait;
> + /* Work item to send pkt */
> + struct work_struct tx_work;
> + /* Work item to recv pkt */
> + struct work_struct rx_work;
> + /* Mutex to protect send pkt*/
> + struct mutex tx_lock;
> + /* Mutex to protect recv pkt*/
> + struct mutex rx_lock;

Further down I got confused by what lock was what and exactly what was
being protected. If the receive and transmit paths touch separate things
it might be worth re-arranging the structure to make it clearer, eg:

   /* The transmit path is protected by tx_lock */
   struct mutex tx_lock;
   struct work_struct tx_work;
   ..
   ..

   /* The receive path is protected by rx_lock */
   wait_queue_head_t queue_wait;
   ..
   ..

 Which might make things a little clearer. Then all the redundant
 information in the comments can be removed. I don't need to know what
 is a Virtio device, virtqueue or wait_queue etc as they are implicit in
 the structure name.

> + /* Number of recv buffers */
> + int rx_buf_nr;
> + /* Number of max recv buffers */
> + int rx_buf_max_nr;
> + /* Used for global tx buf limitation */
> + u32 total_tx_buf;
> + /* Guest context id, just like guest ip address */
> + u32 guest_cid;
> +};
> +
> +static struct virtio_vsock *virtio_vsock_get(void)
> +{
> + return the_virtio_vsock;
> +}
> +
> +static u32 virtio_transport_get_local_cid(void)
> +{
> + struct virtio_vsock *vsock = virtio_vsock_get();
> +
> + return vsock->guest_cid;
> +}
> +
> +static int
> +virtio_transport_send_pkt(struct vsock_sock *vsk,
> +   struct virtio_vsock_pkt_info *info)
> +{
> + u32 src_cid, src_port, dst_cid, dst_port;
> + int ret, in_sg = 0, out_sg = 0;
> + struct virtio_transport *trans;
> + struct virtio_vsock_pkt *pkt;
> + struct virtio_vsock *vsock;
> + struct scatterlist hdr, buf, *sgs[2];
> + struct virtqueue *vq;
> + u32 pkt_len = info->pkt_len;
> + DEFINE_WAIT(wait);
> +
> + vsock = virtio_vsock_get();
> + if (!vsock)
> + return -ENODEV;
> +
> + src_cid = virtio_transport_get_local_cid();
> + src_port = vsk->local_addr.svm_port;
> + if (!info->remote_cid) {
> + dst_cid = vsk->remote_addr.svm_cid;
> + dst_port = vsk->remote_addr.svm_port;
> + } else {
> + dst_cid = info->remote_cid;
> + dst_port = info->remote_port;
> + }
> +
> + trans = vsk->trans;
> + vq = vsock->vqs[VSOCK_VQ_TX];
> +
> + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
> + pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
> + pkt_len = virtio_transport_get_credit(trans, pkt_len);
> + /* Do not send zero length OP_RW pkt*/
> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> + return pkt_len;
> +
> + /* Respect global tx buf limitation */
> + mutex_lock(>tx_lock);
> + while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
> + prepare_to_wait_exclusive(>queue_wait, ,
> +   TASK_UNINTERRUPTIBLE);
> + mutex_unlock(>tx_lock);
> +

Re: [PATCH v3 2/4] VSOCK: Introduce virtio-vsock.ko

2015-12-10 Thread Stefan Hajnoczi
On Thu, Dec 10, 2015 at 09:23:25PM +, Alex Bennée wrote:
> Stefan Hajnoczi  writes:
> 
> > From: Asias He 
> >
> > VM sockets virtio transport implementation. This module runs in guest
> > kernel.
> 
> checkpatch warns on a bunch of whitespace/tab issues.

Will fix in the next version.

> > +struct virtio_vsock {
> > +   /* Virtio device */
> > +   struct virtio_device *vdev;
> > +   /* Virtio virtqueue */
> > +   struct virtqueue *vqs[VSOCK_VQ_MAX];
> > +   /* Wait queue for send pkt */
> > +   wait_queue_head_t queue_wait;
> > +   /* Work item to send pkt */
> > +   struct work_struct tx_work;
> > +   /* Work item to recv pkt */
> > +   struct work_struct rx_work;
> > +   /* Mutex to protect send pkt*/
> > +   struct mutex tx_lock;
> > +   /* Mutex to protect recv pkt*/
> > +   struct mutex rx_lock;
> 
> Further down I got confused by what lock was what and exactly what was
> being protected. If the receive and transmit paths touch separate things
> it might be worth re-arranging the structure to make it clearer, eg:
> 
>/* The transmit path is protected by tx_lock */
>struct mutex tx_lock;
>struct work_struct tx_work;
>..
>..
> 
>/* The receive path is protected by rx_lock */
>wait_queue_head_t queue_wait;
>..
>..
> 
>  Which might make things a little clearer. Then all the redundant
>  information in the comments can be removed. I don't need to know what
>  is a Virtio device, virtqueue or wait_queue etc as they are implicit in
>  the structure name.

Thanks, that is a nice idea.

> > +   mutex_lock(>tx_lock);
> > +   while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt,
> > +   GFP_KERNEL)) < 0) {
> > +   prepare_to_wait_exclusive(>queue_wait, ,
> > + TASK_UNINTERRUPTIBLE);
> > +   mutex_unlock(>tx_lock);
> > +   schedule();
> > +   mutex_lock(>tx_lock);
> > +   finish_wait(>queue_wait, );
> > +   }
> > +   virtqueue_kick(vq);
> > +   mutex_unlock(>tx_lock);
> 
> What are we protecting with tx_lock here? See comments above about
> making the lock usage semantics clearer.

vq (vsock->vqs[VSOCK_VQ_TX]) is being protected.  Concurrent calls to
virtqueue_add_sgs() are not allowed.

> > +
> > +   return pkt_len;
> > +}
> > +
> > +static struct virtio_transport_pkt_ops virtio_ops = {
> > +   .send_pkt = virtio_transport_send_pkt,
> > +};
> > +
> > +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
> > +{
> > +   int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
> > +   struct virtio_vsock_pkt *pkt;
> > +   struct scatterlist hdr, buf, *sgs[2];
> > +   struct virtqueue *vq;
> > +   int ret;
> > +
> > +   vq = vsock->vqs[VSOCK_VQ_RX];
> > +
> > +   do {
> > +   pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
> > +   if (!pkt) {
> > +   pr_debug("%s: fail to allocate pkt\n", __func__);
> > +   goto out;
> > +   }
> > +
> > +   /* TODO: use mergeable rx buffer */
> 
> TODO's should end up in merged code.

Will fix in next revision.

> > +   pkt->buf = kmalloc(buf_len, GFP_KERNEL);
> > +   if (!pkt->buf) {
> > +   pr_debug("%s: fail to allocate pkt->buf\n", __func__);
> > +   goto err;
> > +   }
> > +
> > +   sg_init_one(, >hdr, sizeof(pkt->hdr));
> > +   sgs[0] = 
> > +
> > +   sg_init_one(, pkt->buf, buf_len);
> > +   sgs[1] = 
> > +   ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
> > +   if (ret)
> > +   goto err;
> > +   vsock->rx_buf_nr++;
> > +   } while (vq->num_free);
> > +   if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
> > +   vsock->rx_buf_max_nr = vsock->rx_buf_nr;
> > +out:
> > +   virtqueue_kick(vq);
> > +   return;
> > +err:
> > +   virtqueue_kick(vq);
> > +   virtio_transport_free_pkt(pkt);
> 
> You could free the pkt memory at the fail site and just have one exit path.

Okay, I agree the err label is of marginal use.  Let's get rid of it.

> 
> > +   return;
> > +}
> > +
> > +static void virtio_transport_send_pkt_work(struct work_struct *work)
> > +{
> > +   struct virtio_vsock *vsock =
> > +   container_of(work, struct virtio_vsock, tx_work);
> > +   struct virtio_vsock_pkt *pkt;
> > +   bool added = false;
> > +   struct virtqueue *vq;
> > +   unsigned int len;
> > +   struct sock *sk;
> > +
> > +   vq = vsock->vqs[VSOCK_VQ_TX];
> > +   mutex_lock(>tx_lock);
> > +   do {
> 
> You can move the declarations of pkt/len into the do block.

Okay.

> 
> > +   virtqueue_disable_cb(vq);
> > +   while ((pkt = virtqueue_get_buf(vq, )) != NULL) {
> 
> And the sk declaration here

Okay.

> > +static void virtio_transport_recv_pkt_work(struct work_struct *work)
> > +{
> > +   struct virtio_vsock *vsock =
> > +   container_of(work, struct virtio_vsock, rx_work);
> > +   struct 

[PATCH v3 2/4] VSOCK: Introduce virtio-vsock.ko

2015-12-09 Thread Stefan Hajnoczi
From: Asias He 

VM sockets virtio transport implementation. This module runs in guest
kernel.

Signed-off-by: Asias He 
Signed-off-by: Stefan Hajnoczi 
---
v2:
 * Fix total_tx_buf accounting
 * Add virtio_transport global mutex to prevent races
---
 net/vmw_vsock/virtio_transport.c | 466 +++
 1 file changed, 466 insertions(+)
 create mode 100644 net/vmw_vsock/virtio_transport.c

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
new file mode 100644
index 000..df65dca
--- /dev/null
+++ b/net/vmw_vsock/virtio_transport.c
@@ -0,0 +1,466 @@
+/*
+ * virtio transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He 
+ * Stefan Hajnoczi 
+ *
+ * Some of the code is take from Gerd Hoffmann 's
+ * early virtio-vsock proof-of-concept bits.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static struct workqueue_struct *virtio_vsock_workqueue;
+static struct virtio_vsock *the_virtio_vsock;
+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock);
+
+struct virtio_vsock {
+   /* Virtio device */
+   struct virtio_device *vdev;
+   /* Virtio virtqueue */
+   struct virtqueue *vqs[VSOCK_VQ_MAX];
+   /* Wait queue for send pkt */
+   wait_queue_head_t queue_wait;
+   /* Work item to send pkt */
+   struct work_struct tx_work;
+   /* Work item to recv pkt */
+   struct work_struct rx_work;
+   /* Mutex to protect send pkt*/
+   struct mutex tx_lock;
+   /* Mutex to protect recv pkt*/
+   struct mutex rx_lock;
+   /* Number of recv buffers */
+   int rx_buf_nr;
+   /* Number of max recv buffers */
+   int rx_buf_max_nr;
+   /* Used for global tx buf limitation */
+   u32 total_tx_buf;
+   /* Guest context id, just like guest ip address */
+   u32 guest_cid;
+};
+
+static struct virtio_vsock *virtio_vsock_get(void)
+{
+   return the_virtio_vsock;
+}
+
+static u32 virtio_transport_get_local_cid(void)
+{
+   struct virtio_vsock *vsock = virtio_vsock_get();
+
+   return vsock->guest_cid;
+}
+
+static int
+virtio_transport_send_pkt(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt_info *info)
+{
+   u32 src_cid, src_port, dst_cid, dst_port;
+   int ret, in_sg = 0, out_sg = 0;
+   struct virtio_transport *trans;
+   struct virtio_vsock_pkt *pkt;
+   struct virtio_vsock *vsock;
+   struct scatterlist hdr, buf, *sgs[2];
+   struct virtqueue *vq;
+   u32 pkt_len = info->pkt_len;
+   DEFINE_WAIT(wait);
+
+   vsock = virtio_vsock_get();
+   if (!vsock)
+   return -ENODEV;
+
+   src_cid = virtio_transport_get_local_cid();
+   src_port = vsk->local_addr.svm_port;
+   if (!info->remote_cid) {
+   dst_cid = vsk->remote_addr.svm_cid;
+   dst_port = vsk->remote_addr.svm_port;
+   } else {
+   dst_cid = info->remote_cid;
+   dst_port = info->remote_port;
+   }
+
+   trans = vsk->trans;
+   vq = vsock->vqs[VSOCK_VQ_TX];
+
+   if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
+   pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+   pkt_len = virtio_transport_get_credit(trans, pkt_len);
+   /* Do not send zero length OP_RW pkt*/
+   if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+   return pkt_len;
+
+   /* Respect global tx buf limitation */
+   mutex_lock(>tx_lock);
+   while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
+   prepare_to_wait_exclusive(>queue_wait, ,
+ TASK_UNINTERRUPTIBLE);
+   mutex_unlock(>tx_lock);
+   schedule();
+   mutex_lock(>tx_lock);
+   finish_wait(>queue_wait, );
+   }
+   vsock->total_tx_buf += pkt_len;
+   mutex_unlock(>tx_lock);
+
+   pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len,
+src_cid, src_port,
+dst_cid, dst_port);
+   if (!pkt) {
+   mutex_lock(>tx_lock);
+   vsock->total_tx_buf -= pkt_len;
+   mutex_unlock(>tx_lock);
+   virtio_transport_put_credit(trans, pkt_len);
+   return -ENOMEM;
+   }
+
+   pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len);
+
+   /* Will be released in virtio_transport_send_pkt_work */
+   sock_hold(>vsk->sk);
+   virtio_transport_inc_tx_pkt(pkt);
+
+   /* Put pkt in the virtqueue */
+   sg_init_one(, >hdr,