Re: [PATCH v3 2/4] VSOCK: Introduce virtio-vsock.ko
Stefan Hajnocziwrites: > From: Asias He > > VM sockets virtio transport implementation. This module runs in guest > kernel. checkpatch warns on a bunch of whitespace/tab issues. > > Signed-off-by: Asias He > Signed-off-by: Stefan Hajnoczi > --- > v2: > * Fix total_tx_buf accounting > * Add virtio_transport global mutex to prevent races > --- > net/vmw_vsock/virtio_transport.c | 466 > +++ > 1 file changed, 466 insertions(+) > create mode 100644 net/vmw_vsock/virtio_transport.c > > diff --git a/net/vmw_vsock/virtio_transport.c > b/net/vmw_vsock/virtio_transport.c > new file mode 100644 > index 000..df65dca > --- /dev/null > +++ b/net/vmw_vsock/virtio_transport.c > @@ -0,0 +1,466 @@ > +/* > + * virtio transport for vsock > + * > + * Copyright (C) 2013-2015 Red Hat, Inc. > + * Author: Asias He > + * Stefan Hajnoczi > + * > + * Some of the code is take from Gerd Hoffmann 's > + * early virtio-vsock proof-of-concept bits. > + * > + * This work is licensed under the terms of the GNU GPL, version 2. > + */ > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +static struct workqueue_struct *virtio_vsock_workqueue; > +static struct virtio_vsock *the_virtio_vsock; > +static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ > +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); > + > +struct virtio_vsock { > + /* Virtio device */ > + struct virtio_device *vdev; > + /* Virtio virtqueue */ > + struct virtqueue *vqs[VSOCK_VQ_MAX]; > + /* Wait queue for send pkt */ > + wait_queue_head_t queue_wait; > + /* Work item to send pkt */ > + struct work_struct tx_work; > + /* Work item to recv pkt */ > + struct work_struct rx_work; > + /* Mutex to protect send pkt*/ > + struct mutex tx_lock; > + /* Mutex to protect recv pkt*/ > + struct mutex rx_lock; Further down I got confused by what lock was what and exactly what was being protected. If the receive and transmit paths touch separate things it might be worth re-arranging the structure to make it clearer, eg: /* The transmit path is protected by tx_lock */ struct mutex tx_lock; struct work_struct tx_work; .. .. /* The receive path is protected by rx_lock */ wait_queue_head_t queue_wait; .. .. Which might make things a little clearer. Then all the redundant information in the comments can be removed. I don't need to know what is a Virtio device, virtqueue or wait_queue etc as they are implicit in the structure name. > + /* Number of recv buffers */ > + int rx_buf_nr; > + /* Number of max recv buffers */ > + int rx_buf_max_nr; > + /* Used for global tx buf limitation */ > + u32 total_tx_buf; > + /* Guest context id, just like guest ip address */ > + u32 guest_cid; > +}; > + > +static struct virtio_vsock *virtio_vsock_get(void) > +{ > + return the_virtio_vsock; > +} > + > +static u32 virtio_transport_get_local_cid(void) > +{ > + struct virtio_vsock *vsock = virtio_vsock_get(); > + > + return vsock->guest_cid; > +} > + > +static int > +virtio_transport_send_pkt(struct vsock_sock *vsk, > + struct virtio_vsock_pkt_info *info) > +{ > + u32 src_cid, src_port, dst_cid, dst_port; > + int ret, in_sg = 0, out_sg = 0; > + struct virtio_transport *trans; > + struct virtio_vsock_pkt *pkt; > + struct virtio_vsock *vsock; > + struct scatterlist hdr, buf, *sgs[2]; > + struct virtqueue *vq; > + u32 pkt_len = info->pkt_len; > + DEFINE_WAIT(wait); > + > + vsock = virtio_vsock_get(); > + if (!vsock) > + return -ENODEV; > + > + src_cid = virtio_transport_get_local_cid(); > + src_port = vsk->local_addr.svm_port; > + if (!info->remote_cid) { > + dst_cid = vsk->remote_addr.svm_cid; > + dst_port = vsk->remote_addr.svm_port; > + } else { > + dst_cid = info->remote_cid; > + dst_port = info->remote_port; > + } > + > + trans = vsk->trans; > + vq = vsock->vqs[VSOCK_VQ_TX]; > + > + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) > + pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; > + pkt_len = virtio_transport_get_credit(trans, pkt_len); > + /* Do not send zero length OP_RW pkt*/ > + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > + return pkt_len; > + > + /* Respect global tx buf limitation */ > + mutex_lock(>tx_lock); > + while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { > + prepare_to_wait_exclusive(>queue_wait, , > + TASK_UNINTERRUPTIBLE); > + mutex_unlock(>tx_lock); > +
Re: [PATCH v3 2/4] VSOCK: Introduce virtio-vsock.ko
On Thu, Dec 10, 2015 at 09:23:25PM +, Alex Bennée wrote: > Stefan Hajnocziwrites: > > > From: Asias He > > > > VM sockets virtio transport implementation. This module runs in guest > > kernel. > > checkpatch warns on a bunch of whitespace/tab issues. Will fix in the next version. > > +struct virtio_vsock { > > + /* Virtio device */ > > + struct virtio_device *vdev; > > + /* Virtio virtqueue */ > > + struct virtqueue *vqs[VSOCK_VQ_MAX]; > > + /* Wait queue for send pkt */ > > + wait_queue_head_t queue_wait; > > + /* Work item to send pkt */ > > + struct work_struct tx_work; > > + /* Work item to recv pkt */ > > + struct work_struct rx_work; > > + /* Mutex to protect send pkt*/ > > + struct mutex tx_lock; > > + /* Mutex to protect recv pkt*/ > > + struct mutex rx_lock; > > Further down I got confused by what lock was what and exactly what was > being protected. If the receive and transmit paths touch separate things > it might be worth re-arranging the structure to make it clearer, eg: > >/* The transmit path is protected by tx_lock */ >struct mutex tx_lock; >struct work_struct tx_work; >.. >.. > >/* The receive path is protected by rx_lock */ >wait_queue_head_t queue_wait; >.. >.. > > Which might make things a little clearer. Then all the redundant > information in the comments can be removed. I don't need to know what > is a Virtio device, virtqueue or wait_queue etc as they are implicit in > the structure name. Thanks, that is a nice idea. > > + mutex_lock(>tx_lock); > > + while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, > > + GFP_KERNEL)) < 0) { > > + prepare_to_wait_exclusive(>queue_wait, , > > + TASK_UNINTERRUPTIBLE); > > + mutex_unlock(>tx_lock); > > + schedule(); > > + mutex_lock(>tx_lock); > > + finish_wait(>queue_wait, ); > > + } > > + virtqueue_kick(vq); > > + mutex_unlock(>tx_lock); > > What are we protecting with tx_lock here? See comments above about > making the lock usage semantics clearer. vq (vsock->vqs[VSOCK_VQ_TX]) is being protected. Concurrent calls to virtqueue_add_sgs() are not allowed. > > + > > + return pkt_len; > > +} > > + > > +static struct virtio_transport_pkt_ops virtio_ops = { > > + .send_pkt = virtio_transport_send_pkt, > > +}; > > + > > +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) > > +{ > > + int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; > > + struct virtio_vsock_pkt *pkt; > > + struct scatterlist hdr, buf, *sgs[2]; > > + struct virtqueue *vq; > > + int ret; > > + > > + vq = vsock->vqs[VSOCK_VQ_RX]; > > + > > + do { > > + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); > > + if (!pkt) { > > + pr_debug("%s: fail to allocate pkt\n", __func__); > > + goto out; > > + } > > + > > + /* TODO: use mergeable rx buffer */ > > TODO's should end up in merged code. Will fix in next revision. > > + pkt->buf = kmalloc(buf_len, GFP_KERNEL); > > + if (!pkt->buf) { > > + pr_debug("%s: fail to allocate pkt->buf\n", __func__); > > + goto err; > > + } > > + > > + sg_init_one(, >hdr, sizeof(pkt->hdr)); > > + sgs[0] = > > + > > + sg_init_one(, pkt->buf, buf_len); > > + sgs[1] = > > + ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); > > + if (ret) > > + goto err; > > + vsock->rx_buf_nr++; > > + } while (vq->num_free); > > + if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) > > + vsock->rx_buf_max_nr = vsock->rx_buf_nr; > > +out: > > + virtqueue_kick(vq); > > + return; > > +err: > > + virtqueue_kick(vq); > > + virtio_transport_free_pkt(pkt); > > You could free the pkt memory at the fail site and just have one exit path. Okay, I agree the err label is of marginal use. Let's get rid of it. > > > + return; > > +} > > + > > +static void virtio_transport_send_pkt_work(struct work_struct *work) > > +{ > > + struct virtio_vsock *vsock = > > + container_of(work, struct virtio_vsock, tx_work); > > + struct virtio_vsock_pkt *pkt; > > + bool added = false; > > + struct virtqueue *vq; > > + unsigned int len; > > + struct sock *sk; > > + > > + vq = vsock->vqs[VSOCK_VQ_TX]; > > + mutex_lock(>tx_lock); > > + do { > > You can move the declarations of pkt/len into the do block. Okay. > > > + virtqueue_disable_cb(vq); > > + while ((pkt = virtqueue_get_buf(vq, )) != NULL) { > > And the sk declaration here Okay. > > +static void virtio_transport_recv_pkt_work(struct work_struct *work) > > +{ > > + struct virtio_vsock *vsock = > > + container_of(work, struct virtio_vsock, rx_work); > > + struct
[PATCH v3 2/4] VSOCK: Introduce virtio-vsock.ko
From: Asias HeVM sockets virtio transport implementation. This module runs in guest kernel. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi --- v2: * Fix total_tx_buf accounting * Add virtio_transport global mutex to prevent races --- net/vmw_vsock/virtio_transport.c | 466 +++ 1 file changed, 466 insertions(+) create mode 100644 net/vmw_vsock/virtio_transport.c diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c new file mode 100644 index 000..df65dca --- /dev/null +++ b/net/vmw_vsock/virtio_transport.c @@ -0,0 +1,466 @@ +/* + * virtio transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * Some of the code is take from Gerd Hoffmann 's + * early virtio-vsock proof-of-concept bits. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *virtio_vsock_workqueue; +static struct virtio_vsock *the_virtio_vsock; +static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); + +struct virtio_vsock { + /* Virtio device */ + struct virtio_device *vdev; + /* Virtio virtqueue */ + struct virtqueue *vqs[VSOCK_VQ_MAX]; + /* Wait queue for send pkt */ + wait_queue_head_t queue_wait; + /* Work item to send pkt */ + struct work_struct tx_work; + /* Work item to recv pkt */ + struct work_struct rx_work; + /* Mutex to protect send pkt*/ + struct mutex tx_lock; + /* Mutex to protect recv pkt*/ + struct mutex rx_lock; + /* Number of recv buffers */ + int rx_buf_nr; + /* Number of max recv buffers */ + int rx_buf_max_nr; + /* Used for global tx buf limitation */ + u32 total_tx_buf; + /* Guest context id, just like guest ip address */ + u32 guest_cid; +}; + +static struct virtio_vsock *virtio_vsock_get(void) +{ + return the_virtio_vsock; +} + +static u32 virtio_transport_get_local_cid(void) +{ + struct virtio_vsock *vsock = virtio_vsock_get(); + + return vsock->guest_cid; +} + +static int +virtio_transport_send_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info) +{ + u32 src_cid, src_port, dst_cid, dst_port; + int ret, in_sg = 0, out_sg = 0; + struct virtio_transport *trans; + struct virtio_vsock_pkt *pkt; + struct virtio_vsock *vsock; + struct scatterlist hdr, buf, *sgs[2]; + struct virtqueue *vq; + u32 pkt_len = info->pkt_len; + DEFINE_WAIT(wait); + + vsock = virtio_vsock_get(); + if (!vsock) + return -ENODEV; + + src_cid = virtio_transport_get_local_cid(); + src_port = vsk->local_addr.svm_port; + if (!info->remote_cid) { + dst_cid = vsk->remote_addr.svm_cid; + dst_port = vsk->remote_addr.svm_port; + } else { + dst_cid = info->remote_cid; + dst_port = info->remote_port; + } + + trans = vsk->trans; + vq = vsock->vqs[VSOCK_VQ_TX]; + + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + pkt_len = virtio_transport_get_credit(trans, pkt_len); + /* Do not send zero length OP_RW pkt*/ + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) + return pkt_len; + + /* Respect global tx buf limitation */ + mutex_lock(>tx_lock); + while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { + prepare_to_wait_exclusive(>queue_wait, , + TASK_UNINTERRUPTIBLE); + mutex_unlock(>tx_lock); + schedule(); + mutex_lock(>tx_lock); + finish_wait(>queue_wait, ); + } + vsock->total_tx_buf += pkt_len; + mutex_unlock(>tx_lock); + + pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, +src_cid, src_port, +dst_cid, dst_port); + if (!pkt) { + mutex_lock(>tx_lock); + vsock->total_tx_buf -= pkt_len; + mutex_unlock(>tx_lock); + virtio_transport_put_credit(trans, pkt_len); + return -ENOMEM; + } + + pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); + + /* Will be released in virtio_transport_send_pkt_work */ + sock_hold(>vsk->sk); + virtio_transport_inc_tx_pkt(pkt); + + /* Put pkt in the virtqueue */ + sg_init_one(, >hdr,