Re: [RFC][PATCH v3 2/3] Provides multiple submits and asynchronous notifications.
On Fri, Apr 23, 2010 at 03:08:33PM +0800, xiaohui@intel.com wrote: From: Xin Xiaohui xiaohui@intel.com The vhost-net backend now only supports synchronous send/recv operations. The patch provides multiple submits and asynchronous notifications. This is needed for zero-copy case. Signed-off-by: Xin Xiaohui xiaohui@intel.com --- Michael, Can't vhost supply a kiocb completion callback that will handle the list? Yes, thanks. And with it I also remove the vq-receivr finally. Thanks Xiaohui Nice progress. I commented on some minor issues below. Thanks! The updated patch addressed your comments on the minor issues. Thanks! Thanks Xiaohui drivers/vhost/net.c | 236 +++- drivers/vhost/vhost.c | 120 ++--- drivers/vhost/vhost.h | 14 +++ 3 files changed, 314 insertions(+), 56 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 38989d1..18f6c41 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -23,6 +23,8 @@ #include linux/if_arp.h #include linux/if_tun.h #include linux/if_macvlan.h +#include linux/mpassthru.h +#include linux/aio.h #include net/sock.h @@ -48,6 +50,7 @@ struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; + struct kmem_cache *cache; /* Tells us whether we are polling a socket for TX. * We only do this when socket buffer fills up. * Protected by tx vq lock. */ @@ -92,11 +95,138 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) net-tx_poll_state = VHOST_NET_POLL_STARTED; } +struct kiocb *notify_dequeue(struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + if (!list_empty(vq-notifier)) { + iocb = list_first_entry(vq-notifier, + struct kiocb, ki_list); + list_del(iocb-ki_list); + } + spin_unlock_irqrestore(vq-notify_lock, flags); + return iocb; +} + +static void handle_iocb(struct kiocb *iocb) +{ + struct vhost_virtqueue *vq = iocb-private; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + list_add_tail(iocb-ki_list, vq-notifier); + spin_unlock_irqrestore(vq-notify_lock, flags); Don't we need to wake up the wq as well? +} + +static int is_async_vq(struct vhost_virtqueue *vq) +{ + return (vq-link_state == VHOST_VQ_LINK_ASYNC); () not needed +} + +static void handle_async_rx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct socket *sock) +{ + struct kiocb *iocb = NULL; + struct vhost_log *vq_log = NULL; + int rx_total_len = 0; + unsigned int head, log, in, out; + int size; + + if (!is_async_vq(vq)) + return; + + if (sock-sk-sk_data_ready) + sock-sk-sk_data_ready(sock-sk, 0); + + vq_log = unlikely(vhost_has_feature(net-dev, VHOST_F_LOG_ALL)) ? + vq-log : NULL; + + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, iocb-ki_nbytes); + size = iocb-ki_nbytes; + head = iocb-ki_pos; + rx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); I am confused by the above. Isn't ki_dtor handle_iocb? Why is it called here? + kmem_cache_free(net-cache, iocb); + + /* when log is enabled, recomputing the log info is needed, + * since these buffers are in async queue, and may not get + * the log info before. + */ + if (unlikely(vq_log)) { + if (!log) log is uninitialized now? + __vhost_get_vq_desc(net-dev, vq, vq-iov, + ARRAY_SIZE(vq-iov), + out, in, vq_log, + log, head); + vhost_log_write(vq, vq_log, log, size); + } + if (unlikely(rx_total_len = VHOST_NET_WEIGHT)) { + vhost_poll_queue(vq-poll); + break; + } + } +} + +static void handle_async_tx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + int tx_total_len = 0; + + if (!is_async_vq(vq)) + return; + + while ((iocb = notify_dequeue(vq)) != NULL) { Please just write this as while (((iocb = notify_dequeue(vq))) above
Re:[RFC][PATCH v3 2/3] Provides multiple submits and asynchronous notifications.
From: Xin Xiaohui xiaohui@intel.com The vhost-net backend now only supports synchronous send/recv operations. The patch provides multiple submits and asynchronous notifications. This is needed for zero-copy case. Signed-off-by: Xin Xiaohui xiaohui@intel.com --- Michael, Can't vhost supply a kiocb completion callback that will handle the list? Yes, thanks. And with it I also remove the vq-receivr finally. Thanks Xiaohui Nice progress. I commented on some minor issues below. Thanks! The updated patch addressed your comments on the minor issues. Thanks! Thanks Xiaohui drivers/vhost/net.c | 236 +++- drivers/vhost/vhost.c | 120 ++--- drivers/vhost/vhost.h | 14 +++ 3 files changed, 314 insertions(+), 56 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 38989d1..18f6c41 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -23,6 +23,8 @@ #include linux/if_arp.h #include linux/if_tun.h #include linux/if_macvlan.h +#include linux/mpassthru.h +#include linux/aio.h #include net/sock.h @@ -48,6 +50,7 @@ struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; + struct kmem_cache *cache; /* Tells us whether we are polling a socket for TX. * We only do this when socket buffer fills up. * Protected by tx vq lock. */ @@ -92,11 +95,138 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) net-tx_poll_state = VHOST_NET_POLL_STARTED; } +struct kiocb *notify_dequeue(struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + if (!list_empty(vq-notifier)) { + iocb = list_first_entry(vq-notifier, + struct kiocb, ki_list); + list_del(iocb-ki_list); + } + spin_unlock_irqrestore(vq-notify_lock, flags); + return iocb; +} + +static void handle_iocb(struct kiocb *iocb) +{ + struct vhost_virtqueue *vq = iocb-private; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + list_add_tail(iocb-ki_list, vq-notifier); + spin_unlock_irqrestore(vq-notify_lock, flags); +} + +static int is_async_vq(struct vhost_virtqueue *vq) +{ + return (vq-link_state == VHOST_VQ_LINK_ASYNC); +} + +static void handle_async_rx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct socket *sock) +{ + struct kiocb *iocb = NULL; + struct vhost_log *vq_log = NULL; + int rx_total_len = 0; + unsigned int head, log, in, out; + int size; + + if (!is_async_vq(vq)) + return; + + if (sock-sk-sk_data_ready) + sock-sk-sk_data_ready(sock-sk, 0); + + vq_log = unlikely(vhost_has_feature(net-dev, VHOST_F_LOG_ALL)) ? + vq-log : NULL; + + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, iocb-ki_nbytes); + size = iocb-ki_nbytes; + head = iocb-ki_pos; + rx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + kmem_cache_free(net-cache, iocb); + + /* when log is enabled, recomputing the log info is needed, +* since these buffers are in async queue, and may not get +* the log info before. +*/ + if (unlikely(vq_log)) { + if (!log) + __vhost_get_vq_desc(net-dev, vq, vq-iov, + ARRAY_SIZE(vq-iov), + out, in, vq_log, + log, head); + vhost_log_write(vq, vq_log, log, size); + } + if (unlikely(rx_total_len = VHOST_NET_WEIGHT)) { + vhost_poll_queue(vq-poll); + break; + } + } +} + +static void handle_async_tx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + int tx_total_len = 0; + + if (!is_async_vq(vq)) + return; + + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, 0); + tx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + + kmem_cache_free(net-cache, iocb); + if
Re:[RFC][PATCH v3 2/3] Provides multiple submits and asynchronous notifications.
From: Xin Xiaohui xiaohui@intel.com The vhost-net backend now only supports synchronous send/recv operations. The patch provides multiple submits and asynchronous notifications. This is needed for zero-copy case. Signed-off-by: Xin Xiaohui xiaohui@intel.com --- Michael, Can't vhost supply a kiocb completion callback that will handle the list? Yes, thanks. And with it I also remove the vq-receiver finally. Thanks Xiaohui drivers/vhost/net.c | 227 +++-- drivers/vhost/vhost.c | 115 ++--- drivers/vhost/vhost.h | 14 +++ 3 files changed, 301 insertions(+), 55 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 22d5fef..4a70f66 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,11 +17,13 @@ #include linux/workqueue.h #include linux/rcupdate.h #include linux/file.h +#include linux/aio.h #include linux/net.h #include linux/if_packet.h #include linux/if_arp.h #include linux/if_tun.h +#include linux/mpassthru.h #include net/sock.h @@ -47,6 +49,7 @@ struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; + struct kmem_cache *cache; /* Tells us whether we are polling a socket for TX. * We only do this when socket buffer fills up. * Protected by tx vq lock. */ @@ -91,11 +94,132 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) net-tx_poll_state = VHOST_NET_POLL_STARTED; } +struct kiocb *notify_dequeue(struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + if (!list_empty(vq-notifier)) { + iocb = list_first_entry(vq-notifier, + struct kiocb, ki_list); + list_del(iocb-ki_list); + } + spin_unlock_irqrestore(vq-notify_lock, flags); + return iocb; +} + +static void handle_iocb(struct kiocb *iocb) +{ + struct vhost_virtqueue *vq = iocb-private; + unsigned long flags; + +spin_lock_irqsave(vq-notify_lock, flags); +list_add_tail(iocb-ki_list, vq-notifier); +spin_unlock_irqrestore(vq-notify_lock, flags); +} + +static void handle_async_rx_events_notify(struct vhost_net *net, +struct vhost_virtqueue *vq, +struct socket *sock) +{ + struct kiocb *iocb = NULL; + struct vhost_log *vq_log = NULL; + int rx_total_len = 0; + unsigned int head, log, in, out; + int size; + + if (vq-link_state != VHOST_VQ_LINK_ASYNC) + return; + + if (sock-sk-sk_data_ready) + sock-sk-sk_data_ready(sock-sk, 0); + + vq_log = unlikely(vhost_has_feature( + net-dev, VHOST_F_LOG_ALL)) ? vq-log : NULL; + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, iocb-ki_nbytes); + log = (int)(iocb-ki_user_data 32); + size = iocb-ki_nbytes; + head = iocb-ki_pos; + rx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + kmem_cache_free(net-cache, iocb); + + /* when log is enabled, recomputing the log info is needed, +* since these buffers are in async queue, and may not get +* the log info before. +*/ + if (unlikely(vq_log)) { + if (!log) + __vhost_get_vq_desc(net-dev, vq, vq-iov, + ARRAY_SIZE(vq-iov), + out, in, vq_log, + log, head); + vhost_log_write(vq, vq_log, log, size); + } + if (unlikely(rx_total_len = VHOST_NET_WEIGHT)) { + vhost_poll_queue(vq-poll); + break; + } + } +} + +static void handle_async_tx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + int tx_total_len = 0; + + if (vq-link_state != VHOST_VQ_LINK_ASYNC) + return; + + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, 0); + tx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + + kmem_cache_free(net-cache, iocb); + if (unlikely(tx_total_len = VHOST_NET_WEIGHT)) { +
Re: [RFC][PATCH v3 2/3] Provides multiple submits and asynchronous notifications.
On Thu, Apr 22, 2010 at 04:37:16PM +0800, xiaohui@intel.com wrote: From: Xin Xiaohui xiaohui@intel.com The vhost-net backend now only supports synchronous send/recv operations. The patch provides multiple submits and asynchronous notifications. This is needed for zero-copy case. Signed-off-by: Xin Xiaohui xiaohui@intel.com --- Michael, Can't vhost supply a kiocb completion callback that will handle the list? Yes, thanks. And with it I also remove the vq-receiver finally. Thanks Xiaohui Nice progress. I commented on some minor issues below. Thanks! drivers/vhost/net.c | 227 +++-- drivers/vhost/vhost.c | 115 ++--- drivers/vhost/vhost.h | 14 +++ 3 files changed, 301 insertions(+), 55 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 22d5fef..4a70f66 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,11 +17,13 @@ #include linux/workqueue.h #include linux/rcupdate.h #include linux/file.h +#include linux/aio.h #include linux/net.h #include linux/if_packet.h #include linux/if_arp.h #include linux/if_tun.h +#include linux/mpassthru.h #include net/sock.h @@ -47,6 +49,7 @@ struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; + struct kmem_cache *cache; /* Tells us whether we are polling a socket for TX. * We only do this when socket buffer fills up. * Protected by tx vq lock. */ @@ -91,11 +94,132 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) net-tx_poll_state = VHOST_NET_POLL_STARTED; } +struct kiocb *notify_dequeue(struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + if (!list_empty(vq-notifier)) { + iocb = list_first_entry(vq-notifier, + struct kiocb, ki_list); + list_del(iocb-ki_list); + } + spin_unlock_irqrestore(vq-notify_lock, flags); + return iocb; +} + +static void handle_iocb(struct kiocb *iocb) +{ + struct vhost_virtqueue *vq = iocb-private; + unsigned long flags; + +spin_lock_irqsave(vq-notify_lock, flags); +list_add_tail(iocb-ki_list, vq-notifier); +spin_unlock_irqrestore(vq-notify_lock, flags); +} + checkpatch.pl does not complain about the above? +static void handle_async_rx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct socket *sock) continuation lines should start to the right of (. +{ + struct kiocb *iocb = NULL; + struct vhost_log *vq_log = NULL; + int rx_total_len = 0; + unsigned int head, log, in, out; + int size; + + if (vq-link_state != VHOST_VQ_LINK_ASYNC) + return; + + if (sock-sk-sk_data_ready) + sock-sk-sk_data_ready(sock-sk, 0); + + vq_log = unlikely(vhost_has_feature( + net-dev, VHOST_F_LOG_ALL)) ? vq-log : NULL; split the above line at ?, continuation being to the left of ( looks ugly. + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, iocb-ki_nbytes); + log = (int)(iocb-ki_user_data 32); how about we always do the recompute step, and not encode the log bit in ki_user_data? + size = iocb-ki_nbytes; + head = iocb-ki_pos; + rx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + kmem_cache_free(net-cache, iocb); + + /* when log is enabled, recomputing the log info is needed, + * since these buffers are in async queue, and may not get + * the log info before. + */ + if (unlikely(vq_log)) { + if (!log) + __vhost_get_vq_desc(net-dev, vq, vq-iov, + ARRAY_SIZE(vq-iov), + out, in, vq_log, + log, head); + vhost_log_write(vq, vq_log, log, size); + } + if (unlikely(rx_total_len = VHOST_NET_WEIGHT)) { + vhost_poll_queue(vq-poll); + break; + } + } +} + +static void handle_async_tx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + int tx_total_len = 0; + + if (vq-link_state != VHOST_VQ_LINK_ASYNC) + return; + + while
[RFC][PATCH v3 2/3] Provides multiple submits and asynchronous notifications.
From: Xin Xiaohui xiaohui@intel.com The vhost-net backend now only supports synchronous send/recv operations. The patch provides multiple submits and asynchronous notifications. This is needed for zero-copy case. Signed-off-by: Xin Xiaohui xiaohui@intel.com --- drivers/vhost/net.c | 203 +++-- drivers/vhost/vhost.c | 115 drivers/vhost/vhost.h | 15 3 files changed, 278 insertions(+), 55 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 22d5fef..d3fb3fc 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,11 +17,13 @@ #include linux/workqueue.h #include linux/rcupdate.h #include linux/file.h +#include linux/aio.h #include linux/net.h #include linux/if_packet.h #include linux/if_arp.h #include linux/if_tun.h +#include linux/mpassthru.h #include net/sock.h @@ -47,6 +49,7 @@ struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; + struct kmem_cache *cache; /* Tells us whether we are polling a socket for TX. * We only do this when socket buffer fills up. * Protected by tx vq lock. */ @@ -91,11 +94,100 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) net-tx_poll_state = VHOST_NET_POLL_STARTED; } +struct kiocb *notify_dequeue(struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + unsigned long flags; + + spin_lock_irqsave(vq-notify_lock, flags); + if (!list_empty(vq-notifier)) { + iocb = list_first_entry(vq-notifier, + struct kiocb, ki_list); + list_del(iocb-ki_list); + } + spin_unlock_irqrestore(vq-notify_lock, flags); + return iocb; +} + +static void handle_async_rx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + struct vhost_log *vq_log = NULL; + int rx_total_len = 0; + unsigned int head, log, in, out; + int size; + + if (vq-link_state != VHOST_VQ_LINK_ASYNC) + return; + + if (vq-receiver) + vq-receiver(vq); + + vq_log = unlikely(vhost_has_feature( + net-dev, VHOST_F_LOG_ALL)) ? vq-log : NULL; + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, iocb-ki_nbytes); + log = (int)iocb-ki_user_data; + size = iocb-ki_nbytes; + head = iocb-ki_pos; + rx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + kmem_cache_free(net-cache, iocb); + + /* when log is enabled, recomputing the log info is needed, +* since these buffers are in async queue, and may not get +* the log info before. +*/ + if (unlikely(vq_log)) { + if (!log) + __vhost_get_vq_desc(net-dev, vq, vq-iov, + ARRAY_SIZE(vq-iov), + out, in, vq_log, + log, head); + vhost_log_write(vq, vq_log, log, size); + } + if (unlikely(rx_total_len = VHOST_NET_WEIGHT)) { + vhost_poll_queue(vq-poll); + break; + } + } +} + +static void handle_async_tx_events_notify(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + struct kiocb *iocb = NULL; + int tx_total_len = 0; + + if (vq-link_state != VHOST_VQ_LINK_ASYNC) + return; + + while ((iocb = notify_dequeue(vq)) != NULL) { + vhost_add_used_and_signal(net-dev, vq, + iocb-ki_pos, 0); + tx_total_len += iocb-ki_nbytes; + + if (iocb-ki_dtor) + iocb-ki_dtor(iocb); + + kmem_cache_free(net-cache, iocb); + if (unlikely(tx_total_len = VHOST_NET_WEIGHT)) { + vhost_poll_queue(vq-poll); + break; + } + } +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct kiocb *iocb = NULL; unsigned head, out, in, s; struct msghdr msg = { .msg_name = NULL, @@ -124,6 +216,8 @@ static void handle_tx(struct vhost_net *net)