Re: [PATCH V2 3/3] vhost_net: basic polling support
On 2016/1/21 13:13, Michael S. Tsirkin wrote: On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote: On 2016/1/20 22:35, Michael S. Tsirkin wrote: On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang --- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -&out, &in, -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(&vq->mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? + vhost_disable_notify(&net->dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(&net->dev, endtime) && + skb_queue_empty(&sk->sk_receive_queue) && + !vhost_vq_more_avail(&net->dev, vq)) + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. I have seen the RX queue is easy to be empty under some extreme conditions like lots of small packet. So maybe the check is useful here. It's not useful *here*. If you have an rx packet but no space in the ring, this will exit immediately. Indeed! It might be useful elsewhere but I doubt it - if rx ring is out of buffers, you are better off backing out and giving guest some breathing space. -- best regards yang -- best regards yang ___ Virtualization mailin
Re: [PATCH V2 3/3] vhost_net: basic polling support
On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote: > On 2016/1/20 22:35, Michael S. Tsirkin wrote: > >On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: > >>This patch tries to poll for new added tx buffer or socket receive > >>queue for a while at the end of tx/rx processing. The maximum time > >>spent on polling were specified through a new kind of vring ioctl. > >> > >>Signed-off-by: Jason Wang > >>--- > >> drivers/vhost/net.c| 72 > >> ++ > >> drivers/vhost/vhost.c | 15 ++ > >> drivers/vhost/vhost.h | 1 + > >> include/uapi/linux/vhost.h | 11 +++ > >> 4 files changed, 94 insertions(+), 5 deletions(-) > >> > >>diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > >>index 9eda69e..ce6da77 100644 > >>--- a/drivers/vhost/net.c > >>+++ b/drivers/vhost/net.c > >>@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info > >>*ubuf, bool success) > >>rcu_read_unlock_bh(); > >> } > >> > >>+static inline unsigned long busy_clock(void) > >>+{ > >>+ return local_clock() >> 10; > >>+} > >>+ > >>+static bool vhost_can_busy_poll(struct vhost_dev *dev, > >>+ unsigned long endtime) > >>+{ > >>+ return likely(!need_resched()) && > >>+ likely(!time_after(busy_clock(), endtime)) && > >>+ likely(!signal_pending(current)) && > >>+ !vhost_has_work(dev) && > >>+ single_task_running(); > >>+} > >>+ > >>+static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > >>+ struct vhost_virtqueue *vq, > >>+ struct iovec iov[], unsigned int iov_size, > >>+ unsigned int *out_num, unsigned int *in_num) > >>+{ > >>+ unsigned long uninitialized_var(endtime); > >>+ > >>+ if (vq->busyloop_timeout) { > >>+ preempt_disable(); > >>+ endtime = busy_clock() + vq->busyloop_timeout; > >>+ while (vhost_can_busy_poll(vq->dev, endtime) && > >>+ !vhost_vq_more_avail(vq->dev, vq)) > >>+ cpu_relax(); > >>+ preempt_enable(); > >>+ } > > > >Isn't there a way to call all this after vhost_get_vq_desc? > >First, this will reduce the good path overhead as you > >won't have to play with timers and preemption. > > > >Second, this will reduce the chance of a pagefault on avail ring read. > > > >>+ > >>+ return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > >>+out_num, in_num, NULL, NULL); > >>+} > >>+ > >> /* Expects to be always run from workqueue - which acts as > >> * read-size critical section for our kind of RCU. */ > >> static void handle_tx(struct vhost_net *net) > >>@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) > >> % UIO_MAXIOV == nvq->done_idx)) > >>break; > >> > >>- head = vhost_get_vq_desc(vq, vq->iov, > >>-ARRAY_SIZE(vq->iov), > >>-&out, &in, > >>-NULL, NULL); > >>+ head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, > >>+ ARRAY_SIZE(vq->iov), > >>+ &out, &in); > >>/* On error, stop handling until the next kick. */ > >>if (unlikely(head < 0)) > >>break; > >>@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) > >>return len; > >> } > >> > >>+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) > > > >Need a hint that it's rx related in the name. > > > >>+{ > >>+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; > >>+ struct vhost_virtqueue *vq = &nvq->vq; > >>+ unsigned long uninitialized_var(endtime); > >>+ > >>+ if (vq->busyloop_timeout) { > >>+ mutex_lock(&vq->mutex); > > > >This appears to be called under vq mutex in handle_rx. > >So how does this work then? > > > > > >>+ vhost_disable_notify(&net->dev, vq); > > > >This appears to be called after disable notify > >in handle_rx - so why disable here again? > > > >>+ > >>+ preempt_disable(); > >>+ endtime = busy_clock() + vq->busyloop_timeout; > >>+ > >>+ while (vhost_can_busy_poll(&net->dev, endtime) && > >>+ skb_queue_empty(&sk->sk_receive_queue) && > >>+ !vhost_vq_more_avail(&net->dev, vq)) > >>+ cpu_relax(); > > > >This seems to mix in several items. > >RX queue is normally not empty. I don't think > >we need to poll for that. > > I have seen the RX queue is easy to be empty under some extreme conditions > like lots of small packet. So maybe the check is useful here. It's not useful *here*. If you have an rx packet but no space in the ring, this will exit immediately. It might be useful elsewhere but I doubt it - if rx ring is ou
Re: [PATCH V2 3/3] vhost_net: basic polling support
On 2016/1/20 22:35, Michael S. Tsirkin wrote: On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang --- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -&out, &in, -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(&vq->mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? + vhost_disable_notify(&net->dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(&net->dev, endtime) && + skb_queue_empty(&sk->sk_receive_queue) && + !vhost_vq_more_avail(&net->dev, vq)) + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. I have seen the RX queue is easy to be empty under some extreme conditions like lots of small packet. So maybe the check is useful here. -- best regards yang ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH] tools/virtio: use virt_xxx barriers
Fix build after API changes. Reported-by: Kamal Mostafa Signed-off-by: Michael S. Tsirkin --- tools/virtio/asm/barrier.h| 22 +- tools/virtio/linux/compiler.h | 9 + tools/virtio/linux/kernel.h | 1 + 3 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 tools/virtio/linux/compiler.h diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 26b7926..ba34f9e 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -1,15 +1,19 @@ #if defined(__i386__) || defined(__x86_64__) #define barrier() asm volatile("" ::: "memory") -#define mb() __sync_synchronize() - -#define smp_mb() mb() -# define dma_rmb() barrier() -# define dma_wmb() barrier() -# define smp_rmb() barrier() -# define smp_wmb() barrier() +#define virt_mb() __sync_synchronize() +#define virt_rmb() barrier() +#define virt_wmb() barrier() +/* Atomic store should be enough, but gcc generates worse code in that case. */ +#define virt_store_mb(var, value) do { \ + typeof(var) virt_store_mb_value = (value); \ + __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \ + __ATOMIC_SEQ_CST); \ + barrier(); \ +} while (0); /* Weak barriers should be used. If not - it's a bug */ -# define rmb() abort() -# define wmb() abort() +# define mb() abort() +# define rmb() abort() +# define wmb() abort() #else #error Please fill in barrier macros #endif diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h new file mode 100644 index 000..845960e --- /dev/null +++ b/tools/virtio/linux/compiler.h @@ -0,0 +1,9 @@ +#ifndef LINUX_COMPILER_H +#define LINUX_COMPILER_H + +#define WRITE_ONCE(var, val) \ + (*((volatile typeof(val) *)(&(var))) = (val)) + +#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var + +#endif diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 4db7d56..0338499 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
CISTI'2016 - 11th Iberian Conference on Information Systems and Technologies
- --- CISTI'2016 - 11th Iberian Conference on Information Systems and Technologies June 15 18, 2016, Gran Canaria, Canary Islands, Spain http://www.aisti.eu/cisti2016/index.php/en We are pleased to invite the academic and business community to submit their papers to CISTI'2016 - 11th Iberian Conference on Information Systems and Technologies, to be held in Gran Canaria, Canary Islands, Spain, between the 15th and 18th of June 2016. Authors are encouraged to submit original scientific contributions such as state-of-art reviews and new research perspectives, groundbreaking ideas and/or architectures, solutions and/or applications for real problems, empirical and/or evaluation works, case studies, etc., in conformity with the themes of this Conference. Four types of papers can be submitted: Full paper: Finished or consolidated R&D works, to be included in one of the Conference themes. These papers are assigned a 6-page limit. Short paper: Ongoing works with relevant preliminary results, opened to discussion. These papers are assigned a 4-page limit. Poster paper: Initial work with relevant ideas, opened to discussion. These papers are assigned a 2-page limit. Company paper: Companies' papers that show practical experience, R & D, tools, etc., focused in some topics of the conference. These articles are abstracts with a maximum of 2 pages. Papers submitted for the Scientific Committees evaluation must not include any information leading to the authors identification. Therefore, the authors names, affiliations and bibliographic references should not be included in the early version. This information should only be included in the final version. Submitted papers must not have been published and must not be under review for any other conference and national or international publication. Papers must comply with the format standard and be written in Portuguese, Spanish or English. All papers will be subjected to a blind review by at least two members of the Scientific Committee. Full papers can be accepted as short papers or poster papers only. Similarly, short papers can be accepted as poster papers only. In these two cases, the authors will be allowed to maintain the original number of pages in the proceedings publication. The authors of accepted poster papers must also build and print a poster to be exhibited during the Conference. This poster must follow an A1 or A2 vertical format. The Conference includes Work Sessions where these posters are presented and orally discussed, with a 5-minute limit per poster. The authors of accepted full papers will dispose of a 15-minute presentation in the Conference Work Session, and approximately 5 minutes of discussion will follow each presentation. The authors of accepted short papers and company papers will dispose of an 11-minute presentation in the Conference Work Session, and approximately 4 minutes of discussion will follow each presentation. THEMES Submitted papers must follow the main themes proposed for the Conference (the topics proposed in each theme constitute a mere framework reference; they are not intended as restrictive): A) OMIS - Organizational Models and Information Systems B) KMDSS - Knowledge Management and Decision Support Systems C) SSAAT - Software Systems, Architectures, Applications and Tools D) CNMPS - Computer Networks, Mobility and Pervasive Systems E) HCC - Human Centered Computing F) HIS - Health Informatics G) ITE - Information Technologies in Education H) AEC Architecture and Engineering of Construction PUBLICATION & INDEXING To ensure that the contribution (full paper, short paper, poster paper or company paper) is published in the Proceedings, at least one of the authors must be fully registered by the 11th of April, and the paper must comply with the suggested layout and page-limit. Additionally, all recommended modifications must be addressed by the authors before they submit the final version. No more than one paper per registration will be published in the Conference Proceedings. An extra fee must be paid for publication of additional papers, with a maximum of one additional paper per registration. Published full and short papers will be sent to EI, IEEE XPlore, INSPEC, ISI, SCOPUS and Google Scholar. Poster papers and company papers will be sent to EBSCO and EI. IMPORTANT DATES Paper submission: February 14, 2016 Notification of acceptance: March 27, 2016 Submission of accepted papers: April 10, 2016 Payment of registration, to ensure the inclusion of an accepted paper in the conference proceedings: April 8, 2016 Best regards, CISTI'2016 Team http://www.aisti.eu/cisti2016/index.php/en ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://l
Re: [PATCH V2 2/3] vhost: introduce vhost_vq_more_avail()
On Tue, Dec 01, 2015 at 02:39:44PM +0800, Jason Wang wrote: > Signed-off-by: Jason Wang Wow new API with no comments anywhere, and no commit log to say what it's good for. Want to know what it does and whether it's correct? You have to read the next patch. So what is the point of splitting it out? It's confusing, and in fact it made you miss a bug. > --- > drivers/vhost/vhost.c | 13 + > drivers/vhost/vhost.h | 1 + > 2 files changed, 14 insertions(+) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 163b365..4f45a03 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -1633,6 +1633,19 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, > } > EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); > > +bool vhost_vq_more_avail(struct vhost_dev *dev, struct vhost_virtqueue *vq) > +{ > + __virtio16 avail_idx; > + int r; > + > + r = __get_user(avail_idx, &vq->avail->idx); > + if (r) > + return false; So the result is that if the page is not present, you return false (empty ring) and the caller will busy wait with preempt disabled. Nasty. So it should return something that breaks the loop, and this means it should have a different name for the return code to make sense. Maybe reverse the polarity: vhost_vq_avail_empty? And add a comment saying we can't be sure ring is empty so return false. > + > + return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx; > +} > +EXPORT_SYMBOL_GPL(vhost_vq_more_avail); > + > /* OK, now we need to know about added descriptors. */ > bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) > { > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 43284ad..2f3c57c 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -159,6 +159,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, > struct vhost_virtqueue *, > struct vring_used_elem *heads, unsigned count); > void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); > void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); > +bool vhost_vq_more_avail(struct vhost_dev *, struct vhost_virtqueue *); > bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); > > int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, > -- > 2.5.0 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH V2 3/3] vhost_net: basic polling support
On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: > This patch tries to poll for new added tx buffer or socket receive > queue for a while at the end of tx/rx processing. The maximum time > spent on polling were specified through a new kind of vring ioctl. > > Signed-off-by: Jason Wang > --- > drivers/vhost/net.c| 72 > ++ > drivers/vhost/vhost.c | 15 ++ > drivers/vhost/vhost.h | 1 + > include/uapi/linux/vhost.h | 11 +++ > 4 files changed, 94 insertions(+), 5 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 9eda69e..ce6da77 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info > *ubuf, bool success) > rcu_read_unlock_bh(); > } > > +static inline unsigned long busy_clock(void) > +{ > + return local_clock() >> 10; > +} > + > +static bool vhost_can_busy_poll(struct vhost_dev *dev, > + unsigned long endtime) > +{ > + return likely(!need_resched()) && > +likely(!time_after(busy_clock(), endtime)) && > +likely(!signal_pending(current)) && > +!vhost_has_work(dev) && > +single_task_running(); > +} > + > +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > + struct vhost_virtqueue *vq, > + struct iovec iov[], unsigned int iov_size, > + unsigned int *out_num, unsigned int *in_num) > +{ > + unsigned long uninitialized_var(endtime); > + > + if (vq->busyloop_timeout) { > + preempt_disable(); > + endtime = busy_clock() + vq->busyloop_timeout; > + while (vhost_can_busy_poll(vq->dev, endtime) && > +!vhost_vq_more_avail(vq->dev, vq)) > + cpu_relax(); > + preempt_enable(); > + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. > + > + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > + out_num, in_num, NULL, NULL); > +} > + > /* Expects to be always run from workqueue - which acts as > * read-size critical section for our kind of RCU. */ > static void handle_tx(struct vhost_net *net) > @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) > % UIO_MAXIOV == nvq->done_idx)) > break; > > - head = vhost_get_vq_desc(vq, vq->iov, > - ARRAY_SIZE(vq->iov), > - &out, &in, > - NULL, NULL); > + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, > + ARRAY_SIZE(vq->iov), > + &out, &in); > /* On error, stop handling until the next kick. */ > if (unlikely(head < 0)) > break; > @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) > return len; > } > > +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. > +{ > + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; > + struct vhost_virtqueue *vq = &nvq->vq; > + unsigned long uninitialized_var(endtime); > + > + if (vq->busyloop_timeout) { > + mutex_lock(&vq->mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? > + vhost_disable_notify(&net->dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? > + > + preempt_disable(); > + endtime = busy_clock() + vq->busyloop_timeout; > + > + while (vhost_can_busy_poll(&net->dev, endtime) && > +skb_queue_empty(&sk->sk_receive_queue) && > +!vhost_vq_more_avail(&net->dev, vq)) > + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. So IMHO we only need to poll for sk_receive_queue really. > + > + preempt_enable(); > + > + if (vhost_enable_notify(&net->dev, vq)) > + vhost_poll_queue(&vq->poll); But vhost_enable_notify returns true on queue not empty. So in fact this will requeue on good path - does not make sense to me. > + mutex_unlock(&vq->mutex); > + } > + Same comment as for get vq desc here: don't slow down the good path. > + return peek_head_len(sk); > +} > + > /* This is a multi-buffer version of vhost_get_desc, that works if > * vq has