Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Yang Zhang

On 2016/1/21 13:13, Michael S. Tsirkin wrote:

On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote:

On 2016/1/20 22:35, Michael S. Tsirkin wrote:

On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
  drivers/vhost/net.c| 72 ++
  drivers/vhost/vhost.c  | 15 ++
  drivers/vhost/vhost.h  |  1 +
  include/uapi/linux/vhost.h | 11 +++
  4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
  }

+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }


Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.


+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
  /* Expects to be always run from workqueue - which acts as
   * read-size critical section for our kind of RCU. */
  static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;

-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-&out, &in,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   &out, &in);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
  }

+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)


Need a hint that it's rx related in the name.


+{
+   struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = &nvq->vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(&vq->mutex);


This appears to be called under vq mutex in handle_rx.
So how does this work then?



+   vhost_disable_notify(&net->dev, vq);


This appears to be called after disable notify
in handle_rx - so why disable here again?


+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(&net->dev, endtime) &&
+  skb_queue_empty(&sk->sk_receive_queue) &&
+  !vhost_vq_more_avail(&net->dev, vq))
+   cpu_relax();


This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.


I have seen the RX queue is easy to be empty under some extreme conditions
like lots of small packet. So maybe the check is useful here.


It's not useful *here*.
If you have an rx packet but no space in the ring,
this will exit immediately.


Indeed!



It might be useful elsewhere but I doubt it -
if rx ring is out of buffers, you are better off
backing out and giving guest some breathing space.


--
best regards
yang



--
best regards
yang
___
Virtualization mailin

Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Michael S. Tsirkin
On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote:
> On 2016/1/20 22:35, Michael S. Tsirkin wrote:
> >On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:
> >>This patch tries to poll for new added tx buffer or socket receive
> >>queue for a while at the end of tx/rx processing. The maximum time
> >>spent on polling were specified through a new kind of vring ioctl.
> >>
> >>Signed-off-by: Jason Wang 
> >>---
> >>  drivers/vhost/net.c| 72 
> >> ++
> >>  drivers/vhost/vhost.c  | 15 ++
> >>  drivers/vhost/vhost.h  |  1 +
> >>  include/uapi/linux/vhost.h | 11 +++
> >>  4 files changed, 94 insertions(+), 5 deletions(-)
> >>
> >>diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >>index 9eda69e..ce6da77 100644
> >>--- a/drivers/vhost/net.c
> >>+++ b/drivers/vhost/net.c
> >>@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
> >>*ubuf, bool success)
> >>rcu_read_unlock_bh();
> >>  }
> >>
> >>+static inline unsigned long busy_clock(void)
> >>+{
> >>+   return local_clock() >> 10;
> >>+}
> >>+
> >>+static bool vhost_can_busy_poll(struct vhost_dev *dev,
> >>+   unsigned long endtime)
> >>+{
> >>+   return likely(!need_resched()) &&
> >>+  likely(!time_after(busy_clock(), endtime)) &&
> >>+  likely(!signal_pending(current)) &&
> >>+  !vhost_has_work(dev) &&
> >>+  single_task_running();
> >>+}
> >>+
> >>+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
> >>+   struct vhost_virtqueue *vq,
> >>+   struct iovec iov[], unsigned int iov_size,
> >>+   unsigned int *out_num, unsigned int *in_num)
> >>+{
> >>+   unsigned long uninitialized_var(endtime);
> >>+
> >>+   if (vq->busyloop_timeout) {
> >>+   preempt_disable();
> >>+   endtime = busy_clock() + vq->busyloop_timeout;
> >>+   while (vhost_can_busy_poll(vq->dev, endtime) &&
> >>+  !vhost_vq_more_avail(vq->dev, vq))
> >>+   cpu_relax();
> >>+   preempt_enable();
> >>+   }
> >
> >Isn't there a way to call all this after vhost_get_vq_desc?
> >First, this will reduce the good path overhead as you
> >won't have to play with timers and preemption.
> >
> >Second, this will reduce the chance of a pagefault on avail ring read.
> >
> >>+
> >>+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> >>+out_num, in_num, NULL, NULL);
> >>+}
> >>+
> >>  /* Expects to be always run from workqueue - which acts as
> >>   * read-size critical section for our kind of RCU. */
> >>  static void handle_tx(struct vhost_net *net)
> >>@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
> >>  % UIO_MAXIOV == nvq->done_idx))
> >>break;
> >>
> >>-   head = vhost_get_vq_desc(vq, vq->iov,
> >>-ARRAY_SIZE(vq->iov),
> >>-&out, &in,
> >>-NULL, NULL);
> >>+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
> >>+   ARRAY_SIZE(vq->iov),
> >>+   &out, &in);
> >>/* On error, stop handling until the next kick. */
> >>if (unlikely(head < 0))
> >>break;
> >>@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
> >>return len;
> >>  }
> >>
> >>+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
> >
> >Need a hint that it's rx related in the name.
> >
> >>+{
> >>+   struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> >>+   struct vhost_virtqueue *vq = &nvq->vq;
> >>+   unsigned long uninitialized_var(endtime);
> >>+
> >>+   if (vq->busyloop_timeout) {
> >>+   mutex_lock(&vq->mutex);
> >
> >This appears to be called under vq mutex in handle_rx.
> >So how does this work then?
> >
> >
> >>+   vhost_disable_notify(&net->dev, vq);
> >
> >This appears to be called after disable notify
> >in handle_rx - so why disable here again?
> >
> >>+
> >>+   preempt_disable();
> >>+   endtime = busy_clock() + vq->busyloop_timeout;
> >>+
> >>+   while (vhost_can_busy_poll(&net->dev, endtime) &&
> >>+  skb_queue_empty(&sk->sk_receive_queue) &&
> >>+  !vhost_vq_more_avail(&net->dev, vq))
> >>+   cpu_relax();
> >
> >This seems to mix in several items.
> >RX queue is normally not empty. I don't think
> >we need to poll for that.
> 
> I have seen the RX queue is easy to be empty under some extreme conditions
> like lots of small packet. So maybe the check is useful here.

It's not useful *here*.
If you have an rx packet but no space in the ring,
this will exit immediately.

It might be useful elsewhere but I doubt it -
if rx ring is ou

Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Yang Zhang

On 2016/1/20 22:35, Michael S. Tsirkin wrote:

On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
  drivers/vhost/net.c| 72 ++
  drivers/vhost/vhost.c  | 15 ++
  drivers/vhost/vhost.h  |  1 +
  include/uapi/linux/vhost.h | 11 +++
  4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
  }

+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }


Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.


+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
  /* Expects to be always run from workqueue - which acts as
   * read-size critical section for our kind of RCU. */
  static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;

-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-&out, &in,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   &out, &in);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
  }

+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)


Need a hint that it's rx related in the name.


+{
+   struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = &nvq->vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(&vq->mutex);


This appears to be called under vq mutex in handle_rx.
So how does this work then?



+   vhost_disable_notify(&net->dev, vq);


This appears to be called after disable notify
in handle_rx - so why disable here again?


+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(&net->dev, endtime) &&
+  skb_queue_empty(&sk->sk_receive_queue) &&
+  !vhost_vq_more_avail(&net->dev, vq))
+   cpu_relax();


This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.


I have seen the RX queue is easy to be empty under some extreme 
conditions like lots of small packet. So maybe the check is useful here.


--
best regards
yang
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] tools/virtio: use virt_xxx barriers

2016-01-20 Thread Michael S. Tsirkin
Fix build after API changes.

Reported-by: Kamal Mostafa 
Signed-off-by: Michael S. Tsirkin 
---
 tools/virtio/asm/barrier.h| 22 +-
 tools/virtio/linux/compiler.h |  9 +
 tools/virtio/linux/kernel.h   |  1 +
 3 files changed, 23 insertions(+), 9 deletions(-)
 create mode 100644 tools/virtio/linux/compiler.h

diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926..ba34f9e 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()   mb()
-# define dma_rmb() barrier()
-# define dma_wmb() barrier()
-# define smp_rmb() barrier()
-# define smp_wmb() barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+   typeof(var) virt_store_mb_value = (value); \
+   __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+ __ATOMIC_SEQ_CST); \
+   barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000..845960e
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+   (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d56..0338499 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
-- 
MST
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


CISTI'2016 - 11th Iberian Conference on Information Systems and Technologies

2016-01-20 Thread Maria Lemos
-
--- 
CISTI'2016 - 11th Iberian Conference on Information Systems and Technologies
   June 15 18, 2016, Gran Canaria, Canary Islands, Spain
   http://www.aisti.eu/cisti2016/index.php/en


We are pleased to invite the academic and business community to submit their 
papers to CISTI'2016 - 11th Iberian Conference on Information Systems and 
Technologies, to be held in Gran Canaria, Canary Islands, Spain, between the 
15th and 18th of June 2016. Authors are encouraged to submit original 
scientific contributions such as state-of-art reviews and new research 
perspectives, groundbreaking ideas and/or architectures, solutions and/or 
applications for real problems, empirical and/or evaluation works, case 
studies, etc., in conformity with the themes of this Conference.

Four types of papers can be submitted:

Full paper: Finished or consolidated R&D works, to be included in one of the 
Conference themes. These papers are assigned  a 6-page limit.

Short paper: Ongoing works with relevant preliminary results, opened to 
discussion. These papers are assigned a 4-page limit.

Poster paper: Initial work with relevant ideas, opened to discussion. These 
papers are assigned a 2-page limit.

Company paper: Companies' papers  that show practical experience, R & D, tools, 
etc., focused in some topics of the conference. These articles are abstracts 
with a maximum of 2 pages.

Papers submitted for the Scientific Committee’s evaluation must not include any 
information leading to the authors’ identification. Therefore, the authors’ 
names, affiliations and bibliographic references should not be included in the 
early version. This information should only be included in the final version.

Submitted papers must not have been published and must not be under review for 
any other conference and national or international publication.

Papers must comply with the format standard and be written in Portuguese, 
Spanish or English.

All papers will be subjected to a “blind review” by at least two members of the 
Scientific Committee.

Full papers can be accepted as short papers or poster papers only. Similarly, 
short papers can be accepted as poster papers only. In these two cases, the 
authors will be allowed to maintain the original number of pages in the 
proceedings publication.

The authors of accepted poster papers must also build and print a poster to be 
exhibited during the Conference. This poster must follow an A1 or A2 vertical 
format. The Conference includes Work Sessions where these posters are presented 
and orally discussed, with a 5-minute limit per poster.

The authors of accepted full papers will dispose of a 15-minute presentation in 
the Conference Work Session, and approximately 5 minutes of discussion will 
follow each presentation. The authors of accepted short papers and company 
papers will dispose of an 11-minute presentation in the Conference Work 
Session, and approximately 4 minutes of discussion will follow each 
presentation.


THEMES

Submitted papers must follow the main themes proposed for the Conference (the 
topics proposed in each theme constitute a mere framework reference; they are 
not intended as restrictive):

A) OMIS - Organizational Models and Information Systems

B) KMDSS - Knowledge Management and Decision Support Systems

C) SSAAT - Software Systems, Architectures, Applications and Tools

D) CNMPS - Computer Networks, Mobility and Pervasive Systems

E) HCC - Human Centered Computing

F) HIS - Health Informatics

G) ITE - Information Technologies in Education

H) AEC – Architecture and Engineering of Construction


PUBLICATION & INDEXING

To ensure that the contribution (full paper, short paper, poster paper or 
company paper) is published in the Proceedings, at least one of the authors 
must be fully registered by the 11th of April, and the paper must comply with 
the suggested layout and page-limit. Additionally, all recommended 
modifications must be addressed by the authors before they submit the final 
version.

No more than one paper per registration will be published in the Conference 
Proceedings. An extra fee must be paid for publication of additional papers, 
with a maximum of one additional paper per registration.

Published full and short papers will be sent to EI, IEEE XPlore, INSPEC, ISI, 
SCOPUS and Google Scholar. Poster papers and company papers will be sent to 
EBSCO and EI.


IMPORTANT DATES

Paper submission: February 14, 2016

Notification of acceptance: March 27, 2016

Submission of accepted papers: April 10, 2016

Payment of registration, to ensure the inclusion of an accepted paper in the 
conference proceedings: April 8, 2016


Best regards,

CISTI'2016 Team
http://www.aisti.eu/cisti2016/index.php/en

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://l

Re: [PATCH V2 2/3] vhost: introduce vhost_vq_more_avail()

2016-01-20 Thread Michael S. Tsirkin
On Tue, Dec 01, 2015 at 02:39:44PM +0800, Jason Wang wrote:
> Signed-off-by: Jason Wang 

Wow new API with no comments anywhere, and no
commit log to say what it's good for.
Want to know what it does and whether
it's correct? You have to read the next patch.

So what is the point of splitting it out?
It's confusing, and in fact it made you
miss a bug.

> ---
>  drivers/vhost/vhost.c | 13 +
>  drivers/vhost/vhost.h |  1 +
>  2 files changed, 14 insertions(+)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 163b365..4f45a03 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -1633,6 +1633,19 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
>  }
>  EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
>  
> +bool vhost_vq_more_avail(struct vhost_dev *dev, struct vhost_virtqueue *vq)
> +{
> + __virtio16 avail_idx;
> + int r;
> +
> + r = __get_user(avail_idx, &vq->avail->idx);
> + if (r)
> + return false;

So the result is that if the page is not present,
you return false (empty ring) and the
caller will busy wait with preempt disabled.
Nasty.

So it should return something that breaks
the loop, and this means it should have
a different name for the return code
to make sense.

Maybe reverse the polarity: vhost_vq_avail_empty?
And add a comment saying we can't be sure ring
is empty so return false.

> +
> + return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx;
> +}
> +EXPORT_SYMBOL_GPL(vhost_vq_more_avail);
> +
>  /* OK, now we need to know about added descriptors. */
>  bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
>  {
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 43284ad..2f3c57c 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -159,6 +159,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, 
> struct vhost_virtqueue *,
>  struct vring_used_elem *heads, unsigned count);
>  void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
>  void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
> +bool vhost_vq_more_avail(struct vhost_dev *, struct vhost_virtqueue *);
>  bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
>  
>  int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
> -- 
> 2.5.0
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Michael S. Tsirkin
On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:
> This patch tries to poll for new added tx buffer or socket receive
> queue for a while at the end of tx/rx processing. The maximum time
> spent on polling were specified through a new kind of vring ioctl.
> 
> Signed-off-by: Jason Wang 
> ---
>  drivers/vhost/net.c| 72 
> ++
>  drivers/vhost/vhost.c  | 15 ++
>  drivers/vhost/vhost.h  |  1 +
>  include/uapi/linux/vhost.h | 11 +++
>  4 files changed, 94 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 9eda69e..ce6da77 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
> *ubuf, bool success)
>   rcu_read_unlock_bh();
>  }
>  
> +static inline unsigned long busy_clock(void)
> +{
> + return local_clock() >> 10;
> +}
> +
> +static bool vhost_can_busy_poll(struct vhost_dev *dev,
> + unsigned long endtime)
> +{
> + return likely(!need_resched()) &&
> +likely(!time_after(busy_clock(), endtime)) &&
> +likely(!signal_pending(current)) &&
> +!vhost_has_work(dev) &&
> +single_task_running();
> +}
> +
> +static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
> + struct vhost_virtqueue *vq,
> + struct iovec iov[], unsigned int iov_size,
> + unsigned int *out_num, unsigned int *in_num)
> +{
> + unsigned long uninitialized_var(endtime);
> +
> + if (vq->busyloop_timeout) {
> + preempt_disable();
> + endtime = busy_clock() + vq->busyloop_timeout;
> + while (vhost_can_busy_poll(vq->dev, endtime) &&
> +!vhost_vq_more_avail(vq->dev, vq))
> + cpu_relax();
> + preempt_enable();
> + }

Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.

> +
> + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> +  out_num, in_num, NULL, NULL);
> +}
> +
>  /* Expects to be always run from workqueue - which acts as
>   * read-size critical section for our kind of RCU. */
>  static void handle_tx(struct vhost_net *net)
> @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
> % UIO_MAXIOV == nvq->done_idx))
>   break;
>  
> - head = vhost_get_vq_desc(vq, vq->iov,
> -  ARRAY_SIZE(vq->iov),
> -  &out, &in,
> -  NULL, NULL);
> + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
> + ARRAY_SIZE(vq->iov),
> + &out, &in);
>   /* On error, stop handling until the next kick. */
>   if (unlikely(head < 0))
>   break;
> @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
>   return len;
>  }
>  
> +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)

Need a hint that it's rx related in the name.

> +{
> + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> + struct vhost_virtqueue *vq = &nvq->vq;
> + unsigned long uninitialized_var(endtime);
> +
> + if (vq->busyloop_timeout) {
> + mutex_lock(&vq->mutex);

This appears to be called under vq mutex in handle_rx.
So how does this work then?


> + vhost_disable_notify(&net->dev, vq);

This appears to be called after disable notify
in handle_rx - so why disable here again?

> +
> + preempt_disable();
> + endtime = busy_clock() + vq->busyloop_timeout;
> +
> + while (vhost_can_busy_poll(&net->dev, endtime) &&
> +skb_queue_empty(&sk->sk_receive_queue) &&
> +!vhost_vq_more_avail(&net->dev, vq))
> + cpu_relax();

This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.
So IMHO we only need to poll for sk_receive_queue really.

> +
> + preempt_enable();
> +
> + if (vhost_enable_notify(&net->dev, vq))
> + vhost_poll_queue(&vq->poll);

But vhost_enable_notify returns true on queue not empty.
So in fact this will requeue on good path -
does not make sense to me.

> + mutex_unlock(&vq->mutex);
> + }
> +

Same comment as for get vq desc here: don't slow
down the good path.

> + return peek_head_len(sk);
> +}
> +
>  /* This is a multi-buffer version of vhost_get_desc, that works if
>   *   vq has