[PATCH net-next RFC 2/2] vhost_net: basic polling support

2015-10-21 Thread Jason Wang
This patch tries to poll for new added tx buffer for a while at the
end of tx processing. The maximum time spent on polling were limited
through a module parameter. To avoid block rx, the loop will end it
there's new other works queued on vhost so in fact socket receive
queue is also be polled.

busyloop_timeout = 50 gives us following improvement on TCP_RR test:

size/session/+thu%/+normalize%
1/ 1/   +5%/  -20%
1/50/  +17%/   +3%

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..bbb522a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -31,7 +31,9 @@
 #include "vhost.h"
 
 static int experimental_zcopytx = 1;
+static int busyloop_timeout = 50;
 module_param(experimental_zcopytx, int, 0444);
+module_param(busyloop_timeout, int, 0444);
 MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
   " 1 -Enable; 0 - Disable");
 
@@ -287,12 +289,23 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
 }
 
+static bool tx_can_busy_poll(struct vhost_dev *dev,
+unsigned long endtime)
+{
+   unsigned long now = local_clock() >> 10;
+
+   return busyloop_timeout && !need_resched() &&
+  !time_after(now, endtime) && !vhost_has_work(dev) &&
+  single_task_running();
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_tx(struct vhost_net *net)
 {
struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = >vq;
+   unsigned long endtime;
unsigned out, in;
int head;
struct msghdr msg = {
@@ -331,6 +344,8 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;
 
+   endtime  = (local_clock() >> 10) + busyloop_timeout;
+again:
head = vhost_get_vq_desc(vq, vq->iov,
 ARRAY_SIZE(vq->iov),
 , ,
@@ -340,6 +355,10 @@ static void handle_tx(struct vhost_net *net)
break;
/* Nothing new?  Wait for eventfd to tell us they refilled. */
if (head == vq->num) {
+   if (tx_can_busy_poll(vq->dev, endtime)) {
+   cpu_relax();
+   goto again;
+   }
if (unlikely(vhost_enable_notify(>dev, vq))) {
vhost_disable_notify(>dev, vq);
continue;
-- 
1.8.3.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net-next RFC 1/2] vhost: introduce vhost_has_work()

2015-10-21 Thread Jason Wang
This path introduces a helper which can give a hint for whether or not
there's a work queued in the work list.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 6 ++
 drivers/vhost/vhost.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index eec2f11..d42d11e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -245,6 +245,12 @@ void vhost_work_queue(struct vhost_dev *dev, struct 
vhost_work *work)
 }
 EXPORT_SYMBOL_GPL(vhost_work_queue);
 
+bool vhost_has_work(struct vhost_dev *dev)
+{
+   return !list_empty(>work_list);
+}
+EXPORT_SYMBOL_GPL(vhost_has_work);
+
 void vhost_poll_queue(struct vhost_poll *poll)
 {
vhost_work_queue(poll->dev, >work);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 4772862..ea0327d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -37,6 +37,7 @@ struct vhost_poll {
 
 void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
+bool vhost_has_work(struct vhost_dev *dev);
 
 void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
 unsigned long mask, struct vhost_dev *dev);
-- 
1.8.3.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: sock_put wasn't safe to call in interrupt context

2015-10-21 Thread Jorgen Hansen
In the vsock vmci_transport driver, sock_put wasn't safe to call
in interrupt context, since that may call the vsock destructor
which in turn calls several functions that should only be called
from process context. This change defers the callling of these
functions  to a worker thread. All these functions were
deallocation of resources related to the transport itself.

Furthermore, an unused callback was removed to simplify the
cleanup.

Multiple customers have been hitting this issue when using
VMware tools on vSphere 2015.

Also added a version to the vmci transport module (starting from
1.0.2.0-k since up until now it appears that this module was
sharing version with vsock that is currently at 1.0.1.0-k).

Reviewed-by: Aditya Asarwade 
Reviewed-by: Thomas Hellstrom 
Signed-off-by: Jorgen Hansen 
---
 net/vmw_vsock/vmci_transport.c |  173 +++-
 net/vmw_vsock/vmci_transport.h |4 +-
 2 files changed, 86 insertions(+), 91 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f63daf..5243ce2 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -40,13 +40,11 @@
 
 static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
 static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
-static void vmci_transport_peer_attach_cb(u32 sub_id,
- const struct vmci_event_data *ed,
- void *client_data);
 static void vmci_transport_peer_detach_cb(u32 sub_id,
  const struct vmci_event_data *ed,
  void *client_data);
 static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static void vmci_transport_cleanup(struct work_struct *work);
 static int vmci_transport_recv_listen(struct sock *sk,
  struct vmci_transport_packet *pkt);
 static int vmci_transport_recv_connecting_server(
@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
struct vmci_transport_packet pkt;
 };
 
+static LIST_HEAD(vmci_transport_cleanup_list);
+static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
+static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
+
 static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
   VMCI_INVALID_ID };
 static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
@@ -791,44 +793,6 @@ out:
return err;
 }
 
-static void vmci_transport_peer_attach_cb(u32 sub_id,
- const struct vmci_event_data *e_data,
- void *client_data)
-{
-   struct sock *sk = client_data;
-   const struct vmci_event_payload_qp *e_payload;
-   struct vsock_sock *vsk;
-
-   e_payload = vmci_event_data_const_payload(e_data);
-
-   vsk = vsock_sk(sk);
-
-   /* We don't ask for delayed CBs when we subscribe to this event (we
-* pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
-* guarantees in that case about what context we might be running in,
-* so it could be BH or process, blockable or non-blockable.  So we
-* need to account for all possible contexts here.
-*/
-   local_bh_disable();
-   bh_lock_sock(sk);
-
-   /* XXX This is lame, we should provide a way to lookup sockets by
-* qp_handle.
-*/
-   if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
-e_payload->handle)) {
-   /* XXX This doesn't do anything, but in the future we may want
-* to set a flag here to verify the attach really did occur and
-* we weren't just sent a datagram claiming it was.
-*/
-   goto out;
-   }
-
-out:
-   bh_unlock_sock(sk);
-   local_bh_enable();
-}
-
 static void vmci_transport_handle_detach(struct sock *sk)
 {
struct vsock_sock *vsk;
@@ -871,28 +835,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
  const struct vmci_event_data *e_data,
  void *client_data)
 {
-   struct sock *sk = client_data;
+   struct vmci_transport *trans = client_data;
const struct vmci_event_payload_qp *e_payload;
-   struct vsock_sock *vsk;
 
e_payload = vmci_event_data_const_payload(e_data);
-   vsk = vsock_sk(sk);
-   if (vmci_handle_is_invalid(e_payload->handle))
-   return;
-
-   /* Same rules for locking as for peer_attach_cb(). */
-   local_bh_disable();
-   bh_lock_sock(sk);
 
/* XXX This is lame, we should provide a way to lookup sockets by
 * qp_handle.
 */
-   if