Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-17 Thread Thiago Jung Bauermann


Hello,

Just going back to this question which I wasn't able to answer.

Thiago Jung Bauermann  writes:

> Michael S. Tsirkin  writes:
>
>> So far so good, but now a question:
>>
>> how are we handling guest address width limitations?
>> Is VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS subject to
>> guest address width limitations?
>> I am guessing we can make them so ...
>> This needs to be documented.
>
> I'm not sure. I will get back to you on this.

We don't have address width limitations between host and guest.

--
Thiago Jung Bauermann
IBM Linux Technology Center

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PULL] virtio, vhost: fixes, features, performance

2019-07-17 Thread pr-tracker-bot
The pull request you sent on Tue, 16 Jul 2019 11:31:51 -0400:

> git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/3a1d5384b7decbff6519daa9c65a35665e227323

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 5/5] vsock/virtio: change the maximum packet size allowed

2019-07-17 Thread Michael S. Tsirkin
On Wed, Jul 17, 2019 at 01:30:30PM +0200, Stefano Garzarella wrote:
> Since now we are able to split packets, we can avoid limiting
> their sizes to VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE.
> Instead, we can use VIRTIO_VSOCK_MAX_PKT_BUF_SIZE as the max
> packet size.
> 
> Signed-off-by: Stefano Garzarella 


OK so this is kind of like GSO where we are passing
64K packets to the vsock and then split at the
low level.


> ---
>  net/vmw_vsock/virtio_transport_common.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/net/vmw_vsock/virtio_transport_common.c 
> b/net/vmw_vsock/virtio_transport_common.c
> index 56fab3f03d0e..94cc0fa3e848 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -181,8 +181,8 @@ static int virtio_transport_send_pkt_info(struct 
> vsock_sock *vsk,
>   vvs = vsk->trans;
>  
>   /* we can send less than pkt_len bytes */
> - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
> - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>  
>   /* virtio_transport_get_credit might return less than pkt_len credit */
>   pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> -- 
> 2.20.1
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 4/5] vhost/vsock: split packets to send using multiple buffers

2019-07-17 Thread Michael S. Tsirkin
On Wed, Jul 17, 2019 at 01:30:29PM +0200, Stefano Garzarella wrote:
> If the packets to sent to the guest are bigger than the buffer
> available, we can split them, using multiple buffers and fixing
> the length in the packet header.
> This is safe since virtio-vsock supports only stream sockets.
> 
> Signed-off-by: Stefano Garzarella 

So how does it work right now? If an app
does sendmsg with a 64K buffer and the other
side publishes 4K buffers - does it just stall?


> ---
>  drivers/vhost/vsock.c   | 66 ++---
>  net/vmw_vsock/virtio_transport_common.c | 15 --
>  2 files changed, 60 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 6c8390a2af52..9f57736fe15e 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -102,7 +102,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>   struct iov_iter iov_iter;
>   unsigned out, in;
>   size_t nbytes;
> - size_t len;
> + size_t iov_len, payload_len;
>   int head;
>  
>   spin_lock_bh(>send_pkt_list_lock);
> @@ -147,8 +147,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>   break;
>   }
>  
> - len = iov_length(>iov[out], in);
> - iov_iter_init(_iter, READ, >iov[out], in, len);
> + iov_len = iov_length(>iov[out], in);
> + if (iov_len < sizeof(pkt->hdr)) {
> + virtio_transport_free_pkt(pkt);
> + vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
> + break;
> + }
> +
> + iov_iter_init(_iter, READ, >iov[out], in, iov_len);
> + payload_len = pkt->len - pkt->off;
> +
> + /* If the packet is greater than the space available in the
> +  * buffer, we split it using multiple buffers.
> +  */
> + if (payload_len > iov_len - sizeof(pkt->hdr))
> + payload_len = iov_len - sizeof(pkt->hdr);
> +
> + /* Set the correct length in the header */
> + pkt->hdr.len = cpu_to_le32(payload_len);
>  
>   nbytes = copy_to_iter(>hdr, sizeof(pkt->hdr), _iter);
>   if (nbytes != sizeof(pkt->hdr)) {
> @@ -157,33 +173,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>   break;
>   }
>  
> - nbytes = copy_to_iter(pkt->buf, pkt->len, _iter);
> - if (nbytes != pkt->len) {
> + nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
> +   _iter);
> + if (nbytes != payload_len) {
>   virtio_transport_free_pkt(pkt);
>   vq_err(vq, "Faulted on copying pkt buf\n");
>   break;
>   }
>  
> - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
> + vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
>   added = true;
>  
> - if (pkt->reply) {
> - int val;
> -
> - val = atomic_dec_return(>queued_replies);
> -
> - /* Do we have resources to resume tx processing? */
> - if (val + 1 == tx_vq->num)
> - restart_tx = true;
> - }
> -
>   /* Deliver to monitoring devices all correctly transmitted
>* packets.
>*/
>   virtio_transport_deliver_tap_pkt(pkt);
>  
> - total_len += pkt->len;
> - virtio_transport_free_pkt(pkt);
> + pkt->off += payload_len;
> + total_len += payload_len;
> +
> + /* If we didn't send all the payload we can requeue the packet
> +  * to send it with the next available buffer.
> +  */
> + if (pkt->off < pkt->len) {
> + spin_lock_bh(>send_pkt_list_lock);
> + list_add(>list, >send_pkt_list);
> + spin_unlock_bh(>send_pkt_list_lock);
> + } else {
> + if (pkt->reply) {
> + int val;
> +
> + val = atomic_dec_return(>queued_replies);
> +
> + /* Do we have resources to resume tx
> +  * processing?
> +  */
> + if (val + 1 == tx_vq->num)
> + restart_tx = true;
> + }
> +
> + virtio_transport_free_pkt(pkt);
> + }
>   } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
>   if (added)
>   vhost_signal(>dev, vq);
> diff --git a/net/vmw_vsock/virtio_transport_common.c 
> b/net/vmw_vsock/virtio_transport_common.c
> index 

Re: [PATCH v4 3/5] vsock/virtio: fix locking in virtio_transport_inc_tx_pkt()

2019-07-17 Thread Michael S. Tsirkin
On Wed, Jul 17, 2019 at 01:30:28PM +0200, Stefano Garzarella wrote:
> fwd_cnt and last_fwd_cnt are protected by rx_lock, so we should use
> the same spinlock also if we are in the TX path.
> 
> Move also buf_alloc under the same lock.
> 
> Signed-off-by: Stefano Garzarella 

Wait a second is this a bugfix?
If it's used under the wrong lock won't values get corrupted?
Won't traffic then stall or more data get to sent than
credits?

> ---
>  include/linux/virtio_vsock.h| 2 +-
>  net/vmw_vsock/virtio_transport_common.c | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index 49fc9d20bc43..4c7781f4b29b 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -35,7 +35,6 @@ struct virtio_vsock_sock {
>  
>   /* Protected by tx_lock */
>   u32 tx_cnt;
> - u32 buf_alloc;
>   u32 peer_fwd_cnt;
>   u32 peer_buf_alloc;
>  
> @@ -43,6 +42,7 @@ struct virtio_vsock_sock {
>   u32 fwd_cnt;
>   u32 last_fwd_cnt;
>   u32 rx_bytes;
> + u32 buf_alloc;
>   struct list_head rx_queue;
>  };
>  
> diff --git a/net/vmw_vsock/virtio_transport_common.c 
> b/net/vmw_vsock/virtio_transport_common.c
> index a85559d4d974..34a2b42313b7 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -210,11 +210,11 @@ static void virtio_transport_dec_rx_pkt(struct 
> virtio_vsock_sock *vvs,
>  
>  void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct 
> virtio_vsock_pkt *pkt)
>  {
> - spin_lock_bh(>tx_lock);
> + spin_lock_bh(>rx_lock);
>   vvs->last_fwd_cnt = vvs->fwd_cnt;
>   pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
>   pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
> - spin_unlock_bh(>tx_lock);
> + spin_unlock_bh(>rx_lock);
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
>  
> -- 
> 2.20.1
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V3 00/15] Packed virtqueue support for vhost

2019-07-17 Thread Michael S. Tsirkin
On Wed, Jul 17, 2019 at 08:27:28PM +0800, Jason Wang wrote:
> 
> On 2019/7/17 下午7:02, Michael S. Tsirkin wrote:
> > On Wed, Jul 17, 2019 at 06:52:40AM -0400, Jason Wang wrote:
> > > Hi all:
> > > 
> > > This series implements packed virtqueues which were described
> > > at [1]. In this version we try to address the performance regression
> > > saw by V2. The root cause is packed virtqueue need more times of
> > > userspace memory accesssing which turns out to be very
> > > expensive. Thanks to the help of 7f466032dc9e ("vhost: access vq
> > > metadata through kernel virtual address"), such overhead cold be
> > > eliminated. So in this version, we can see about 2% improvement for
> > > packed virtqueue on PPS.
> > Great job, thanks!
> > Pls allow a bit more review time than usual as this is a big patchset.
> > Should be done by Tuesday.
> > -next material anyway.
> 
> 
> Sure, just to confirm, I think this should go for your vhost tree?.
> 
> Thanks

I think this makes sense, yes.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V3 00/15] Packed virtqueue support for vhost

2019-07-17 Thread Jason Wang


On 2019/7/17 下午7:02, Michael S. Tsirkin wrote:

On Wed, Jul 17, 2019 at 06:52:40AM -0400, Jason Wang wrote:

Hi all:

This series implements packed virtqueues which were described
at [1]. In this version we try to address the performance regression
saw by V2. The root cause is packed virtqueue need more times of
userspace memory accesssing which turns out to be very
expensive. Thanks to the help of 7f466032dc9e ("vhost: access vq
metadata through kernel virtual address"), such overhead cold be
eliminated. So in this version, we can see about 2% improvement for
packed virtqueue on PPS.

Great job, thanks!
Pls allow a bit more review time than usual as this is a big patchset.
Should be done by Tuesday.
-next material anyway.



Sure, just to confirm, I think this should go for your vhost tree?.

Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v4 5/5] vsock/virtio: change the maximum packet size allowed

2019-07-17 Thread Stefano Garzarella
Since now we are able to split packets, we can avoid limiting
their sizes to VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE.
Instead, we can use VIRTIO_VSOCK_MAX_PKT_BUF_SIZE as the max
packet size.

Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 56fab3f03d0e..94cc0fa3e848 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -181,8 +181,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock 
*vsk,
vvs = vsk->trans;
 
/* we can send less than pkt_len bytes */
-   if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
-   pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+   if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+   pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
 
/* virtio_transport_get_credit might return less than pkt_len credit */
pkt_len = virtio_transport_get_credit(vvs, pkt_len);
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v4 1/5] vsock/virtio: limit the memory used per-socket

2019-07-17 Thread Stefano Garzarella
Since virtio-vsock was introduced, the buffers filled by the host
and pushed to the guest using the vring, are directly queued in
a per-socket list. These buffers are preallocated by the guest
with a fixed size (4 KB).

The maximum amount of memory used by each socket should be
controlled by the credit mechanism.
The default credit available per-socket is 256 KB, but if we use
only 1 byte per packet, the guest can queue up to 262144 of 4 KB
buffers, using up to 1 GB of memory per-socket. In addition, the
guest will continue to fill the vring with new 4 KB free buffers
to avoid starvation of other sockets.

This patch mitigates this issue copying the payload of small
packets (< 128 bytes) into the buffer of last packet queued, in
order to avoid wasting memory.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vsock.c   |  2 +
 include/linux/virtio_vsock.h|  1 +
 net/vmw_vsock/virtio_transport.c|  1 +
 net/vmw_vsock/virtio_transport_common.c | 60 +
 4 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 6a50e1d0529c..6c8390a2af52 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -329,6 +329,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
return NULL;
}
 
+   pkt->buf_len = pkt->len;
+
nbytes = copy_from_iter(pkt->buf, pkt->len, _iter);
if (nbytes != pkt->len) {
vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index e223e2632edd..7d973903f52e 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -52,6 +52,7 @@ struct virtio_vsock_pkt {
/* socket refcnt not held, only use for cancellation */
struct vsock_sock *vsk;
void *buf;
+   u32 buf_len;
u32 len;
u32 off;
bool reply;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 0815d1357861..082a30936690 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
break;
}
 
+   pkt->buf_len = buf_len;
pkt->len = buf_len;
 
sg_init_one(, >hdr, sizeof(pkt->hdr));
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 6f1a8aff65c5..095221f94786 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
 /* How long to wait for graceful shutdown of a connection */
 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
 
+/* Threshold for detecting small packets to copy */
+#define GOOD_COPY_LEN  128
+
 static const struct virtio_transport *virtio_transport_get_ops(void)
 {
const struct vsock_transport *t = vsock_core_get_transport();
@@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
pkt->buf = kmalloc(len, GFP_KERNEL);
if (!pkt->buf)
goto out_pkt;
+
+   pkt->buf_len = len;
+
err = memcpy_from_msg(pkt->buf, info->msg, len);
if (err)
goto out;
@@ -841,24 +847,60 @@ virtio_transport_recv_connecting(struct sock *sk,
return err;
 }
 
+static void
+virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt *pkt)
+{
+   struct virtio_vsock_sock *vvs = vsk->trans;
+   bool free_pkt = false;
+
+   pkt->len = le32_to_cpu(pkt->hdr.len);
+   pkt->off = 0;
+
+   spin_lock_bh(>rx_lock);
+
+   virtio_transport_inc_rx_pkt(vvs, pkt);
+
+   /* Try to copy small packets into the buffer of last packet queued,
+* to avoid wasting memory queueing the entire buffer with a small
+* payload.
+*/
+   if (pkt->len <= GOOD_COPY_LEN && !list_empty(>rx_queue)) {
+   struct virtio_vsock_pkt *last_pkt;
+
+   last_pkt = list_last_entry(>rx_queue,
+  struct virtio_vsock_pkt, list);
+
+   /* If there is space in the last packet queued, we copy the
+* new packet in its buffer.
+*/
+   if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+   memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+  pkt->len);
+   last_pkt->len += pkt->len;
+   free_pkt = true;
+   goto out;
+   }
+   }
+
+   list_add_tail(>list, >rx_queue);
+
+out:
+   spin_unlock_bh(>rx_lock);
+   if (free_pkt)
+   virtio_transport_free_pkt(pkt);
+}
+
 static int
 virtio_transport_recv_connected(struct sock *sk,
  

[PATCH v4 4/5] vhost/vsock: split packets to send using multiple buffers

2019-07-17 Thread Stefano Garzarella
If the packets to sent to the guest are bigger than the buffer
available, we can split them, using multiple buffers and fixing
the length in the packet header.
This is safe since virtio-vsock supports only stream sockets.

Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vsock.c   | 66 ++---
 net/vmw_vsock/virtio_transport_common.c | 15 --
 2 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 6c8390a2af52..9f57736fe15e 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -102,7 +102,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct iov_iter iov_iter;
unsigned out, in;
size_t nbytes;
-   size_t len;
+   size_t iov_len, payload_len;
int head;
 
spin_lock_bh(>send_pkt_list_lock);
@@ -147,8 +147,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
break;
}
 
-   len = iov_length(>iov[out], in);
-   iov_iter_init(_iter, READ, >iov[out], in, len);
+   iov_len = iov_length(>iov[out], in);
+   if (iov_len < sizeof(pkt->hdr)) {
+   virtio_transport_free_pkt(pkt);
+   vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
+   break;
+   }
+
+   iov_iter_init(_iter, READ, >iov[out], in, iov_len);
+   payload_len = pkt->len - pkt->off;
+
+   /* If the packet is greater than the space available in the
+* buffer, we split it using multiple buffers.
+*/
+   if (payload_len > iov_len - sizeof(pkt->hdr))
+   payload_len = iov_len - sizeof(pkt->hdr);
+
+   /* Set the correct length in the header */
+   pkt->hdr.len = cpu_to_le32(payload_len);
 
nbytes = copy_to_iter(>hdr, sizeof(pkt->hdr), _iter);
if (nbytes != sizeof(pkt->hdr)) {
@@ -157,33 +173,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
break;
}
 
-   nbytes = copy_to_iter(pkt->buf, pkt->len, _iter);
-   if (nbytes != pkt->len) {
+   nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
+ _iter);
+   if (nbytes != payload_len) {
virtio_transport_free_pkt(pkt);
vq_err(vq, "Faulted on copying pkt buf\n");
break;
}
 
-   vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+   vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
added = true;
 
-   if (pkt->reply) {
-   int val;
-
-   val = atomic_dec_return(>queued_replies);
-
-   /* Do we have resources to resume tx processing? */
-   if (val + 1 == tx_vq->num)
-   restart_tx = true;
-   }
-
/* Deliver to monitoring devices all correctly transmitted
 * packets.
 */
virtio_transport_deliver_tap_pkt(pkt);
 
-   total_len += pkt->len;
-   virtio_transport_free_pkt(pkt);
+   pkt->off += payload_len;
+   total_len += payload_len;
+
+   /* If we didn't send all the payload we can requeue the packet
+* to send it with the next available buffer.
+*/
+   if (pkt->off < pkt->len) {
+   spin_lock_bh(>send_pkt_list_lock);
+   list_add(>list, >send_pkt_list);
+   spin_unlock_bh(>send_pkt_list_lock);
+   } else {
+   if (pkt->reply) {
+   int val;
+
+   val = atomic_dec_return(>queued_replies);
+
+   /* Do we have resources to resume tx
+* processing?
+*/
+   if (val + 1 == tx_vq->num)
+   restart_tx = true;
+   }
+
+   virtio_transport_free_pkt(pkt);
+   }
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
if (added)
vhost_signal(>dev, vq);
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 34a2b42313b7..56fab3f03d0e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -97,8 +97,17 @@ static struct sk_buff *virtio_transport_build_skb(void 
*opaque)
struct virtio_vsock_pkt *pkt = opaque;
struct 

[PATCH v4 2/5] vsock/virtio: reduce credit update messages

2019-07-17 Thread Stefano Garzarella
In order to reduce the number of credit update messages,
we send them only when the space available seen by the
transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE.

Signed-off-by: Stefano Garzarella 
---
 include/linux/virtio_vsock.h|  1 +
 net/vmw_vsock/virtio_transport_common.c | 16 +---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 7d973903f52e..49fc9d20bc43 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -41,6 +41,7 @@ struct virtio_vsock_sock {
 
/* Protected by rx_lock */
u32 fwd_cnt;
+   u32 last_fwd_cnt;
u32 rx_bytes;
struct list_head rx_queue;
 };
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 095221f94786..a85559d4d974 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -211,6 +211,7 @@ static void virtio_transport_dec_rx_pkt(struct 
virtio_vsock_sock *vvs,
 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct 
virtio_vsock_pkt *pkt)
 {
spin_lock_bh(>tx_lock);
+   vvs->last_fwd_cnt = vvs->fwd_cnt;
pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
spin_unlock_bh(>tx_lock);
@@ -261,6 +262,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct virtio_vsock_sock *vvs = vsk->trans;
struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0;
+   u32 free_space;
int err = -EFAULT;
 
spin_lock_bh(>rx_lock);
@@ -291,11 +293,19 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
virtio_transport_free_pkt(pkt);
}
}
+
+   free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+
spin_unlock_bh(>rx_lock);
 
-   /* Send a credit pkt to peer */
-   virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
-   NULL);
+   /* We send a credit update only when the space available seen
+* by the transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
+*/
+   if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+   virtio_transport_send_credit_update(vsk,
+   VIRTIO_VSOCK_TYPE_STREAM,
+   NULL);
+   }
 
return total;
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v4 3/5] vsock/virtio: fix locking in virtio_transport_inc_tx_pkt()

2019-07-17 Thread Stefano Garzarella
fwd_cnt and last_fwd_cnt are protected by rx_lock, so we should use
the same spinlock also if we are in the TX path.

Move also buf_alloc under the same lock.

Signed-off-by: Stefano Garzarella 
---
 include/linux/virtio_vsock.h| 2 +-
 net/vmw_vsock/virtio_transport_common.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 49fc9d20bc43..4c7781f4b29b 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -35,7 +35,6 @@ struct virtio_vsock_sock {
 
/* Protected by tx_lock */
u32 tx_cnt;
-   u32 buf_alloc;
u32 peer_fwd_cnt;
u32 peer_buf_alloc;
 
@@ -43,6 +42,7 @@ struct virtio_vsock_sock {
u32 fwd_cnt;
u32 last_fwd_cnt;
u32 rx_bytes;
+   u32 buf_alloc;
struct list_head rx_queue;
 };
 
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index a85559d4d974..34a2b42313b7 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -210,11 +210,11 @@ static void virtio_transport_dec_rx_pkt(struct 
virtio_vsock_sock *vvs,
 
 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct 
virtio_vsock_pkt *pkt)
 {
-   spin_lock_bh(>tx_lock);
+   spin_lock_bh(>rx_lock);
vvs->last_fwd_cnt = vvs->fwd_cnt;
pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
-   spin_unlock_bh(>tx_lock);
+   spin_unlock_bh(>rx_lock);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v4 0/5] vsock/virtio: optimizations to increase the throughput

2019-07-17 Thread Stefano Garzarella
This series tries to increase the throughput of virtio-vsock with slight
changes.
While I was testing the v2 of this series I discovered an huge use of memory,
so I added patch 1 to mitigate this issue. I put it in this series in order
to better track the performance trends.

v4:
- rebased all patches on current master (conflicts is Patch 4)
- Patch 1: added Stefan's R-b
- Patch 3: removed lock when buf_alloc is written [David];
   moved this patch after "vsock/virtio: reduce credit update messages"
   to make it clearer
- Patch 4: vhost_exceeds_weight() is recently introduced, so I've solved some
   conflicts

v3: https://patchwork.kernel.org/cover/10970145

v2: https://patchwork.kernel.org/cover/10938743

v1: https://patchwork.kernel.org/cover/10885431

Below are the benchmarks step by step. I used iperf3 [1] modified with VSOCK
support. As Micheal suggested in the v1, I booted host and guest with 'nosmap'.

A brief description of patches:
- Patches 1:   limit the memory usage with an extra copy for small packets
- Patches 2+3: reduce the number of credit update messages sent to the
   transmitter
- Patches 4+5: allow the host to split packets on multiple buffers and use
   VIRTIO_VSOCK_MAX_PKT_BUF_SIZE as the max packet size allowed

host -> guest [Gbps]
pkt_size before opt   p 1 p 2+3p 4+5

32 0.032 0.0300.0480.051
64 0.061 0.0590.1080.117
1280.122 0.1120.2270.234
2560.244 0.2410.4180.415
5120.459 0.4660.8470.865
1K 0.927 0.9191.6571.641
2K 1.884 1.8133.2623.269
4K 3.378 3.3266.0446.195
8K 5.637 5.676   10.141   11.287
16K8.250 8.402   15.976   16.736
32K   13.32713.204   19.013   20.515
64K   21.24121.341   20.973   21.879
128K  21.85122.354   21.816   23.203
256K  21.40821.693   21.846   24.088
512K  21.60021.899   21.921   24.106

guest -> host [Gbps]
pkt_size before opt   p 1 p 2+3p 4+5

32 0.045 0.0460.0570.057
64 0.089 0.0910.1030.104
1280.170 0.1790.1920.200
2560.364 0.3510.3610.379
5120.709 0.6990.7310.790
1K 1.399 1.4071.3951.427
2K 2.670 2.6842.7452.835
4K 5.171 5.1995.3055.451
8K 8.442 8.500   10.0839.941
16K   12.30512.259   13.519   15.385
32K   11.41811.150   11.988   24.680
64K   10.77810.659   11.589   35.273
128K  10.42110.339   10.939   40.338
256K  10.300 9.719   10.508   36.562
512K   9.833 9.808   10.612   35.979

As Stefan suggested in the v1, I measured also the efficiency in this way:
efficiency = Mbps / (%CPU_Host + %CPU_Guest)

The '%CPU_Guest' is taken inside the VM. I know that it is not the best way,
but it's provided for free from iperf3 and could be an indication.

host -> guest efficiency [Mbps / (%CPU_Host + %CPU_Guest)]
pkt_size before opt   p 1 p 2+3p 4+5

32 0.35  0.45 0.79 1.02
64 0.56  0.80 1.41 1.54
1281.11  1.52 3.03 3.12
2562.20  2.16 5.44 5.58
5124.17  4.1810.9611.46
1K 8.30  8.2620.9920.89
2K16.82 16.3139.7639.73
4K30.89 30.7974.0775.73
8K53.74 54.49   124.24   148.91
16K   80.68 83.63   200.21   232.79
32K  132.27132.52   260.81   357.07
64K  229.82230.40   300.19   444.18
128K 332.60329.78   331.51   492.28
256K 331.06337.22   339.59   511.59
512K 335.58328.50   331.56   504.56

guest -> host efficiency [Mbps / (%CPU_Host + %CPU_Guest)]
pkt_size before opt   p 1 p 2+3p 4+5

32 0.43  0.43 0.53 0.56
64 0.85  0.86 1.04 1.10
1281.63  1.71 2.07 2.13
2563.48  3.35 4.02 4.22
5126.80  6.67 7.97 8.63
1K13.32 13.3115.7215.94
2K25.79 25.9230.8430.98
4K50.37 50.4858.7959.69
8K95.90 96.15   107.04   110.33
16K  145.80145.43   143.97   174.70
32K  147.06144.74   146.02   282.48
64K  145.25143.99   141.62   406.40
128K 149.34146.96   147.49   489.34
256K 156.35149.81   152.21   536.37
512K 151.65150.74   151.52   519.93

[1] https://github.com/stefano-garzarella/iperf/

Stefano Garzarella (5):
  vsock/virtio: limit the memory used per-socket
  vsock/virtio: reduce credit update messages
  vsock/virtio: fix locking in virtio_transport_inc_tx_pkt()
  vhost/vsock: split packets to send using multiple buffers
  vsock/virtio: change the maximum packet size allowed

 

Re: [PATCH V3 00/15] Packed virtqueue support for vhost

2019-07-17 Thread Michael S. Tsirkin
On Wed, Jul 17, 2019 at 06:52:40AM -0400, Jason Wang wrote:
> Hi all:
> 
> This series implements packed virtqueues which were described
> at [1]. In this version we try to address the performance regression
> saw by V2. The root cause is packed virtqueue need more times of
> userspace memory accesssing which turns out to be very
> expensive. Thanks to the help of 7f466032dc9e ("vhost: access vq
> metadata through kernel virtual address"), such overhead cold be
> eliminated. So in this version, we can see about 2% improvement for
> packed virtqueue on PPS.


Great job, thanks!
Pls allow a bit more review time than usual as this is a big patchset.
Should be done by Tuesday.
-next material anyway.

> More optimizations (e.g IN_ORDER) is on the road.
> 
> Please review.
> 
> [1] 
> https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-610007
> 
> This version were tested with:
> - zercopy/datacopy
> - mergeable buffer on/off
> - TCP stream & virtio-user
> 
> Changes from V2:
> - rebase on top of vhost metadata accelreation series
> - introduce shadow used ring API
> - new SET_VRING_BASE/GET_VRING_BASE that takes care about warp counter
>   and index for both avail and used
> - various twaeaks
> 
> Changes from V1:
> - drop uapi patch and use Tiwei's
> - split the enablement of packed virtqueue into a separate patch
> 
> Changes from RFC V5:
> - save unnecessary barriers during vhost_add_used_packed_n()
> - more compact math for event idx
> - fix failure of SET_VRING_BASE when avail_wrap_counter is true
> - fix not copy avail_wrap_counter during GET_VRING_BASE
> - introduce SET_VRING_USED_BASE/GET_VRING_USED_BASE for syncing
> - last_used_idx
> - rename used_wrap_counter to last_used_wrap_counter
> - rebase to net-next
> 
> Changes from RFC V4:
> - fix signalled_used index recording
> - track avail index correctly
> - various minor fixes
> 
> Changes from RFC V3:
> - Fix math on event idx checking
> - Sync last avail wrap counter through GET/SET_VRING_BASE
> - remove desc_event prefix in the driver/device structure
> 
> Changes from RFC V2:
> - do not use & in checking desc_event_flags
> - off should be most significant bit
> - remove the workaround of mergeable buffer for dpdk prototype
> - id should be in the last descriptor in the chain
> - keep _F_WRITE for write descriptor when adding used
> - device flags updating should use ADDR_USED type
> - return error on unexpected unavail descriptor in a chain
> - return false in vhost_ve_avail_empty is descriptor is available
> - track last seen avail_wrap_counter
> - correctly examine available descriptor in get_indirect_packed()
> - vhost_idx_diff should return u16 instead of bool
> 
> Changes from RFC V1:
> - Refactor vhost used elem code to avoid open coding on used elem
> - Event suppression support (compile test only).
> - Indirect descriptor support (compile test only).
> - Zerocopy support.
> - vIOMMU support.
> - SCSI/VSOCK support (compile test only).
> - Fix several bugs
> 
> Jason Wang (15):
>   vhost: simplify meta data pointer accessing
>   vhost: remove the unnecessary parameter of vhost_vq_avail_empty()
>   vhost: remove unnecessary parameter of
> vhost_enable_notify()/vhost_disable_notify
>   vhost-net: don't use vhost_add_used_n() for zerocopy
>   vhost: introduce helpers to manipulate shadow used ring
>   vhost_net: switch TX to use shadow used ring API
>   vhost_net: calculate last used length once for mergeable buffer
>   vhost_net: switch to use shadow used ring API for RX
>   vhost: do not export vhost_add_used_n() and
> vhost_add_used_and_signal_n()
>   vhost: hide used ring layout from device
>   vhost: do not use vring_used_elem
>   vhost: vhost_put_user() can accept metadata type
>   vhost: packed ring support
>   vhost: event suppression for packed ring
>   vhost: enable packed virtqueues
> 
>  drivers/vhost/net.c   |  200 +++---
>  drivers/vhost/scsi.c  |   72 +-
>  drivers/vhost/test.c  |6 +-
>  drivers/vhost/vhost.c | 1508 +++--
>  drivers/vhost/vhost.h |   78 ++-
>  drivers/vhost/vsock.c |   57 +-
>  6 files changed, 1513 insertions(+), 408 deletions(-)
> 
> -- 
> 2.18.1
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 14/15] vhost: event suppression for packed ring

2019-07-17 Thread Jason Wang
This patch introduces support for event suppression. This is done by
have a two areas: device area and driver area. One side could then try
to disable or enable (delayed) notification from other side by using a
boolean hint or event index interface in the areas.

For more information, please refer Virtio spec.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 265 +++---
 drivers/vhost/vhost.h |  11 +-
 2 files changed, 255 insertions(+), 21 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a7d24b9d5204..a188e9af3b35 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1524,6 +1524,76 @@ static inline int vhost_put_desc_flags(struct 
vhost_virtqueue *vq,
return vhost_put_user(vq, *flags, >flags, VHOST_ADDR_DESC);
 }
 
+static int vhost_get_driver_off_wrap(struct vhost_virtqueue *vq,
+__virtio16 *off_wrap)
+{
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+   struct vring_packed_desc_event *event =
+  vhost_get_meta_ptr(vq, VHOST_ADDR_AVAIL);
+   if (likely(event)) {
+   *off_wrap = event->off_wrap;
+   vhost_put_meta_ptr();
+   return 0;
+   }
+#endif
+   return vhost_get_user(vq, *off_wrap,
+ >driver_event->off_wrap,
+ VHOST_ADDR_AVAIL);
+}
+
+static int vhost_get_driver_flags(struct vhost_virtqueue *vq,
+ __virtio16 *driver_flags)
+{
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+   struct vring_packed_desc_event *event =
+  vhost_get_meta_ptr(vq, VHOST_ADDR_AVAIL);
+
+   if (likely(event)) {
+   *driver_flags = event->flags;
+   vhost_put_meta_ptr();
+   return 0;
+   }
+#endif
+   return vhost_get_user(vq, *driver_flags, >driver_event->flags,
+ VHOST_ADDR_AVAIL);
+}
+
+static int vhost_put_device_off_wrap(struct vhost_virtqueue *vq,
+__virtio16 *off_wrap)
+{
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+   struct vring_packed_desc_event *event =
+  vhost_get_meta_ptr(vq, VHOST_ADDR_USED);
+
+   if (likely(event)) {
+   event->off_wrap = *off_wrap;
+   vhost_put_meta_ptr();
+   return 0;
+   }
+#endif
+   return vhost_put_user(vq, *off_wrap,
+ >device_event->off_wrap,
+ VHOST_ADDR_USED);
+}
+
+static int vhost_put_device_flags(struct vhost_virtqueue *vq,
+ __virtio16 *device_flags)
+{
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+   struct vring_packed_desc_event *event =
+  vhost_get_meta_ptr(vq, VHOST_ADDR_USED);
+
+   if (likely(event)) {
+   event->flags = *device_flags;
+   vhost_put_meta_ptr();
+   return 0;
+   }
+#endif
+   return vhost_put_user(vq, *device_flags,
+ >device_event->flags,
+ VHOST_ADDR_USED);
+}
+
 static int vhost_new_umem_range(struct vhost_umem *umem,
u64 start, u64 size, u64 end,
u64 userspace_addr, int perm)
@@ -1809,10 +1879,15 @@ static int vq_access_ok_packed(struct vhost_virtqueue 
*vq, unsigned int num,
   struct vring_used __user *used)
 {
struct vring_packed_desc *packed = (struct vring_packed_desc *)desc;
+   struct vring_packed_desc_event *driver_event =
+   (struct vring_packed_desc_event *)avail;
+   struct vring_packed_desc_event *device_event =
+   (struct vring_packed_desc_event *)used;
 
-   /* TODO: check device area and driver area */
return access_ok(packed, num * sizeof(*packed)) &&
-  access_ok(packed, num * sizeof(*packed));
+  access_ok(packed, num * sizeof(*packed)) &&
+  access_ok(driver_event, sizeof(*driver_event)) &&
+  access_ok(device_event, sizeof(*device_event));
 }
 
 static int vq_access_ok_split(struct vhost_virtqueue *vq, unsigned int num,
@@ -1904,16 +1979,25 @@ static void vhost_vq_map_prefetch(struct 
vhost_virtqueue *vq)
 }
 #endif
 
-int vq_meta_prefetch(struct vhost_virtqueue *vq)
+static int vq_iotlb_prefetch_packed(struct vhost_virtqueue *vq)
 {
-   unsigned int num = vq->num;
+   int num = vq->num;
 
-   if (!vq->iotlb) {
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
-   vhost_vq_map_prefetch(vq);
-#endif
-   return 1;
-   }
+   return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
+  num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
+  iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->desc,
+  num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
+  iotlb_access_ok(vq, VHOST_ACCESS_RO,
+

[PATCH V3 05/15] vhost: introduce helpers to manipulate shadow used ring

2019-07-17 Thread Jason Wang
We open coding vq->heads[] in net.c for:

1) implementing batching which in fact a shadow used ring
   implementation.
2) maintain pending heads in order which is in fact another kind of
   shadow used ring

But this expose used ring layout for device which makes it hard to
introduce new kind of ring like packed virtqueue. So this patch
introduces two types of shadow used ring API:

1) shadow used ring API for batch updating of used heads
2) zerocopy shadow used API for maintaining pending heads and batch
   updating used heads

This can help to hide the used ring layout from device. Device should
not mix using those two kinds of APIs.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 95 +--
 drivers/vhost/vhost.h | 18 
 2 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e781db88dfca..5bfca5b76b05 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -486,6 +486,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
vhost_reset_vq_maps(vq);
 #endif
+   vq->nheads = 0;
 }
 
 static int vhost_worker(void *data)
@@ -2790,25 +2791,28 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
 
-/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
+static void vhost_withdraw_shadow_used(struct vhost_virtqueue *vq, int count)
 {
-   vq->last_avail_idx -= n;
+   BUG_ON(count > vq->nheads);
+   vq->nheads -= count;
 }
-EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+/* Reverse the effect of vhost_get_vq_desc and
+ * vhost_add_shadow_used. Useful for error handleing
+ */
+void vhost_discard_shadow_used(struct vhost_virtqueue *vq, int n)
 {
-   struct vring_used_elem heads = {
-   cpu_to_vhost32(vq, head),
-   cpu_to_vhost32(vq, len)
-   };
+   vhost_withdraw_shadow_used(vq, n);
+   vhost_discard_vq_desc(vq, n);
+}
+EXPORT_SYMBOL_GPL(vhost_discard_shadow_used);
 
-   return vhost_add_used_n(vq, , 1);
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
+{
+   vq->last_avail_idx -= n;
 }
-EXPORT_SYMBOL_GPL(vhost_add_used);
+EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
 
 static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
@@ -2842,6 +2846,41 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
return 0;
 }
 
+void vhost_set_zc_used_len(struct vhost_virtqueue *vq,
+  int idx, int len)
+{
+   vq->heads[idx].len = len;
+}
+EXPORT_SYMBOL_GPL(vhost_set_zc_used_len);
+
+int vhost_get_zc_used_len(struct vhost_virtqueue *vq, int idx)
+{
+   return vq->heads[idx].len;
+}
+EXPORT_SYMBOL_GPL(vhost_get_zc_used_len);
+
+void vhost_set_zc_used(struct vhost_virtqueue *vq,
+  int idx, unsigned int head, int len)
+{
+   vq->heads[idx].id = head;
+   vq->heads[idx].len = len;
+}
+EXPORT_SYMBOL_GPL(vhost_set_zc_used);
+
+void vhost_add_shadow_used(struct vhost_virtqueue *vq,
+  unsigned int head, int len)
+{
+   vhost_set_zc_used(vq, vq->nheads, head, len);
+   ++vq->nheads;
+}
+EXPORT_SYMBOL_GPL(vhost_add_shadow_used);
+
+int vhost_get_shadow_used_count(struct vhost_virtqueue *vq)
+{
+   return vq->nheads;
+}
+EXPORT_SYMBOL_GPL(vhost_get_shadow_used_count);
+
 /* After we've used one of their buffers, we tell them about it.  We'll then
  * want to notify the guest, using eventfd. */
 int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
@@ -2879,6 +2918,19 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct 
vring_used_elem *heads,
 }
 EXPORT_SYMBOL_GPL(vhost_add_used_n);
 
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+   struct vring_used_elem heads = {
+   cpu_to_vhost32(vq, head),
+   cpu_to_vhost32(vq, len)
+   };
+
+   return vhost_add_used_n(vq, , 1);
+}
+EXPORT_SYMBOL_GPL(vhost_add_used);
+
 static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
__u16 old, new;
@@ -2945,6 +2997,23 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
 }
 EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
 
+void vhost_flush_shadow_used_and_signal(struct vhost_virtqueue *vq)
+
+{
+   if (!vq->nheads)
+   return;
+
+   vhost_add_used_and_signal_n(vq->dev, 

[PATCH V3 02/15] vhost: remove the unnecessary parameter of vhost_vq_avail_empty()

2019-07-17 Thread Jason Wang
Its dev parameter is not even used, so remove it.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   | 8 
 drivers/vhost/vhost.c | 2 +-
 drivers/vhost/vhost.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 3beb401235c0..7d34e8cbc89b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -498,7 +498,7 @@ static int sock_has_rx_data(struct socket *sock)
 static void vhost_net_busy_poll_try_queue(struct vhost_net *net,
  struct vhost_virtqueue *vq)
 {
-   if (!vhost_vq_avail_empty(>dev, vq)) {
+   if (!vhost_vq_avail_empty(vq)) {
vhost_poll_queue(>poll);
} else if (unlikely(vhost_enable_notify(>dev, vq))) {
vhost_disable_notify(>dev, vq);
@@ -540,8 +540,8 @@ static void vhost_net_busy_poll(struct vhost_net *net,
}
 
if ((sock_has_rx_data(sock) &&
-!vhost_vq_avail_empty(>dev, rvq)) ||
-   !vhost_vq_avail_empty(>dev, tvq))
+!vhost_vq_avail_empty(rvq)) ||
+   !vhost_vq_avail_empty(tvq))
break;
 
cpu_relax();
@@ -638,7 +638,7 @@ static int get_tx_bufs(struct vhost_net *net,
 static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
 {
return total_len < VHOST_NET_WEIGHT &&
-  !vhost_vq_avail_empty(vq->dev, vq);
+  !vhost_vq_avail_empty(vq);
 }
 
 #define SKB_FRAG_PAGE_ORDER get_order(32768)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 7f51c74d9aee..ec3534bcd51b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2946,7 +2946,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
 EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
 
 /* return true if we're sure that avaiable ring is empty */
-bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+bool vhost_vq_avail_empty(struct vhost_virtqueue *vq)
 {
__virtio16 avail_idx;
int r;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 819296332913..e0451c900177 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -247,7 +247,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct 
vhost_virtqueue *,
   struct vring_used_elem *heads, unsigned count);
 void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
 void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
-bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
+bool vhost_vq_avail_empty(struct vhost_virtqueue *vq);
 bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
 
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 03/15] vhost: remove unnecessary parameter of vhost_enable_notify()/vhost_disable_notify

2019-07-17 Thread Jason Wang
Its dev parameter is not even used, so remove it.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   | 25 -
 drivers/vhost/scsi.c  | 12 ++--
 drivers/vhost/test.c  |  6 +++---
 drivers/vhost/vhost.c |  4 ++--
 drivers/vhost/vhost.h |  4 ++--
 drivers/vhost/vsock.c | 14 +++---
 6 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7d34e8cbc89b..78d248574f8e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -500,8 +500,8 @@ static void vhost_net_busy_poll_try_queue(struct vhost_net 
*net,
 {
if (!vhost_vq_avail_empty(vq)) {
vhost_poll_queue(>poll);
-   } else if (unlikely(vhost_enable_notify(>dev, vq))) {
-   vhost_disable_notify(>dev, vq);
+   } else if (unlikely(vhost_enable_notify(vq))) {
+   vhost_disable_notify(vq);
vhost_poll_queue(>poll);
}
 }
@@ -524,7 +524,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
if (!mutex_trylock(>mutex))
return;
 
-   vhost_disable_notify(>dev, vq);
+   vhost_disable_notify(vq);
sock = rvq->private_data;
 
busyloop_timeout = poll_rx ? rvq->busyloop_timeout:
@@ -552,7 +552,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
if (poll_rx || sock_has_rx_data(sock))
vhost_net_busy_poll_try_queue(net, vq);
else if (!poll_rx) /* On tx here, sock has no rx data. */
-   vhost_enable_notify(>dev, rvq);
+   vhost_enable_notify(rvq);
 
mutex_unlock(>mutex);
 }
@@ -788,9 +788,8 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
if (head == vq->num) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(>poll);
-   } else if (unlikely(vhost_enable_notify(>dev,
-   vq))) {
-   vhost_disable_notify(>dev, vq);
+   } else if (unlikely(vhost_enable_notify(vq))) {
+   vhost_disable_notify(vq);
continue;
}
break;
@@ -880,8 +879,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
if (head == vq->num) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(>poll);
-   } else if (unlikely(vhost_enable_notify(>dev, 
vq))) {
-   vhost_disable_notify(>dev, vq);
+   } else if (unlikely(vhost_enable_notify(vq))) {
+   vhost_disable_notify(vq);
continue;
}
break;
@@ -960,7 +959,7 @@ static void handle_tx(struct vhost_net *net)
if (!vq_meta_prefetch(vq))
goto out;
 
-   vhost_disable_notify(>dev, vq);
+   vhost_disable_notify(vq);
vhost_net_disable_vq(net, vq);
 
if (vhost_sock_zcopy(sock))
@@ -1129,7 +1128,7 @@ static void handle_rx(struct vhost_net *net)
if (!vq_meta_prefetch(vq))
goto out;
 
-   vhost_disable_notify(>dev, vq);
+   vhost_disable_notify(vq);
vhost_net_disable_vq(net, vq);
 
vhost_hlen = nvq->vhost_hlen;
@@ -1156,10 +1155,10 @@ static void handle_rx(struct vhost_net *net)
if (!headcount) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(>poll);
-   } else if (unlikely(vhost_enable_notify(>dev, 
vq))) {
+   } else if (unlikely(vhost_enable_notify(vq))) {
/* They have slipped one in as we were
 * doing that: check again. */
-   vhost_disable_notify(>dev, vq);
+   vhost_disable_notify(vq);
continue;
}
/* Nothing new?  Wait for eventfd to tell us
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index a9caf1bc3c3e..8d4e87007a8d 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -458,7 +458,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct 
vhost_scsi_evt *evt)
}
 
 again:
-   vhost_disable_notify(>dev, vq);
+   vhost_disable_notify(vq);
head = vhost_get_vq_desc(vq, vq->iov,
ARRAY_SIZE(vq->iov), , ,
NULL, NULL);
@@ -467,7 +467,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct 
vhost_scsi_evt *evt)
return;
}
if (head == vq->num) {
-   if (vhost_enable_notify(>dev, vq))
+   if (vhost_enable_notify(vq))
   

[PATCH V3 10/15] vhost: hide used ring layout from device

2019-07-17 Thread Jason Wang
We used to return descriptor head by vhost_get_vq_desc() to device and
pass it back to vhost_add_used() and its friends. This exposes the
internal used ring layout to device which makes it hard to be extended for
e.g packed ring layout.

So this patch tries to hide the used ring layout by

- letting vhost_get_vq_desc() return pointer to struct vring_used_elem
- accepting pointer to struct vring_used_elem in vhost_add_used() and
  vhost_add_used_and_signal()

This could help to hide used ring layout and make it easier to
implement packed ring on top.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   | 88 ++-
 drivers/vhost/scsi.c  | 62 --
 drivers/vhost/vhost.c | 38 +++
 drivers/vhost/vhost.h | 11 +++---
 drivers/vhost/vsock.c | 43 +++--
 5 files changed, 129 insertions(+), 113 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9e087d08b199..572d80c8c36e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -546,25 +546,28 @@ static void vhost_net_busy_poll(struct vhost_net *net,
 }
 
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
-   struct vhost_net_virtqueue *tnvq,
+   struct vring_used_elem *used_elem,
unsigned int *out_num, unsigned int *in_num,
struct msghdr *msghdr, bool *busyloop_intr)
 {
+   struct vhost_net_virtqueue *tnvq = >vqs[VHOST_NET_VQ_TX];
struct vhost_net_virtqueue *rnvq = >vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *rvq = >vq;
struct vhost_virtqueue *tvq = >vq;
 
-   int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+   int r = vhost_get_vq_desc(tvq, used_elem, tvq->iov,
+ ARRAY_SIZE(tvq->iov),
  out_num, in_num, NULL, NULL);
 
-   if (r == tvq->num && tvq->busyloop_timeout) {
+   if (r == -ENOSPC && tvq->busyloop_timeout) {
/* Flush batched packets first */
if (!vhost_sock_zcopy(tvq->private_data))
vhost_tx_batch(net, tnvq, tvq->private_data, msghdr);
 
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);
 
-   r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+   r = vhost_get_vq_desc(tvq, used_elem, tvq->iov,
+ ARRAY_SIZE(tvq->iov),
  out_num, in_num, NULL, NULL);
}
 
@@ -593,6 +596,7 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, 
struct iov_iter *iter,
 }
 
 static int get_tx_bufs(struct vhost_net *net,
+  struct vring_used_elem *used_elem,
   struct vhost_net_virtqueue *nvq,
   struct msghdr *msg,
   unsigned int *out, unsigned int *in,
@@ -601,9 +605,10 @@ static int get_tx_bufs(struct vhost_net *net,
struct vhost_virtqueue *vq = >vq;
int ret;
 
-   ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr);
+   ret = vhost_net_tx_get_vq_desc(net, used_elem, out, in,
+  msg, busyloop_intr);
 
-   if (ret < 0 || ret == vq->num)
+   if (ret < 0 || ret == -ENOSPC)
return ret;
 
if (*in) {
@@ -747,8 +752,8 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
 {
struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = >vq;
+   struct vring_used_elem used;
unsigned out, in;
-   int head;
struct msghdr msg = {
.msg_name = NULL,
.msg_namelen = 0,
@@ -767,13 +772,11 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
if (vhost_get_shadow_used_count(vq) == VHOST_NET_BATCH)
vhost_tx_batch(net, nvq, sock, );
 
-   head = get_tx_bufs(net, nvq, , , , ,
-  _intr);
-   /* On error, stop handling until the next kick. */
-   if (unlikely(head < 0))
-   break;
+   err = get_tx_bufs(net, ,
+ nvq, , , , ,
+ _intr);
/* Nothing new?  Wait for eventfd to tell us they refilled. */
-   if (head == vq->num) {
+   if (err == -ENOSPC) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(>poll);
} else if (unlikely(vhost_enable_notify(vq))) {
@@ -782,7 +785,9 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
}
break;
}
-
+   /* On error, stop handling until the next kick. 

[PATCH V3 08/15] vhost_net: switch to use shadow used ring API for RX

2019-07-17 Thread Jason Wang
This patch switches to use shadow used ring API for RX. This will help
to hid used ring layout from device.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c | 37 +++--
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1a67f889cbc1..9e087d08b199 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -445,18 +445,6 @@ static int vhost_net_enable_vq(struct vhost_net *n,
return vhost_poll_start(poll, sock->file);
 }
 
-static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
-{
-   struct vhost_virtqueue *vq = >vq;
-   struct vhost_dev *dev = vq->dev;
-
-   if (!nvq->done_idx)
-   return;
-
-   vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
-   nvq->done_idx = 0;
-}
-
 static void vhost_tx_batch(struct vhost_net *net,
   struct vhost_net_virtqueue *nvq,
   struct socket *sock,
@@ -999,7 +987,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net 
*net, struct sock *sk,
 
if (!len && rvq->busyloop_timeout) {
/* Flush batched heads first */
-   vhost_net_signal_used(rnvq);
+   vhost_flush_shadow_used_and_signal(rvq);
/* Both tx vq and rx socket were polled here */
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
 
@@ -1020,7 +1008,6 @@ static int vhost_net_rx_peek_head_len(struct vhost_net 
*net, struct sock *sk,
  * returns number of buffer heads allocated, negative on error
  */
 static int get_rx_bufs(struct vhost_virtqueue *vq,
-  struct vring_used_elem *heads,
   int datalen,
   unsigned *iovcount,
   struct vhost_log *log,
@@ -1063,11 +1050,11 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
-   heads[headcount].id = cpu_to_vhost32(vq, d);
len = iov_length(vq->iov + seg, in);
datalen -= len;
-   heads[headcount].len = cpu_to_vhost32(vq,
-  datalen >= 0 ? len : len + datalen);
+   vhost_add_shadow_used(vq, cpu_to_vhost32(vq, d),
+ cpu_to_vhost32(vq, datalen >= 0 ? len
+: len + datalen));
++headcount;
seg += in;
}
@@ -1082,7 +1069,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
}
return headcount;
 err:
-   vhost_discard_vq_desc(vq, headcount);
+   vhost_discard_shadow_used(vq, headcount);
return r;
 }
 
@@ -1141,8 +1128,7 @@ static void handle_rx(struct vhost_net *net)
break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
-   headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
-   vhost_len, , vq_log, ,
+   headcount = get_rx_bufs(vq, vhost_len, , vq_log, ,
likely(mergeable) ? UIO_MAXIOV : 1);
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
@@ -1189,7 +1175,7 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(err != sock_len)) {
pr_debug("Discarded rx packet: "
 " len %d, expected %zd\n", err, sock_len);
-   vhost_discard_vq_desc(vq, headcount);
+   vhost_discard_shadow_used(vq, headcount);
continue;
}
/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
@@ -1213,12 +1199,11 @@ static void handle_rx(struct vhost_net *net)
copy_to_iter(_buffers, sizeof num_buffers,
 ) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
-   vhost_discard_vq_desc(vq, headcount);
+   vhost_discard_shadow_used(vq, headcount);
goto out;
}
-   nvq->done_idx += headcount;
-   if (nvq->done_idx > VHOST_NET_BATCH)
-   vhost_net_signal_used(nvq);
+   if (vhost_get_shadow_used_count(vq) > VHOST_NET_BATCH)
+   vhost_flush_shadow_used_and_signal(vq);
if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len,
vq->iov, in);
@@ -1230,7 +1215,7 @@ static void handle_rx(struct vhost_net *net)
else if (!sock_len)
vhost_net_enable_vq(net, vq);
 out:
-   vhost_net_signal_used(nvq);
+   

[PATCH V3 11/15] vhost: do not use vring_used_elem

2019-07-17 Thread Jason Wang
Instead of depending on the exported vring_used_elem, this patch
switches to use a new internal structure vhost_used_elem which embed
vring_used_elem in itself. This could be used to let vhost to record
extra metadata for the incoming packed ring layout.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   | 10 +-
 drivers/vhost/scsi.c  |  8 
 drivers/vhost/vhost.c | 38 +++---
 drivers/vhost/vhost.h | 21 +++--
 drivers/vhost/vsock.c |  4 ++--
 5 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 572d80c8c36e..7c2f320930c7 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -546,7 +546,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
 }
 
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
-   struct vring_used_elem *used_elem,
+   struct vhost_used_elem *used_elem,
unsigned int *out_num, unsigned int *in_num,
struct msghdr *msghdr, bool *busyloop_intr)
 {
@@ -596,7 +596,7 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, 
struct iov_iter *iter,
 }
 
 static int get_tx_bufs(struct vhost_net *net,
-  struct vring_used_elem *used_elem,
+  struct vhost_used_elem *used_elem,
   struct vhost_net_virtqueue *nvq,
   struct msghdr *msg,
   unsigned int *out, unsigned int *in,
@@ -752,7 +752,7 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
 {
struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = >vq;
-   struct vring_used_elem used;
+   struct vhost_used_elem used;
unsigned out, in;
struct msghdr msg = {
.msg_name = NULL,
@@ -847,7 +847,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
.msg_flags = MSG_DONTWAIT,
};
struct tun_msg_ctl ctl;
-   struct vring_used_elem used;
+   struct vhost_used_elem used;
size_t len, total_len = 0;
int err;
struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
@@ -1027,7 +1027,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 * datalen > 0.
 */
u32 uninitialized_var(len);
-   struct vring_used_elem uninitialized_var(used);
+   struct vhost_used_elem uninitialized_var(used);
 
while (datalen > 0 && headcount < quota) {
if (unlikely(seg >= UIO_MAXIOV)) {
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 4a5a75ab25ad..42c32612dc32 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -72,7 +72,7 @@ struct vhost_scsi_inflight {
 
 struct vhost_scsi_cmd {
/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
-   struct vring_used_elem tvc_vq_used;
+   struct vhost_used_elem tvc_vq_used;
/* virtio-scsi initiator task attribute */
int tvc_task_attr;
/* virtio-scsi response incoming iovecs */
@@ -213,7 +213,7 @@ struct vhost_scsi {
  * Context for processing request and control queue operations.
  */
 struct vhost_scsi_ctx {
-   struct vring_used_elem head;
+   struct vhost_used_elem head;
unsigned int out, in;
size_t req_size, rsp_size;
size_t out_size, in_size;
@@ -449,7 +449,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct 
vhost_scsi_evt *evt)
struct vhost_virtqueue *vq = >vqs[VHOST_SCSI_VQ_EVT].vq;
struct virtio_scsi_event *event = >event;
struct virtio_scsi_event __user *eventp;
-   struct vring_used_elem used;
+   struct vhost_used_elem used;
unsigned out, in;
int ret;
 
@@ -821,7 +821,7 @@ vhost_scsi_get_desc(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq,
NULL, NULL);
 
pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
-vc->head.id, vc->out, vc->in);
+vc->head.elem.id, vc->out, vc->in);
 
/* Nothing new?  Wait for eventfd to tell us they refilled. */
if (ret == -ENOSPC) {
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index dbe4db0179a5..6044cdea124f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2651,7 +2651,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
  * never a valid descriptor number) if none was found.  A negative code is
  * returned on error. */
 int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct vring_used_elem *used,
+ struct vhost_used_elem *used,
  struct iovec iov[], unsigned int iov_size,
  unsigned int *out_num, unsigned int *in_num,
  struct vhost_log *log, unsigned int *log_num)
@@ -2701,7 +2701,7 

[PATCH V3 04/15] vhost-net: don't use vhost_add_used_n() for zerocopy

2019-07-17 Thread Jason Wang
We tried to use vhost_add_used_n() for the packets that is not
zero-copied. This can help to mitigate HOL issue but not a total
solution. What's more, it may lead out of order completion and cause
extra complexity for packed virtqueue implementation that needs to
maintain wrap counters.

So this patch switch to constantly use vq->heads[] to maintain
heads. This will ease the introduction of zerocopy shadow used ring
API and reduce the complexity for packed virtqueues.

After this, vhost_net became a in order device.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 78d248574f8e..ac31983d2d77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -894,9 +894,6 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
if (zcopy_used) {
struct ubuf_info *ubuf;
ubuf = nvq->ubuf_info + nvq->upend_idx;
-
-   vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
-   vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
@@ -907,11 +904,14 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs;
atomic_inc(>refcount);
-   nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
} else {
msg.msg_control = NULL;
ubufs = NULL;
}
+   vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
+   vq->heads[nvq->upend_idx].len = zcopy_used ?
+VHOST_DMA_IN_PROGRESS : VHOST_DMA_DONE_LEN;
+   nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
total_len += len;
if (tx_can_batch(vq, total_len) &&
likely(!vhost_exceeds_maxpend(net))) {
@@ -923,11 +923,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, , len);
if (unlikely(err < 0)) {
-   if (zcopy_used) {
+   if (zcopy_used)
vhost_net_ubuf_put(ubufs);
-   nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
-   % UIO_MAXIOV;
-   }
+   nvq->upend_idx = ((unsigned int)nvq->upend_idx - 1)
+% UIO_MAXIOV;
vhost_discard_vq_desc(vq, 1);
vhost_net_enable_vq(net, vq);
break;
@@ -935,10 +934,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
if (err != len)
pr_debug("Truncated TX packet: "
 " len %d != %zd\n", err, len);
-   if (!zcopy_used)
-   vhost_add_used_and_signal(>dev, vq, head, 0);
-   else
-   vhost_zerocopy_signal_used(net, vq);
+
+   vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net);
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
 }
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 01/15] vhost: simplify meta data pointer accessing

2019-07-17 Thread Jason Wang
Instead of open coding meta pointer accessing through caches. This
patch introduces vhost_get_meta_ptr()/vhost_put_meta_ptr() pair to
reduce the code duplication and simplify its callers
implementation. This is helpful for reducing LOC for packed virtqueue
as well.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 207 ++
 1 file changed, 70 insertions(+), 137 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index dc9301d31f12..7f51c74d9aee 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1194,25 +1194,37 @@ static inline void __user *__vhost_get_user(struct 
vhost_virtqueue *vq,
ret; \
 })
 
-static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
-{
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
+static void *vhost_get_meta_ptr(struct vhost_virtqueue *vq, int type)
+{
struct vhost_map *map;
-   struct vring_used *used;
 
if (!vq->iotlb) {
rcu_read_lock();
+   map = rcu_dereference(vq->maps[type]);
+   if (likely(map))
+   return map->addr;
+   rcu_read_unlock();
+   }
+   return NULL;
+}
 
-   map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
-   if (likely(map)) {
-   used = map->addr;
-   *((__virtio16 *)>ring[vq->num]) =
-   cpu_to_vhost16(vq, vq->avail_idx);
-   rcu_read_unlock();
-   return 0;
-   }
+static void vhost_put_meta_ptr(void)
+{
+   rcu_read_unlock();
+}
+#endif
 
-   rcu_read_unlock();
+static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
+{
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+   struct vring_used *used = vhost_get_meta_ptr(vq, VHOST_ADDR_USED);
+
+   if (likely(used)) {
+   *((__virtio16 *)>ring[vq->num]) =
+   cpu_to_vhost16(vq, vq->avail_idx);
+   vhost_put_meta_ptr();
+   return 0;
}
 #endif
 
@@ -1225,23 +1237,14 @@ static inline int vhost_put_used(struct vhost_virtqueue 
*vq,
 int count)
 {
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
-   struct vhost_map *map;
-   struct vring_used *used;
+   struct vring_used *used = vhost_get_meta_ptr(vq, VHOST_ADDR_USED);
size_t size;
 
-   if (!vq->iotlb) {
-   rcu_read_lock();
-
-   map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
-   if (likely(map)) {
-   used = map->addr;
-   size = count * sizeof(*head);
-   memcpy(used->ring + idx, head, size);
-   rcu_read_unlock();
-   return 0;
-   }
-
-   rcu_read_unlock();
+   if (likely(used)) {
+   size = count * sizeof(*head);
+   memcpy(used->ring + idx, head, size);
+   vhost_put_meta_ptr();
+   return 0;
}
 #endif
 
@@ -1253,21 +1256,12 @@ static inline int vhost_put_used_flags(struct 
vhost_virtqueue *vq)
 
 {
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
-   struct vhost_map *map;
-   struct vring_used *used;
-
-   if (!vq->iotlb) {
-   rcu_read_lock();
+   struct vring_used *used = vhost_get_meta_ptr(vq, VHOST_ADDR_USED);
 
-   map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
-   if (likely(map)) {
-   used = map->addr;
-   used->flags = cpu_to_vhost16(vq, vq->used_flags);
-   rcu_read_unlock();
-   return 0;
-   }
-
-   rcu_read_unlock();
+   if (likely(used)) {
+   used->flags = cpu_to_vhost16(vq, vq->used_flags);
+   vhost_put_meta_ptr();
+   return 0;
}
 #endif
 
@@ -1279,21 +1273,12 @@ static inline int vhost_put_used_idx(struct 
vhost_virtqueue *vq)
 
 {
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
-   struct vhost_map *map;
-   struct vring_used *used;
+   struct vring_used *used = vhost_get_meta_ptr(vq, VHOST_ADDR_USED);
 
-   if (!vq->iotlb) {
-   rcu_read_lock();
-
-   map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
-   if (likely(map)) {
-   used = map->addr;
-   used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
-   rcu_read_unlock();
-   return 0;
-   }
-
-   rcu_read_unlock();
+   if (likely(used)) {
+   used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
+   vhost_put_meta_ptr();
+   return 0;
}
 #endif
 
@@ -1343,21 +1328,12 @@ static inline int vhost_get_avail_idx(struct 
vhost_virtqueue *vq,
  __virtio16 *idx)
 {
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
-   struct 

[PATCH V3 15/15] vhost: enable packed virtqueues

2019-07-17 Thread Jason Wang
This patch enables packed virtqueue support for vhost.

Testpmd (virtio-user) + vhost_net shows about 2.6% improvemetn on TX
PPS:

Before: 5.75Mpps
After : 5.90Mpps

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index bb3f8bb763b9..5483eea84a5c 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -326,7 +326,8 @@ enum {
 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
 (1ULL << VHOST_F_LOG_ALL) |
 (1ULL << VIRTIO_F_ANY_LAYOUT) |
-(1ULL << VIRTIO_F_VERSION_1)
+(1ULL << VIRTIO_F_VERSION_1) |
+(1ULL << VIRTIO_F_RING_PACKED)
 };
 
 static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 13/15] vhost: packed ring support

2019-07-17 Thread Jason Wang
This patch introduces basic support for packed ring. The idea behinds
packed ring is to use a single descriptor ring instead of three
different rings (avail, used and descriptor). This could help to
reduce the cache contention and PCI transactions. So it was designed
to help for the performance for both software implementation and
hardware implementation.

The implementation was straightforward, packed version of vhost core
(whose name has a packed suffix) helpers were introduced and previous
helpers were renamed with a split suffix. Then the exported helpers
can just do a switch to go to the correct internal helpers.

The event suppression (device area and driver area) were not
implemented. It will be done on top with another patch.

For more information of packed ring, please refer Virtio spec.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   |   6 +-
 drivers/vhost/vhost.c | 980 ++
 drivers/vhost/vhost.h |  24 +-
 3 files changed, 925 insertions(+), 85 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7c2f320930c7..ef79446b42f1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -799,7 +799,7 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
goto done;
} else if (unlikely(err != -ENOSPC)) {
vhost_tx_batch(net, nvq, sock, );
-   vhost_discard_vq_desc(vq, 1);
+   vhost_discard_vq_desc(vq, );
vhost_net_enable_vq(net, vq);
break;
}
@@ -820,7 +820,7 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, , len);
if (unlikely(err < 0)) {
-   vhost_discard_vq_desc(vq, 1);
+   vhost_discard_vq_desc(vq, );
vhost_net_enable_vq(net, vq);
break;
}
@@ -919,7 +919,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
vhost_net_ubuf_put(ubufs);
nvq->upend_idx = ((unsigned int)nvq->upend_idx - 1)
 % UIO_MAXIOV;
-   vhost_discard_vq_desc(vq, 1);
+   vhost_discard_vq_desc(vq, );
vhost_net_enable_vq(net, vq);
break;
}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3fa1adf2cb90..a7d24b9d5204 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -479,6 +479,9 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vhost_reset_is_le(vq);
vhost_disable_cross_endian(vq);
vq->busyloop_timeout = 0;
+   vq->last_used_wrap_counter = true;
+   vq->last_avail_wrap_counter = true;
+   vq->avail_wrap_counter = true;
vq->umem = NULL;
vq->iotlb = NULL;
vq->invalidate_count = 0;
@@ -551,7 +554,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 GFP_KERNEL);
vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
GFP_KERNEL);
-   vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
+   vq->heads = kmalloc_array(dev->iov_limit,
+ sizeof(struct vhost_used_elem),
  GFP_KERNEL);
if (!vq->indirect || !vq->log || !vq->heads)
goto err_nomem;
@@ -1406,8 +1410,8 @@ static inline int vhost_get_used_idx(struct 
vhost_virtqueue *vq,
return vhost_get_used(vq, *idx, >used->idx);
 }
 
-static inline int vhost_get_desc(struct vhost_virtqueue *vq,
-struct vring_desc *desc, int idx)
+static inline int vhost_get_desc_split(struct vhost_virtqueue *vq,
+  struct vring_desc *desc, int idx)
 {
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vring_desc *d = vhost_get_meta_ptr(vq, VHOST_ADDR_DESC);
@@ -1422,6 +1426,104 @@ static inline int vhost_get_desc(struct vhost_virtqueue 
*vq,
return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
 }
 
+static inline int vhost_get_desc_packed(struct vhost_virtqueue *vq,
+   struct vring_packed_desc *desc, int idx)
+{
+   int ret;
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+   struct vring_packed_desc *d = vhost_get_meta_ptr(vq, VHOST_ADDR_DESC);
+
+   if (likely(d)) {
+   d += idx;
+
+   desc->flags = d->flags;
+
+   /* Make sure flags is seen before the rest fields of
+* 

[PATCH V3 12/15] vhost: vhost_put_user() can accept metadata type

2019-07-17 Thread Jason Wang
We assumes used ring update is the only user for vhost_put_user() in
the past. This may not be the case for the incoming packed ring which
may update the descriptor ring for used. So introduce a new type
parameter.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 6044cdea124f..3fa1adf2cb90 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1178,7 +1178,7 @@ static inline void __user *__vhost_get_user(struct 
vhost_virtqueue *vq,
return __vhost_get_user_slow(vq, addr, size, type);
 }
 
-#define vhost_put_user(vq, x, ptr) \
+#define vhost_put_user(vq, x, ptr, type)   \
 ({ \
int ret = -EFAULT; \
if (!vq->iotlb) { \
@@ -1186,7 +1186,7 @@ static inline void __user *__vhost_get_user(struct 
vhost_virtqueue *vq,
} else { \
__typeof__(ptr) to = \
(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
- sizeof(*ptr), VHOST_ADDR_USED); \
+ sizeof(*ptr), type); \
if (to != NULL) \
ret = __put_user(x, to); \
else \
@@ -1230,7 +1230,7 @@ static inline int vhost_put_avail_event(struct 
vhost_virtqueue *vq)
 #endif
 
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
- vhost_avail_event(vq));
+ vhost_avail_event(vq), VHOST_ADDR_USED);
 }
 
 static inline int vhost_put_used(struct vhost_virtqueue *vq,
@@ -1267,7 +1267,7 @@ static inline int vhost_put_used_flags(struct 
vhost_virtqueue *vq)
 #endif
 
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
- >used->flags);
+ >used->flags, VHOST_ADDR_USED);
 }
 
 static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
@@ -1284,7 +1284,7 @@ static inline int vhost_put_used_idx(struct 
vhost_virtqueue *vq)
 #endif
 
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
- >used->idx);
+ >used->idx, VHOST_ADDR_USED);
 }
 
 #define vhost_get_user(vq, x, ptr, type)   \
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 09/15] vhost: do not export vhost_add_used_n() and vhost_add_used_and_signal_n()

2019-07-17 Thread Jason Wang
We would request device to use shadow used ring API. Then there's no
need for exposing those to device.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 13 +++--
 drivers/vhost/vhost.h |  4 
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5bfca5b76b05..50ba382f0981 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2883,8 +2883,9 @@ EXPORT_SYMBOL_GPL(vhost_get_shadow_used_count);
 
 /* After we've used one of their buffers, we tell them about it.  We'll then
  * want to notify the guest, using eventfd. */
-int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
-unsigned count)
+static int vhost_add_used_n(struct vhost_virtqueue *vq,
+   struct vring_used_elem *heads,
+   unsigned count)
 {
int start, n, r;
 
@@ -2988,14 +2989,14 @@ void vhost_add_used_and_signal(struct vhost_dev *dev,
 EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
 
 /* multi-buffer version of vhost_add_used_and_signal */
-void vhost_add_used_and_signal_n(struct vhost_dev *dev,
-struct vhost_virtqueue *vq,
-struct vring_used_elem *heads, unsigned count)
+static void vhost_add_used_and_signal_n(struct vhost_dev *dev,
+   struct vhost_virtqueue *vq,
+   struct vring_used_elem *heads,
+   unsigned count)
 {
vhost_add_used_n(vq, heads, count);
vhost_signal(dev, vq);
 }
-EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
 
 void vhost_flush_shadow_used_and_signal(struct vhost_virtqueue *vq)
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 175eb5ebf954..481baba20c3d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -240,13 +240,9 @@ void vhost_discard_vq_desc(struct vhost_virtqueue *, int 
n);
 
 int vhost_vq_init_access(struct vhost_virtqueue *);
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
-int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
-unsigned count);
 
 void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
   unsigned int id, int len);
-void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
-  struct vring_used_elem *heads, unsigned count);
 
 /* Zerocopy shadow used ring API */
 void vhost_set_zc_used_len(struct vhost_virtqueue *vq,
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 07/15] vhost_net: calculate last used length once for mergeable buffer

2019-07-17 Thread Jason Wang
This patch tries to calculate last used length once instead of
twice. This can help to convert to use shadow used ring API for
RX.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index cf47e6e348f4..1a67f889cbc1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1065,12 +1065,12 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
}
heads[headcount].id = cpu_to_vhost32(vq, d);
len = iov_length(vq->iov + seg, in);
-   heads[headcount].len = cpu_to_vhost32(vq, len);
datalen -= len;
+   heads[headcount].len = cpu_to_vhost32(vq,
+  datalen >= 0 ? len : len + datalen);
++headcount;
seg += in;
}
-   heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
*iovcount = seg;
if (unlikely(log))
*log_num = nlogs;
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 06/15] vhost_net: switch TX to use shadow used ring API

2019-07-17 Thread Jason Wang
This patch switch to use shadow used ring API for transmission. This
will help to hide used ring layout from device.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ac31983d2d77..cf47e6e348f4 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -361,22 +361,22 @@ static void vhost_zerocopy_signal_used(struct vhost_net 
*net,
 {
struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
-   int i, add;
+   int i, add, len;
int j = 0;
 
for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
-   if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
+   len = vhost_get_zc_used_len(vq, i);
+   if (len == VHOST_DMA_FAILED_LEN)
vhost_net_tx_err(net);
-   if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
-   vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
+   if (VHOST_DMA_IS_DONE(len)) {
+   vhost_set_zc_used_len(vq, i, VHOST_DMA_CLEAR_LEN);
++j;
} else
break;
}
while (j) {
add = min(UIO_MAXIOV - nvq->done_idx, j);
-   vhost_add_used_and_signal_n(vq->dev, vq,
-   >heads[nvq->done_idx], add);
+   vhost_flush_zc_used_and_signal(vq, nvq->done_idx, add);
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
j -= add;
}
@@ -391,8 +391,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, 
bool success)
rcu_read_lock_bh();
 
/* set len to mark this desc buffers done DMA */
-   vq->heads[ubuf->desc].len = success ?
-   VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
+   vhost_set_zc_used_len(vq, ubuf->desc, success ?
+ VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN);
cnt = vhost_net_ubuf_put(ubufs);
 
/*
@@ -480,7 +480,7 @@ static void vhost_tx_batch(struct vhost_net *net,
}
 
 signal_used:
-   vhost_net_signal_used(nvq);
+   vhost_flush_shadow_used_and_signal(>vq);
nvq->batched_xdp = 0;
 }
 
@@ -776,7 +776,7 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
do {
bool busyloop_intr = false;
 
-   if (nvq->done_idx == VHOST_NET_BATCH)
+   if (vhost_get_shadow_used_count(vq) == VHOST_NET_BATCH)
vhost_tx_batch(net, nvq, sock, );
 
head = get_tx_bufs(net, nvq, , , , ,
@@ -835,9 +835,7 @@ static void handle_tx_copy(struct vhost_net *net, struct 
socket *sock)
pr_debug("Truncated TX packet: len %d != %zd\n",
 err, len);
 done:
-   vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
-   vq->heads[nvq->done_idx].len = 0;
-   ++nvq->done_idx;
+   vhost_add_shadow_used(vq, cpu_to_vhost32(vq, head), 0);
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
 
vhost_tx_batch(net, nvq, sock, );
@@ -908,9 +906,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, 
struct socket *sock)
msg.msg_control = NULL;
ubufs = NULL;
}
-   vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
-   vq->heads[nvq->upend_idx].len = zcopy_used ?
-VHOST_DMA_IN_PROGRESS : VHOST_DMA_DONE_LEN;
+   vhost_set_zc_used(vq, nvq->upend_idx,
+ cpu_to_vhost32(vq, head),
+ zcopy_used ? VHOST_DMA_IN_PROGRESS :
+ VHOST_DMA_DONE_LEN);
nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
total_len += len;
if (tx_can_batch(vq, total_len) &&
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH V3 00/15] Packed virtqueue support for vhost

2019-07-17 Thread Jason Wang
Hi all:

This series implements packed virtqueues which were described
at [1]. In this version we try to address the performance regression
saw by V2. The root cause is packed virtqueue need more times of
userspace memory accesssing which turns out to be very
expensive. Thanks to the help of 7f466032dc9e ("vhost: access vq
metadata through kernel virtual address"), such overhead cold be
eliminated. So in this version, we can see about 2% improvement for
packed virtqueue on PPS.

More optimizations (e.g IN_ORDER) is on the road.

Please review.

[1] 
https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-610007

This version were tested with:
- zercopy/datacopy
- mergeable buffer on/off
- TCP stream & virtio-user

Changes from V2:
- rebase on top of vhost metadata accelreation series
- introduce shadow used ring API
- new SET_VRING_BASE/GET_VRING_BASE that takes care about warp counter
  and index for both avail and used
- various twaeaks

Changes from V1:
- drop uapi patch and use Tiwei's
- split the enablement of packed virtqueue into a separate patch

Changes from RFC V5:
- save unnecessary barriers during vhost_add_used_packed_n()
- more compact math for event idx
- fix failure of SET_VRING_BASE when avail_wrap_counter is true
- fix not copy avail_wrap_counter during GET_VRING_BASE
- introduce SET_VRING_USED_BASE/GET_VRING_USED_BASE for syncing
- last_used_idx
- rename used_wrap_counter to last_used_wrap_counter
- rebase to net-next

Changes from RFC V4:
- fix signalled_used index recording
- track avail index correctly
- various minor fixes

Changes from RFC V3:
- Fix math on event idx checking
- Sync last avail wrap counter through GET/SET_VRING_BASE
- remove desc_event prefix in the driver/device structure

Changes from RFC V2:
- do not use & in checking desc_event_flags
- off should be most significant bit
- remove the workaround of mergeable buffer for dpdk prototype
- id should be in the last descriptor in the chain
- keep _F_WRITE for write descriptor when adding used
- device flags updating should use ADDR_USED type
- return error on unexpected unavail descriptor in a chain
- return false in vhost_ve_avail_empty is descriptor is available
- track last seen avail_wrap_counter
- correctly examine available descriptor in get_indirect_packed()
- vhost_idx_diff should return u16 instead of bool

Changes from RFC V1:
- Refactor vhost used elem code to avoid open coding on used elem
- Event suppression support (compile test only).
- Indirect descriptor support (compile test only).
- Zerocopy support.
- vIOMMU support.
- SCSI/VSOCK support (compile test only).
- Fix several bugs

Jason Wang (15):
  vhost: simplify meta data pointer accessing
  vhost: remove the unnecessary parameter of vhost_vq_avail_empty()
  vhost: remove unnecessary parameter of
vhost_enable_notify()/vhost_disable_notify
  vhost-net: don't use vhost_add_used_n() for zerocopy
  vhost: introduce helpers to manipulate shadow used ring
  vhost_net: switch TX to use shadow used ring API
  vhost_net: calculate last used length once for mergeable buffer
  vhost_net: switch to use shadow used ring API for RX
  vhost: do not export vhost_add_used_n() and
vhost_add_used_and_signal_n()
  vhost: hide used ring layout from device
  vhost: do not use vring_used_elem
  vhost: vhost_put_user() can accept metadata type
  vhost: packed ring support
  vhost: event suppression for packed ring
  vhost: enable packed virtqueues

 drivers/vhost/net.c   |  200 +++---
 drivers/vhost/scsi.c  |   72 +-
 drivers/vhost/test.c  |6 +-
 drivers/vhost/vhost.c | 1508 +++--
 drivers/vhost/vhost.h |   78 ++-
 drivers/vhost/vsock.c |   57 +-
 6 files changed, 1513 insertions(+), 408 deletions(-)

-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 2/3] drm: plumb attaching dev thru to prime_pin/unpin

2019-07-17 Thread Koenig, Christian
Am 16.07.19 um 23:37 schrieb Rob Clark:
> From: Rob Clark 
>
> Needed in the following patch for cache operations.

Well have you seen that those callbacks are deprecated?
>* Deprecated hook in favour of _gem_object_funcs.pin.

>* Deprecated hook in favour of _gem_object_funcs.unpin.
>

I would rather say if you want to extend something it would be better to 
switch over to the per GEM object functions first.

Regards,
Christian.

>
> Signed-off-by: Rob Clark 
> ---
> v3: rebased on drm-tip
>
>   drivers/gpu/drm/drm_gem.c   | 8 
>   drivers/gpu/drm/drm_internal.h  | 4 ++--
>   drivers/gpu/drm/drm_prime.c | 4 ++--
>   drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 4 ++--
>   drivers/gpu/drm/msm/msm_drv.h   | 4 ++--
>   drivers/gpu/drm/msm/msm_gem_prime.c | 4 ++--
>   drivers/gpu/drm/nouveau/nouveau_gem.h   | 4 ++--
>   drivers/gpu/drm/nouveau/nouveau_prime.c | 4 ++--
>   drivers/gpu/drm/qxl/qxl_prime.c | 4 ++--
>   drivers/gpu/drm/radeon/radeon_prime.c   | 4 ++--
>   drivers/gpu/drm/vgem/vgem_drv.c | 4 ++--
>   include/drm/drm_drv.h   | 5 ++---
>   12 files changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index 84689ccae885..af2549c45027 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -1215,22 +1215,22 @@ void drm_gem_print_info(struct drm_printer *p, 
> unsigned int indent,
>   obj->dev->driver->gem_print_info(p, indent, obj);
>   }
>   
> -int drm_gem_pin(struct drm_gem_object *obj)
> +int drm_gem_pin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (obj->funcs && obj->funcs->pin)
>   return obj->funcs->pin(obj);
>   else if (obj->dev->driver->gem_prime_pin)
> - return obj->dev->driver->gem_prime_pin(obj);
> + return obj->dev->driver->gem_prime_pin(obj, dev);
>   else
>   return 0;
>   }
>   
> -void drm_gem_unpin(struct drm_gem_object *obj)
> +void drm_gem_unpin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (obj->funcs && obj->funcs->unpin)
>   obj->funcs->unpin(obj);
>   else if (obj->dev->driver->gem_prime_unpin)
> - obj->dev->driver->gem_prime_unpin(obj);
> + obj->dev->driver->gem_prime_unpin(obj, dev);
>   }
>   
>   void *drm_gem_vmap(struct drm_gem_object *obj)
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 51a2055c8f18..e64090373e3a 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -133,8 +133,8 @@ void drm_gem_release(struct drm_device *dev, struct 
> drm_file *file_private);
>   void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
>   const struct drm_gem_object *obj);
>   
> -int drm_gem_pin(struct drm_gem_object *obj);
> -void drm_gem_unpin(struct drm_gem_object *obj);
> +int drm_gem_pin(struct drm_gem_object *obj, struct device *dev);
> +void drm_gem_unpin(struct drm_gem_object *obj, struct device *dev);
>   void *drm_gem_vmap(struct drm_gem_object *obj);
>   void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr);
>   
> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> index 189d980402ad..126860432ff9 100644
> --- a/drivers/gpu/drm/drm_prime.c
> +++ b/drivers/gpu/drm/drm_prime.c
> @@ -575,7 +575,7 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
>   {
>   struct drm_gem_object *obj = dma_buf->priv;
>   
> - return drm_gem_pin(obj);
> + return drm_gem_pin(obj, attach->dev);
>   }
>   EXPORT_SYMBOL(drm_gem_map_attach);
>   
> @@ -593,7 +593,7 @@ void drm_gem_map_detach(struct dma_buf *dma_buf,
>   {
>   struct drm_gem_object *obj = dma_buf->priv;
>   
> - drm_gem_unpin(obj);
> + drm_gem_unpin(obj, attach->dev);
>   }
>   EXPORT_SYMBOL(drm_gem_map_detach);
>   
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
> index a05292e8ed6f..67e69a5f00f2 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
> @@ -43,7 +43,7 @@ int etnaviv_gem_prime_mmap(struct drm_gem_object *obj,
>   return etnaviv_obj->ops->mmap(etnaviv_obj, vma);
>   }
>   
> -int etnaviv_gem_prime_pin(struct drm_gem_object *obj)
> +int etnaviv_gem_prime_pin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (!obj->import_attach) {
>   struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
> @@ -55,7 +55,7 @@ int etnaviv_gem_prime_pin(struct drm_gem_object *obj)
>   return 0;
>   }
>   
> -void etnaviv_gem_prime_unpin(struct drm_gem_object *obj)
> +void etnaviv_gem_prime_unpin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (!obj->import_attach) {
>   struct etnaviv_gem_object *etnaviv_obj = 

Re: [PATCH v6 11/18] drm/virtio: switch from ttm to gem shmem helpers

2019-07-17 Thread Chia-I Wu
On Tue, Jul 2, 2019 at 7:19 AM Gerd Hoffmann  wrote:
>
> virtio-gpu basically needs a sg_table for the bo, to tell the host where
> the backing pages for the object are.  So the gem shmem helpers are a
> perfect fit.  Some drm_gem_object_funcs need thin wrappers to update the
> host state, but otherwise the helpers handle everything just fine.
>
> Once the fencing was sorted the switch was surprisingly easy and for the
> most part just removing the ttm code.
>
> v4: fix drm_gem_object_funcs name.
>
> Signed-off-by: Gerd Hoffmann 
> Acked-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/virtio/virtgpu_drv.h|  52 +---
>  drivers/gpu/drm/virtio/virtgpu_drv.c|  20 +-
>  drivers/gpu/drm/virtio/virtgpu_gem.c|  16 +-
>  drivers/gpu/drm/virtio/virtgpu_ioctl.c  |  19 +-
>  drivers/gpu/drm/virtio/virtgpu_kms.c|   9 -
>  drivers/gpu/drm/virtio/virtgpu_object.c | 146 
>  drivers/gpu/drm/virtio/virtgpu_prime.c  |  37 ---
>  drivers/gpu/drm/virtio/virtgpu_ttm.c| 304 
>  drivers/gpu/drm/virtio/virtgpu_vq.c |  24 +-
>  drivers/gpu/drm/virtio/Kconfig  |   2 +-
>  drivers/gpu/drm/virtio/Makefile |   2 +-
>  11 files changed, 82 insertions(+), 549 deletions(-)
>  delete mode 100644 drivers/gpu/drm/virtio/virtgpu_ttm.c
>
> diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
> b/drivers/gpu/drm/virtio/virtgpu_drv.h
> index 12168067a874..f8a586029400 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_drv.h
> +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
> @@ -33,14 +33,11 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
> -#include 
> -#include 
> -#include 
> -#include 
>
>  #define DRIVER_NAME "virtio_gpu"
>  #define DRIVER_DESC "virtio GPU"
> @@ -68,21 +65,16 @@ struct virtio_gpu_object_params {
>  };
>
>  struct virtio_gpu_object {
> -   struct drm_gem_object gem_base;
> +   struct drm_gem_shmem_object base;
> uint32_t hw_res_handle;
>
> struct sg_table *pages;
> uint32_t mapped;
> -   void *vmap;
> bool dumb;
> -   struct ttm_placeplacement_code;
> -   struct ttm_placementplacement;
> -   struct ttm_buffer_objecttbo;
> -   struct ttm_bo_kmap_obj  kmap;
> bool created;
>  };
>  #define gem_to_virtio_gpu_obj(gobj) \
> -   container_of((gobj), struct virtio_gpu_object, gem_base)
> +   container_of((gobj), struct virtio_gpu_object, base.base)
>
>  struct virtio_gpu_object_array {
> struct ww_acquire_ctx ticket;
> @@ -153,10 +145,6 @@ struct virtio_gpu_framebuffer {
>  #define to_virtio_gpu_framebuffer(x) \
> container_of(x, struct virtio_gpu_framebuffer, base)
>
> -struct virtio_gpu_mman {
> -   struct ttm_bo_devicebdev;
> -};
> -
>  struct virtio_gpu_queue {
> struct virtqueue *vq;
> spinlock_t qlock;
> @@ -185,8 +173,6 @@ struct virtio_gpu_device {
>
> struct virtio_device *vdev;
>
> -   struct virtio_gpu_mman mman;
> -
> struct virtio_gpu_output outputs[VIRTIO_GPU_MAX_SCANOUTS];
> uint32_t num_scanouts;
>
> @@ -357,11 +343,6 @@ struct drm_plane *virtio_gpu_plane_init(struct 
> virtio_gpu_device *vgdev,
> enum drm_plane_type type,
> int index);
>
> -/* virtio_gpu_ttm.c */
> -int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev);
> -void virtio_gpu_ttm_fini(struct virtio_gpu_device *vgdev);
> -int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma);
> -
>  /* virtio_gpu_fence.c */
>  bool virtio_fence_signaled(struct dma_fence *f);
>  struct virtio_gpu_fence *virtio_gpu_fence_alloc(
> @@ -373,58 +354,47 @@ void virtio_gpu_fence_event_process(struct 
> virtio_gpu_device *vdev,
> u64 last_seq);
>
>  /* virtio_gpu_object */
> +struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
> +   size_t size);
>  int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
>  struct virtio_gpu_object_params *params,
>  struct virtio_gpu_object **bo_ptr,
>  struct virtio_gpu_fence *fence);
> -void virtio_gpu_object_kunmap(struct virtio_gpu_object *bo);
> -int virtio_gpu_object_kmap(struct virtio_gpu_object *bo);
> -int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev,
> -  struct virtio_gpu_object *bo);
> -void virtio_gpu_object_free_sg_table(struct virtio_gpu_object *bo);
>
>  /* virtgpu_prime.c */
> -struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
>  struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
> struct drm_device *dev, struct dma_buf_attachment *attach,
> struct sg_table *sgt);
> -void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
> -void