Re: [PATCH net V4] vhost: log dirty page correctly

2019-01-18 Thread Michael S. Tsirkin
On Wed, Jan 16, 2019 at 04:54:42PM +0800, Jason Wang wrote:
> Vhost dirty page logging API is designed to sync through GPA. But we
> try to log GIOVA when device IOTLB is enabled. This is wrong and may
> lead to missing data after migration.
> 
> To solve this issue, when logging with device IOTLB enabled, we will:
> 
> 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to
>get HVA, for writable descriptor, get HVA through iovec. For used
>ring update, translate its GIOVA to HVA
> 2) traverse the GPA->HVA mapping to get the possible GPA and log
>through GPA. Pay attention this reverse mapping is not guaranteed
>to be unique, so we should log each possible GPA in this case.
> 
> This fix the failure of scp to guest during migration. In -next, we
> will probably support passing GIOVA->GPA instead of GIOVA->HVA.
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Jintack Lim 
> Cc: Jintack Lim 
> Signed-off-by: Jason Wang 

This one looks good to me

Acked-by: Michael S. Tsirkin 

> ---
> Changes from V3:
> - make sure each part of the hva was logged when crossing the boundary
>   of memory regions
> Changes from V2:
> - check and log the case of range overlap
> - remove unnecessary u64 cast
> - use smp_wmb() for the case of device IOTLB as well
> Changes from V1:
> - return error instead of warn
> ---
>  drivers/vhost/net.c   |  3 +-
>  drivers/vhost/vhost.c | 97 ---
>  drivers/vhost/vhost.h |  3 +-
>  3 files changed, 87 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 36f3d0f49e60..bca86bf7189f 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -1236,7 +1236,8 @@ static void handle_rx(struct vhost_net *net)
>   if (nvq->done_idx > VHOST_NET_BATCH)
>   vhost_net_signal_used(nvq);
>   if (unlikely(vq_log))
> - vhost_log_write(vq, vq_log, log, vhost_len);
> + vhost_log_write(vq, vq_log, log, vhost_len,
> + vq->iov, in);
>   total_len += vhost_len;
>   if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
>   vhost_poll_queue(>poll);
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 9f7942cbcbb2..babbb32b9bf0 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -1733,13 +1733,87 @@ static int log_write(void __user *log_base,
>   return r;
>  }
>  
> +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
> +{
> + struct vhost_umem *umem = vq->umem;
> + struct vhost_umem_node *u;
> + u64 start, end, l, min;
> + int r;
> + bool hit = false;
> +
> + while (len) {
> + min = len;
> + /* More than one GPAs can be mapped into a single HVA. So
> +  * iterate all possible umems here to be safe.
> +  */
> + list_for_each_entry(u, >umem_list, link) {
> + if (u->userspace_addr > hva - 1 + len ||
> + u->userspace_addr - 1 + u->size < hva)
> + continue;
> + start = max(u->userspace_addr, hva);
> + end = min(u->userspace_addr - 1 + u->size,
> +   hva - 1 + len);
> + l = end - start + 1;
> + r = log_write(vq->log_base,
> +   u->start + start - u->userspace_addr,
> +   l);
> + if (r < 0)
> + return r;
> + hit = true;
> + min = min(l, min);
> + }
> +
> + if (!hit)
> + return -EFAULT;
> +
> + len -= min;
> + hva += min;
> + }
> +
> + return 0;
> +}
> +
> +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
> +{
> + struct iovec iov[64];
> + int i, ret;
> +
> + if (!vq->iotlb)
> + return log_write(vq->log_base, vq->log_addr + used_offset, len);
> +
> + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
> +  len, iov, 64, VHOST_ACCESS_WO);
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < ret; i++) {
> + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
> + iov[i].iov_len);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
>  int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
> - unsigned int log_num, u64 len)
> + unsigned int log_num, u64 len, struct iovec *iov, int count)
>  {
>   int i, r;
>  
>   /* Make sure data written is seen before log. */
>   smp_wmb();
> +
> + if (vq->iotlb) {
> + 

Re: [PATCH net V4] vhost: log dirty page correctly

2019-01-18 Thread David Miller
From: Jason Wang 
Date: Wed, 16 Jan 2019 16:54:42 +0800

> Vhost dirty page logging API is designed to sync through GPA. But we
> try to log GIOVA when device IOTLB is enabled. This is wrong and may
> lead to missing data after migration.
> 
> To solve this issue, when logging with device IOTLB enabled, we will:
> 
> 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to
>get HVA, for writable descriptor, get HVA through iovec. For used
>ring update, translate its GIOVA to HVA
> 2) traverse the GPA->HVA mapping to get the possible GPA and log
>through GPA. Pay attention this reverse mapping is not guaranteed
>to be unique, so we should log each possible GPA in this case.
> 
> This fix the failure of scp to guest during migration. In -next, we
> will probably support passing GIOVA->GPA instead of GIOVA->HVA.
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Jintack Lim 
> Cc: Jintack Lim 
> Signed-off-by: Jason Wang 

Applied and queued up for -stable.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net V4] vhost: log dirty page correctly

2019-01-17 Thread David Miller
From: Jason Wang 
Date: Wed, 16 Jan 2019 16:54:42 +0800

> Vhost dirty page logging API is designed to sync through GPA. But we
> try to log GIOVA when device IOTLB is enabled. This is wrong and may
> lead to missing data after migration.
> 
> To solve this issue, when logging with device IOTLB enabled, we will:
> 
> 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to
>get HVA, for writable descriptor, get HVA through iovec. For used
>ring update, translate its GIOVA to HVA
> 2) traverse the GPA->HVA mapping to get the possible GPA and log
>through GPA. Pay attention this reverse mapping is not guaranteed
>to be unique, so we should log each possible GPA in this case.
> 
> This fix the failure of scp to guest during migration. In -next, we
> will probably support passing GIOVA->GPA instead of GIOVA->HVA.
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Reported-by: Jintack Lim 
> Cc: Jintack Lim 
> Signed-off-by: Jason Wang 

Michaell, can I get a review for this please?

Thank you.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net V4] vhost: log dirty page correctly

2019-01-16 Thread Jason Wang
Vhost dirty page logging API is designed to sync through GPA. But we
try to log GIOVA when device IOTLB is enabled. This is wrong and may
lead to missing data after migration.

To solve this issue, when logging with device IOTLB enabled, we will:

1) reuse the device IOTLB translation result of GIOVA->HVA mapping to
   get HVA, for writable descriptor, get HVA through iovec. For used
   ring update, translate its GIOVA to HVA
2) traverse the GPA->HVA mapping to get the possible GPA and log
   through GPA. Pay attention this reverse mapping is not guaranteed
   to be unique, so we should log each possible GPA in this case.

This fix the failure of scp to guest during migration. In -next, we
will probably support passing GIOVA->GPA instead of GIOVA->HVA.

Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
Reported-by: Jintack Lim 
Cc: Jintack Lim 
Signed-off-by: Jason Wang 
---
Changes from V3:
- make sure each part of the hva was logged when crossing the boundary
  of memory regions
Changes from V2:
- check and log the case of range overlap
- remove unnecessary u64 cast
- use smp_wmb() for the case of device IOTLB as well
Changes from V1:
- return error instead of warn
---
 drivers/vhost/net.c   |  3 +-
 drivers/vhost/vhost.c | 97 ---
 drivers/vhost/vhost.h |  3 +-
 3 files changed, 87 insertions(+), 16 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 36f3d0f49e60..bca86bf7189f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1236,7 +1236,8 @@ static void handle_rx(struct vhost_net *net)
if (nvq->done_idx > VHOST_NET_BATCH)
vhost_net_signal_used(nvq);
if (unlikely(vq_log))
-   vhost_log_write(vq, vq_log, log, vhost_len);
+   vhost_log_write(vq, vq_log, log, vhost_len,
+   vq->iov, in);
total_len += vhost_len;
if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
vhost_poll_queue(>poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9f7942cbcbb2..babbb32b9bf0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1733,13 +1733,87 @@ static int log_write(void __user *log_base,
return r;
 }
 
+static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
+{
+   struct vhost_umem *umem = vq->umem;
+   struct vhost_umem_node *u;
+   u64 start, end, l, min;
+   int r;
+   bool hit = false;
+
+   while (len) {
+   min = len;
+   /* More than one GPAs can be mapped into a single HVA. So
+* iterate all possible umems here to be safe.
+*/
+   list_for_each_entry(u, >umem_list, link) {
+   if (u->userspace_addr > hva - 1 + len ||
+   u->userspace_addr - 1 + u->size < hva)
+   continue;
+   start = max(u->userspace_addr, hva);
+   end = min(u->userspace_addr - 1 + u->size,
+ hva - 1 + len);
+   l = end - start + 1;
+   r = log_write(vq->log_base,
+ u->start + start - u->userspace_addr,
+ l);
+   if (r < 0)
+   return r;
+   hit = true;
+   min = min(l, min);
+   }
+
+   if (!hit)
+   return -EFAULT;
+
+   len -= min;
+   hva += min;
+   }
+
+   return 0;
+}
+
+static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
+{
+   struct iovec iov[64];
+   int i, ret;
+
+   if (!vq->iotlb)
+   return log_write(vq->log_base, vq->log_addr + used_offset, len);
+
+   ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
+len, iov, 64, VHOST_ACCESS_WO);
+   if (ret)
+   return ret;
+
+   for (i = 0; i < ret; i++) {
+   ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
+   iov[i].iov_len);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
+
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
-   unsigned int log_num, u64 len)
+   unsigned int log_num, u64 len, struct iovec *iov, int count)
 {
int i, r;
 
/* Make sure data written is seen before log. */
smp_wmb();
+
+   if (vq->iotlb) {
+   for (i = 0; i < count; i++) {
+   r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
+ iov[i].iov_len);
+   if (r < 0)
+   return r;
+