Re: [PATCH RFC 4/5] vhost-vdpa: support IOTLB batching hints

2020-06-29 Thread Jason Wang


On 2020/6/29 下午11:49, Michael S. Tsirkin wrote:

On Mon, Jun 29, 2020 at 05:26:03PM +0800, Jason Wang wrote:

On 2020/6/28 下午5:58, Michael S. Tsirkin wrote:

On Thu, Jun 18, 2020 at 01:56:25PM +0800, Jason Wang wrote:

This patches extend the vhost IOTLB API to accept batch updating hints
form userspace. When userspace wants update the device IOTLB in a
batch, it may do:

1) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_BEGIN flag
2) Perform a batch of IOTLB updating via VHOST_IOTLB_UPDATE/INVALIDATE
3) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_END flag

As long as we are extending the interface,
is there some way we could cut down the number of system calls needed
here?


I'm not sure it's worth to do that since usually we only have less than 10
regions.


Well with a guest iommu I'm guessing it can go up significantly, right?



The batching flag is not mandatory, so userspace can still choose to 
update a single IOTLB mapping through a single system call without batch 
flag.






A possible method is to carry multiple vhost_iotlb_message in one system
call.

That's an option.
Also, can kernel handle a batch that is too large by applying
parts of it iteratively?



I think so, we can iterate a vhost iotlb message one by one through iov 
iterator.




Or must all changes take place atomically after BATCH_END?



For changes:

- if you mean the changes in vhost IOTLB, it's not atomically
- if you mean the mapping seen by vDPA device driver, it could be 
atomically for the device driver that implements set_map() method, since 
we pass a interval tree to the device.




If atomically, we might need to limit batch size to make
sure kernel can keep a copy in memory.



It's the responsibility of guest driver to make sure there won't be a 
race condition between DMA and map updating. So it looks to me we don't 
need to care about that.









Vhost-vdpa may decide to batch the IOMMU/IOTLB updating in step 3 when
vDPA device support set_map() ops. This is useful for the vDPA device
that want to know all the mappings to tweak their own DMA translation
logic.

For vDPA device that doesn't require set_map(), no behavior changes.

This capability is advertised via VHOST_BACKEND_F_IOTLB_BATCH capability.

Signed-off-by: Jason Wang 
---
   drivers/vhost/vdpa.c | 30 +++---
   include/uapi/linux/vhost.h   |  2 ++
   include/uapi/linux/vhost_types.h |  7 +++
   3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 453057421f80..8f624bbafee7 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -56,7 +56,9 @@ enum {
   };
   enum {
-   VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
+   VHOST_VDPA_BACKEND_FEATURES =
+   (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
+   (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
   };
   /* Currently, only network backend w/o multiqueue is supported. */
@@ -77,6 +79,7 @@ struct vhost_vdpa {
int virtio_id;
int minor;
struct eventfd_ctx *config_ctx;
+   int in_batch;
   };
   static DEFINE_IDA(vhost_vdpa_ida);
@@ -125,6 +128,7 @@ static void vhost_vdpa_reset(struct vhost_vdpa *v)
const struct vdpa_config_ops *ops = vdpa->config;
ops->set_status(vdpa, 0);
+   v->in_batch = 0;
   }
   static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
@@ -540,9 +544,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (ops->dma_map)
r = ops->dma_map(vdpa, iova, size, pa, perm);
-   else if (ops->set_map)
-   r = ops->set_map(vdpa, dev->iotlb);
-   else
+   else if (ops->set_map) {
+   if (!v->in_batch)
+   r = ops->set_map(vdpa, dev->iotlb);
+   } else
r = iommu_map(v->domain, iova, pa, size,
  perm_to_iommu_flags(perm));
@@ -559,9 +564,10 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 
iova, u64 size)
if (ops->dma_map)
ops->dma_unmap(vdpa, iova, size);
-   else if (ops->set_map)
-   ops->set_map(vdpa, dev->iotlb);
-   else
+   else if (ops->set_map) {
+   if (!v->in_batch)
+   ops->set_map(vdpa, dev->iotlb);
+   } else
iommu_unmap(v->domain, iova, size);
   }
@@ -655,6 +661,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev 
*dev,
struct vhost_iotlb_msg *msg)
   {
struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
+   struct vdpa_device *vdpa = v->vdpa;
+   const struct vdpa_config_ops *ops = vdpa->config;
int r = 0;
r = vhost_dev_check_owner(dev);
@@ -668,6 +676,14 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev 
*dev,
case VHOST_IOTLB_INVALIDATE:
vhost_vdpa_unmap(v, msg->iova, msg->size);
break;
+  

Re: [PATCH RFC 4/5] vhost-vdpa: support IOTLB batching hints

2020-06-29 Thread Michael S. Tsirkin
On Mon, Jun 29, 2020 at 05:26:03PM +0800, Jason Wang wrote:
> 
> On 2020/6/28 下午5:58, Michael S. Tsirkin wrote:
> > On Thu, Jun 18, 2020 at 01:56:25PM +0800, Jason Wang wrote:
> > > This patches extend the vhost IOTLB API to accept batch updating hints
> > > form userspace. When userspace wants update the device IOTLB in a
> > > batch, it may do:
> > > 
> > > 1) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_BEGIN flag
> > > 2) Perform a batch of IOTLB updating via VHOST_IOTLB_UPDATE/INVALIDATE
> > > 3) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_END flag
> > As long as we are extending the interface,
> > is there some way we could cut down the number of system calls needed
> > here?
> 
> 
> I'm not sure it's worth to do that since usually we only have less than 10
> regions.


Well with a guest iommu I'm guessing it can go up significantly, right?

> A possible method is to carry multiple vhost_iotlb_message in one system
> call.

That's an option.
Also, can kernel handle a batch that is too large by applying
parts of it iteratively?
Or must all changes take place atomically after BATCH_END?
If atomically, we might need to limit batch size to make
sure kernel can keep a copy in memory.


> 
> > 
> > 
> > > Vhost-vdpa may decide to batch the IOMMU/IOTLB updating in step 3 when
> > > vDPA device support set_map() ops. This is useful for the vDPA device
> > > that want to know all the mappings to tweak their own DMA translation
> > > logic.
> > > 
> > > For vDPA device that doesn't require set_map(), no behavior changes.
> > > 
> > > This capability is advertised via VHOST_BACKEND_F_IOTLB_BATCH capability.
> > > 
> > > Signed-off-by: Jason Wang 
> > > ---
> > >   drivers/vhost/vdpa.c | 30 +++---
> > >   include/uapi/linux/vhost.h   |  2 ++
> > >   include/uapi/linux/vhost_types.h |  7 +++
> > >   3 files changed, 32 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > > index 453057421f80..8f624bbafee7 100644
> > > --- a/drivers/vhost/vdpa.c
> > > +++ b/drivers/vhost/vdpa.c
> > > @@ -56,7 +56,9 @@ enum {
> > >   };
> > >   enum {
> > > - VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
> > > + VHOST_VDPA_BACKEND_FEATURES =
> > > + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
> > > + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
> > >   };
> > >   /* Currently, only network backend w/o multiqueue is supported. */
> > > @@ -77,6 +79,7 @@ struct vhost_vdpa {
> > >   int virtio_id;
> > >   int minor;
> > >   struct eventfd_ctx *config_ctx;
> > > + int in_batch;
> > >   };
> > >   static DEFINE_IDA(vhost_vdpa_ida);
> > > @@ -125,6 +128,7 @@ static void vhost_vdpa_reset(struct vhost_vdpa *v)
> > >   const struct vdpa_config_ops *ops = vdpa->config;
> > >   ops->set_status(vdpa, 0);
> > > + v->in_batch = 0;
> > >   }
> > >   static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user 
> > > *argp)
> > > @@ -540,9 +544,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > >   if (ops->dma_map)
> > >   r = ops->dma_map(vdpa, iova, size, pa, perm);
> > > - else if (ops->set_map)
> > > - r = ops->set_map(vdpa, dev->iotlb);
> > > - else
> > > + else if (ops->set_map) {
> > > + if (!v->in_batch)
> > > + r = ops->set_map(vdpa, dev->iotlb);
> > > + } else
> > >   r = iommu_map(v->domain, iova, pa, size,
> > > perm_to_iommu_flags(perm));
> > > @@ -559,9 +564,10 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, 
> > > u64 iova, u64 size)
> > >   if (ops->dma_map)
> > >   ops->dma_unmap(vdpa, iova, size);
> > > - else if (ops->set_map)
> > > - ops->set_map(vdpa, dev->iotlb);
> > > - else
> > > + else if (ops->set_map) {
> > > + if (!v->in_batch)
> > > + ops->set_map(vdpa, dev->iotlb);
> > > + } else
> > >   iommu_unmap(v->domain, iova, size);
> > >   }
> > > @@ -655,6 +661,8 @@ static int vhost_vdpa_process_iotlb_msg(struct 
> > > vhost_dev *dev,
> > >   struct vhost_iotlb_msg *msg)
> > >   {
> > >   struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, 
> > > vdev);
> > > + struct vdpa_device *vdpa = v->vdpa;
> > > + const struct vdpa_config_ops *ops = vdpa->config;
> > >   int r = 0;
> > >   r = vhost_dev_check_owner(dev);
> > > @@ -668,6 +676,14 @@ static int vhost_vdpa_process_iotlb_msg(struct 
> > > vhost_dev *dev,
> > >   case VHOST_IOTLB_INVALIDATE:
> > >   vhost_vdpa_unmap(v, msg->iova, msg->size);
> > >   break;
> > > + case VHOST_IOTLB_BATCH_BEGIN:
> > > + v->in_batch = true;
> > > + break;
> > > + case VHOST_IOTLB_BATCH_END:
> > > + if (v->in_batch && ops->set_map)
> > > + ops->set_map(vdpa, dev->iotlb);
> > > + v->in_batch = 

Re: [PATCH RFC 4/5] vhost-vdpa: support IOTLB batching hints

2020-06-29 Thread Jason Wang


On 2020/6/28 下午5:58, Michael S. Tsirkin wrote:

On Thu, Jun 18, 2020 at 01:56:25PM +0800, Jason Wang wrote:

This patches extend the vhost IOTLB API to accept batch updating hints
form userspace. When userspace wants update the device IOTLB in a
batch, it may do:

1) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_BEGIN flag
2) Perform a batch of IOTLB updating via VHOST_IOTLB_UPDATE/INVALIDATE
3) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_END flag

As long as we are extending the interface,
is there some way we could cut down the number of system calls needed
here?



I'm not sure it's worth to do that since usually we only have less than 
10 regions.


A possible method is to carry multiple vhost_iotlb_message in one system 
call.







Vhost-vdpa may decide to batch the IOMMU/IOTLB updating in step 3 when
vDPA device support set_map() ops. This is useful for the vDPA device
that want to know all the mappings to tweak their own DMA translation
logic.

For vDPA device that doesn't require set_map(), no behavior changes.

This capability is advertised via VHOST_BACKEND_F_IOTLB_BATCH capability.

Signed-off-by: Jason Wang 
---
  drivers/vhost/vdpa.c | 30 +++---
  include/uapi/linux/vhost.h   |  2 ++
  include/uapi/linux/vhost_types.h |  7 +++
  3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 453057421f80..8f624bbafee7 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -56,7 +56,9 @@ enum {
  };
  
  enum {

-   VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
+   VHOST_VDPA_BACKEND_FEATURES =
+   (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
+   (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
  };
  
  /* Currently, only network backend w/o multiqueue is supported. */

@@ -77,6 +79,7 @@ struct vhost_vdpa {
int virtio_id;
int minor;
struct eventfd_ctx *config_ctx;
+   int in_batch;
  };
  
  static DEFINE_IDA(vhost_vdpa_ida);

@@ -125,6 +128,7 @@ static void vhost_vdpa_reset(struct vhost_vdpa *v)
const struct vdpa_config_ops *ops = vdpa->config;
  
  	ops->set_status(vdpa, 0);

+   v->in_batch = 0;
  }
  
  static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)

@@ -540,9 +544,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
  
  	if (ops->dma_map)

r = ops->dma_map(vdpa, iova, size, pa, perm);
-   else if (ops->set_map)
-   r = ops->set_map(vdpa, dev->iotlb);
-   else
+   else if (ops->set_map) {
+   if (!v->in_batch)
+   r = ops->set_map(vdpa, dev->iotlb);
+   } else
r = iommu_map(v->domain, iova, pa, size,
  perm_to_iommu_flags(perm));
  
@@ -559,9 +564,10 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
  
  	if (ops->dma_map)

ops->dma_unmap(vdpa, iova, size);
-   else if (ops->set_map)
-   ops->set_map(vdpa, dev->iotlb);
-   else
+   else if (ops->set_map) {
+   if (!v->in_batch)
+   ops->set_map(vdpa, dev->iotlb);
+   } else
iommu_unmap(v->domain, iova, size);
  }
  
@@ -655,6 +661,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,

struct vhost_iotlb_msg *msg)
  {
struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
+   struct vdpa_device *vdpa = v->vdpa;
+   const struct vdpa_config_ops *ops = vdpa->config;
int r = 0;
  
  	r = vhost_dev_check_owner(dev);

@@ -668,6 +676,14 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev 
*dev,
case VHOST_IOTLB_INVALIDATE:
vhost_vdpa_unmap(v, msg->iova, msg->size);
break;
+   case VHOST_IOTLB_BATCH_BEGIN:
+   v->in_batch = true;
+   break;
+   case VHOST_IOTLB_BATCH_END:
+   if (v->in_batch && ops->set_map)
+   ops->set_map(vdpa, dev->iotlb);
+   v->in_batch = false;
+   break;
default:
r = -EINVAL;
break;
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 0c2349612e77..565da96f55d5 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -91,6 +91,8 @@
  
  /* Use message type V2 */

  #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accpet batching hints */

typo



Will fix.





+#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
  
  #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)

  #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index 669457ce5c48..5c12faffdde9 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -60,6 +60,13 @@ struct vhost_iotlb_msg {
  

Re: [PATCH RFC 4/5] vhost-vdpa: support IOTLB batching hints

2020-06-28 Thread Michael S. Tsirkin
On Thu, Jun 18, 2020 at 01:56:25PM +0800, Jason Wang wrote:
> This patches extend the vhost IOTLB API to accept batch updating hints
> form userspace. When userspace wants update the device IOTLB in a
> batch, it may do:
> 
> 1) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_BEGIN flag
> 2) Perform a batch of IOTLB updating via VHOST_IOTLB_UPDATE/INVALIDATE
> 3) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_END flag

As long as we are extending the interface,
is there some way we could cut down the number of system calls needed
here?


> 
> Vhost-vdpa may decide to batch the IOMMU/IOTLB updating in step 3 when
> vDPA device support set_map() ops. This is useful for the vDPA device
> that want to know all the mappings to tweak their own DMA translation
> logic.
> 
> For vDPA device that doesn't require set_map(), no behavior changes.
> 
> This capability is advertised via VHOST_BACKEND_F_IOTLB_BATCH capability.
> 
> Signed-off-by: Jason Wang 
> ---
>  drivers/vhost/vdpa.c | 30 +++---
>  include/uapi/linux/vhost.h   |  2 ++
>  include/uapi/linux/vhost_types.h |  7 +++
>  3 files changed, 32 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 453057421f80..8f624bbafee7 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -56,7 +56,9 @@ enum {
>  };
>  
>  enum {
> - VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
> + VHOST_VDPA_BACKEND_FEATURES =
> + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
> + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
>  };
>  
>  /* Currently, only network backend w/o multiqueue is supported. */
> @@ -77,6 +79,7 @@ struct vhost_vdpa {
>   int virtio_id;
>   int minor;
>   struct eventfd_ctx *config_ctx;
> + int in_batch;
>  };
>  
>  static DEFINE_IDA(vhost_vdpa_ida);
> @@ -125,6 +128,7 @@ static void vhost_vdpa_reset(struct vhost_vdpa *v)
>   const struct vdpa_config_ops *ops = vdpa->config;
>  
>   ops->set_status(vdpa, 0);
> + v->in_batch = 0;
>  }
>  
>  static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
> @@ -540,9 +544,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
>  
>   if (ops->dma_map)
>   r = ops->dma_map(vdpa, iova, size, pa, perm);
> - else if (ops->set_map)
> - r = ops->set_map(vdpa, dev->iotlb);
> - else
> + else if (ops->set_map) {
> + if (!v->in_batch)
> + r = ops->set_map(vdpa, dev->iotlb);
> + } else
>   r = iommu_map(v->domain, iova, pa, size,
> perm_to_iommu_flags(perm));
>  
> @@ -559,9 +564,10 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 
> iova, u64 size)
>  
>   if (ops->dma_map)
>   ops->dma_unmap(vdpa, iova, size);
> - else if (ops->set_map)
> - ops->set_map(vdpa, dev->iotlb);
> - else
> + else if (ops->set_map) {
> + if (!v->in_batch)
> + ops->set_map(vdpa, dev->iotlb);
> + } else
>   iommu_unmap(v->domain, iova, size);
>  }
>  
> @@ -655,6 +661,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev 
> *dev,
>   struct vhost_iotlb_msg *msg)
>  {
>   struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
> + struct vdpa_device *vdpa = v->vdpa;
> + const struct vdpa_config_ops *ops = vdpa->config;
>   int r = 0;
>  
>   r = vhost_dev_check_owner(dev);
> @@ -668,6 +676,14 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev 
> *dev,
>   case VHOST_IOTLB_INVALIDATE:
>   vhost_vdpa_unmap(v, msg->iova, msg->size);
>   break;
> + case VHOST_IOTLB_BATCH_BEGIN:
> + v->in_batch = true;
> + break;
> + case VHOST_IOTLB_BATCH_END:
> + if (v->in_batch && ops->set_map)
> + ops->set_map(vdpa, dev->iotlb);
> + v->in_batch = false;
> + break;
>   default:
>   r = -EINVAL;
>   break;
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index 0c2349612e77..565da96f55d5 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -91,6 +91,8 @@
>  
>  /* Use message type V2 */
>  #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
> +/* IOTLB can accpet batching hints */

typo

> +#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
>  
>  #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
>  #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
> diff --git a/include/uapi/linux/vhost_types.h 
> b/include/uapi/linux/vhost_types.h
> index 669457ce5c48..5c12faffdde9 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -60,6 +60,13 @@ struct vhost_iotlb_msg {
>  #define VHOST_IOTLB_UPDATE 2
>  #define VHOST_IOTLB_INVALIDATE 3
>  #define