Re: [virtio-dev] Re: [PATCH v4 5/7] iommu: Add virtio-iommu driver

2018-11-21 Thread Auger Eric
Hi jean,

On 11/20/18 6:30 PM, Jean-Philippe Brucker wrote:
> On 16/11/2018 18:46, Jean-Philippe Brucker wrote:
 +/*
 + * __viommu_sync_req - Complete all in-flight requests
 + *
 + * Wait for all added requests to complete. When this function returns, 
 all
 + * requests that were in-flight at the time of the call have completed.
 + */
 +static int __viommu_sync_req(struct viommu_dev *viommu)
 +{
 +  int ret = 0;
 +  unsigned int len;
 +  size_t write_len;
 +  struct viommu_request *req;
 +  struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
 +
 +  assert_spin_locked(>request_lock);
 +
 +  virtqueue_kick(vq);
 +
 +  while (!list_empty(>requests)) {
 +  len = 0;
 +  req = virtqueue_get_buf(vq, );
 +  if (!req)
 +  continue;
 +
 +  if (!len)
 +  viommu_set_req_status(req->buf, req->len,
 +VIRTIO_IOMMU_S_IOERR);
 +
 +  write_len = req->len - req->write_offset;
 +  if (req->writeback && len >= write_len)
>>> I don't get "len >= write_len". Is it valid for the device to write more
>>> than write_len? If it writes less than write_len, the status is not set,
>>> is it?
> 
> Actually, len could well be three bytes smaller than write_len. The spec
> doesn't require the device to write the three padding bytes in
> virtio_iommu_req_tail, after the status byte.
> 
> Here the driver just assumes that the device wrote the reserved field.
> The QEMU device seems to write uninitialized data in there...

Indeed that's incorrect and I should fix it. tail struct should be
properly initialized to 0. Only probe request implementation is correct.
> 
> Any objection to changing the spec to require the device to initialize
> those bytes to zero? I think it makes things nicer overall and shouldn't
> have any performance impact.
No objection from me.
> 
> [...]
 +static struct iommu_domain *viommu_domain_alloc(unsigned type)
 +{
 +  struct viommu_domain *vdomain;
 +
 +  if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
>>> smmu drivers also support the IDENTITY type. Don't we want to support it
>>> as well?
>>
>> Eventually yes. The IDENTITY domain is useful when an IOMMU has been
>> forced upon you and gets in the way of performance, in which case you
>> get rid of it with CONFIG_IOMMU_DEFAULT_PASSTHROUGH=y or
>> iommu.passthrough=y. For virtio-iommu though, you could simply not
>> instantiate the device.
>>
>> I don't think it's difficult to add: if the device supports
>> VIRTIO_IOMMU_F_BYPASS, then we simply don't send an ATTACH request.
>> Otherwise after ATTACH we send a MAP for the whole input range. If the
>> change isn't bigger than this, I'll add it to v5.
> 
> Not as straightforward as I hoped, when the device doesn't support
> VIRTIO_IOMMU_F_BYPASS:
> 
> * We can't simply create an ID map for the whole input range, we need to
> carve out the resv_mem regions.
> 
> * For a VFIO device, the host has no way of forwarding the identity
> mapping. For example the guest will attempt to map [0; 0x]
> -> [0; 0x], but VFIO only allows to map RAM and cannot take
> such request. One solution is to only create ID mapping for RAM, and
> register a notifier for memory hotplug, like intel-iommu does for IOMMUs
> that don't support HW pass-through.
> 
> Since it's not completely trivial and - I think - not urgent, I'll leave
> this for later.
OK makes sense to me too. It was just a head up.

Thanks

Eric
> 
> Thanks,
> Jean
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [virtio-dev] Re: [PATCH v4 5/7] iommu: Add virtio-iommu driver

2018-11-20 Thread Jean-Philippe Brucker
On 16/11/2018 18:46, Jean-Philippe Brucker wrote:
>>> +/*
>>> + * __viommu_sync_req - Complete all in-flight requests
>>> + *
>>> + * Wait for all added requests to complete. When this function returns, all
>>> + * requests that were in-flight at the time of the call have completed.
>>> + */
>>> +static int __viommu_sync_req(struct viommu_dev *viommu)
>>> +{
>>> +   int ret = 0;
>>> +   unsigned int len;
>>> +   size_t write_len;
>>> +   struct viommu_request *req;
>>> +   struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
>>> +
>>> +   assert_spin_locked(>request_lock);
>>> +
>>> +   virtqueue_kick(vq);
>>> +
>>> +   while (!list_empty(>requests)) {
>>> +   len = 0;
>>> +   req = virtqueue_get_buf(vq, );
>>> +   if (!req)
>>> +   continue;
>>> +
>>> +   if (!len)
>>> +   viommu_set_req_status(req->buf, req->len,
>>> + VIRTIO_IOMMU_S_IOERR);
>>> +
>>> +   write_len = req->len - req->write_offset;
>>> +   if (req->writeback && len >= write_len)
>> I don't get "len >= write_len". Is it valid for the device to write more
>> than write_len? If it writes less than write_len, the status is not set,
>> is it?

Actually, len could well be three bytes smaller than write_len. The spec
doesn't require the device to write the three padding bytes in
virtio_iommu_req_tail, after the status byte.

Here the driver just assumes that the device wrote the reserved field.
The QEMU device seems to write uninitialized data in there...

Any objection to changing the spec to require the device to initialize
those bytes to zero? I think it makes things nicer overall and shouldn't
have any performance impact.

[...]
>>> +static struct iommu_domain *viommu_domain_alloc(unsigned type)
>>> +{
>>> +   struct viommu_domain *vdomain;
>>> +
>>> +   if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
>> smmu drivers also support the IDENTITY type. Don't we want to support it
>> as well?
> 
> Eventually yes. The IDENTITY domain is useful when an IOMMU has been
> forced upon you and gets in the way of performance, in which case you
> get rid of it with CONFIG_IOMMU_DEFAULT_PASSTHROUGH=y or
> iommu.passthrough=y. For virtio-iommu though, you could simply not
> instantiate the device.
> 
> I don't think it's difficult to add: if the device supports
> VIRTIO_IOMMU_F_BYPASS, then we simply don't send an ATTACH request.
> Otherwise after ATTACH we send a MAP for the whole input range. If the
> change isn't bigger than this, I'll add it to v5.

Not as straightforward as I hoped, when the device doesn't support
VIRTIO_IOMMU_F_BYPASS:

* We can't simply create an ID map for the whole input range, we need to
carve out the resv_mem regions.

* For a VFIO device, the host has no way of forwarding the identity
mapping. For example the guest will attempt to map [0; 0x]
-> [0; 0x], but VFIO only allows to map RAM and cannot take
such request. One solution is to only create ID mapping for RAM, and
register a notifier for memory hotplug, like intel-iommu does for IOMMUs
that don't support HW pass-through.

Since it's not completely trivial and - I think - not urgent, I'll leave
this for later.

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 5/7] iommu: Add virtio-iommu driver

2018-11-16 Thread Jean-Philippe Brucker
Hi Eric,

On 16/11/2018 06:08, Auger Eric wrote:
>> +struct viommu_domain {
>> +struct iommu_domain domain;
>> +struct viommu_dev   *viommu;
>> +struct mutexmutex;
> same naming/comment as in smmu driver may help here
> struct mutex init_mutex; /* Protects viommu pointer */

ok

>> +unsigned intid;
>> +
>> +spinlock_t  mappings_lock;
>> +struct rb_root_cached   mappings;
>> +
>> +unsigned long   nr_endpoints;
>> +};
>> +
>> +struct viommu_endpoint {
>> +struct viommu_dev   *viommu;
>> +struct viommu_domain*vdomain;
>> +};
>> +
>> +struct viommu_request {
>> +struct list_headlist;
>> +void*writeback;
>> +unsigned intwrite_offset;
>> +unsigned intlen;
>> +charbuf[];
>> +};
>> +
>> +#define to_viommu_domain(domain)\
>> +container_of(domain, struct viommu_domain, domain)
>> +
>> +static int viommu_get_req_errno(void *buf, size_t len)
>> +{
>> +struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
>> +
>> +switch (tail->status) {
>> +case VIRTIO_IOMMU_S_OK:
>> +return 0;
>> +case VIRTIO_IOMMU_S_UNSUPP:
>> +return -ENOSYS;
>> +case VIRTIO_IOMMU_S_INVAL:
>> +return -EINVAL;
>> +case VIRTIO_IOMMU_S_RANGE:
>> +return -ERANGE;
>> +case VIRTIO_IOMMU_S_NOENT:
>> +return -ENOENT;
>> +case VIRTIO_IOMMU_S_FAULT:
>> +return -EFAULT;
>> +case VIRTIO_IOMMU_S_IOERR:
>> +case VIRTIO_IOMMU_S_DEVERR:
>> +default:
>> +return -EIO;
>> +}
>> +}
>> +
>> +static void viommu_set_req_status(void *buf, size_t len, int status)
>> +{
>> +struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
>> +
>> +tail->status = status;
>> +}
>> +
>> +static off_t viommu_get_req_offset(struct viommu_dev *viommu,
>> +   struct virtio_iommu_req_head *req,
>> +   size_t len)
> nit: viommu_get_write_desc_offset would be more self-explanatory?

ok

>> +{
>> +size_t tail_size = sizeof(struct virtio_iommu_req_tail);
>> +
>> +return len - tail_size;
>> +}
>> +
>> +/*
>> + * __viommu_sync_req - Complete all in-flight requests
>> + *
>> + * Wait for all added requests to complete. When this function returns, all
>> + * requests that were in-flight at the time of the call have completed.
>> + */
>> +static int __viommu_sync_req(struct viommu_dev *viommu)
>> +{
>> +int ret = 0;
>> +unsigned int len;
>> +size_t write_len;
>> +struct viommu_request *req;
>> +struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
>> +
>> +assert_spin_locked(>request_lock);
>> +
>> +virtqueue_kick(vq);
>> +
>> +while (!list_empty(>requests)) {
>> +len = 0;
>> +req = virtqueue_get_buf(vq, );
>> +if (!req)
>> +continue;
>> +
>> +if (!len)
>> +viommu_set_req_status(req->buf, req->len,
>> +  VIRTIO_IOMMU_S_IOERR);
>> +
>> +write_len = req->len - req->write_offset;
>> +if (req->writeback && len >= write_len)
> I don't get "len >= write_len". Is it valid for the device to write more
> than write_len? If it writes less than write_len, the status is not set,
> is it?

No on both counts, I'll change this to ==

There is a problem in the spec regarding this: to get the status, the
driver expects the device to set len to the size of the whole writeable
part of the buffer, even if it only filled a few properties. But the
device doesn't have to do it at the moment. I need to change this sentence:

The device MAY fill the remaining bytes of properties, if any,
with zeroes.

Into somthing like:

The device SHOULD fill the remaining bytes of properties, if
any, with zeroes.

The QEMU and kvmtool devices already do this.

>> +memcpy(req->writeback, req->buf + req->write_offset,
>> +   write_len);
>> +
>> +list_del(>list);
>> +kfree(req);
>> +}
>> +
>> +return ret;
>> +}
>> +
>> +static int viommu_sync_req(struct viommu_dev *viommu)
>> +{
>> +int ret;
>> +unsigned long flags;
>> +
>> +spin_lock_irqsave(>request_lock, flags);
>> +ret = __viommu_sync_req(viommu);
>> +if (ret)
>> +dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
>> +spin_unlock_irqrestore(>request_lock, flags);
>> +
>> +return ret;
>> +}
>> +
>> +/*
>> + * __viommu_add_request - Add one request to the queue
>> + * @buf: pointer to the request buffer
>> + * @len: length of the request buffer
>> + * @writeback: copy data back to the buffer when the request 

Re: [PATCH v4 5/7] iommu: Add virtio-iommu driver

2018-11-15 Thread Auger Eric
Hi Jean,

On 11/15/18 5:52 PM, Jean-Philippe Brucker wrote:
> The virtio IOMMU is a para-virtualized device, allowing to send IOMMU
> requests such as map/unmap over virtio transport without emulating page
> tables. This implementation handles ATTACH, DETACH, MAP and UNMAP
> requests.
> 
> The bulk of the code transforms calls coming from the IOMMU API into
> corresponding virtio requests. Mappings are kept in an interval tree
> instead of page tables.

Some few comments/questions below.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  MAINTAINERS   |   7 +
>  drivers/iommu/Kconfig |  11 +
>  drivers/iommu/Makefile|   1 +
>  drivers/iommu/virtio-iommu.c  | 918 ++
>  include/uapi/linux/virtio_ids.h   |   1 +
>  include/uapi/linux/virtio_iommu.h | 104 
>  6 files changed, 1042 insertions(+)
>  create mode 100644 drivers/iommu/virtio-iommu.c
>  create mode 100644 include/uapi/linux/virtio_iommu.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 0abecc528dac..0c7bdce57719 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15948,6 +15948,13 @@ S:   Maintained
>  F:   drivers/virtio/virtio_input.c
>  F:   include/uapi/linux/virtio_input.h
>  
> +VIRTIO IOMMU DRIVER
> +M:   Jean-Philippe Brucker 
> +L:   virtualizat...@lists.linux-foundation.org
> +S:   Maintained
> +F:   drivers/iommu/virtio-iommu.c
> +F:   include/uapi/linux/virtio_iommu.h
> +
>  VIRTUAL BOX GUEST DEVICE DRIVER
>  M:   Hans de Goede 
>  M:   Arnd Bergmann 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index d9a25715650e..efdeaaeee0e0 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -435,4 +435,15 @@ config QCOM_IOMMU
>   help
> Support for IOMMU on certain Qualcomm SoCs.
>  
> +config VIRTIO_IOMMU
> + bool "Virtio IOMMU driver"
> + depends on VIRTIO=y
> + select IOMMU_API
> + select INTERVAL_TREE
> + select ARM_DMA_USE_IOMMU if ARM
> + help
> +   Para-virtualised IOMMU driver with virtio.
> +
> +   Say Y here if you intend to run this kernel as a guest.
> +
>  endif # IOMMU_SUPPORT
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index a158a68c8ea8..48d831a39281 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -32,3 +32,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
>  obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
>  obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
>  obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
> +obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> new file mode 100644
> index ..2a9cb6285a1e
> --- /dev/null
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -0,0 +1,918 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Virtio driver for the paravirtualized IOMMU
> + *
> + * Copyright (C) 2018 Arm Limited
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#define MSI_IOVA_BASE0x800
> +#define MSI_IOVA_LENGTH  0x10
> +
> +#define VIOMMU_REQUEST_VQ0
> +#define VIOMMU_NR_VQS1
> +
> +struct viommu_dev {
> + struct iommu_device iommu;
> + struct device   *dev;
> + struct virtio_device*vdev;
> +
> + struct ida  domain_ids;
> +
> + struct virtqueue*vqs[VIOMMU_NR_VQS];
> + spinlock_t  request_lock;
> + struct list_headrequests;
> +
> + /* Device configuration */
> + struct iommu_domain_geometrygeometry;
> + u64 pgsize_bitmap;
> + u8  domain_bits;
> +};
> +
> +struct viommu_mapping {
> + phys_addr_t paddr;
> + struct interval_tree_node   iova;
> + u32 flags;
> +};
> +
> +struct viommu_domain {
> + struct iommu_domain domain;
> + struct viommu_dev   *viommu;
> + struct mutexmutex;
same naming/comment as in smmu driver may help here
struct mutex init_mutex; /* Protects viommu pointer */
> + unsigned intid;
> +
> + spinlock_t  mappings_lock;
> + struct rb_root_cached   mappings;
> +
> + unsigned long   nr_endpoints;
> +};
> +
> +struct viommu_endpoint {
> + struct viommu_dev   *viommu;
> + struct viommu_domain*vdomain;
> +};
> +
> +struct viommu_request {
> + struct list_headlist;
> + void*writeback;
> + unsigned int  

[PATCH v4 5/7] iommu: Add virtio-iommu driver

2018-11-15 Thread Jean-Philippe Brucker
The virtio IOMMU is a para-virtualized device, allowing to send IOMMU
requests such as map/unmap over virtio transport without emulating page
tables. This implementation handles ATTACH, DETACH, MAP and UNMAP
requests.

The bulk of the code transforms calls coming from the IOMMU API into
corresponding virtio requests. Mappings are kept in an interval tree
instead of page tables.

Signed-off-by: Jean-Philippe Brucker 
---
 MAINTAINERS   |   7 +
 drivers/iommu/Kconfig |  11 +
 drivers/iommu/Makefile|   1 +
 drivers/iommu/virtio-iommu.c  | 918 ++
 include/uapi/linux/virtio_ids.h   |   1 +
 include/uapi/linux/virtio_iommu.h | 104 
 6 files changed, 1042 insertions(+)
 create mode 100644 drivers/iommu/virtio-iommu.c
 create mode 100644 include/uapi/linux/virtio_iommu.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 0abecc528dac..0c7bdce57719 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15948,6 +15948,13 @@ S: Maintained
 F: drivers/virtio/virtio_input.c
 F: include/uapi/linux/virtio_input.h
 
+VIRTIO IOMMU DRIVER
+M: Jean-Philippe Brucker 
+L: virtualizat...@lists.linux-foundation.org
+S: Maintained
+F: drivers/iommu/virtio-iommu.c
+F: include/uapi/linux/virtio_iommu.h
+
 VIRTUAL BOX GUEST DEVICE DRIVER
 M: Hans de Goede 
 M: Arnd Bergmann 
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9a25715650e..efdeaaeee0e0 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -435,4 +435,15 @@ config QCOM_IOMMU
help
  Support for IOMMU on certain Qualcomm SoCs.
 
+config VIRTIO_IOMMU
+   bool "Virtio IOMMU driver"
+   depends on VIRTIO=y
+   select IOMMU_API
+   select INTERVAL_TREE
+   select ARM_DMA_USE_IOMMU if ARM
+   help
+ Para-virtualised IOMMU driver with virtio.
+
+ Say Y here if you intend to run this kernel as a guest.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index a158a68c8ea8..48d831a39281 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
 obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
new file mode 100644
index ..2a9cb6285a1e
--- /dev/null
+++ b/drivers/iommu/virtio-iommu.c
@@ -0,0 +1,918 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Virtio driver for the paravirtualized IOMMU
+ *
+ * Copyright (C) 2018 Arm Limited
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define MSI_IOVA_BASE  0x800
+#define MSI_IOVA_LENGTH0x10
+
+#define VIOMMU_REQUEST_VQ  0
+#define VIOMMU_NR_VQS  1
+
+struct viommu_dev {
+   struct iommu_device iommu;
+   struct device   *dev;
+   struct virtio_device*vdev;
+
+   struct ida  domain_ids;
+
+   struct virtqueue*vqs[VIOMMU_NR_VQS];
+   spinlock_t  request_lock;
+   struct list_headrequests;
+
+   /* Device configuration */
+   struct iommu_domain_geometrygeometry;
+   u64 pgsize_bitmap;
+   u8  domain_bits;
+};
+
+struct viommu_mapping {
+   phys_addr_t paddr;
+   struct interval_tree_node   iova;
+   u32 flags;
+};
+
+struct viommu_domain {
+   struct iommu_domain domain;
+   struct viommu_dev   *viommu;
+   struct mutexmutex;
+   unsigned intid;
+
+   spinlock_t  mappings_lock;
+   struct rb_root_cached   mappings;
+
+   unsigned long   nr_endpoints;
+};
+
+struct viommu_endpoint {
+   struct viommu_dev   *viommu;
+   struct viommu_domain*vdomain;
+};
+
+struct viommu_request {
+   struct list_headlist;
+   void*writeback;
+   unsigned intwrite_offset;
+   unsigned intlen;
+   charbuf[];
+};
+
+#define to_viommu_domain(domain)   \
+   container_of(domain, struct viommu_domain, domain)
+
+static int viommu_get_req_errno(void *buf, size_t len)
+{
+   struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
+
+   switch (tail->status) {
+   case VIRTIO_IOMMU_S_OK:
+   return