RE: [PATCH v6 2/5] iommu: Use bus iommu ops for aux related callback

2020-10-29 Thread Tian, Kevin
> From: Lu Baolu 
> Sent: Friday, October 30, 2020 12:58 PM
> 
> The aux-domain apis were designed for macro driver where the subdevices
> are created and used inside a device driver. Use the device's bus iommu
> ops instead of that in iommu domain for various callbacks.

IIRC there are only two users on these apis. One is VFIO, and the other
is on the ARM side (not checked in yet). Jean, can you help confirm 
whether ARM-side usage still relies on aux apis even with this change?
If no, possibly they can be removed completely?

Thanks
Kevin

> 
> Signed-off-by: Lu Baolu 
> ---
>  drivers/iommu/iommu.c | 16 ++--
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 6bbdd959f9f3..17f2686664db 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2913,10 +2913,11 @@
> EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
>   */
>  int iommu_aux_attach_device(struct iommu_domain *domain, struct device
> *dev)
>  {
> + const struct iommu_ops *ops = dev->bus->iommu_ops;
>   int ret = -ENODEV;
> 
> - if (domain->ops->aux_attach_dev)
> - ret = domain->ops->aux_attach_dev(domain, dev);
> + if (ops && ops->aux_attach_dev)
> + ret = ops->aux_attach_dev(domain, dev);
> 
>   if (!ret)
>   trace_attach_device_to_domain(dev);
> @@ -2927,8 +2928,10 @@
> EXPORT_SYMBOL_GPL(iommu_aux_attach_device);
> 
>  void iommu_aux_detach_device(struct iommu_domain *domain, struct
> device *dev)
>  {
> - if (domain->ops->aux_detach_dev) {
> - domain->ops->aux_detach_dev(domain, dev);
> + const struct iommu_ops *ops = dev->bus->iommu_ops;
> +
> + if (ops && ops->aux_detach_dev) {
> + ops->aux_detach_dev(domain, dev);
>   trace_detach_device_from_domain(dev);
>   }
>  }
> @@ -2936,10 +2939,11 @@
> EXPORT_SYMBOL_GPL(iommu_aux_detach_device);
> 
>  int iommu_aux_get_pasid(struct iommu_domain *domain, struct device
> *dev)
>  {
> + const struct iommu_ops *ops = dev->bus->iommu_ops;
>   int ret = -ENODEV;
> 
> - if (domain->ops->aux_get_pasid)
> - ret = domain->ops->aux_get_pasid(domain, dev);
> + if (ops && ops->aux_get_pasid)
> + ret = ops->aux_get_pasid(domain, dev);
> 
>   return ret;
>  }
> --
> 2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 5/5] vfio/type1: Use mdev bus iommu_ops for IOMMU callbacks

2020-10-29 Thread Lu Baolu
With the IOMMU driver registering iommu_ops for the mdev_bus, the IOMMU
operations on an mdev could be done in the same way as any normal device
(for example, PCI/PCIe). There's no need to distinguish an mdev from
others for iommu operations. Remove the unnecessary code.

Signed-off-by: Lu Baolu 
---
 drivers/vfio/mdev/mdev_core.c|  18 -
 drivers/vfio/mdev/mdev_driver.c  |   6 ++
 drivers/vfio/mdev/mdev_private.h |   1 -
 drivers/vfio/vfio_iommu_type1.c  | 128 +++
 include/linux/mdev.h |  14 
 5 files changed, 18 insertions(+), 149 deletions(-)

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 6b9ab71f89e7..f4fd5f237c49 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -386,24 +386,6 @@ int mdev_device_remove(struct device *dev)
return 0;
 }
 
-int mdev_set_iommu_device(struct device *dev, struct device *iommu_device)
-{
-   struct mdev_device *mdev = to_mdev_device(dev);
-
-   mdev->iommu_device = iommu_device;
-
-   return 0;
-}
-EXPORT_SYMBOL(mdev_set_iommu_device);
-
-struct device *mdev_get_iommu_device(struct device *dev)
-{
-   struct mdev_device *mdev = to_mdev_device(dev);
-
-   return mdev->iommu_device;
-}
-EXPORT_SYMBOL(mdev_get_iommu_device);
-
 static int __init mdev_init(void)
 {
return mdev_bus_register();
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
index 0d3223aee20b..487402f16355 100644
--- a/drivers/vfio/mdev/mdev_driver.c
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -18,6 +18,9 @@ static int mdev_attach_iommu(struct mdev_device *mdev)
int ret;
struct iommu_group *group;
 
+   if (iommu_present(_bus_type))
+   return 0;
+
group = iommu_group_alloc();
if (IS_ERR(group))
return PTR_ERR(group);
@@ -33,6 +36,9 @@ static int mdev_attach_iommu(struct mdev_device *mdev)
 
 static void mdev_detach_iommu(struct mdev_device *mdev)
 {
+   if (iommu_present(_bus_type))
+   return;
+
iommu_group_remove_device(>dev);
dev_info(>dev, "MDEV: detaching iommu\n");
 }
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 7d922950caaf..efe0aefdb52f 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -31,7 +31,6 @@ struct mdev_device {
void *driver_data;
struct list_head next;
struct kobject *type_kobj;
-   struct device *iommu_device;
bool active;
 };
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index bb2684cc245e..e231b7070ca5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -100,7 +100,6 @@ struct vfio_dma {
 struct vfio_group {
struct iommu_group  *iommu_group;
struct list_headnext;
-   boolmdev_group; /* An mdev group */
boolpinned_page_dirty_scope;
 };
 
@@ -1675,102 +1674,6 @@ static bool vfio_iommu_has_sw_msi(struct list_head 
*group_resv_regions,
return ret;
 }
 
-static struct device *vfio_mdev_get_iommu_device(struct device *dev)
-{
-   struct device *(*fn)(struct device *dev);
-   struct device *iommu_device;
-
-   fn = symbol_get(mdev_get_iommu_device);
-   if (fn) {
-   iommu_device = fn(dev);
-   symbol_put(mdev_get_iommu_device);
-
-   return iommu_device;
-   }
-
-   return NULL;
-}
-
-static int vfio_mdev_attach_domain(struct device *dev, void *data)
-{
-   struct iommu_domain *domain = data;
-   struct device *iommu_device;
-
-   iommu_device = vfio_mdev_get_iommu_device(dev);
-   if (iommu_device) {
-   if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-   return iommu_aux_attach_device(domain, iommu_device);
-   else
-   return iommu_attach_device(domain, iommu_device);
-   }
-
-   return -EINVAL;
-}
-
-static int vfio_mdev_detach_domain(struct device *dev, void *data)
-{
-   struct iommu_domain *domain = data;
-   struct device *iommu_device;
-
-   iommu_device = vfio_mdev_get_iommu_device(dev);
-   if (iommu_device) {
-   if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-   iommu_aux_detach_device(domain, iommu_device);
-   else
-   iommu_detach_device(domain, iommu_device);
-   }
-
-   return 0;
-}
-
-static int vfio_iommu_attach_group(struct vfio_domain *domain,
-  struct vfio_group *group)
-{
-   if (group->mdev_group)
-   return iommu_group_for_each_dev(group->iommu_group,
-   domain->domain,
-   vfio_mdev_attach_domain);
-   else
- 

[PATCH v6 4/5] iommu/vt-d: Add iommu_ops support for subdevice bus

2020-10-29 Thread Lu Baolu
The iommu_ops will only take effect when INTEL_IOMMU_SCALABLE_IOV kernel
option is selected. It applies to any device passthrough framework which
implements an underlying bus for the subdevices.

- Subdevice probe:
  When a subdevice is created and added to the bus, iommu_probe_device()
  will be called, where the device will be probed by the iommu core. An
  iommu group will be allocated and the device will be added to it. The
  default domain won't be allocated since there's no use case of using a
  subdevice in the host kernel at this time being. However, it's pretty
  easy to add this support later.

- Domain alloc/free/map/unmap/iova_to_phys operations:
  For such ops, we just reuse those for PCI/PCIe devices.

- Domain attach/detach operations:
  It depends on whether the parent device supports IOMMU_DEV_FEAT_AUX
  feature. If so, the domain will be attached to the parent device as an
  aux-domain; Otherwise, it will be attached to the parent as a primary
  domain.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/Kconfig  |  13 
 drivers/iommu/intel/Makefile |   1 +
 drivers/iommu/intel/iommu.c  |   5 ++
 drivers/iommu/intel/siov.c   | 119 +++
 include/linux/intel-iommu.h  |   4 ++
 5 files changed, 142 insertions(+)
 create mode 100644 drivers/iommu/intel/siov.c

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 28a3d1596c76..94edc332f558 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -86,3 +86,16 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
  is not selected, scalable mode support could also be enabled by
  passing intel_iommu=sm_on to the kernel. If not sure, please use
  the default value.
+
+config INTEL_IOMMU_SCALABLE_IOV
+   bool "Support for Intel Scalable I/O Virtualization"
+   depends on INTEL_IOMMU
+   select VFIO
+   select VFIO_MDEV
+   select VFIO_MDEV_DEVICE
+   help
+ Intel Scalable I/O virtualization (SIOV) is a hardware-assisted
+ PCIe subdevices virtualization. With each subdevice tagged with
+ an unique ID(PCI/PASID) the VT-d hardware could identify, hence
+ isolate DMA transactions from different subdevices on a same PCIe
+ device. Selecting this option will enable the support.
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index fb8e1e8c8029..f216385d5d59 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -4,4 +4,5 @@ obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
 obj-$(CONFIG_INTEL_IOMMU) += trace.o
 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
+obj-$(CONFIG_INTEL_IOMMU_SCALABLE_IOV) += siov.o
 obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1454fe74f3ba..dafd8069c2af 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4298,6 +4298,11 @@ int __init intel_iommu_init(void)
up_read(_global_lock);
 
bus_set_iommu(_bus_type, _iommu_ops);
+
+#ifdef CONFIG_INTEL_IOMMU_SCALABLE_IOV
+   intel_siov_init();
+#endif /* CONFIG_INTEL_IOMMU_SCALABLE_IOV */
+
if (si_domain && !hw_pass_through)
register_memory_notifier(_iommu_memory_nb);
cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
diff --git a/drivers/iommu/intel/siov.c b/drivers/iommu/intel/siov.c
new file mode 100644
index ..b9470e7ab3d6
--- /dev/null
+++ b/drivers/iommu/intel/siov.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * siov.c - Intel Scalable I/O virtualization support
+ *
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * Author: Lu Baolu 
+ */
+
+#define pr_fmt(fmt)"DMAR: " fmt
+
+#include 
+#include 
+#include 
+#include 
+
+static struct device *subdev_lookup_parent(struct device *dev)
+{
+   if (dev->bus == _bus_type)
+   return mdev_parent_dev(mdev_from_dev(dev));
+
+   return NULL;
+}
+
+static struct iommu_domain *siov_iommu_domain_alloc(unsigned int type)
+{
+   if (type != IOMMU_DOMAIN_UNMANAGED)
+   return NULL;
+
+   return intel_iommu_domain_alloc(type);
+}
+
+static int siov_iommu_attach_device(struct iommu_domain *domain,
+   struct device *dev)
+{
+   struct device *parent;
+
+   parent = subdev_lookup_parent(dev);
+   if (!parent || !dev_is_pci(parent))
+   return -ENODEV;
+
+   if (iommu_dev_feature_enabled(parent, IOMMU_DEV_FEAT_AUX))
+   return intel_iommu_aux_attach_device(domain, parent);
+   else
+   return intel_iommu_attach_device(domain, parent);
+}
+
+static void siov_iommu_detach_device(struct iommu_domain *domain,
+struct device *dev)
+{
+   struct device *parent;
+
+   parent = subdev_lookup_parent(dev);
+   if (WARN_ON_ONCE(!parent || 

[PATCH v6 1/5] vfio/mdev: Register mdev bus earlier during boot

2020-10-29 Thread Lu Baolu
Move mdev bus registration earlier than IOMMU probe processing so that
the IOMMU drivers could be able to set iommu_ops for the mdev bus. This
only applies when vfio-mdev module is setected to be built-in.

Signed-off-by: Lu Baolu 
---
 drivers/vfio/mdev/mdev_core.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index b558d4cfd082..6b9ab71f89e7 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -417,8 +417,12 @@ static void __exit mdev_exit(void)
mdev_bus_unregister();
 }
 
+#if IS_BUILTIN(CONFIG_VFIO_MDEV)
+postcore_initcall(mdev_init)
+#else
 module_init(mdev_init)
 module_exit(mdev_exit)
+#endif /* IS_BUILTIN(CONFIG_VFIO_MDEV) */
 
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 2/5] iommu: Use bus iommu ops for aux related callback

2020-10-29 Thread Lu Baolu
The aux-domain apis were designed for macro driver where the subdevices
are created and used inside a device driver. Use the device's bus iommu
ops instead of that in iommu domain for various callbacks.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/iommu.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6bbdd959f9f3..17f2686664db 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2913,10 +2913,11 @@ EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
  */
 int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev)
 {
+   const struct iommu_ops *ops = dev->bus->iommu_ops;
int ret = -ENODEV;
 
-   if (domain->ops->aux_attach_dev)
-   ret = domain->ops->aux_attach_dev(domain, dev);
+   if (ops && ops->aux_attach_dev)
+   ret = ops->aux_attach_dev(domain, dev);
 
if (!ret)
trace_attach_device_to_domain(dev);
@@ -2927,8 +2928,10 @@ EXPORT_SYMBOL_GPL(iommu_aux_attach_device);
 
 void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
 {
-   if (domain->ops->aux_detach_dev) {
-   domain->ops->aux_detach_dev(domain, dev);
+   const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+   if (ops && ops->aux_detach_dev) {
+   ops->aux_detach_dev(domain, dev);
trace_detach_device_from_domain(dev);
}
 }
@@ -2936,10 +2939,11 @@ EXPORT_SYMBOL_GPL(iommu_aux_detach_device);
 
 int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
 {
+   const struct iommu_ops *ops = dev->bus->iommu_ops;
int ret = -ENODEV;
 
-   if (domain->ops->aux_get_pasid)
-   ret = domain->ops->aux_get_pasid(domain, dev);
+   if (ops && ops->aux_get_pasid)
+   ret = ops->aux_get_pasid(domain, dev);
 
return ret;
 }
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 0/5] iommu aux-domain APIs extensions

2020-10-29 Thread Lu Baolu
Hi Joerg and Alex,

A description of purpose for this series could be found here.

https://lore.kernel.org/linux-iommu/20200901033422.22249-1-baolu...@linux.intel.com/

The previous version was posted here.

https://lore.kernel.org/linux-iommu/20200922061042.31633-1-baolu...@linux.intel.com/

This version is evolved according to Joerg's comments posted here.

https://lore.kernel.org/linux-iommu/20200924095532.gk27...@8bytes.org/

This basic idea is that IOMMU registers an iommu_ops for subdevice
bus (for example, the vfio/mdev bus), so that the upper layer device
passthrough framework could use the standard iommu-core code to setup
the IOMMU logistics.

This series was tested by Dave Jiang  with his
idxd driver posted here. Very appreciated!

https://lore.kernel.org/lkml/160021250454.67751.3119489448651243709.st...@djiang5-desk3.ch.intel.com/

Please help to review and comment.

Best regards,
baolu

Lu Baolu (5):
  vfio/mdev: Register mdev bus earlier during boot
  iommu: Use bus iommu ops for aux related callback
  iommu/vt-d: Make some static functions global
  iommu/vt-d: Add iommu_ops support for subdevice bus
  vfio/type1: Use mdev bus iommu_ops for IOMMU callbacks

 drivers/iommu/intel/Kconfig  |  13 
 drivers/iommu/intel/Makefile |   1 +
 drivers/iommu/intel/iommu.c  |  79 +--
 drivers/iommu/intel/siov.c   | 119 
 drivers/iommu/iommu.c|  16 ++--
 drivers/vfio/mdev/mdev_core.c|  22 +-
 drivers/vfio/mdev/mdev_driver.c  |   6 ++
 drivers/vfio/mdev/mdev_private.h |   1 -
 drivers/vfio/vfio_iommu_type1.c  | 128 +++
 include/linux/intel-iommu.h  |  53 +
 include/linux/mdev.h |  14 
 11 files changed, 236 insertions(+), 216 deletions(-)
 create mode 100644 drivers/iommu/intel/siov.c

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 3/5] iommu/vt-d: Make some static functions global

2020-10-29 Thread Lu Baolu
So that they could be used in other files as well. No functional changes.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 74 +++--
 include/linux/intel-iommu.h | 49 
 2 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 8f5e7b31b3fb..1454fe74f3ba 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -330,10 +330,6 @@ static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct device *dev);
 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static int intel_iommu_attach_device(struct iommu_domain *domain,
-struct device *dev);
-static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
-   dma_addr_t iova);
 
 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 int dmar_disabled = 0;
@@ -4423,7 +4419,7 @@ static int md_domain_init(struct dmar_domain *domain, int 
guest_width)
return 0;
 }
 
-static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
+struct iommu_domain *intel_iommu_domain_alloc(unsigned int type)
 {
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
@@ -4462,7 +4458,7 @@ static struct iommu_domain 
*intel_iommu_domain_alloc(unsigned type)
return NULL;
 }
 
-static void intel_iommu_domain_free(struct iommu_domain *domain)
+void intel_iommu_domain_free(struct iommu_domain *domain)
 {
if (domain != _domain->domain)
domain_exit(to_dmar_domain(domain));
@@ -4639,8 +4635,7 @@ static int prepare_domain_attach_device(struct 
iommu_domain *domain,
return 0;
 }
 
-static int intel_iommu_attach_device(struct iommu_domain *domain,
-struct device *dev)
+int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 {
int ret;
 
@@ -4669,8 +4664,7 @@ static int intel_iommu_attach_device(struct iommu_domain 
*domain,
return domain_add_dev_info(to_dmar_domain(domain), dev);
 }
 
-static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
-struct device *dev)
+int intel_iommu_aux_attach_device(struct iommu_domain *domain, struct device 
*dev)
 {
int ret;
 
@@ -4684,14 +4678,12 @@ static int intel_iommu_aux_attach_device(struct 
iommu_domain *domain,
return aux_domain_add_dev(to_dmar_domain(domain), dev);
 }
 
-static void intel_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
+void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 {
dmar_remove_one_dev_info(dev);
 }
 
-static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
- struct device *dev)
+void intel_iommu_aux_detach_device(struct iommu_domain *domain, struct device 
*dev)
 {
aux_domain_remove_dev(to_dmar_domain(domain), dev);
 }
@@ -4875,9 +4867,8 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, 
struct device *dev,
 }
 #endif
 
-static int intel_iommu_map(struct iommu_domain *domain,
-  unsigned long iova, phys_addr_t hpa,
-  size_t size, int iommu_prot, gfp_t gfp)
+int intel_iommu_map(struct iommu_domain *domain, unsigned long iova,
+   phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp)
 {
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;
@@ -4913,9 +4904,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
return ret;
 }
 
-static size_t intel_iommu_unmap(struct iommu_domain *domain,
-   unsigned long iova, size_t size,
-   struct iommu_iotlb_gather *gather)
+size_t intel_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+size_t size, struct iommu_iotlb_gather *gather)
 {
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long start_pfn, last_pfn;
@@ -4963,8 +4953,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain 
*domain,
dma_free_pagelist(gather->freelist);
 }
 
-static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
-   dma_addr_t iova)
+phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t 
iova)
 {
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct dma_pte *pte;
@@ -4980,42 +4969,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct 
iommu_domain *domain,
return phys;
 }
 
-static inline bool scalable_mode_support(void)
-{
-   struct dmar_drhd_unit *drhd;
-   struct intel_iommu *iommu;
-   bool ret = true;
-

[PATCH v2 1/2] iommu/vt-d: Fix sid not set issue in intel_svm_bind_gpasid()

2020-10-29 Thread Yi Sun
From: Liu Yi L 

Should get correct sid and set it into sdev. Because we execute
'sdev->sid != req->rid' in the loop of prq_event_thread().

Fixes: eb8d93ea3c1d ("iommu/vt-d: Report page request faults for guest SVA")
Signed-off-by: Liu Yi L 
Signed-off-by: Yi Sun 
---
 drivers/iommu/intel/svm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index f1861fa..7584669 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -279,6 +279,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, 
struct device *dev,
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev = NULL;
struct dmar_domain *dmar_domain;
+   struct device_domain_info *info;
struct intel_svm *svm = NULL;
int ret = 0;
 
@@ -310,6 +311,10 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, 
struct device *dev,
if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
return -EINVAL;
 
+   info = get_domain_info(dev);
+   if (!info)
+   return -EINVAL;
+
dmar_domain = to_dmar_domain(domain);
 
mutex_lock(_mutex);
@@ -357,6 +362,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, 
struct device *dev,
goto out;
}
sdev->dev = dev;
+   sdev->sid = PCI_DEVID(info->bus, info->devfn);
 
/* Only count users if device has aux domains */
if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/2] iommu/vt-d: Fix a bug for PDP check in prq_event_thread

2020-10-29 Thread Yi Sun
From: "Liu, Yi L" 

In prq_event_thread(), the QI_PGRP_PDP is wrongly set by
'req->pasid_present' which should be replaced to
'req->priv_data_present'.

Fixes: 5b438f4ba315 ("iommu/vt-d: Support page request in scalable mode")
Signed-off-by: Liu, Yi L 
Signed-off-by: Yi Sun 
---
 drivers/iommu/intel/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7584669..3242ebd 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -1035,7 +1035,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
resp.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
-   QI_PGRP_PDP(req->pasid_present) |
+   QI_PGRP_PDP(req->priv_data_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
resp.qw1 = QI_PGRP_IDX(req->prg_index) |
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/2] iommu: Fix a few issues related to PRQ

2020-10-29 Thread Yi Sun
We found a few issues about PRQ. So, two patches are cooked to
fix them. Please have a review. Thanks!

Changes from v1:

- Modify subject of patch 1 to make it more accurate.
- Move get_domain_info() up to the sanity check part in patch 1.
- Remove v1 patch 2 which is not suitable.
- Add description for current patch 2.
- Add 'Fixes:' tags for all patches.

Liu Yi L (1):
  iommu/vt-d: Fix sid not set issue in in intel_svm_bind_gpasid()

Liu, Yi L (1):
  iommu/vt-d: Fix a bug for PDP check in prq_event_thread

 drivers/iommu/intel/svm.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: check return value of of_find_device_by_node() in rk_iommu_of_xlate()

2020-10-29 Thread yukuai (C)



On 2020/10/29 21:51, Robin Murphy wrote:

On 2020-10-29 13:19, yukuai (C) wrote:


On 2020/10/29 18:08, Robin Murphy wrote:

On 2020-10-29 09:22, Yu Kuai wrote:

If of_find_device_by_node() failed in rk_iommu_of_xlate(), null pointer
dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.


How can that happen? (Given that ".suppress_bind_attrs = true")

Robin.


I'm not sure if that could happen...

My thought is that it's better to do such checking to aviod any possible
problem.


->of_xlate() is only invoked on the specific set of ops returned by 
iommu_ops_from_fwnode(). In turn, iommu_ops_from_fwnode() will only 
return those ops if the driver has successfully probed and called 
iommu_register_device() with the relevant DT node. For the driver to 
have been able to probe at all, a platform device associated with that 
DT node must have been created, and therefore of_find_device_by_node() 
cannot fail.


If there ever were some problem serious enough to break that fundamental 
assumption, then I *want* these drivers to crash right here, with a nice 
clear stack trace to start debugging from. So no, I firmly disagree that 
adding redundant code, which will never do anything except attempt to 
paper over catastrophic memory corruption, is "better". Sorry :)




Sounds reasonable, thanks for your explanation

Yu Kuai
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module

2020-10-29 Thread John Stultz
On Wed, Oct 28, 2020 at 7:51 AM Robin Murphy  wrote:
> Hmm, perhaps I'm missing something here, but even if the config options
> *do* line up, what prevents arm-smmu probing before qcom-scm and
> dereferencing NULL in qcom_scm_qsmmu500_wait_safe_toggle() before __scm
> is initialised?

Oh man, this spun me on a "wait, but how does it all work!" trip. :)

So in the non-module case, the qcom_scm driver is a subsys_initcall
and the arm-smmu is a module_platform_driver, so the ordering works
out.

In the module case, the arm-smmu code isn't loaded until the qcom_scm
driver finishes probing due to the symbol dependency handling.

To double check this, I added a big msleep at the top of the
qcom_scm_probe to try to open the race window you described, but the
arm_smmu_device_probe() doesn't run until after qcom_scm_probe
completes.

So at least as a built in / built in, or a module/module case its ok.
And in the case where arm-smmu is a module and qcom_scm is built in
that's ok too.

Its just the case my patch is trying to prevent is where arm-smmu is
built in, but qcom_scm is a module that it can't work (due to build
errors in missing symbols,  or if we tried to use function pointers to
plug in the qcom_scm - the lack of initialization ordering).

Hopefully that addresses your concern? Let me know if I'm still
missing something.

thanks
-john
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread David Laight
From: Arvind Sankar
> Sent: 29 October 2020 21:35
> 
> On Thu, Oct 29, 2020 at 09:41:13PM +0100, Thomas Gleixner wrote:
> > On Thu, Oct 29 2020 at 17:59, Paolo Bonzini wrote:
> > > On 29/10/20 17:56, Arvind Sankar wrote:
> > >>> For those two just add:
> > >>> struct apic *apic = x86_system_apic;
> > >>> before all the assignments.
> > >>> Less churn and much better code.
> > >>>
> > >> Why would it be better code?
> > >>
> > >
> > > I think he means the compiler produces better code, because it won't
> > > read the global variable repeatedly.  Not sure if that's true,(*) but I
> > > think I do prefer that version if Arnd wants to do that tweak.
> >
> > It's not true.
> >
> >  foo *p = bar;
> >
> >  p->a = 1;
> >  p->b = 2;
> >
> > The compiler is free to reload bar after accessing p->a and with
> >
> > bar->a = 1;
> > bar->b = 1;
> >
> > it can either cache bar in a register or reread it after bar->a
> >
> > The generated code is the same as long as there is no reason to reload,
> > e.g. register pressure.
> >
> > Thanks,
> >
> > tglx
> 
> It's not quite the same.
> 
> https://godbolt.org/z/4dzPbM
> 
> With -fno-strict-aliasing, the compiler reloads the pointer if you write
> to the start of what it points to, but not if you write to later
> elements.

I guess it assumes that global data doesn't overlap.

But in general they are sort of opposites:

With the local variable it can reload if it knows the write
cannot have affected the global - but is unlikely to do so.

Using the global it must reload if it is possible the write
might have affected the global.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 1/2] dma-mapping: add benchmark support for streaming DMA APIs

2020-10-29 Thread Song Bao Hua (Barry Song)



> -Original Message-
> From: Robin Murphy [mailto:robin.mur...@arm.com]
> Sent: Friday, October 30, 2020 8:38 AM
> To: Song Bao Hua (Barry Song) ;
> iommu@lists.linux-foundation.org; h...@lst.de; m.szyprow...@samsung.com
> Cc: j...@8bytes.org; w...@kernel.org; sh...@kernel.org; Linuxarm
> ; linux-kselft...@vger.kernel.org
> Subject: Re: [PATCH 1/2] dma-mapping: add benchmark support for streaming
> DMA APIs
> 
> On 2020-10-27 03:53, Barry Song wrote:
> > Nowadays, there are increasing requirements to benchmark the performance
> > of dma_map and dma_unmap particually while the device is attached to an
> > IOMMU.
> >
> > This patch enables the support. Users can run specified number of threads
> > to do dma_map_page and dma_unmap_page on a specific NUMA node with
> the
> > specified duration. Then dma_map_benchmark will calculate the average
> > latency for map and unmap.
> >
> > A difficulity for this benchmark is that dma_map/unmap APIs must run on
> > a particular device. Each device might have different backend of IOMMU or
> > non-IOMMU.
> >
> > So we use the driver_override to bind dma_map_benchmark to a particual
> > device by:
> > echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override
> > echo xxx > /sys/bus/platform/drivers/xxx/unbind
> > echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind
> >
> > For this moment, it supports platform device only, PCI device will also
> > be supported afterwards.
> 
> Neat! This is something I've thought about many times, but never got
> round to attempting :)

I am happy you have the same needs. When I came to IOMMU area a half year ago,
the first thing I've done was writing a rough benchmark. At that time, I hacked 
kernel
to get a device behind an IOMMU.

Recently, I got some time to think about how to get "device" without ugly 
hacking and
then clean up code for sending patches out to provide a common benchmark in 
order
that everybody can use.

> 
> I think the basic latency measurement for mapping and unmapping pages is
> enough to start with, but there are definitely some more things that
> would be interesting to look into for future enhancements:
> 
>   - a choice of mapping sizes, both smaller and larger than one page, to
> help characterise stuff like cache maintenance overhead and bounce
> buffer/IOVA fragmentation.
>   - alternative allocation patterns like doing lots of maps first, then
> all their corresponding unmaps (to provoke things like the worst-case
> IOVA rcache behaviour).
>   - ways to exercise a range of those parameters at once across
> different threads in a single test.
> 

Yes, sure. Once we have a basic framework, we can add more benchmark patterns
by using different parameters in the userspace tool:
testing/selftests/dma/dma_map_benchmark.c

Similar function extensions have been carried out in GUP_BENCHMARK.

> But let's get a basic framework nailed down first...

Sure.

> 
> > Cc: Joerg Roedel 
> > Cc: Will Deacon 
> > Cc: Shuah Khan 
> > Cc: Christoph Hellwig 
> > Cc: Marek Szyprowski 
> > Cc: Robin Murphy 
> > Signed-off-by: Barry Song 
> > ---
> >   kernel/dma/Kconfig |   8 ++
> >   kernel/dma/Makefile|   1 +
> >   kernel/dma/map_benchmark.c | 202
> +
> >   3 files changed, 211 insertions(+)
> >   create mode 100644 kernel/dma/map_benchmark.c
> >
> > diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> > index c99de4a21458..949c53da5991 100644
> > --- a/kernel/dma/Kconfig
> > +++ b/kernel/dma/Kconfig
> > @@ -225,3 +225,11 @@ config DMA_API_DEBUG_SG
> >   is technically out-of-spec.
> >
> >   If unsure, say N.
> > +
> > +config DMA_MAP_BENCHMARK
> > +   bool "Enable benchmarking of streaming DMA mapping"
> > +   help
> > + Provides /sys/kernel/debug/dma_map_benchmark that helps with
> testing
> > + performance of dma_(un)map_page.
> > +
> > + See tools/testing/selftests/dma/dma_map_benchmark.c
> > diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
> > index dc755ab68aab..7aa6b26b1348 100644
> > --- a/kernel/dma/Makefile
> > +++ b/kernel/dma/Makefile
> > @@ -10,3 +10,4 @@ obj-$(CONFIG_DMA_API_DEBUG)   += debug.o
> >   obj-$(CONFIG_SWIOTLB) += swiotlb.o
> >   obj-$(CONFIG_DMA_COHERENT_POOL)   += pool.o
> >   obj-$(CONFIG_DMA_REMAP)   += remap.o
> > +obj-$(CONFIG_DMA_MAP_BENCHMARK)+= map_benchmark.o
> > diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
> > new file mode 100644
> > index ..16a5d7779d67
> > --- /dev/null
> > +++ b/kernel/dma/map_benchmark.c
> > @@ -0,0 +1,202 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2020 Hisilicon Limited.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> 
> Nit: alphabetical order is always nice, when there's not an existing
> precedent of a 

Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Arvind Sankar
On Thu, Oct 29, 2020 at 09:41:13PM +0100, Thomas Gleixner wrote:
> On Thu, Oct 29 2020 at 17:59, Paolo Bonzini wrote:
> > On 29/10/20 17:56, Arvind Sankar wrote:
> >>> For those two just add:
> >>>   struct apic *apic = x86_system_apic;
> >>> before all the assignments.
> >>> Less churn and much better code.
> >>>
> >> Why would it be better code?
> >> 
> >
> > I think he means the compiler produces better code, because it won't
> > read the global variable repeatedly.  Not sure if that's true,(*) but I
> > think I do prefer that version if Arnd wants to do that tweak.
> 
> It's not true.
> 
>  foo *p = bar;
> 
>  p->a = 1;
>  p->b = 2;
> 
> The compiler is free to reload bar after accessing p->a and with
> 
> bar->a = 1;
> bar->b = 1;
> 
> it can either cache bar in a register or reread it after bar->a
> 
> The generated code is the same as long as there is no reason to reload,
> e.g. register pressure.
> 
> Thanks,
> 
> tglx

It's not quite the same.

https://godbolt.org/z/4dzPbM

With -fno-strict-aliasing, the compiler reloads the pointer if you write
to the start of what it points to, but not if you write to later
elements.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Thomas Gleixner
On Thu, Oct 29 2020 at 17:59, Paolo Bonzini wrote:
> On 29/10/20 17:56, Arvind Sankar wrote:
>>> For those two just add:
>>> struct apic *apic = x86_system_apic;
>>> before all the assignments.
>>> Less churn and much better code.
>>>
>> Why would it be better code?
>> 
>
> I think he means the compiler produces better code, because it won't
> read the global variable repeatedly.  Not sure if that's true,(*) but I
> think I do prefer that version if Arnd wants to do that tweak.

It's not true.

 foo *p = bar;

 p->a = 1;
 p->b = 2;

The compiler is free to reload bar after accessing p->a and with

bar->a = 1;
bar->b = 1;

it can either cache bar in a register or reread it after bar->a

The generated code is the same as long as there is no reason to reload,
e.g. register pressure.

Thanks,

tglx
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] dma-mapping: add benchmark support for streaming DMA APIs

2020-10-29 Thread Robin Murphy

On 2020-10-27 03:53, Barry Song wrote:

Nowadays, there are increasing requirements to benchmark the performance
of dma_map and dma_unmap particually while the device is attached to an
IOMMU.

This patch enables the support. Users can run specified number of threads
to do dma_map_page and dma_unmap_page on a specific NUMA node with the
specified duration. Then dma_map_benchmark will calculate the average
latency for map and unmap.

A difficulity for this benchmark is that dma_map/unmap APIs must run on
a particular device. Each device might have different backend of IOMMU or
non-IOMMU.

So we use the driver_override to bind dma_map_benchmark to a particual
device by:
echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override
echo xxx > /sys/bus/platform/drivers/xxx/unbind
echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind

For this moment, it supports platform device only, PCI device will also
be supported afterwards.


Neat! This is something I've thought about many times, but never got 
round to attempting :)


I think the basic latency measurement for mapping and unmapping pages is 
enough to start with, but there are definitely some more things that 
would be interesting to look into for future enhancements:


 - a choice of mapping sizes, both smaller and larger than one page, to 
help characterise stuff like cache maintenance overhead and bounce 
buffer/IOVA fragmentation.
 - alternative allocation patterns like doing lots of maps first, then 
all their corresponding unmaps (to provoke things like the worst-case 
IOVA rcache behaviour).
 - ways to exercise a range of those parameters at once across 
different threads in a single test.


But let's get a basic framework nailed down first...


Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Shuah Khan 
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Signed-off-by: Barry Song 
---
  kernel/dma/Kconfig |   8 ++
  kernel/dma/Makefile|   1 +
  kernel/dma/map_benchmark.c | 202 +
  3 files changed, 211 insertions(+)
  create mode 100644 kernel/dma/map_benchmark.c

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index c99de4a21458..949c53da5991 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -225,3 +225,11 @@ config DMA_API_DEBUG_SG
  is technically out-of-spec.
  
  	  If unsure, say N.

+
+config DMA_MAP_BENCHMARK
+   bool "Enable benchmarking of streaming DMA mapping"
+   help
+ Provides /sys/kernel/debug/dma_map_benchmark that helps with testing
+ performance of dma_(un)map_page.
+
+ See tools/testing/selftests/dma/dma_map_benchmark.c
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index dc755ab68aab..7aa6b26b1348 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_DMA_API_DEBUG)   += debug.o
  obj-$(CONFIG_SWIOTLB) += swiotlb.o
  obj-$(CONFIG_DMA_COHERENT_POOL)   += pool.o
  obj-$(CONFIG_DMA_REMAP)   += remap.o
+obj-$(CONFIG_DMA_MAP_BENCHMARK)+= map_benchmark.o
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
new file mode 100644
index ..16a5d7779d67
--- /dev/null
+++ b/kernel/dma/map_benchmark.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 


Nit: alphabetical order is always nice, when there's not an existing 
precedent of a complete mess...



+
+#define DMA_MAP_BENCHMARK  _IOWR('d', 1, struct map_benchmark)
+
+struct map_benchmark {
+   __u64 map_nsec;
+   __u64 unmap_nsec;
+   __u32 threads; /* how many threads will do map/unmap in parallel */
+   __u32 seconds; /* how long the test will last */
+   int node; /* which numa node this benchmark will run on */
+   __u64 expansion[10];/* For future use */
+};


I'm no expert on userspace ABIs (and what little experience I do have is 
mostly of Win32...), so hopefully someone else will comment if there's 
anything of concern here. One thing I wonder is that there's a fair 
likelihood of functionality evolving here over time, so might it be 
appropriate to have some sort of explicit versioning parameter for 
robustness?



+struct map_benchmark_data {
+   struct map_benchmark bparam;
+   struct device *dev;
+   struct dentry  *debugfs;
+   atomic64_t total_map_nsecs;
+   atomic64_t total_map_loops;
+   atomic64_t total_unmap_nsecs;
+   atomic64_t total_unmap_loops;
+};
+
+static int map_benchmark_thread(void *data)
+{
+   struct page *page;
+   dma_addr_t dma_addr;
+   struct map_benchmark_data *map = data;
+   int ret = 0;
+
+   page = alloc_page(GFP_KERNEL);
+   if (!page)
+   return -ENOMEM;
+
+   while (!kthread_should_stop())  {
+   

Re: [PATCH v5 0/3] iommu/arm-smmu-qcom: Support maintaining bootloader mappings

2020-10-29 Thread Will Deacon
On Mon, 19 Oct 2020 11:23:20 -0700, Bjorn Andersson wrote:
> This is the revised fourth attempt of inheriting the stream mapping for
> the framebuffer on many Qualcomm platforms, in order to not hit
> catastrophic faults during arm-smmu initialization.
> 
> The new approach does, based on Robin's suggestion, take a much more
> direct approach with the allocation of a context bank for bypass
> emulation and use of this context bank pretty much isolated to the
> Qualcomm specific implementation.
> 
> [...]

Applied to will (for-joerg/arm-smmu/updates), thanks!

[1/3] iommu/arm-smmu: Allow implementation specific write_s2cr
  https://git.kernel.org/will/c/56b75b51ed6d
[2/3] iommu/arm-smmu-qcom: Read back stream mappings
  https://git.kernel.org/will/c/07a7f2caaa5a
[3/3] iommu/arm-smmu-qcom: Implement S2CR quirk
  https://git.kernel.org/will/c/f9081b8ff593

Cheers,
-- 
Will

https://fixes.arm64.dev
https://next.arm64.dev
https://will.arm64.dev
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 7/7] mm: Remove examples from enum zone_type comment

2020-10-29 Thread Nicolas Saenz Julienne
We can't really list every setup in common code. On top of that they are
unlikely to stay true for long as things change in the arch trees
independently of this comment.

Suggested-by: Christoph Hellwig 
Signed-off-by: Nicolas Saenz Julienne 
Reviewed-by: Christoph Hellwig 
---
 include/linux/mmzone.h | 20 
 1 file changed, 20 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fb3bf696c05e..9d0c454d23cd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -354,26 +354,6 @@ enum zone_type {
 * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
 * platforms may need both zones as they support peripherals with
 * different DMA addressing limitations.
-*
-* Some examples:
-*
-*  - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
-*rest of the lower 4G.
-*
-*  - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
-*the specific device.
-*
-*  - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
-*lower 4G.
-*
-*  - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
-*depending on the specific device.
-*
-*  - s390 uses ZONE_DMA fixed to the lower 2G.
-*
-*  - ia64 and riscv only use ZONE_DMA32.
-*
-*  - parisc uses neither.
 */
 #ifdef CONFIG_ZONE_DMA
ZONE_DMA,
-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 4/7] of: unittest: Add test for of_dma_get_max_cpu_address()

2020-10-29 Thread Nicolas Saenz Julienne
Introduce a test for of_dma_get_max_cup_address(), it uses the same DT
data as the rest of dma-ranges unit tests.

Signed-off-by: Nicolas Saenz Julienne 
Reviewed-by: Rob Herring 

---
Changes since v3:
 - Remove HAS_DMA guards

 drivers/of/unittest.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 06cc988faf78..b9a4d047a95e 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -869,6 +869,23 @@ static void __init of_unittest_changeset(void)
 #endif
 }
 
+static void __init of_unittest_dma_get_max_cpu_address(void)
+{
+   struct device_node *np;
+   phys_addr_t cpu_addr;
+
+   np = of_find_node_by_path("/testcase-data/address-tests");
+   if (!np) {
+   pr_err("missing testcase data\n");
+   return;
+   }
+
+   cpu_addr = of_dma_get_max_cpu_address(np);
+   unittest(cpu_addr == 0x5000,
+"of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting 
%x)\n",
+_addr, 0x5000);
+}
+
 static void __init of_unittest_dma_ranges_one(const char *path,
u64 expect_dma_addr, u64 expect_paddr)
 {
@@ -3266,6 +3283,7 @@ static int __init of_unittest(void)
of_unittest_changeset();
of_unittest_parse_interrupts();
of_unittest_parse_interrupts_extended();
+   of_unittest_dma_get_max_cpu_address();
of_unittest_parse_dma_ranges();
of_unittest_pci_dma_ranges();
of_unittest_match_node();
-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 0/7] arm64: Default to 32-bit wide ZONE_DMA

2020-10-29 Thread Nicolas Saenz Julienne
Using two distinct DMA zones turned out to be problematic. Here's an
attempt go back to a saner default.

I tested this on both a RPi4 and QEMU.

---

Changes since v4:
 - Fix of_dma_get_max_cpu_address() so it returns the last addressable
   addres, not the limit

Changes since v3:
 - Drop patch adding define in dma-mapping
 - Address small review changes
 - Update Ard's patch
 - Add new patch removing examples from mmzone.h

Changes since v2:
 - Introduce Ard's patch
 - Improve OF dma-ranges parsing function
 - Add unit test for OF function
 - Address small changes
 - Move crashkernel reservation later in boot process

Changes since v1:
 - Parse dma-ranges instead of using machine compatible string

Ard Biesheuvel (1):
  arm64: mm: Set ZONE_DMA size based on early IORT scan

Nicolas Saenz Julienne (6):
  arm64: mm: Move reserve_crashkernel() into mem_init()
  arm64: mm: Move zone_dma_bits initialization into zone_sizes_init()
  of/address: Introduce of_dma_get_max_cpu_address()
  of: unittest: Add test for of_dma_get_max_cpu_address()
  arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges
  mm: Remove examples from enum zone_type comment

 arch/arm64/mm/init.c  | 16 ++--
 drivers/acpi/arm64/iort.c | 52 +++
 drivers/of/address.c  | 42 +++
 drivers/of/unittest.c | 18 ++
 include/linux/acpi_iort.h |  4 +++
 include/linux/mmzone.h| 20 ---
 include/linux/of.h|  7 ++
 7 files changed, 130 insertions(+), 29 deletions(-)

-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 6/7] arm64: mm: Set ZONE_DMA size based on early IORT scan

2020-10-29 Thread Nicolas Saenz Julienne
From: Ard Biesheuvel 

We recently introduced a 1 GB sized ZONE_DMA to cater for platforms
incorporating masters that can address less than 32 bits of DMA, in
particular the Raspberry Pi 4, which has 4 or 8 GB of DRAM, but has
peripherals that can only address up to 1 GB (and its PCIe host
bridge can only access the bottom 3 GB)

Instructing the DMA layer about these limitations is straight-forward,
even though we had to fix some issues regarding memory limits set in
the IORT for named components, and regarding the handling of ACPI _DMA
methods. However, the DMA layer also needs to be able to allocate
memory that is guaranteed to meet those DMA constraints, for bounce
buffering as well as allocating the backing for consistent mappings.

This is why the 1 GB ZONE_DMA was introduced recently. Unfortunately,
it turns out the having a 1 GB ZONE_DMA as well as a ZONE_DMA32 causes
problems with kdump, and potentially in other places where allocations
cannot cross zone boundaries. Therefore, we should avoid having two
separate DMA zones when possible.

So let's do an early scan of the IORT, and only create the ZONE_DMA
if we encounter any devices that need it. This puts the burden on
the firmware to describe such limitations in the IORT, which may be
redundant (and less precise) if _DMA methods are also being provided.
However, it should be noted that this situation is highly unusual for
arm64 ACPI machines. Also, the DMA subsystem still gives precedence to
the _DMA method if implemented, and so we will not lose the ability to
perform streaming DMA outside the ZONE_DMA if the _DMA method permits
it.

Cc: Jeremy Linton 
Cc: Lorenzo Pieralisi 
Cc: Nicolas Saenz Julienne 
Cc: Rob Herring 
Cc: Christoph Hellwig 
Cc: Robin Murphy 
Cc: Hanjun Guo 
Cc: Sudeep Holla 
Cc: Anshuman Khandual 
Signed-off-by: Ard Biesheuvel 
[nsaenz: Rebased, removed documentation change and add declaration in 
acpi_iort.h]
Signed-off-by: Nicolas Saenz Julienne 
Tested-by: Jeremy Linton 
Acked-by: Lorenzo Pieralisi 
Acked-by: Hanjun Guo 

---

Changes since v3:
 - Use min_not_zero()
 - Check revision
 - Remove unnecessary #ifdef in zone_sizes_init()
 
 arch/arm64/mm/init.c  |  3 ++-
 drivers/acpi/arm64/iort.c | 52 +++
 include/linux/acpi_iort.h |  4 +++
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a2ce8a9a71a6..b9dc3831dd6b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -190,7 +191,7 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
 
 #ifdef CONFIG_ZONE_DMA
dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
-   zone_dma_bits = min(32U, dt_zone_dma_bits);
+   zone_dma_bits = min3(32U, dt_zone_dma_bits, 
acpi_iort_get_zone_dma_size());
arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 9929ff50c0c0..05fe4a076bab 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1718,3 +1718,55 @@ void __init acpi_iort_init(void)
 
iort_init_platform_devices();
 }
+
+#ifdef CONFIG_ZONE_DMA
+/*
+ * Check the IORT whether any devices exist whose DMA mask is < 32 bits.
+ * If so, return the smallest value encountered, or 32 otherwise.
+ */
+unsigned int __init acpi_iort_get_zone_dma_size(void)
+{
+   struct acpi_table_iort *iort;
+   struct acpi_iort_node *node, *end;
+   acpi_status status;
+   u8 limit = 32;
+   int i;
+
+   if (acpi_disabled)
+   return limit;
+
+   status = acpi_get_table(ACPI_SIG_IORT, 0,
+   (struct acpi_table_header **));
+   if (ACPI_FAILURE(status))
+   return limit;
+
+   node = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->node_offset);
+   end = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->header.length);
+
+   for (i = 0; i < iort->node_count; i++) {
+   if (node >= end)
+   break;
+
+   switch (node->type) {
+   struct acpi_iort_named_component *ncomp;
+   struct acpi_iort_root_complex *rc;
+
+   case ACPI_IORT_NODE_NAMED_COMPONENT:
+   ncomp = (struct acpi_iort_named_component 
*)node->node_data;
+   limit = min_not_zero(limit, 
ncomp->memory_address_limit);
+   break;
+
+   case ACPI_IORT_NODE_PCI_ROOT_COMPLEX:
+   if (node->revision < 1)
+   break;
+
+   rc = (struct acpi_iort_root_complex *)node->node_data;
+   limit = min_not_zero(limit, rc->memory_address_limit);
+   break;
+   }
+   node 

[PATCH v5 5/7] arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges

2020-10-29 Thread Nicolas Saenz Julienne
We recently introduced a 1 GB sized ZONE_DMA to cater for platforms
incorporating masters that can address less than 32 bits of DMA, in
particular the Raspberry Pi 4, which has 4 or 8 GB of DRAM, but has
peripherals that can only address up to 1 GB (and its PCIe host
bridge can only access the bottom 3 GB)

The DMA layer also needs to be able to allocate memory that is
guaranteed to meet those DMA constraints, for bounce buffering as well
as allocating the backing for consistent mappings. This is why the 1 GB
ZONE_DMA was introduced recently. Unfortunately, it turns out the having
a 1 GB ZONE_DMA as well as a ZONE_DMA32 causes problems with kdump, and
potentially in other places where allocations cannot cross zone
boundaries. Therefore, we should avoid having two separate DMA zones
when possible.

So, with the help of of_dma_get_max_cpu_address() get the topmost
physical address accessible to all DMA masters in system and use that
information to fine-tune ZONE_DMA's size. In the absence of addressing
limited masters ZONE_DMA will span the whole 32-bit address space,
otherwise, in the case of the Raspberry Pi 4 it'll only span the 30-bit
address space, and have ZONE_DMA32 cover the rest of the 32-bit address
space.

Signed-off-by: Nicolas Saenz Julienne 

---

Changes since v4:
 - Use fls64 as we're now using the max address (as opposed to the
   limit)

Changes since v3:
 - Simplify code for readability.

Changes since v2:
 - Updated commit log by shamelessly copying Ard's ACPI commit log

 arch/arm64/mm/init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 410721fc4fc0..a2ce8a9a71a6 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -42,8 +42,6 @@
 #include 
 #include 
 
-#define ARM64_ZONE_DMA_BITS30
-
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -188,9 +186,11 @@ static phys_addr_t __init max_zone_phys(unsigned int 
zone_bits)
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
+   unsigned int __maybe_unused dt_zone_dma_bits;
 
 #ifdef CONFIG_ZONE_DMA
-   zone_dma_bits = ARM64_ZONE_DMA_BITS;
+   dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
+   zone_dma_bits = min(32U, dt_zone_dma_bits);
arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v18 0/4] iommu/arm-smmu: Add adreno-smmu implementation and bindings

2020-10-29 Thread Will Deacon
On Tue, Oct 27, 2020 at 04:34:04PM -0600, Jordan Crouse wrote:
> This short series adds support for the adreno-smmu implementation of the
> arm-smmu driver and the device-tree bindings to turn on the implementation
> for the sm845 and sc7180 GPUs. These changes are the last ones needed to 
> enable
> per-instance pagetables in the drm/msm driver.
> 
> No deltas in this patchset since the last go-around for 5.10 [1].
> 
> [1] https://patchwork.freedesktop.org/series/81393/
> 
> Jordan Crouse (3):
>   iommu/arm-smmu-qcom: Add implementation for the adreno GPU SMMU
>   dt-bindings: arm-smmu: Add compatible string for Adreno GPU SMMU
>   arm: dts: qcom: sm845: Set the compatible string for the GPU SMMU
> 
> Rob Clark (1):
>   iommu/arm-smmu: Add a way for implementations to influence SCTLR

FYI: this patch (patch 4/4) doesn't seem to have made it anywhere (I don't
have it, and neither does the archive).

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 2/7] arm64: mm: Move zone_dma_bits initialization into zone_sizes_init()

2020-10-29 Thread Nicolas Saenz Julienne
zone_dma_bits's initialization happens earlier that it's actually
needed, in arm64_memblock_init(). So move it into the more suitable
zone_sizes_init().

Signed-off-by: Nicolas Saenz Julienne 
Tested-by: Jeremy Linton 
---
 arch/arm64/mm/init.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index fc4ab0d6d5d2..410721fc4fc0 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -190,6 +190,8 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 
 #ifdef CONFIG_ZONE_DMA
+   zone_dma_bits = ARM64_ZONE_DMA_BITS;
+   arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
 #ifdef CONFIG_ZONE_DMA32
@@ -376,11 +378,6 @@ void __init arm64_memblock_init(void)
 
early_init_fdt_scan_reserved_mem();
 
-   if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-   zone_dma_bits = ARM64_ZONE_DMA_BITS;
-   arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
-   }
-
if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
else
-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 1/7] arm64: mm: Move reserve_crashkernel() into mem_init()

2020-10-29 Thread Nicolas Saenz Julienne
crashkernel might reserve memory located in ZONE_DMA. We plan to delay
ZONE_DMA's initialization after unflattening the devicetree and ACPI's
boot table initialization, so move it later in the boot process.
Specifically into mem_init(), this is the last place crashkernel will be
able to reserve the memory before the page allocator kicks in. There
isn't any apparent reason for doing this earlier.

Signed-off-by: Nicolas Saenz Julienne 
Tested-by: Jeremy Linton 
---
 arch/arm64/mm/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 095540667f0f..fc4ab0d6d5d2 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -386,8 +386,6 @@ void __init arm64_memblock_init(void)
else
arm64_dma32_phys_limit = PHYS_MASK + 1;
 
-   reserve_crashkernel();
-
reserve_elfcorehdr();
 
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -508,6 +506,8 @@ void __init mem_init(void)
else
swiotlb_force = SWIOTLB_NO_FORCE;
 
+   reserve_crashkernel();
+
set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 3/7] of/address: Introduce of_dma_get_max_cpu_address()

2020-10-29 Thread Nicolas Saenz Julienne
Introduce of_dma_get_max_cpu_address(), which provides the highest CPU
physical address addressable by all DMA masters in the system. It's
specially useful for setting memory zones sizes at early boot time.

Signed-off-by: Nicolas Saenz Julienne 

---

Changes since v4:
 - Return max address, not address limit (one-off difference)

Changes since v3:
 - use u64 with cpu_end

Changes since v2:
 - Use PHYS_ADDR_MAX
 - return phys_dma_t
 - Rename function
 - Correct subject
 - Add support to start parsing from an arbitrary device node in order
   for the function to work with unit tests

 drivers/of/address.c | 42 ++
 include/linux/of.h   |  7 +++
 2 files changed, 49 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index eb9ab4f1e80b..09c0af7fd1c4 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -1024,6 +1024,48 @@ int of_dma_get_range(struct device_node *np, const 
struct bus_dma_region **map)
 }
 #endif /* CONFIG_HAS_DMA */
 
+/**
+ * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA
+ * @np: The node to start searching from or NULL to start from the root
+ *
+ * Gets the highest CPU physical address that is addressable by all DMA masters
+ * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no
+ * DMA constrained device is found, it returns PHYS_ADDR_MAX.
+ */
+phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
+{
+   phys_addr_t max_cpu_addr = PHYS_ADDR_MAX;
+   struct of_range_parser parser;
+   phys_addr_t subtree_max_addr;
+   struct device_node *child;
+   struct of_range range;
+   const __be32 *ranges;
+   u64 cpu_end = 0;
+   int len;
+
+   if (!np)
+   np = of_root;
+
+   ranges = of_get_property(np, "dma-ranges", );
+   if (ranges && len) {
+   of_dma_range_parser_init(, np);
+   for_each_of_range(, )
+   if (range.cpu_addr + range.size > cpu_end)
+   cpu_end = range.cpu_addr + range.size - 1;
+
+   if (max_cpu_addr > cpu_end)
+   max_cpu_addr = cpu_end;
+   }
+
+   for_each_available_child_of_node(np, child) {
+   subtree_max_addr = of_dma_get_max_cpu_address(child);
+   if (max_cpu_addr > subtree_max_addr)
+   max_cpu_addr = subtree_max_addr;
+   }
+
+   return max_cpu_addr;
+}
+
 /**
  * of_dma_is_coherent - Check if device is coherent
  * @np:device node
diff --git a/include/linux/of.h b/include/linux/of.h
index 5d51891cbf1a..9ed5b8532c30 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id,
   const char *map_name, const char *map_mask_name,
   struct device_node **target, u32 *id_out);
 
+phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+
 #else /* CONFIG_OF */
 
 static inline void of_core_init(void)
@@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id,
return -EINVAL;
 }
 
+static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+{
+   return PHYS_ADDR_MAX;
+}
+
 #define of_match_ptr(_ptr) NULL
 #define of_match_node(_matches, _node) NULL
 #endif /* CONFIG_OF */
-- 
2.29.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Arvind Sankar
On Thu, Oct 29, 2020 at 05:59:54PM +0100, Paolo Bonzini wrote:
> On 29/10/20 17:56, Arvind Sankar wrote:
> >> For those two just add:
> >>struct apic *apic = x86_system_apic;
> >> before all the assignments.
> >> Less churn and much better code.
> >>
> > Why would it be better code?
> > 
> 
> I think he means the compiler produces better code, because it won't
> read the global variable repeatedly.  Not sure if that's true,(*) but I
> think I do prefer that version if Arnd wants to do that tweak.
> 
> Paolo
> 
> (*) if it is, it might also be due to Linux using -fno-strict-aliasing
> 

Nope, it doesn't read it multiple times. I guess it still assumes that
apic isn't in the middle of what it points to: it would reload the
address if the first element of *apic was modified, but doesn't for
other elements. Interesting.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Paolo Bonzini
On 29/10/20 17:56, Arvind Sankar wrote:
>> For those two just add:
>>  struct apic *apic = x86_system_apic;
>> before all the assignments.
>> Less churn and much better code.
>>
> Why would it be better code?
> 

I think he means the compiler produces better code, because it won't
read the global variable repeatedly.  Not sure if that's true,(*) but I
think I do prefer that version if Arnd wants to do that tweak.

Paolo

(*) if it is, it might also be due to Linux using -fno-strict-aliasing

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Arvind Sankar
On Thu, Oct 29, 2020 at 03:05:31PM +, David Laight wrote:
> From: Arnd Bergmann
> > Sent: 28 October 2020 21:21
> > 
> > From: Arnd Bergmann 
> > 
> > There are hundreds of warnings in a W=2 build about a local
> > variable shadowing the global 'apic' definition:
> > 
> > arch/x86/kvm/lapic.h:149:65: warning: declaration of 'apic' shadows a 
> > global declaration [-Wshadow]
> > 
> > Avoid this by renaming the global 'apic' variable to the more descriptive
> > 'x86_system_apic'. It was originally called 'genapic', but both that
> > and the current 'apic' seem to be a little overly generic for a global
> > variable.
> > 
> > Fixes: c48f14966cc4 ("KVM: inline kvm_apic_present() and 
> > kvm_lapic_enabled()")
> > Fixes: c8d46cf06dc2 ("x86: rename 'genapic' to 'apic'")
> > Signed-off-by: Arnd Bergmann 
> > ---
> > v2: rename the global instead of the local variable in the header
> ...
> > diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> > index 284e73661a18..33e2dc78ca11 100644
> > --- a/arch/x86/hyperv/hv_apic.c
> > +++ b/arch/x86/hyperv/hv_apic.c
> > @@ -259,14 +259,14 @@ void __init hv_apic_init(void)
> > /*
> >  * Set the IPI entry points.
> >  */
> > -   orig_apic = *apic;
> > -
> > -   apic->send_IPI = hv_send_ipi;
> > -   apic->send_IPI_mask = hv_send_ipi_mask;
> > -   apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself;
> > -   apic->send_IPI_allbutself = hv_send_ipi_allbutself;
> > -   apic->send_IPI_all = hv_send_ipi_all;
> > -   apic->send_IPI_self = hv_send_ipi_self;
> > +   orig_apic = *x86_system_apic;
> > +
> > +   x86_system_apic->send_IPI = hv_send_ipi;
> > +   x86_system_apic->send_IPI_mask = hv_send_ipi_mask;
> > +   x86_system_apic->send_IPI_mask_allbutself = 
> > hv_send_ipi_mask_allbutself;
> > +   x86_system_apic->send_IPI_allbutself = hv_send_ipi_allbutself;
> > +   x86_system_apic->send_IPI_all = hv_send_ipi_all;
> > +   x86_system_apic->send_IPI_self = hv_send_ipi_self;
> > }
> > 
> > if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
> > @@ -285,10 +285,10 @@ void __init hv_apic_init(void)
> >  */
> > apic_set_eoi_write(hv_apic_eoi_write);
> > if (!x2apic_enabled()) {
> > -   apic->read  = hv_apic_read;
> > -   apic->write = hv_apic_write;
> > -   apic->icr_write = hv_apic_icr_write;
> > -   apic->icr_read  = hv_apic_icr_read;
> > +   x86_system_apic->read  = hv_apic_read;
> > +   x86_system_apic->write = hv_apic_write;
> > +   x86_system_apic->icr_write = hv_apic_icr_write;
> > +   x86_system_apic->icr_read  = hv_apic_icr_read;
> > }
> 
> For those two just add:
>   struct apic *apic = x86_system_apic;
> before all the assignments.
> Less churn and much better code.
> 

Why would it be better code?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Thomas Gleixner
Arnd,

On Thu, Oct 29 2020 at 10:51, Arnd Bergmann wrote:
> On Thu, Oct 29, 2020 at 8:04 AM Paolo Bonzini  wrote:
>> On 28/10/20 22:20, Arnd Bergmann wrote:
>> > Avoid this by renaming the global 'apic' variable to the more descriptive
>> > 'x86_system_apic'. It was originally called 'genapic', but both that
>> > and the current 'apic' seem to be a little overly generic for a global
>> > variable.
>>
>> The 'apic' affects only the current CPU, so one of 'x86_local_apic',
>> 'x86_lapic' or 'x86_apic' is probably preferrable.
>
> Ok, I'll change it to x86_local_apic then, unless someone else has
> a preference between them.

x86_local_apic is fine with me.

> I think ideally there would be no global variable, withall accesses
> encapsulated in function calls, possibly using static_call() optimizations
> if any of them are performance critical.

There are a bunch, yes.

> It doesn't seem hard to do, but I'd rather leave that change to
> an x86 person ;-)

It's not hard, but I'm not really sure whether it buys much.

Can you please redo that against tip x86/apic. Much of what you are
touching got a major overhaul.

Thanks,

tglx
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread David Laight
From: Arnd Bergmann
> Sent: 29 October 2020 09:51
...
> I think ideally there would be no global variable, withall accesses
> encapsulated in function calls, possibly using static_call() optimizations
> if any of them are performance critical.

There isn't really a massive difference between global variables
and global access functions.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH] iommu/arm-smmu-v3: Add def_domain_type callback

2020-10-29 Thread Shameerali Kolothum Thodi


> -Original Message-
> From: Robin Murphy [mailto:robin.mur...@arm.com]
> Sent: 29 October 2020 15:54
> To: Shameerali Kolothum Thodi ;
> linux-arm-ker...@lists.infradead.org; iommu@lists.linux-foundation.org
> Cc: Linuxarm ; w...@kernel.org; j...@8bytes.org;
> jean-phili...@linaro.org; ashok@intel.com; John Garry
> ; Song Bao Hua (Barry Song)
> ; Jonathan Cameron
> 
> Subject: Re: [PATCH] iommu/arm-smmu-v3: Add def_domain_type callback
> 
> On 2020-10-29 15:41, Shameer Kolothum wrote:
> > With the introduction of def_domain_type in iommu_ops, vendor
> > drivers can now inform the iommu generic layer about any specific
> > default domain requirement for a device. Any pci dev marked as
> > untrusted is now prevented from having an IDENTITY mapping
> > domain.
> >
> > The callback is also required when the support for dynamically
> > changing the default domain of a group is available.
> 
> Yes, we want to allow the group type control to work for all drivers,
> ideally...
> 
> > Signed-off-by: Shameer Kolothum 
> > ---
> >   -Only devices downstream from externally exposed PCIe hierarchies
> >    (such as Thunderbolt outside the platform) are currently marked
> >as "untrusted". Not aware of any ARM64 platforms that may use
> >this type of device.
> >
> >    Nevertheless, the main motivation for this patch is to have the
> >flexibility of changing the iommu default domain for a group based
> >on the series[1] "iommu: Add support to change default domain of an
> >iommu group" and that mandates vendor iommu driver to provide this
> >callback.
> >
> >   -This is tested along with [1] and was able to change the default
> >    domain of an iommu group on an HiSilicon D06 hardware.
> >
> > 1.
> https://lore.kernel.org/linux-iommu/20200925190620.18732-1-ashok.raj@intel
> .com/
> > ---
> >   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26
> +
> >   1 file changed, 26 insertions(+)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index e634bbe60573..d5dbcee995db 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -2567,6 +2567,31 @@ static int arm_smmu_dev_disable_feature(struct
> device *dev,
> > }
> >   }
> >
> > +/*
> > + * Return the required default domain type for a specific device.
> > + *
> > + * @dev: the device in query
> > + *
> > + * Returns:
> > + *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
> > + *  - 0: both identity and dynamic domains work for this device
> > + */
> > +static int arm_smmu_def_domain_type(struct device *dev)
> > +{
> > +   if (dev_is_pci(dev)) {
> > +   struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +   /*
> > +* Prevent any device marked as untrusted from getting
> > +* placed into the Identity mapping domain.
> > +*/
> > +   if (pdev->untrusted)
> > +   return IOMMU_DOMAIN_DMA;
> > +   }
> 
> This should be somewhere in core code - it has nothing to do with SMMUv3.

Agree.

> > +
> > +   return 0;
> 
> I don't strictly object to adding a stub callback for that bit, but why
> can't iommu_change_dev_def_domain() simply assume it from a NULL
> callback? That works for everyone, for no extra cost ;)

Right.

Hi Ashok,
Do you have any plan to respin your series and is it possible to include
the above suggestions into that? Please let me know.

Thanks,
Shameer
 
> Robin.
> 
> > +}
> > +
> >   static struct iommu_ops arm_smmu_ops = {
> > .capable= arm_smmu_capable,
> > .domain_alloc   = arm_smmu_domain_alloc,
> > @@ -2589,6 +2614,7 @@ static struct iommu_ops arm_smmu_ops = {
> > .dev_feat_enabled   = arm_smmu_dev_feature_enabled,
> > .dev_enable_feat= arm_smmu_dev_enable_feature,
> > .dev_disable_feat   = arm_smmu_dev_disable_feature,
> > +   .def_domain_type= arm_smmu_def_domain_type,
> > .pgsize_bitmap  = -1UL, /* Restricted during device attach */
> >   };
> >
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu: Clarify .of_xlate assumptions

2020-10-29 Thread Maxime Ripard
On Thu, Oct 29, 2020 at 03:34:48PM +, Robin Murphy wrote:
> A common idiom for .of_xlate is to use of_find_device_by_node() to
> retrieve the relevant IOMMU instance data when translating IOMMU
> specifiers for a client device. Although it's slightly roundabout,
> this is simply looking up something we know exists - if it *were*
> to return NULL, that would mean that no platform device is associated
> with the given DT node, therefore the driver couldn't have probed
> and called iommu_device_register() with the ops that .of_xlate was
> called from in the first place. By construction, we can also assume
> that the instance data for any registered IOMMU must be valid.
> 
> This isn't necessarily obvious at first glance, though, so add some
> comments to document these assumptions in-place.
> 
> Suggested-by: Yu Kuai 
> Signed-off-by: Robin Murphy 

Acked-by: Maxime Ripard 

Thanks!
Maxime


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/arm-smmu-v3: Add def_domain_type callback

2020-10-29 Thread Robin Murphy

On 2020-10-29 15:41, Shameer Kolothum wrote:

With the introduction of def_domain_type in iommu_ops, vendor
drivers can now inform the iommu generic layer about any specific
default domain requirement for a device. Any pci dev marked as
untrusted is now prevented from having an IDENTITY mapping
domain.

The callback is also required when the support for dynamically
changing the default domain of a group is available.


Yes, we want to allow the group type control to work for all drivers, 
ideally...



Signed-off-by: Shameer Kolothum 
---
  -Only devices downstream from externally exposed PCIe hierarchies
   (such as Thunderbolt outside the platform) are currently marked
   as "untrusted". Not aware of any ARM64 platforms that may use
   this type of device.

   Nevertheless, the main motivation for this patch is to have the
   flexibility of changing the iommu default domain for a group based
   on the series[1] "iommu: Add support to change default domain of an
   iommu group" and that mandates vendor iommu driver to provide this
   callback.

  -This is tested along with [1] and was able to change the default
   domain of an iommu group on an HiSilicon D06 hardware.
  
1. https://lore.kernel.org/linux-iommu/20200925190620.18732-1-ashok@intel.com/

---
  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 +
  1 file changed, 26 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e634bbe60573..d5dbcee995db 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2567,6 +2567,31 @@ static int arm_smmu_dev_disable_feature(struct device 
*dev,
}
  }
  
+/*

+ * Return the required default domain type for a specific device.
+ *
+ * @dev: the device in query
+ *
+ * Returns:
+ *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
+ *  - 0: both identity and dynamic domains work for this device
+ */
+static int arm_smmu_def_domain_type(struct device *dev)
+{
+   if (dev_is_pci(dev)) {
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   /*
+* Prevent any device marked as untrusted from getting
+* placed into the Identity mapping domain.
+*/
+   if (pdev->untrusted)
+   return IOMMU_DOMAIN_DMA;
+   }


This should be somewhere in core code - it has nothing to do with SMMUv3.


+
+   return 0;


I don't strictly object to adding a stub callback for that bit, but why 
can't iommu_change_dev_def_domain() simply assume it from a NULL 
callback? That works for everyone, for no extra cost ;)


Robin.


+}
+
  static struct iommu_ops arm_smmu_ops = {
.capable= arm_smmu_capable,
.domain_alloc   = arm_smmu_domain_alloc,
@@ -2589,6 +2614,7 @@ static struct iommu_ops arm_smmu_ops = {
.dev_feat_enabled   = arm_smmu_dev_feature_enabled,
.dev_enable_feat= arm_smmu_dev_enable_feature,
.dev_disable_feat   = arm_smmu_dev_disable_feature,
+   .def_domain_type= arm_smmu_def_domain_type,
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
  };
  


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] iommu/arm-smmu-v3: Add def_domain_type callback

2020-10-29 Thread Shameer Kolothum
With the introduction of def_domain_type in iommu_ops, vendor
drivers can now inform the iommu generic layer about any specific
default domain requirement for a device. Any pci dev marked as
untrusted is now prevented from having an IDENTITY mapping
domain. 

The callback is also required when the support for dynamically
changing the default domain of a group is available.

Signed-off-by: Shameer Kolothum 
---
 -Only devices downstream from externally exposed PCIe hierarchies
  (such as Thunderbolt outside the platform) are currently marked
  as "untrusted". Not aware of any ARM64 platforms that may use
  this type of device.  

  Nevertheless, the main motivation for this patch is to have the
  flexibility of changing the iommu default domain for a group based
  on the series[1] "iommu: Add support to change default domain of an
  iommu group" and that mandates vendor iommu driver to provide this
  callback.

 -This is tested along with [1] and was able to change the default
  domain of an iommu group on an HiSilicon D06 hardware.
 
1. 
https://lore.kernel.org/linux-iommu/20200925190620.18732-1-ashok@intel.com/
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 +
 1 file changed, 26 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e634bbe60573..d5dbcee995db 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2567,6 +2567,31 @@ static int arm_smmu_dev_disable_feature(struct device 
*dev,
}
 }
 
+/*
+ * Return the required default domain type for a specific device.
+ *
+ * @dev: the device in query
+ *
+ * Returns:
+ *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
+ *  - 0: both identity and dynamic domains work for this device
+ */
+static int arm_smmu_def_domain_type(struct device *dev)
+{
+   if (dev_is_pci(dev)) {
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   /*
+* Prevent any device marked as untrusted from getting
+* placed into the Identity mapping domain.
+*/
+   if (pdev->untrusted)
+   return IOMMU_DOMAIN_DMA;
+   }
+
+   return 0;
+}
+
 static struct iommu_ops arm_smmu_ops = {
.capable= arm_smmu_capable,
.domain_alloc   = arm_smmu_domain_alloc,
@@ -2589,6 +2614,7 @@ static struct iommu_ops arm_smmu_ops = {
.dev_feat_enabled   = arm_smmu_dev_feature_enabled,
.dev_enable_feat= arm_smmu_dev_enable_feature,
.dev_disable_feat   = arm_smmu_dev_disable_feature,
+   .def_domain_type= arm_smmu_def_domain_type,
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
 };
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] iommu: Clarify .of_xlate assumptions

2020-10-29 Thread Robin Murphy
A common idiom for .of_xlate is to use of_find_device_by_node() to
retrieve the relevant IOMMU instance data when translating IOMMU
specifiers for a client device. Although it's slightly roundabout,
this is simply looking up something we know exists - if it *were*
to return NULL, that would mean that no platform device is associated
with the given DT node, therefore the driver couldn't have probed
and called iommu_device_register() with the ops that .of_xlate was
called from in the first place. By construction, we can also assume
that the instance data for any registered IOMMU must be valid.

This isn't necessarily obvious at first glance, though, so add some
comments to document these assumptions in-place.

Suggested-by: Yu Kuai 
Signed-off-by: Robin Murphy 
---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |  7 ---
 drivers/iommu/exynos-iommu.c| 15 ++-
 drivers/iommu/ipmmu-vmsa.c  | 13 ++---
 drivers/iommu/mtk_iommu.c   |  8 
 drivers/iommu/rockchip-iommu.c  |  5 -
 drivers/iommu/sun50i-iommu.c|  5 -
 6 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c 
b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index b30d6c966e2c..1dec28801eac 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -573,10 +573,11 @@ static int qcom_iommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return -EINVAL;
}
 
+   /*
+* We're simply retrieving the same platform device that called
+* iommu_device_register() when it probed, so it must be valid.
+*/
iommu_pdev = of_find_device_by_node(args->np);
-   if (WARN_ON(!iommu_pdev))
-   return -EINVAL;
-
qcom_iommu = platform_get_drvdata(iommu_pdev);
 
/* make sure the asid specified in dt is valid, so we don't have
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index de324b4eedfe..73df251d5bcb 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -630,6 +630,8 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
if (ret)
return ret;
 
+   platform_set_drvdata(pdev, data);
+
iommu_device_set_ops(>iommu, _iommu_ops);
iommu_device_set_fwnode(>iommu, >of_node->fwnode);
 
@@ -637,8 +639,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
if (ret)
return ret;
 
-   platform_set_drvdata(pdev, data);
-
__sysmmu_get_version(data);
if (PG_ENT_SHIFT < 0) {
if (MMU_MAJ_VER(data->version) < 5) {
@@ -1291,14 +1291,11 @@ static int exynos_iommu_of_xlate(struct device *dev,
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data, *entry;
 
-   if (!sysmmu)
-   return -ENODEV;
-
+   /*
+* We're simply retrieving the same platform device that called
+* iommu_device_register() when it probed, so it must be valid.
+*/
data = platform_get_drvdata(sysmmu);
-   if (!data) {
-   put_device(>dev);
-   return -ENODEV;
-   }
 
if (!owner) {
owner = kzalloc(sizeof(*owner), GFP_KERNEL);
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 0f18abda0e20..6be8dea03d97 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -725,11 +725,11 @@ static int ipmmu_init_platform_device(struct device *dev,
  struct of_phandle_args *args)
 {
struct platform_device *ipmmu_pdev;
-
+   /*
+* We're simply retrieving the same platform device that called
+* iommu_device_register() when it probed, so it must be valid.
+*/
ipmmu_pdev = of_find_device_by_node(args->np);
-   if (!ipmmu_pdev)
-   return -ENODEV;
-
dev_iommu_priv_set(dev, platform_get_drvdata(ipmmu_pdev));
 
return 0;
@@ -1075,6 +1075,8 @@ static int ipmmu_probe(struct platform_device *pdev)
}
}
 
+   platform_set_drvdata(pdev, mmu);
+
/*
 * Register the IPMMU to the IOMMU subsystem in the following cases:
 * - R-Car Gen2 IPMMU (all devices registered)
@@ -1105,9 +1107,6 @@ static int ipmmu_probe(struct platform_device *pdev)
 * an IOMMU, which only happens when bus_set_iommu() is called in
 * ipmmu_init() after the probe function returns.
 */
-
-   platform_set_drvdata(pdev, mmu);
-
return 0;
 }
 
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c072cee532c2..46cba18189ec 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -520,11 +520,11 @@ static int mtk_iommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
}
 
if 

RE: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread David Laight
From: Arnd Bergmann
> Sent: 28 October 2020 21:21
> 
> From: Arnd Bergmann 
> 
> There are hundreds of warnings in a W=2 build about a local
> variable shadowing the global 'apic' definition:
> 
> arch/x86/kvm/lapic.h:149:65: warning: declaration of 'apic' shadows a global 
> declaration [-Wshadow]
> 
> Avoid this by renaming the global 'apic' variable to the more descriptive
> 'x86_system_apic'. It was originally called 'genapic', but both that
> and the current 'apic' seem to be a little overly generic for a global
> variable.
> 
> Fixes: c48f14966cc4 ("KVM: inline kvm_apic_present() and kvm_lapic_enabled()")
> Fixes: c8d46cf06dc2 ("x86: rename 'genapic' to 'apic'")
> Signed-off-by: Arnd Bergmann 
> ---
> v2: rename the global instead of the local variable in the header
...
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index 284e73661a18..33e2dc78ca11 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -259,14 +259,14 @@ void __init hv_apic_init(void)
>   /*
>* Set the IPI entry points.
>*/
> - orig_apic = *apic;
> -
> - apic->send_IPI = hv_send_ipi;
> - apic->send_IPI_mask = hv_send_ipi_mask;
> - apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself;
> - apic->send_IPI_allbutself = hv_send_ipi_allbutself;
> - apic->send_IPI_all = hv_send_ipi_all;
> - apic->send_IPI_self = hv_send_ipi_self;
> + orig_apic = *x86_system_apic;
> +
> + x86_system_apic->send_IPI = hv_send_ipi;
> + x86_system_apic->send_IPI_mask = hv_send_ipi_mask;
> + x86_system_apic->send_IPI_mask_allbutself = 
> hv_send_ipi_mask_allbutself;
> + x86_system_apic->send_IPI_allbutself = hv_send_ipi_allbutself;
> + x86_system_apic->send_IPI_all = hv_send_ipi_all;
> + x86_system_apic->send_IPI_self = hv_send_ipi_self;
>   }
> 
>   if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
> @@ -285,10 +285,10 @@ void __init hv_apic_init(void)
>*/
>   apic_set_eoi_write(hv_apic_eoi_write);
>   if (!x2apic_enabled()) {
> - apic->read  = hv_apic_read;
> - apic->write = hv_apic_write;
> - apic->icr_write = hv_apic_icr_write;
> - apic->icr_read  = hv_apic_icr_read;
> + x86_system_apic->read  = hv_apic_read;
> + x86_system_apic->write = hv_apic_write;
> + x86_system_apic->icr_write = hv_apic_icr_write;
> + x86_system_apic->icr_read  = hv_apic_icr_read;
>   }

For those two just add:
struct apic *apic = x86_system_apic;
before all the assignments.
Less churn and much better code.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: check return value of of_find_device_by_node() in rk_iommu_of_xlate()

2020-10-29 Thread Robin Murphy

On 2020-10-29 13:19, yukuai (C) wrote:


On 2020/10/29 18:08, Robin Murphy wrote:

On 2020-10-29 09:22, Yu Kuai wrote:

If of_find_device_by_node() failed in rk_iommu_of_xlate(), null pointer
dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.


How can that happen? (Given that ".suppress_bind_attrs = true")

Robin.


I'm not sure if that could happen...

My thought is that it's better to do such checking to aviod any possible
problem.


->of_xlate() is only invoked on the specific set of ops returned by 
iommu_ops_from_fwnode(). In turn, iommu_ops_from_fwnode() will only 
return those ops if the driver has successfully probed and called 
iommu_register_device() with the relevant DT node. For the driver to 
have been able to probe at all, a platform device associated with that 
DT node must have been created, and therefore of_find_device_by_node() 
cannot fail.


If there ever were some problem serious enough to break that fundamental 
assumption, then I *want* these drivers to crash right here, with a nice 
clear stack trace to start debugging from. So no, I firmly disagree that 
adding redundant code, which will never do anything except attempt to 
paper over catastrophic memory corruption, is "better". Sorry :)


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: check return value of of_find_device_by_node() in rk_iommu_of_xlate()

2020-10-29 Thread yukuai (C)



On 2020/10/29 18:08, Robin Murphy wrote:

On 2020-10-29 09:22, Yu Kuai wrote:

If of_find_device_by_node() failed in rk_iommu_of_xlate(), null pointer
dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.


How can that happen? (Given that ".suppress_bind_attrs = true")

Robin.


I'm not sure if that could happen...

My thought is that it's better to do such checking to aviod any possible
problem.

Thanks!
Yu Kuai
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/sun50i: check return value of of_find_device_by_node() in sun50i_iommu_of_xlate()

2020-10-29 Thread Robin Murphy

On 2020-10-29 09:22, Yu Kuai wrote:

If of_find_device_by_node() failed in sun50i_iommu_of_xlate(), null
pointer dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.


Again, by what means can that ever actually happen?

Robin.


Fixes: 4100b8c229b3("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Yu Kuai 
---
  drivers/iommu/sun50i-iommu.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index ea6db1341916..ce94ffa15215 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -764,6 +764,9 @@ static int sun50i_iommu_of_xlate(struct device *dev,
struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
unsigned id = args->args[0];
  
+	if (!iommu_pdev)

+   return -ENODEV;
+
dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev));
  
  	return iommu_fwspec_add_ids(dev, , 1);



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: check return value of of_find_device_by_node() in rk_iommu_of_xlate()

2020-10-29 Thread Robin Murphy

On 2020-10-29 09:22, Yu Kuai wrote:

If of_find_device_by_node() failed in rk_iommu_of_xlate(), null pointer
dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.


How can that happen? (Given that ".suppress_bind_attrs = true")

Robin.


Fixes: 5fd577c3eac3("iommu/rockchip: Use OF_IOMMU to attach devices 
automatically")
Signed-off-by: Yu Kuai 
---
  drivers/iommu/rockchip-iommu.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index e5d86b7177de..090d149ef8e9 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1099,6 +1099,9 @@ static int rk_iommu_of_xlate(struct device *dev,
  
  	iommu_dev = of_find_device_by_node(args->np);
  
+	if (!iommu_dev)

+   return -ENODEV;
+
data->iommu = platform_get_drvdata(iommu_dev);
dev_iommu_priv_set(dev, data);
  


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel v4 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present

2020-10-29 Thread Michael Ellerman
Alexey Kardashevskiy  writes:
> So far we have been using huge DMA windows to map all the RAM available.
> The RAM is normally mapped to the VM address space contiguously, and
> there is always a reasonable upper limit for possible future hot plugged
> RAM which makes it easy to map all RAM via IOMMU.
>
> Now there is persistent memory ("ibm,pmemory" in the FDT) which (unlike
> normal RAM) can map anywhere in the VM space beyond the maximum RAM size
> and since it can be used for DMA, it requires extending the huge window
> up to MAX_PHYSMEM_BITS which requires hypervisor support for:
> 1. huge TCE tables;
> 2. multilevel TCE tables;
> 3. huge IOMMU pages.
>
> Certain hypervisors cannot do either so the only option left is
> restricting the huge DMA window to include only RAM and fallback to
> the default DMA window for persistent memory.
>
> This defines arch_dma_map_direct/etc to allow generic DMA code perform
> additional checks on whether direct DMA is still possible.
>
> This checks if the system has persistent memory. If it does not,
> the DMA bypass mode is selected, i.e.
> * dev->bus_dma_limit = 0
> * dev->dma_ops_bypass = true <- this avoid calling dma_ops for mapping.
>
> If there is such memory, this creates identity mapping only for RAM and
> sets the dev->bus_dma_limit to let the generic code decide whether to
> call into the direct DMA or the indirect DMA ops.
>
> This should not change the existing behaviour when no persistent memory
> as dev->dma_ops_bypass is expected to be set.
>
> Signed-off-by: Alexey Kardashevskiy 

Acked-by: Michael Ellerman 

cheers
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel v3 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present

2020-10-29 Thread Michael Ellerman
Alexey Kardashevskiy  writes:
> On 29/10/2020 11:40, Michael Ellerman wrote:
>> Alexey Kardashevskiy  writes:
>>> @@ -1126,7 +1129,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>>   
>>> mutex_lock(_window_init_mutex);
>>>   
>>> -   dma_addr = find_existing_ddw(pdn);
>>> +   dma_addr = find_existing_ddw(pdn, );
>> 
>> I don't see len used anywhere?
>> 
>>> if (dma_addr != 0)
>>> goto out_unlock;
>>>   
>>> @@ -1212,14 +1215,26 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>> }
>>> /* verify the window * number of ptes will map the partition */
>>> /* check largest block * page size > max memory hotplug addr */
>>> -   max_addr = ddw_memory_hotplug_max();
>>> -   if (query.largest_available_block < (max_addr >> page_shift)) {
>>> -   dev_dbg(>dev, "can't map partition max 0x%llx with %llu "
>>> - "%llu-sized pages\n", max_addr,  
>>> query.largest_available_block,
>>> - 1ULL << page_shift);
>>> +   /*
>>> +* The "ibm,pmemory" can appear anywhere in the address space.
>>> +* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
>>> +* for the upper limit and fallback to max RAM otherwise but this
>>> +* disables device::dma_ops_bypass.
>>> +*/
>>> +   len = max_ram_len;
>> 
>> Here you override whatever find_existing_ddw() wrote to len?
>
> Not always, there is a bunch of gotos before this line to the end of the 
> function and one (which returns the existing window) is legit. Thanks,

Ah yep I see it.

Gotos considered confusing ;)

cheers
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Arnd Bergmann
On Thu, Oct 29, 2020 at 8:04 AM Paolo Bonzini  wrote:
>
> On 28/10/20 22:20, Arnd Bergmann wrote:
> > Avoid this by renaming the global 'apic' variable to the more descriptive
> > 'x86_system_apic'. It was originally called 'genapic', but both that
> > and the current 'apic' seem to be a little overly generic for a global
> > variable.
>
> The 'apic' affects only the current CPU, so one of 'x86_local_apic',
> 'x86_lapic' or 'x86_apic' is probably preferrable.

Ok, I'll change it to x86_local_apic then, unless someone else has
a preference between them.

> I don't have huge objections to renaming 'apic' variables and arguments
> in KVM to 'lapic'.  I do agree with Sean however that it's going to
> break again very soon.

I think ideally there would be no global variable, withall accesses
encapsulated in function calls, possibly using static_call() optimizations
if any of them are performance critical.

It doesn't seem hard to do, but I'd rather leave that change to
an x86 person ;-)

  Arnd
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/sun50i: check return value of of_find_device_by_node() in sun50i_iommu_of_xlate()

2020-10-29 Thread Maxime Ripard
On Thu, Oct 29, 2020 at 05:22:44PM +0800, Yu Kuai wrote:
> If of_find_device_by_node() failed in sun50i_iommu_of_xlate(), null
> pointer dereference will be triggered. Thus return error code if
> of_find_device_by_node() failed.
> 
> Fixes: 4100b8c229b3("iommu: Add Allwinner H6 IOMMU driver")
> Signed-off-by: Yu Kuai 

Acked-by: Maxime Ripard 

Thanks!
Maxime


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] iommu/sun50i: check return value of of_find_device_by_node() in sun50i_iommu_of_xlate()

2020-10-29 Thread Yu Kuai
If of_find_device_by_node() failed in sun50i_iommu_of_xlate(), null
pointer dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.

Fixes: 4100b8c229b3("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Yu Kuai 
---
 drivers/iommu/sun50i-iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index ea6db1341916..ce94ffa15215 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -764,6 +764,9 @@ static int sun50i_iommu_of_xlate(struct device *dev,
struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
unsigned id = args->args[0];
 
+   if (!iommu_pdev)
+   return -ENODEV;
+
dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev));
 
return iommu_fwspec_add_ids(dev, , 1);
-- 
2.25.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/rockchip: check return value of of_find_device_by_node() in rk_iommu_of_xlate()

2020-10-29 Thread Yu Kuai
If of_find_device_by_node() failed in rk_iommu_of_xlate(), null pointer
dereference will be triggered. Thus return error code if
of_find_device_by_node() failed.

Fixes: 5fd577c3eac3("iommu/rockchip: Use OF_IOMMU to attach devices 
automatically")
Signed-off-by: Yu Kuai 
---
 drivers/iommu/rockchip-iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index e5d86b7177de..090d149ef8e9 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1099,6 +1099,9 @@ static int rk_iommu_of_xlate(struct device *dev,
 
iommu_dev = of_find_device_by_node(args->np);
 
+   if (!iommu_dev)
+   return -ENODEV;
+
data->iommu = platform_get_drvdata(iommu_dev);
dev_iommu_priv_set(dev, data);
 
-- 
2.25.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [v2] x86: apic: avoid -Wshadow warning in header

2020-10-29 Thread Paolo Bonzini
On 28/10/20 22:20, Arnd Bergmann wrote:
> Avoid this by renaming the global 'apic' variable to the more descriptive
> 'x86_system_apic'. It was originally called 'genapic', but both that
> and the current 'apic' seem to be a little overly generic for a global
> variable.

The 'apic' affects only the current CPU, so one of 'x86_local_apic',
'x86_lapic' or 'x86_apic' is probably preferrable.

I don't have huge objections to renaming 'apic' variables and arguments
in KVM to 'lapic'.  I do agree with Sean however that it's going to
break again very soon.

Paolo

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 2/3] iommu: Fix an issue in iommu_page_response() flags check

2020-10-29 Thread Yi Sun
On 20-10-28 10:13:56, Jean-Philippe Brucker wrote:
> Hi,
> 
> On Wed, Oct 28, 2020 at 09:36:57AM +0800, Yi Sun wrote:
> > From: Jacob Pan 
> > 
> > original code fails when LAST_PAGE is set in flags.
> 
> LAST_PAGE is not documented to be a valid flags for page_response.
> So isn't failing the right thing to do?
> 
> > 
> > Signed-off-by: Jacob Pan 
> > Signed-off-by: Liu Yi L 
> > Signed-off-by: Yi Sun 
> > ---
> >  drivers/iommu/iommu.c | 6 --
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 8c470f4..053cec3 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -1200,9 +1200,11 @@ int iommu_page_response(struct device *dev,
> > return -EINVAL;
> >  
> > if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
> > -   msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
> > +   !(msg->flags & IOMMU_PAGE_RESP_PASID_VALID)) {
> 
> It should be OK not to have PASID_VALID set, we're just checking for
> undefined flags here.
> 
Thanks! You are right. Per published spec, we should not care LAST_PAGE
for page_response. I will remove this patch in next version.
 
> Thanks,
> Jean
> 
> > +   dev_warn_ratelimited(dev, "%s:Invalid ver %x: flags %x\n",
> > +   __func__, msg->version, msg->flags);
> > return -EINVAL;
> > -
> > +   }
> > /* Only send response if there is a fault report pending */
> > mutex_lock(>fault_param->lock);
> > if (list_empty(>fault_param->faults)) {
> > -- 
> > 2.7.4
> > 
> > ___
> > iommu mailing list
> > iommu@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu