Re: [PATCH 00/13] iommu: Remove usage of dev->archdata.iommu

2020-06-30 Thread Joerg Roedel
On Thu, Jun 25, 2020 at 03:08:23PM +0200, Joerg Roedel wrote:
> Joerg Roedel (13):
>   iommu/exynos: Use dev_iommu_priv_get/set()
>   iommu/vt-d: Use dev_iommu_priv_get/set()
>   iommu/msm: Use dev_iommu_priv_get/set()
>   iommu/omap: Use dev_iommu_priv_get/set()
>   iommu/rockchip: Use dev_iommu_priv_get/set()
>   iommu/tegra: Use dev_iommu_priv_get/set()
>   iommu/pamu: Use dev_iommu_priv_get/set()
>   iommu/mediatek: Do no use dev->archdata.iommu
>   x86: Remove dev->archdata.iommu pointer
>   ia64: Remove dev->archdata.iommu pointer
>   arm: Remove dev->archdata.iommu pointer
>   arm64: Remove dev->archdata.iommu pointer
>   powerpc/dma: Remove dev->archdata.iommu_domain

Applied.


[PATCH 10/13] ia64: Remove dev->archdata.iommu pointer

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

There are no users left, all drivers have been converted to use the
per-device private pointer offered by IOMMU core.

Signed-off-by: Joerg Roedel 
---
 arch/ia64/include/asm/device.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
index 3eb397415381..918b198cd5bb 100644
--- a/arch/ia64/include/asm/device.h
+++ b/arch/ia64/include/asm/device.h
@@ -6,9 +6,6 @@
 #define _ASM_IA64_DEVICE_H
 
 struct dev_archdata {
-#ifdef CONFIG_IOMMU_API
-   void *iommu; /* hook for IOMMU specific extension */
-#endif
 };
 
 struct pdev_archdata {
-- 
2.27.0



[PATCH 08/13] iommu/mediatek: Do no use dev->archdata.iommu

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

The iommu private pointer is already used in the Mediatek IOMMU v1
driver, so move the dma_iommu_mapping pointer into 'struct
mtk_iommu_data' and do not use dev->archdata.iommu anymore.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/mtk_iommu.h|  2 ++
 drivers/iommu/mtk_iommu_v1.c | 10 --
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index ea949a324e33..1682406e98dc 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -62,6 +62,8 @@ struct mtk_iommu_data {
struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
 
+   struct dma_iommu_mapping*mapping; /* For mtk_iommu_v1.c */
+
struct list_headlist;
struct mtk_smi_larb_iommu   larb_imu[MTK_LARB_NR_MAX];
 };
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index c9d79cff4d17..82ddfe9170d4 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -269,7 +269,7 @@ static int mtk_iommu_attach_device(struct iommu_domain 
*domain,
int ret;
 
/* Only allow the domain created internally. */
-   mtk_mapping = data->dev->archdata.iommu;
+   mtk_mapping = data->mapping;
if (mtk_mapping->domain != domain)
return 0;
 
@@ -369,7 +369,6 @@ static int mtk_iommu_create_mapping(struct device *dev,
struct mtk_iommu_data *data;
struct platform_device *m4updev;
struct dma_iommu_mapping *mtk_mapping;
-   struct device *m4udev;
int ret;
 
if (args->args_count != 1) {
@@ -401,8 +400,7 @@ static int mtk_iommu_create_mapping(struct device *dev,
return ret;
 
data = dev_iommu_priv_get(dev);
-   m4udev = data->dev;
-   mtk_mapping = m4udev->archdata.iommu;
+   mtk_mapping = data->mapping;
if (!mtk_mapping) {
/* MTK iommu support 4GB iova address space. */
mtk_mapping = arm_iommu_create_mapping(_bus_type,
@@ -410,7 +408,7 @@ static int mtk_iommu_create_mapping(struct device *dev,
if (IS_ERR(mtk_mapping))
return PTR_ERR(mtk_mapping);
 
-   m4udev->archdata.iommu = mtk_mapping;
+   data->mapping = mtk_mapping;
}
 
return 0;
@@ -459,7 +457,7 @@ static void mtk_iommu_probe_finalize(struct device *dev)
int err;
 
data= dev_iommu_priv_get(dev);
-   mtk_mapping = data->dev->archdata.iommu;
+   mtk_mapping = data->mapping;
 
err = arm_iommu_attach_device(dev, mtk_mapping);
if (err)
-- 
2.27.0



[PATCH 06/13] iommu/tegra: Use dev_iommu_priv_get/set()

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/tegra-gart.c | 8 
 drivers/iommu/tegra-smmu.c | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 5fbdff6ff41a..fac720273889 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -113,8 +113,8 @@ static int gart_iommu_attach_dev(struct iommu_domain 
*domain,
 
if (gart->active_domain && gart->active_domain != domain) {
ret = -EBUSY;
-   } else if (dev->archdata.iommu != domain) {
-   dev->archdata.iommu = domain;
+   } else if (dev_iommu_priv_get(dev) != domain) {
+   dev_iommu_priv_set(dev, domain);
gart->active_domain = domain;
gart->active_devices++;
}
@@ -131,8 +131,8 @@ static void gart_iommu_detach_dev(struct iommu_domain 
*domain,
 
spin_lock(>dom_lock);
 
-   if (dev->archdata.iommu == domain) {
-   dev->archdata.iommu = NULL;
+   if (dev_iommu_priv_get(dev) == domain) {
+   dev_iommu_priv_set(dev, NULL);
 
if (--gart->active_devices == 0)
gart->active_domain = NULL;
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 7426b7666e2b..124c8848ab7e 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -465,7 +465,7 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
 static int tegra_smmu_attach_dev(struct iommu_domain *domain,
 struct device *dev)
 {
-   struct tegra_smmu *smmu = dev->archdata.iommu;
+   struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
struct tegra_smmu_as *as = to_smmu_as(domain);
struct device_node *np = dev->of_node;
struct of_phandle_args args;
@@ -780,7 +780,7 @@ static struct iommu_device *tegra_smmu_probe_device(struct 
device *dev)
 * supported by the Linux kernel, so abort after the
 * first match.
 */
-   dev->archdata.iommu = smmu;
+   dev_iommu_priv_set(dev, smmu);
 
break;
}
@@ -797,7 +797,7 @@ static struct iommu_device *tegra_smmu_probe_device(struct 
device *dev)
 
 static void tegra_smmu_release_device(struct device *dev)
 {
-   dev->archdata.iommu = NULL;
+   dev_iommu_priv_set(dev, NULL);
 }
 
 static const struct tegra_smmu_group_soc *
@@ -856,7 +856,7 @@ static struct iommu_group *tegra_smmu_group_get(struct 
tegra_smmu *smmu,
 static struct iommu_group *tegra_smmu_device_group(struct device *dev)
 {
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-   struct tegra_smmu *smmu = dev->archdata.iommu;
+   struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
struct iommu_group *group;
 
group = tegra_smmu_group_get(smmu, fwspec->ids[0]);
-- 
2.27.0



[PATCH 05/13] iommu/rockchip: Use dev_iommu_priv_get/set()

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/rockchip-iommu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index d25c2486ca07..e5d86b7177de 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -836,7 +836,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, 
unsigned long _iova,
 
 static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
 {
-   struct rk_iommudata *data = dev->archdata.iommu;
+   struct rk_iommudata *data = dev_iommu_priv_get(dev);
 
return data ? data->iommu : NULL;
 }
@@ -1059,7 +1059,7 @@ static struct iommu_device *rk_iommu_probe_device(struct 
device *dev)
struct rk_iommudata *data;
struct rk_iommu *iommu;
 
-   data = dev->archdata.iommu;
+   data = dev_iommu_priv_get(dev);
if (!data)
return ERR_PTR(-ENODEV);
 
@@ -1073,7 +1073,7 @@ static struct iommu_device *rk_iommu_probe_device(struct 
device *dev)
 
 static void rk_iommu_release_device(struct device *dev)
 {
-   struct rk_iommudata *data = dev->archdata.iommu;
+   struct rk_iommudata *data = dev_iommu_priv_get(dev);
 
device_link_del(data->link);
 }
@@ -1100,7 +1100,7 @@ static int rk_iommu_of_xlate(struct device *dev,
iommu_dev = of_find_device_by_node(args->np);
 
data->iommu = platform_get_drvdata(iommu_dev);
-   dev->archdata.iommu = data;
+   dev_iommu_priv_set(dev, data);
 
platform_device_put(iommu_dev);
 
-- 
2.27.0



[PATCH 04/13] iommu/omap: Use dev_iommu_priv_get/set()

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/omap-iommu.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index c8282cc212cb..e84ead6fb234 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -71,7 +71,7 @@ static struct omap_iommu_domain *to_omap_domain(struct 
iommu_domain *dom)
  **/
 void omap_iommu_save_ctx(struct device *dev)
 {
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu *obj;
u32 *p;
int i;
@@ -101,7 +101,7 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
  **/
 void omap_iommu_restore_ctx(struct device *dev)
 {
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu *obj;
u32 *p;
int i;
@@ -1398,7 +1398,7 @@ static size_t omap_iommu_unmap(struct iommu_domain 
*domain, unsigned long da,
 
 static int omap_iommu_count(struct device *dev)
 {
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
int count = 0;
 
while (arch_data->iommu_dev) {
@@ -1459,8 +1459,8 @@ static void omap_iommu_detach_fini(struct 
omap_iommu_domain *odomain)
 static int
 omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct omap_iommu_device *iommu;
struct omap_iommu *oiommu;
int ret = 0;
@@ -1524,7 +1524,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct 
device *dev)
 static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
   struct device *dev)
 {
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu_device *iommu = omap_domain->iommus;
struct omap_iommu *oiommu;
int i;
@@ -1650,7 +1650,7 @@ static struct iommu_device 
*omap_iommu_probe_device(struct device *dev)
int num_iommus, i;
 
/*
-* Allocate the archdata iommu structure for DT-based devices.
+* Allocate the per-device iommu structure for DT-based devices.
 *
 * TODO: Simplify this when removing non-DT support completely from the
 * IOMMU users.
@@ -1698,7 +1698,7 @@ static struct iommu_device 
*omap_iommu_probe_device(struct device *dev)
of_node_put(np);
}
 
-   dev->archdata.iommu = arch_data;
+   dev_iommu_priv_set(dev, arch_data);
 
/*
 * use the first IOMMU alone for the sysfs device linking.
@@ -1712,19 +1712,19 @@ static struct iommu_device 
*omap_iommu_probe_device(struct device *dev)
 
 static void omap_iommu_release_device(struct device *dev)
 {
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
 
if (!dev->of_node || !arch_data)
return;
 
-   dev->archdata.iommu = NULL;
+   dev_iommu_priv_set(dev, NULL);
kfree(arch_data);
 
 }
 
 static struct iommu_group *omap_iommu_device_group(struct device *dev)
 {
-   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct iommu_group *group = ERR_PTR(-EINVAL);
 
if (!arch_data)
-- 
2.27.0



[PATCH 01/13] iommu/exynos: Use dev_iommu_priv_get/set()

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/exynos-iommu.c  | 20 +--
 .../media/platform/s5p-mfc/s5p_mfc_iommu.h|  4 +++-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 60c8a56e4a3f..6a9b67302369 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -173,7 +173,7 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
 #define REG_V5_FAULT_AR_VA 0x070
 #define REG_V5_FAULT_AW_VA 0x080
 
-#define has_sysmmu(dev)(dev->archdata.iommu != NULL)
+#define has_sysmmu(dev)(dev_iommu_priv_get(dev) != NULL)
 
 static struct device *dma_dev;
 static struct kmem_cache *lv2table_kmem_cache;
@@ -226,7 +226,7 @@ static const struct sysmmu_fault_info sysmmu_v5_faults[] = {
 };
 
 /*
- * This structure is attached to dev.archdata.iommu of the master device
+ * This structure is attached to dev->iommu->priv of the master device
  * on device add, contains a list of SYSMMU controllers defined by device tree,
  * which are bound to given master device. It is usually referenced by 'owner'
  * pointer.
@@ -670,7 +670,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct 
device *dev)
struct device *master = data->master;
 
if (master) {
-   struct exynos_iommu_owner *owner = master->archdata.iommu;
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
 
mutex_lock(>rpm_lock);
if (data->domain) {
@@ -688,7 +688,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct 
device *dev)
struct device *master = data->master;
 
if (master) {
-   struct exynos_iommu_owner *owner = master->archdata.iommu;
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
 
mutex_lock(>rpm_lock);
if (data->domain) {
@@ -837,8 +837,8 @@ static void exynos_iommu_domain_free(struct iommu_domain 
*iommu_domain)
 static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
struct device *dev)
 {
-   struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
phys_addr_t pagetable = virt_to_phys(domain->pgtable);
struct sysmmu_drvdata *data, *next;
unsigned long flags;
@@ -875,8 +875,8 @@ static void exynos_iommu_detach_device(struct iommu_domain 
*iommu_domain,
 static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
   struct device *dev)
 {
-   struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
phys_addr_t pagetable = virt_to_phys(domain->pgtable);
unsigned long flags;
@@ -1237,7 +1237,7 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct 
iommu_domain *iommu_domain,
 
 static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
 {
-   struct exynos_iommu_owner *owner = dev->archdata.iommu;
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
 
if (!has_sysmmu(dev))
@@ -1263,7 +1263,7 @@ static struct iommu_device 
*exynos_iommu_probe_device(struct device *dev)
 
 static void exynos_iommu_release_device(struct device *dev)
 {
-   struct exynos_iommu_owner *owner = dev->archdata.iommu;
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
 
if (!has_sysmmu(dev))
@@ -1287,8 +1287,8 @@ static void exynos_iommu_release_device(struct device 
*dev)
 static int exynos_iommu_of_xlate(struct device *dev,
 struct of_phandle_args *spec)
 {
-   struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct platform_device *sysmmu = of_find_device_by_node(spec->np);
+   struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data, *entry;
 
if (!sysmmu)
@@ -1305,7 +1305,7 @@ static int exynos_iommu_of_xlate(struct device *dev,
 
INIT_LIST_HEAD(>controllers);
mutex_init(>rpm_lock);
-   dev->archdata.iommu = owner;
+   dev_iommu_priv_set(dev, owner);
}
 
list_for_each_entry(entry, >controllers, owner_node)
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_iommu.h 
b/drivers/media/platform/s5p-mfc/s5p_mfc_iommu.h
index 152a713fff78..1a3226

[PATCH 02/13] iommu/vt-d: Use dev_iommu_priv_get/set()

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
 .../gpu/drm/i915/selftests/mock_gem_device.c   | 10 --
 drivers/iommu/intel/iommu.c| 18 +-
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c 
b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9b105b811f1f..e08601905a64 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -24,6 +24,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -118,6 +119,9 @@ struct drm_i915_private *mock_gem_device(void)
 {
struct drm_i915_private *i915;
struct pci_dev *pdev;
+#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
+   struct dev_iommu iommu;
+#endif
int err;
 
pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);
@@ -136,8 +140,10 @@ struct drm_i915_private *mock_gem_device(void)
dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(64));
 
 #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
-   /* hack to disable iommu for the fake device; force identity mapping */
-   pdev->dev.archdata.iommu = (void *)-1;
+   /* HACK HACK HACK to disable iommu for the fake device; force identity 
mapping */
+   memset(, 0, sizeof(iommu));
+   iommu.priv = (void *)-1;
+   pdev->dev.iommu = 
 #endif
 
pci_set_drvdata(pdev, i915);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..2ce490c2eab8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -372,7 +372,7 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
if (!dev)
return NULL;
 
-   info = dev->archdata.iommu;
+   info = dev_iommu_priv_get(dev);
if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
 info == DEFER_DEVICE_DOMAIN_INFO))
return NULL;
@@ -743,12 +743,12 @@ struct context_entry *iommu_context_addr(struct 
intel_iommu *iommu, u8 bus,
 
 static int iommu_dummy(struct device *dev)
 {
-   return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+   return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
 static bool attach_deferred(struct device *dev)
 {
-   return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+   return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
 }
 
 /**
@@ -2420,7 +2420,7 @@ static inline void unlink_domain_info(struct 
device_domain_info *info)
list_del(>link);
list_del(>global);
if (info->dev)
-   info->dev->archdata.iommu = NULL;
+   dev_iommu_priv_set(info->dev, NULL);
 }
 
 static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -2453,7 +2453,7 @@ static void do_deferred_attach(struct device *dev)
 {
struct iommu_domain *domain;
 
-   dev->archdata.iommu = NULL;
+   dev_iommu_priv_set(dev, NULL);
domain = iommu_get_domain_for_dev(dev);
if (domain)
intel_iommu_attach_device(domain, dev);
@@ -2599,7 +2599,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(>link, >devices);
list_add(>global, _domain_list);
if (dev)
-   dev->archdata.iommu = info;
+   dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(_domain_lock, flags);
 
/* PASID table is mandatory for a PCI device in scalable mode. */
@@ -4004,7 +4004,7 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev 
*pdev)
if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for 
Intel(R) QuickData Technology device\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
-   pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+   dev_iommu_priv_set(>dev, DUMMY_DEVICE_DOMAIN_INFO);
}
 }
 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, 
quirk_ioat_snb_local_iommu);
@@ -4043,7 +4043,7 @@ static void __init init_no_remapping_devices(void)
drhd->ignored = 1;
for_each_active_dev_scope(drhd->devices,
  drhd->devices_cnt, i, dev)
-   dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+   dev_iommu_priv_set(dev, 
DUMMY_DEVICE_DOMAIN_INFO);
}
}
 }
@@ -5665,7 +5665,7 @@ static struct iommu_device 
*intel_iommu_probe_device(struct device *dev)
return ERR_PTR(-ENODEV);
 
if (translation_pre_enabled(iommu))
-   dev->arc

[PATCH 00/13] iommu: Remove usage of dev->archdata.iommu

2020-06-25 Thread Joerg Roedel
From: Joerg Roedel 

Hi,

here is a patch-set to remove the usage of dev->archdata.iommu from
the IOMMU code in the kernel and replace its uses by the iommu per-device
private data field. The changes also remove the field entirely from
the architectures which no longer need it.

On PowerPC the field is called dev->archdata.iommu_domain and was only
used by the PAMU IOMMU driver. It gets removed as well.

The patches have been runtime tested on Intel VT-d and compile tested
with allyesconfig for:

* x86 (32 and 64 bit)
* arm and arm64
* ia64 (only drivers/ because build failed for me in
arch/ia64)
* PPC64

Besides that the changes also survived my IOMMU tree compile tests.

Please review.

Regards,

Joerg

Joerg Roedel (13):
  iommu/exynos: Use dev_iommu_priv_get/set()
  iommu/vt-d: Use dev_iommu_priv_get/set()
  iommu/msm: Use dev_iommu_priv_get/set()
  iommu/omap: Use dev_iommu_priv_get/set()
  iommu/rockchip: Use dev_iommu_priv_get/set()
  iommu/tegra: Use dev_iommu_priv_get/set()
  iommu/pamu: Use dev_iommu_priv_get/set()
  iommu/mediatek: Do no use dev->archdata.iommu
  x86: Remove dev->archdata.iommu pointer
  ia64: Remove dev->archdata.iommu pointer
  arm: Remove dev->archdata.iommu pointer
  arm64: Remove dev->archdata.iommu pointer
  powerpc/dma: Remove dev->archdata.iommu_domain

 arch/arm/include/asm/device.h |  3 ---
 arch/arm64/include/asm/device.h   |  3 ---
 arch/ia64/include/asm/device.h|  3 ---
 arch/powerpc/include/asm/device.h |  3 ---
 arch/x86/include/asm/device.h |  3 ---
 .../gpu/drm/i915/selftests/mock_gem_device.c  | 10 --
 drivers/iommu/exynos-iommu.c  | 20 +--
 drivers/iommu/fsl_pamu_domain.c   |  8 
 drivers/iommu/intel/iommu.c   | 18 -
 drivers/iommu/msm_iommu.c |  4 ++--
 drivers/iommu/mtk_iommu.h |  2 ++
 drivers/iommu/mtk_iommu_v1.c  | 10 --
 drivers/iommu/omap-iommu.c| 20 +--
 drivers/iommu/rockchip-iommu.c|  8 
 drivers/iommu/tegra-gart.c|  8 
 drivers/iommu/tegra-smmu.c|  8 
 .../media/platform/s5p-mfc/s5p_mfc_iommu.h|  4 +++-
 17 files changed, 64 insertions(+), 71 deletions(-)

-- 
2.27.0



[PATCH] mm: Move p?d_alloc_track to separate header file

2020-06-09 Thread Joerg Roedel
From: Joerg Roedel 

The functions are only used in two source files, so there is no need
for them to be in the global  header. Move them to the new
 header and include it only where needed.

Signed-off-by: Joerg Roedel 
---
 include/linux/mm.h| 45 ---
 include/linux/pgalloc-track.h | 51 +++
 lib/ioremap.c |  1 +
 mm/vmalloc.c  |  1 +
 4 files changed, 53 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/pgalloc-track.h

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d6042178ca7..22d8b2a2c9bc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2092,51 +2092,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-
-{
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
-   return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
-   }
-
-   return p4d_offset(pgd, address);
-}
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-{
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
-   return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
-   }
-
-   return pud_offset(p4d, address);
-}
-
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
 }
-
-static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-{
-   if (unlikely(pud_none(*pud))) {
-   if (__pmd_alloc(mm, pud, address))
-   return NULL;
-   *mod_mask |= PGTBL_PUD_MODIFIED;
-   }
-
-   return pmd_offset(pud, address);
-}
 #endif /* CONFIG_MMU */
 
 #if USE_SPLIT_PTE_PTLOCKS
@@ -2252,11 +2212,6 @@ static inline void pgtable_pte_page_dtor(struct page 
*page)
((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
 
-#define pte_alloc_kernel_track(pmd, address, mask) \
-   ((unlikely(pmd_none(*(pmd))) && \
- (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
-   NULL: pte_offset_kernel(pmd, address))
-
 #if USE_SPLIT_PMD_PTLOCKS
 
 static struct page *pmd_to_page(pmd_t *pmd)
diff --git a/include/linux/pgalloc-track.h b/include/linux/pgalloc-track.h
new file mode 100644
index ..1dcc865029a2
--- /dev/null
+++ b/include/linux/pgalloc-track.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLLC_TRACK_H
+#define _LINUX_PGALLLC_TRACK_H
+
+#if defined(CONFIG_MMU)
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
+   return NULL;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
+   }
+
+   return p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
+   return NULL;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
+   }
+
+   return pud_offset(p4d, address);
+}
+
+static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(pud_none(*pud))) {
+   if (__pmd_alloc(mm, pud, address))
+   return NULL;
+   *mod_mask |= PGTBL_PUD_MODIFIED;
+   }
+
+   return pmd_offset(pud, address);
+}
+#endif /* CONFIG_MMU */
+
+#define pte_alloc_kernel_track(pmd, address, mask) \
+   ((unlikely(pmd_none(*(pmd))) && \
+ (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+   NULL: pte_offset_kernel(pmd, address))
+
+#endif /* _LINUX_PGALLLC_TRACK_H */
diff --git a/lib

[PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Joerg Roedel
From: Joerg Roedel 

The pud_alloc_track() needs to do different checks based on whether
__ARCH_HAS_5LEVEL_HACK is defined, like it already does in
pud_alloc(). Otherwise it causes boot failures on PowerPC.

Provide the correct implementations for both possible settings of
__ARCH_HAS_5LEVEL_HACK to fix the boot problems.

Reported-by: Abdul Haleem 
Tested-by: Abdul Haleem 
Tested-by: Satheesh Rajendran 
Fixes: d8626138009b ("mm: add functions to track page directory modifications")
Signed-off-by: Joerg Roedel 
---
 include/asm-generic/5level-fixup.h |  5 +
 include/linux/mm.h | 26 +-
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index 58046ddc08d0..afbab31fbd7e 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,6 +17,11 @@
((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
NULL : pud_offset(p4d, address))
 
+#define pud_alloc_track(mm, p4d, address, mask)
\
+   ((unlikely(pgd_none(*(p4d))) && 
\
+ (__pud_alloc(mm, p4d, address) || 
({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
+ NULL : pud_offset(p4d, address))
+
 #define p4d_alloc(mm, pgd, address)(pgd)
 #define p4d_alloc_track(mm, pgd, address, mask)(pgd)
 #define p4d_offset(pgd, start) (pgd)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 66e0977f970a..ad3b31c5bcc3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
-
 {
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
}
 
-   return p4d_offset(pgd, address);
+   return pud_offset(p4d, address);
 }
 
-#endif /* !__ARCH_HAS_5LEVEL_HACK */
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
+
 {
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
}
 
-   return pud_offset(p4d, address);
+   return p4d_offset(pgd, address);
 }
 
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
+
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
-- 
2.26.2



Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-03 Thread Joerg Roedel
On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> @Joerg, Could you please have a look?

Can you please try the attached patch?

diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index 58046ddc08d0..afbab31fbd7e 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,6 +17,11 @@
((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
NULL : pud_offset(p4d, address))
 
+#define pud_alloc_track(mm, p4d, address, mask)
\
+   ((unlikely(pgd_none(*(p4d))) && 
\
+ (__pud_alloc(mm, p4d, address) || 
({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
+ NULL : pud_offset(p4d, address))
+
 #define p4d_alloc(mm, pgd, address)(pgd)
 #define p4d_alloc_track(mm, pgd, address, mask)(pgd)
 #define p4d_offset(pgd, start) (pgd)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7e07f4f490cb..d46bf03b804f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
-
 {
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
}
 
-   return p4d_offset(pgd, address);
+   return pud_offset(p4d, address);
 }
 
-#endif /* !__ARCH_HAS_5LEVEL_HACK */
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
+
 {
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
}
 
-   return pud_offset(p4d, address);
+   return p4d_offset(pgd, address);
 }
 
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
+
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?


Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-03 Thread Joerg Roedel
hi Abdul,

On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> Greeting's
> 
> Today's mainline kernel panics when booting on my powerpc lpar

Thanks for the report, I am looking into it with my limited powerpc
knowledge. But I have an idea and will send you something to test later
today.

Thanks,

Joerg



Re: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device

2020-04-19 Thread Joerg Roedel
On Sun, Apr 19, 2020 at 10:00:58AM +0200, Christoph Hellwig wrote:
> The difference is that NULL ops mean imply the direct mapping is always
> used, dma_ops_bypass means a direct mapping is used if no bounce buffering
> using swiotlb is needed, which should also answer your first question.
> The idea is to consolidate code in the core to use an opportunistic
> direct mapping instead of the dynamic iommu mapping.  I though the cover
> letter and commit log explained this well enough, but maybe I need to
> do a better job.

Ah right, now I see it, when dma_ops_bypass is set it will only use
direct mapping when the available memory fits into the device's
dma_masks, and calls into dma_ops otherwise.

I wonder how that will interact with an IOMMU driver, which has to make
sure that the direct mapping is accessible for the device at all.  It
can either put the device into a passthrough domain for direct mapping
or into a re-mapped domain, but then all DMA-API calls need to use dma-ops.
When the dma_mask covers available memory but coherent_mask doesn't,
the streaming calls will use dma-direct and alloc_coherent() calls into
dma-ops. There is no way for the IOMMU driver to ensure both works.

So what are the conditions under which an IOMMU driver would set
dma_ops_bypass to 1 and get a different result as to when setting
dev->dma_ops to NULL?

Regards,

Joerg


Re: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device

2020-04-18 Thread Joerg Roedel
Hi Christoph,

On Tue, Apr 14, 2020 at 02:25:05PM +0200, Christoph Hellwig wrote:
> +static inline bool dma_map_direct(struct device *dev,
> + const struct dma_map_ops *ops)
> +{
> + if (likely(!ops))
> + return true;
> + if (!dev->dma_ops_bypass)
> + return false;
> +
> + return min_not_zero(*dev->dma_mask, dev->bus_dma_limit) >=
> + dma_direct_get_required_mask(dev);

Why is the dma-mask check done here? The dma-direct code handles memory
outside of the devices dma-mask with swiotlb, no?

I also don't quite get what the difference between setting the
dma_ops_bypass flag non-zero and setting ops to NULL is.


Joerg




Re: [PATCH] iommu: spapr_tce: Disable compile testing to fix build on book3s_32 config

2020-04-18 Thread Joerg Roedel
On Tue, Apr 14, 2020 at 04:26:30PM +0200, Krzysztof Kozlowski wrote:
> Reported-by: Geert Uytterhoeven 
> Fixes: e93a1695d7fb ("iommu: Enable compile testing for some of drivers")
> Signed-off-by: Krzysztof Kozlowski 
> ---
>  drivers/iommu/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied, thanks.


Re: [PATCH v7 1/1] iommu: enhance IOMMU dma mode build options

2019-05-27 Thread Joerg Roedel
Hi Zhen Lei,

On Mon, May 20, 2019 at 09:59:47PM +0800, Zhen Lei wrote:
>  arch/ia64/kernel/pci-dma.c|  2 +-
>  arch/powerpc/platforms/powernv/pci-ioda.c |  3 ++-
>  arch/s390/pci/pci_dma.c   |  2 +-
>  arch/x86/kernel/pci-dma.c |  7 ++---
>  drivers/iommu/Kconfig | 44 
> ++-
>  drivers/iommu/amd_iommu_init.c|  3 ++-
>  drivers/iommu/intel-iommu.c   |  2 +-
>  drivers/iommu/iommu.c |  3 ++-
>  8 files changed, 48 insertions(+), 18 deletions(-)

This needs Acks from the arch maintainers of ia64, powerpc, s390 and
x86, at least.

It is easier for them if you split it up into the Kconfig change and
separete patches per arch and per iommu driver. Then collect the Acks on
the individual patches.

Thanks,

Joerg


Re: [PATCH v5 1/6] iommu: add generic boot option iommu.dma_mode

2019-04-12 Thread Joerg Roedel
On Tue, Apr 09, 2019 at 08:53:03PM +0800, Zhen Lei wrote:
> +static int __init iommu_dma_mode_setup(char *str)
> +{
> + if (!str)
> + goto fail;
> +
> + if (!strncmp(str, "passthrough", 11))
> + iommu_default_dma_mode = IOMMU_DMA_MODE_PASSTHROUGH;
> + else if (!strncmp(str, "lazy", 4))
> + iommu_default_dma_mode = IOMMU_DMA_MODE_LAZY;
> + else if (!strncmp(str, "strict", 6))
> + iommu_default_dma_mode = IOMMU_DMA_MODE_STRICT;
> + else
> + goto fail;
> +
> + pr_info("Force dma mode to be %d\n", iommu_default_dma_mode);

Printing a number is not very desriptive or helpful to the user. Please
print the name of the mode instead.


Regards,

Joerg


Re: [PATCH 1/2] iommu: Tidy up window attributes

2018-09-25 Thread Joerg Roedel
On Wed, Sep 19, 2018 at 11:12:57AM +0100, Robin Murphy wrote:
> The external interface to get/set window attributes is already
> abstracted behind iommu_domain_{get,set}_attr(), so there's no real
> reason for the internal interface to be different. Since we only have
> one window-based driver anyway, clean up the core code by just moving
> the DOMAIN_ATTR_WINDOWS handling directly into the PAMU driver.
> 
> Signed-off-by: Robin Murphy 
> ---
> 
> Just a cleanup opportunity I spotted whilst poking around in the area.
> 
>  drivers/iommu/fsl_pamu_domain.c | 20 
>  drivers/iommu/iommu.c   | 20 
>  2 files changed, 20 insertions(+), 20 deletions(-)

Nice cleanup, applied both patches, thanks Robin.



Re: [PATCH 0/7 v7] Support for fsl-mc bus and its devices in SMMU

2018-09-25 Thread Joerg Roedel
On Mon, Sep 10, 2018 at 07:19:14PM +0530, Nipun Gupta wrote:
> Nipun Gupta (7):
>   Documentation: fsl-mc: add iommu-map device-tree binding for fsl-mc
> bus
>   iommu/of: make of_pci_map_rid() available for other devices too
>   iommu/of: support iommu configuration for fsl-mc devices
>   iommu/arm-smmu: Add support for the fsl-mc bus
>   bus: fsl-mc: support dma configure for devices on fsl-mc bus
>   bus: fsl-mc: set coherent dma mask for devices on fsl-mc bus
>   arm64: dts: ls208xa: comply with the iommu map binding for fsl_mc

Applied, thanks Nipun.


Re: [PATCH] PCI: call dma_debug_add_bus for pci_bus_type in common code

2018-07-31 Thread Joerg Roedel
On Mon, Jul 30, 2018 at 04:17:13PM -0500, Bjorn Helgaas wrote:
> [+cc Joerg]
> 
> On Mon, Jul 30, 2018 at 09:38:42AM +0200, Christoph Hellwig wrote:
> > There is nothing arch specific about PCI or dma-debug, so move this
> > call to common code just after registering the bus type.
> 
> I assume that previously, even if the user set CONFIG_DMA_API_DEBUG=y
> we only got PCI DMA debug on powerpc, sh, and x86.  And after this
> patch, we'll get PCI DMA debug on *all* arches?
> 
> If that's true, I'll add a comment to that effect to the commitlog
> since that new functionality might be of interest to other arches.

There should be implicit support for dma-debug for all arches that use
the generic dma_ops code. The dma_debug_add_bus() function just adds the
reporting of pending dma-allocations on driver-unload for a device. 

Regards,

Joerg



Re: [PATCH -next] soc/qbman: fix implicit header dependency now causing build fails

2017-05-03 Thread Joerg Roedel
Hi Stephen,

On Wed, May 03, 2017 at 07:15:24PM +1000, Stephen Rothwell wrote:
> It looks like there is at least one more:
> 
> drivers/soc/fsl/qbman/qman.c: In function 'qman_init_fq':
> drivers/soc/fsl/qbman/qman.c:1787:4: error: implicit declaration of function 
> 'dma_map_single' [-Werror=implicit-function-declaration]
> drivers/soc/fsl/qbman/qman.c:1788:21: error: 'DMA_TO_DEVICE' undeclared 
> (first use in this function)
> drivers/soc/fsl/qbman/qman.c:1789:4: error: implicit declaration of function 
> 'dma_mapping_error' [-Werror=implicit-function-declaration]
> 
> This is from a powerpc orenet64_smp_defconfig build of today's
> linux-next.

Thanks, I'll fix that up too later today. Please let me know if you find
more of that in your compile-testing.


Joerg



Re: [PATCH -next] soc/qbman: fix implicit header dependency now causing build fails

2017-05-03 Thread Joerg Roedel
Hi Paul,

On Tue, May 02, 2017 at 06:21:12PM -0400, Paul Gortmaker wrote:
> In commit 461a6946b1f9 ("iommu: Remove pci.h include from
> trace/events/iommu.h") that header shuffle uncovered an implicit
> include in this driver, manifesting as:
> 
> CC  drivers/soc/fsl/qbman/qman_portal.o
> drivers/soc/fsl/qbman/qman_portal.c: In function 'qman_portal_probe':
> drivers/soc/fsl/qbman/qman_portal.c:299:2: error: implicit declaration of 
> function 'dma_set_mask'
> drivers/soc/fsl/qbman/qman_portal.c:299:2: error: implicit declaration of 
> function 'DMA_BIT_MASK'
> if (dma_set_mask(dev, DMA_BIT_MASK(40))) {
> ^
> 
> on the corenet32_smp_defconfig (and 64 bit respectively.)  The above
> commit was singled out via git bisect.
> 
> The header it was implictly relying on getting was dma-mapping.h - so
> we explicitly add it here.
> 
> Fixes: 461a6946b1f9 ("iommu: Remove pci.h include from trace/events/iommu.h")
> Cc: Joerg Roedel <jroe...@suse.de>
> Cc: Scott Wood <o...@buserror.net>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Paul Gortmaker <paul.gortma...@windriver.com>

Thanks for catching that, I though I found all breakages caused by
removing this include. Obviously this wasn't true :)

I applied the fix to the iommu/core branch.


Joerg



Re: [PATCH v2] powerpc: Fix incorrect PPC32 PAMU dependency

2016-04-07 Thread Joerg Roedel
On Wed, Mar 16, 2016 at 11:15:44PM -0500, Andy Fleming wrote:
> The Freescale PAMU can be enabled on both 32 and 64-bit Power
> chips. Commit 477ab7a19cec8409e4e2dd10e7348e4cac3c06e5
> (iommu: Make more drivers depend on COMPILE_TEST)
> restricted PAMU to PPC32. PPC covers both.
> 
> Signed-off-by: Andy Fleming 
> ---
> 
> v2: Implemented Michael Ellerman's suggestion to clean up the
> dependency chain

Applied, thanks.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v7, 4/5] powerpc/fsl: move mpc85xx.h to include/linux/fsl

2016-04-05 Thread Joerg Roedel
On Fri, Apr 01, 2016 at 11:07:30AM +0800, Yangbo Lu wrote:
> Move mpc85xx.h to include/linux/fsl and rename it to svr.h as
> a common header file. It has been used for mpc85xx and it will
> be used for ARM-based SoC as well.
> 
> Signed-off-by: Yangbo Lu <yangbo...@nxp.com>
> Acked-by: Wolfram Sang <w...@the-dreams.de>

Acked-by: Joerg Roedel <jroe...@suse.de>

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/iommu: Rename iommu_[un]map_sg functions

2014-11-06 Thread Joerg Roedel
Hi,

here is a patch that fixes a compile failure on powerpc with the recent
iommu tree. If this patch is okay with you guys I'd like to carry it in
the iommu tree too.

Thanks,

Joerg

From ff39a0301d01ad24f7097718b4ec8215eb0c1141 Mon Sep 17 00:00:00 2001
From: Joerg Roedel jroe...@suse.de
Date: Wed, 5 Nov 2014 15:28:30 +0100
Subject: [PATCH] powerpc/iommu: Rename iommu_[un]map_sg functions

The IOMMU-API gained support for a new iommu_map_sg
function. This causes compile failures on powerpc because
the function name is already globally used there.
This patch renames adds a ppc_ prefix to these functions to
solve the compile problem.

Signed-off-by: Joerg Roedel jroe...@suse.de
---
 arch/powerpc/include/asm/iommu.h| 17 ++---
 arch/powerpc/kernel/dma-iommu.c |  8 
 arch/powerpc/kernel/iommu.c | 16 
 arch/powerpc/platforms/cell/iommu.c |  9 +
 4 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 42632c7..9cfa370 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -137,13 +137,16 @@ static inline void set_iommu_table_base_and_group(struct 
device *dev,
iommu_add_device(dev);
 }
 
-extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
-   struct scatterlist *sglist, int nelems,
-   unsigned long mask, enum dma_data_direction direction,
-   struct dma_attrs *attrs);
-extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
-  int nelems, enum dma_data_direction direction,
-  struct dma_attrs *attrs);
+extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+   struct scatterlist *sglist, int nelems,
+   unsigned long mask,
+   enum dma_data_direction direction,
+   struct dma_attrs *attrs);
+extern void ppc_iommu_unmap_sg(struct iommu_table *tbl,
+  struct scatterlist *sglist,
+  int nelems,
+  enum dma_data_direction direction,
+  struct dma_attrs *attrs);
 
 extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
  size_t size, dma_addr_t *dma_handle,
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 54d0116..4c68bfe 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -60,16 +60,16 @@ static int dma_iommu_map_sg(struct device *dev, struct 
scatterlist *sglist,
int nelems, enum dma_data_direction direction,
struct dma_attrs *attrs)
 {
-   return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
-   device_to_mask(dev), direction, attrs);
+   return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
+   device_to_mask(dev), direction, attrs);
 }
 
 static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
struct dma_attrs *attrs)
 {
-   iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
-  attrs);
+   ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+  direction, attrs);
 }
 
 /* We support DMA to/from any memory page via the iommu */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index a10642a..a83cf5e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -428,10 +428,10 @@ static void iommu_free(struct iommu_table *tbl, 
dma_addr_t dma_addr,
ppc_md.tce_flush(tbl);
 }
 
-int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
-struct scatterlist *sglist, int nelems,
-unsigned long mask, enum dma_data_direction direction,
-struct dma_attrs *attrs)
+int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+struct scatterlist *sglist, int nelems,
+unsigned long mask, enum dma_data_direction direction,
+struct dma_attrs *attrs)
 {
dma_addr_t dma_next = 0, dma_addr;
struct scatterlist *s, *outs, *segstart;
@@ -539,7 +539,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table 
*tbl,
 
DBG(mapped %d elements:\n, outcount);
 
-   /* For the sake of iommu_unmap_sg, we clear out the length in the
+   /* For the sake of ppc_iommu_unmap_sg, we clear out the length in the
 * next entry of the sglist if we didn't fill the list completely
 */
if (outcount  incount) {
@@ -572,9

Re: linux-next: build failure after merge of the iommu tree

2014-11-05 Thread Joerg Roedel
On Wed, Nov 05, 2014 at 01:47:31PM +1100, Stephen Rothwell wrote:
 Hi Joerg,
 
 After merging the iommu tree, today's linux-next build (powerpc
 pc64_defconfig) failed like this:
 
 In file included from arch/powerpc/platforms/powernv/pci.c:33:0:
 arch/powerpc/include/asm/iommu.h:140:12: error: conflicting types for 
 'iommu_map_sg'
  extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 ^
 In file included from arch/powerpc/platforms/powernv/pci.c:23:0:
 include/linux/iommu.h:311:22: note: previous definition of 'iommu_map_sg' was 
 here
  static inline size_t iommu_map_sg(struct iommu_domain *domain,
   ^
 
 Caused by commit 315786ebbf4a (iommu: Add iommu_map_sg() function).
 Grep is your friend ...
 
 I have used the iommu tree from next-20141104 for today.

Thanks Stephen, I exluded the my core branch from next for now until the
issue is fixed.


Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2] iommu/fsl: Fix warning resulting from adding PCI device twice

2014-09-05 Thread Joerg Roedel
On Thu, Sep 04, 2014 at 05:08:45PM +0530, Varun Sethi wrote:
 iommu_group_get_for_dev determines the iommu group for the PCI device and adds
 the device to the group.
 
 In the PAMU driver we were again adding the device to the same group without 
 checking
 if the device already had an iommu group. This resulted in the following 
 warning.

 [...]
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
 v2 changes
 - directly check for the device iommu_group
 
  drivers/iommu/fsl_pamu_domain.c |   10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

Applied to iommu/fixes and added stable tag, thanks.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] iommu/fsl: Fix warning resulting from adding PCI device twice

2014-09-04 Thread Joerg Roedel
On Thu, Sep 04, 2014 at 11:33:42AM +0530, Varun Sethi wrote:
 + if (!iommu_group_get(dev))
 + ret = iommu_group_add_device(group, dev);
  
   iommu_group_put(group);
   return ret;

Doesn't this additional call to iommu_group_get take a reference to the
iommu_group that needs to be released?


Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 0/3] iommu/fsl: Fixes for the PAMU driver.

2014-07-07 Thread Joerg Roedel
On Tue, Jun 24, 2014 at 07:27:14PM +0530, Varun Sethi wrote:
 This patch set contains fixes for the PAMU driver. 
 The patches are based on 3.16-rc1.
 
 Varun Sethi (3):
   Fix PAMU window size check. 
   Fix the device domain attach condition.
   Fix the error condition during iommu group creation.
 
  drivers/iommu/fsl_pamu.c|8 
  drivers/iommu/fsl_pamu_domain.c |   19 +--
  2 files changed, 13 insertions(+), 14 deletions(-)

Applied to iommu/fixes, thanks.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/3] iommu/fsl: Fix PAMU window size check.

2014-07-04 Thread Joerg Roedel
On Tue, Jun 24, 2014 at 07:27:15PM +0530, Varun Sethi wrote:
   /* window size is 2^(WSE+1) bytes */
 - return __ffs(addrspace_size) - 1;
 + return fls64(addrspace_size) - 2;

This looks bogus, why do you replace ffs (find-first-bit) by fls
(find-last-bit)?


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/3] iommu/fsl: Fix the device domain attach condition.

2014-07-04 Thread Joerg Roedel
Hmm,

On Tue, Jun 24, 2014 at 07:27:16PM +0530, Varun Sethi wrote:
 - old_domain_info = find_domain(dev);
 + old_domain_info = dev-archdata.iommu_domain;
   if (old_domain_info  old_domain_info-domain != dma_domain) {
   spin_unlock_irqrestore(device_domain_lock, flags);
   detach_device(dev, old_domain_info-domain);

Wouldn't this set dev-archdata.iommu_domain to NULL anyway, so that ...

 @@ -399,7 +394,7 @@ static void attach_device(struct fsl_dma_domain 
 *dma_domain, int liodn, struct d
* the info for the first LIODN as all
* LIODNs share the same domain
*/
 - if (!old_domain_info)
 + if (!dev-archdata.iommu_domain)
   dev-archdata.iommu_domain = info;

We already know that it _must_ be NULL here?

   spin_unlock_irqrestore(device_domain_lock, flags);

This would shrink down the patch to:

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 93072ba..d21b554 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -399,8 +399,7 @@ static void attach_device(struct fsl_dma_domain 
*dma_domain, int liodn, struct d
 * the info for the first LIODN as all
 * LIODNs share the same domain
 */
-   if (!old_domain_info)
-   dev-archdata.iommu_domain = info;
+   dev-archdata.iommu_domain = info;
spin_unlock_irqrestore(device_domain_lock, flags);
 
 }


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] iommu: Add empty stub for iommu_group_get_by_id()

2013-12-30 Thread Joerg Roedel
On Thu, Nov 21, 2013 at 05:41:14PM +1100, Alexey Kardashevskiy wrote:
 Almost every function in include/linux/iommu.h has an empty stub
 but the iommu_group_get_by_id() did not get one by mistake.
 
 This adds an empty stub for iommu_group_get_by_id() for IOMMU_API
 disabled config.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru

Applied, thanks.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/3] powerpc: Add iommu domain pointer to device archdata

2013-08-14 Thread Joerg Roedel
On Mon, Jul 15, 2013 at 10:20:55AM +0530, Varun Sethi wrote:
 Add an iommu domain pointer to device (powerpc) archdata.  Devices
 are attached to iommu domains and this pointer provides a mechanism
 to correlate between a device and the associated iommu domain.  This
 field is set when a device is attached to a domain.
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 Acked-by: Kumar Gala ga...@kernel.crashing.org
 ---
 - rebased patch to 3.11-rc1
  arch/powerpc/include/asm/device.h |3 +++
  1 files changed, 3 insertions(+), 0 deletions(-)

Okay, I applied these patches to my ppc/pamu branch. But before I merge
it to my next branch (so that it can go upstream) I want to have a
compile-test-case first. Can you send me a working .config which
includes this driver?

Thanks,

Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/3] powerpc: Add iommu domain pointer to device archdata

2013-08-14 Thread Joerg Roedel
On Wed, Aug 14, 2013 at 09:56:11AM +, Sethi Varun-B16395 wrote:
 Please find the .config file attached with this mail.

Fantastic, thanks. The build works fine, I'll include the driver into my
next-branch. I also have two minor clean-up patches on-top, just if you
where wondering.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3 v16] iommu/fsl: Freescale PAMU driver and iommu implementation.

2013-06-24 Thread Joerg Roedel
On Thu, Jun 20, 2013 at 09:31:28PM +0530, Varun Sethi wrote:
 This patch provides the PAMU driver (fsl_pamu.c) and the corresponding IOMMU
 API implementation (fsl_pamu_domain.c). The PAMU hardware driver (fsl_pamu.c)
 has been derived from the work done by Ashish Kalra and Timur Tabi.

AlexW,

can you please have a look at the group-code again and ack the patch if
it looks right to you?

Thanks,

Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/2 v15] iommu/fsl: Add additional iommu attributes required by the PAMU driver.

2013-06-20 Thread Joerg Roedel
Varun,

On Wed, Apr 24, 2013 at 05:05:50PM +0530, Varun Sethi wrote:
 Added the following domain attributes for the FSL PAMU driver:
 1. Added new iommu stash attribute, which allows setting of the
LIODN specific stash id parameter through IOMMU API.
 2. Added an attribute for enabling/disabling DMA to a particular
memory window.
 3. Added domain attribute to check for PAMUV1 specific constraints.
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com

Can you please rebase the driver tp v3.10-rc6 and resend asap? I will
give it another review then and request AlexW to look over the
iommu-group stuff. So we can probably get this merged for v3.11.


Thank,

Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/3 v14] iommu/fsl: Add additional iommu attributes required by the PAMU driver.

2013-04-24 Thread Joerg Roedel
On Tue, Apr 23, 2013 at 02:10:25PM +, Sethi Varun-B16395 wrote:
 I think it's fine to have the header under linux, actually I also the
 intel-iommu header under linux.

Yes, the difference is that VT-d runs on x86 and on ia64. So there is no
single arch where the header could be placed. The amd-iommu.h file on
the other hand is x86 only and should also be moved to asm/, as I just
found out :)

And as long as PAMU is only needed on a single architecture the header
should also be arch-specific. If that changes someday the header can be
moved to a generic place.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/3 v2] iommu: Move swap_pci_ref function to pci.h.

2013-04-23 Thread Joerg Roedel
On Tue, Apr 23, 2013 at 10:05:24AM +0530, Varun Sethi wrote:
 +#ifndef __PCI_H
 +#define __PCI_H

Using __PCI_H is not a wise choice, it has certainly a high risk of a
collision. Anyway, I changed it to __IOMMU_PCI_H and applied the patch.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/3 v14] iommu/fsl: Add additional iommu attributes required by the PAMU driver.

2013-04-23 Thread Joerg Roedel
On Tue, Apr 23, 2013 at 10:05:25AM +0530, Varun Sethi wrote:
 Added the following domain attributes for the FSL PAMU driver:
 1. Added new iommu stash attribute, which allows setting of the
LIODN specific stash id parameter through IOMMU API.
 2. Added an attribute for enabling/disabling DMA to a particular
memory window.
 3. Added domain attribute to check for PAMUV1 specific constraints.
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
 v14 changes:
 - Add FSL prefix to PAMU attributes.
 v13 changes:
 - created a new file include/linux/fsl_pamu_stash.h for stash
 attributes.
 v12 changes:
 - Moved PAMU specifc stash ids and structures to PAMU header file.
 - no change in v11.
 - no change in v10.
  include/linux/fsl_pamu_stash.h |   39 +++

Isn't asm/ for your architecture a better place for that header?


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/3] iommu: Move swap_pci_ref function to pci.h.

2013-04-15 Thread Joerg Roedel
On Mon, Apr 15, 2013 at 12:42:00AM +0530, Varun Sethi wrote:
 swap_pci_ref function is used by the IOMMU API code for swapping pci device
 pointers, while determining the iommu group for the device.
 Currently this function was being implemented for different IOMMU drivers.
 This patch moves the function to pci.h so that the implementation can be
 shared across various IOMMU drivers.

The function is only used in IOMMU code, so I think its fine to keep it
there (unless Bjorn disagrees and wants it in PCI code).


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 4/5 v11] iommu/fsl: Add additional iommu attributes required by the PAMU driver.

2013-04-03 Thread Joerg Roedel
On Wed, Apr 03, 2013 at 05:21:16AM +, Sethi Varun-B16395 wrote:
  I would prefer these PAMU specific enum and struct to be in a pamu-
  specific iommu-header.
  
 
 [Sethi Varun-B16395] But, these would be used by the IOMMU API users
 (e.g. VFIO), they shouldn't depend on PAMU specific headers.

Drivers that use PAMU specifics (like the PAMU specific VFIO parts) can
also include the PAMU specific header.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/5 v11] powerpc: Add iommu domain pointer to device archdata

2013-04-02 Thread Joerg Roedel
On Fri, Mar 29, 2013 at 01:23:59AM +0530, Varun Sethi wrote:
 Add an iommu domain pointer to device (powerpc) archdata.  Devices
 are attached to iommu domains and this pointer provides a mechanism
 to correlate between a device and the associated iommu domain.  This
 field is set when a device is attached to a domain.
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com

This patch needs to be Acked by the PPC maintainers.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 4/5 v11] iommu/fsl: Add additional iommu attributes required by the PAMU driver.

2013-04-02 Thread Joerg Roedel
On Fri, Mar 29, 2013 at 01:24:01AM +0530, Varun Sethi wrote:
 +/* cache stash targets */
 +enum stash_target {
 + IOMMU_ATTR_CACHE_L1 = 1,
 + IOMMU_ATTR_CACHE_L2,
 + IOMMU_ATTR_CACHE_L3,
 +};
 +
 +/* This attribute corresponds to IOMMUs capable of generating
 + * a stash transaction. A stash transaction is typically a
 + * hardware initiated prefetch of data from memory to cache.
 + * This attribute allows configuring stashig specific parameters
 + * in the IOMMU hardware.
 + */
 +
 +struct iommu_stash_attribute {
 + u32 cpu;/* cpu number */
 + u32 cache;  /* cache to stash to: L1,L2,L3 */
 +};
 +

I would prefer these PAMU specific enum and struct to be in a
pamu-specific iommu-header.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 5/5 v11] iommu/fsl: Freescale PAMU driver and iommu implementation.

2013-04-02 Thread Joerg Roedel
Cc'ing Alex Williamson

Alex, can you please review the iommu-group part of this patch?

My comments so far are below:

On Fri, Mar 29, 2013 at 01:24:02AM +0530, Varun Sethi wrote:
 +config FSL_PAMU
 + bool Freescale IOMMU support
 + depends on PPC_E500MC
 + select IOMMU_API
 + select GENERIC_ALLOCATOR
 + help
 +   Freescale PAMU support.

A bit lame for a help text. Can you elaborate more what PAMU is and when
it should be enabled?

 +int pamu_enable_liodn(int liodn)
 +{
 + struct paace *ppaace;
 +
 + ppaace = pamu_get_ppaace(liodn);
 + if (!ppaace) {
 + pr_err(Invalid primary paace entry\n);
 + return -ENOENT;
 + }
 +
 + if (!get_bf(ppaace-addr_bitfields, PPAACE_AF_WSE)) {
 + pr_err(liodn %d not configured\n, liodn);
 + return -EINVAL;
 + }
 +
 + /* Ensure that all other stores to the ppaace complete first */
 + mb();
 +
 + ppaace-addr_bitfields |= PAACE_V_VALID;
 + mb();

Why is it sufficient to set the bit in a variable when enabling liodn
but when disabling it set_bf needs to be called? This looks a bit
assymetric.

 +/* Derive the window size encoding for a particular PAACE entry */
 +static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)
 +{
 + /* Bug if not a power of 2 */
 + BUG_ON((addrspace_size  (addrspace_size - 1)));

Please use is_power_of_2 here.

 +
 + /* window size is 2^(WSE+1) bytes */
 + return __ffs(addrspace_size  PAMU_PAGE_SHIFT) + PAMU_PAGE_SHIFT - 1;

The PAMU_PAGE_SHIFT shifting and adding looks redundant.

 + if ((win_size  (win_size - 1)) || win_size  PAMU_PAGE_SIZE) {
 + pr_err(window size too small or not a power of two %llx\n, 
 win_size);
 + return -EINVAL;
 + }
 +
 + if (win_addr  (win_size - 1)) {
 + pr_err(window address is not aligned with window size\n);
 + return -EINVAL;
 + }

Again, use is_power_of_2 instead of hand-coding.

 + if (~stashid != 0)
 + set_bf(paace-impl_attr, PAACE_IA_CID, stashid);
 +
 + smp_wmb();
 +
 + if (enable)
 + paace-addr_bitfields |= PAACE_V_VALID;

Havn't you written a helper funtion to set this bit?

 +irqreturn_t pamu_av_isr(int irq, void *arg)
 +{
 + struct pamu_isr_data *data = arg;
 + phys_addr_t phys;
 + unsigned int i, j;
 +
 + pr_emerg(fsl-pamu: access violation interrupt\n);
 +
 + for (i = 0; i  data-count; i++) {
 + void __iomem *p = data-pamu_reg_base + i * PAMU_OFFSET;
 + u32 pics = in_be32(p + PAMU_PICS);
 +
 + if (pics  PAMU_ACCESS_VIOLATION_STAT) {
 + pr_emerg(POES1=%08x\n, in_be32(p + PAMU_POES1));
 + pr_emerg(POES2=%08x\n, in_be32(p + PAMU_POES2));
 + pr_emerg(AVS1=%08x\n, in_be32(p + PAMU_AVS1));
 + pr_emerg(AVS2=%08x\n, in_be32(p + PAMU_AVS2));
 + pr_emerg(AVA=%016llx\n, make64(in_be32(p + PAMU_AVAH),
 + in_be32(p + PAMU_AVAL)));
 + pr_emerg(UDAD=%08x\n, in_be32(p + PAMU_UDAD));
 + pr_emerg(POEA=%016llx\n, make64(in_be32(p + 
 PAMU_POEAH),
 + in_be32(p + PAMU_POEAL)));
 +
 + phys = make64(in_be32(p + PAMU_POEAH),
 + in_be32(p + PAMU_POEAL));
 +
 + /* Assume that POEA points to a PAACE */
 + if (phys) {
 + u32 *paace = phys_to_virt(phys);
 +
 + /* Only the first four words are relevant */
 + for (j = 0; j  4; j++)
 + pr_emerg(PAACE[%u]=%08x\n, j, 
 in_be32(paace + j));
 + }
 + }
 + }
 +
 + panic(\n);

A kernel panic seems like an over-reaction to an access violation.
Besides the device that caused the violation the system should still
work, no?

 +#define make64(high, low) (((u64)(high)  32) | (low))

You redefined this make64 here.

 +static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain)
 +{
 + struct dma_window *sub_win_ptr =
 + dma_domain-win_arr[0];
 + int i, ret;
 + unsigned long rpn;
 +
 + for (i = 0; i  dma_domain-win_cnt; i++) {
 + if (sub_win_ptr[i].valid) {
 + rpn = sub_win_ptr[i].paddr 
 +  PAMU_PAGE_SHIFT;
 + spin_lock(iommu_lock);

IOMMU code might run in interrupt context, so please use
spin_lock_irqsave for the iommu_lock.

 +static void detach_device(struct device *dev, struct fsl_dma_domain 
 *dma_domain)
 +{
 + struct device_domain_info *info;
 + struct list_head *entry, *tmp;
 + unsigned long flags;
 +
 + spin_lock_irqsave(dma_domain-domain_lock, flags);
 + /* Remove the device from the domain device list */
 + 

Re: [PATCH 0/5 v11] iommu/fsl: Freescale PAMU driver and IOMMU API implementation.

2013-04-02 Thread Joerg Roedel
On Fri, Mar 29, 2013 at 01:23:57AM +0530, Varun Sethi wrote:
 This patchset provides the Freescale PAMU (Peripheral Access Management Unit) 
 driver
 and the corresponding IOMMU API implementation. PAMU is the IOMMU present on 
 Freescale
 QorIQ platforms. PAMU can authorize memory access, remap the memory address, 
 and remap 
 the I/O transaction type.
 
 This set consists of the following patches:
 1.  Make iova dma_addr_t in the iommu_iova_to_phys API.
 2. Addition of new field in the device (powerpc) archdata structure for 
 storing iommu domain information
pointer.
 3. Add window permission flags in the iommu_domain_window_enable API.
 4. Add domain attributes for FSL PAMU driver.
 5. PAMU driver and IOMMU API implementation.

Okay, I am about to apply patches 1 and 3 to a new ppc/pamu branch in my
tree.

As a general question, how did you test the IOMMU driver and what will
you do in the future to avoid regressions?


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/6] powerpc/fsl_pci: Store the platform device information corresponding to the pci controller.

2013-03-07 Thread Joerg Roedel
Yes, please base your patches on the latest upstream-tag. I will move my
tree to v3.9-rc1 soon, there are some fixes that need to go upstream.

On Thu, Mar 07, 2013 at 09:14:21AM +, Sethi Varun-B16395 wrote:
 Hi Joerg,
 I have to post the next version of my patchset, should I base it on top of 
 3.9-rc1?
 By when would you move the iommu git tree to 3.9-rc1?
 
 Regards
 Varun
 
  -Original Message-
  From: Kumar Gala [mailto:ga...@kernel.crashing.org]
  Sent: Thursday, February 28, 2013 9:15 PM
  To: Sethi Varun-B16395
  Cc: Joerg Roedel; Stuart Yoder; io...@lists.linux-foundation.org;
  linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org; Wood Scott-
  B07421; Yoder Stuart-B08248
  Subject: Re: [PATCH 2/6] powerpc/fsl_pci: Store the platform device
  information corresponding to the pci controller.
  
  
  On Feb 27, 2013, at 4:56 AM, Sethi Varun-B16395 wrote:
  
   This patch is present in the next branch of linux ppc tree maintained
  by Kumar Gala.
   Following is the commit id:
   52c5affc545053d37c0b05224bbf70f5336caa20
  
   I am not sure if this would be part of 3.9-rc1.
  
   Regards
   varun
  
  This is now in Linus's tree so will be in 3.9-rc1
  
  - k
  
 
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/6] powerpc/fsl_pci: Store the platform device information corresponding to the pci controller.

2013-02-27 Thread Joerg Roedel
On Tue, Feb 26, 2013 at 06:16:10AM +, Sethi Varun-B16395 wrote:
 This patch is not present in Joerg's tree and the add_device API in
 the PAMU driver requires this patch.

Will this patch be part of v3.9-rc1?


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/6 v8] iommu/fsl: Store iommu domain information pointer in archdata.

2013-02-27 Thread Joerg Roedel
On Mon, Feb 18, 2013 at 06:22:14PM +0530, Varun Sethi wrote:
 Add a new field in the device (powerpc) archdata structure for storing iommu 
 domain
 information pointer. This pointer is stored when the device is attached to a 
 particular
 domain.
 
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
 - no change.
  arch/powerpc/include/asm/device.h |4 
  1 files changed, 4 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/device.h 
 b/arch/powerpc/include/asm/device.h
 index 77e97dd..6dc79fe 100644
 --- a/arch/powerpc/include/asm/device.h
 +++ b/arch/powerpc/include/asm/device.h
 @@ -28,6 +28,10 @@ struct dev_archdata {
   void*iommu_table_base;
   } dma_data;
  
 + /* IOMMU domain information pointer. This would be set
 +  * when this device is attached to an iommu_domain.
 +  */
 + void*iommu_domain;

Please Cc the PowerPC Maintainers on this, so that they can have a look
at it. This also must be put this into an #ifdef CONFIG_IOMMU_API.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/6] powerpc/fsl_pci: Added defines for the FSL PCI controller BRR1 register.

2013-02-27 Thread Joerg Roedel
On Mon, Feb 18, 2013 at 06:22:16PM +0530, Varun Sethi wrote:
 Macros for checking FSL PCI controller version.
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  arch/powerpc/include/asm/pci-bridge.h |4 
  1 files changed, 4 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/pci-bridge.h 
 b/arch/powerpc/include/asm/pci-bridge.h
 index 025a130..c12ed78 100644
 --- a/arch/powerpc/include/asm/pci-bridge.h
 +++ b/arch/powerpc/include/asm/pci-bridge.h
 @@ -14,6 +14,10 @@
  
  struct device_node;
  
 +/* FSL PCI controller BRR1 register */
 +#define PCI_FSL_BRR1  0xbf8
 +#define PCI_FSL_BRR1_VER 0x
 +


Please merge this patch with the one where you actually make use of
these defines for the first time.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 5/6 v8] iommu/fsl: Add addtional attributes specific to the PAMU driver.

2013-02-27 Thread Joerg Roedel
On Mon, Feb 18, 2013 at 06:22:18PM +0530, Varun Sethi wrote:
 Added the following domain attributes for the FSL PAMU driver:
 1. Added new iommu stash attribute, which allows setting of the
LIODN specific stash id parameter through IOMMU API.
 2. Added an attribute for enabling/disabling DMA to a particular
memory window.
 3. Added domain attribute to check for PAMUV1 specific constraints.
 
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  include/linux/iommu.h |   33 +
  1 files changed, 33 insertions(+), 0 deletions(-)
 
 diff --git a/include/linux/iommu.h b/include/linux/iommu.h
 index 529987c..c44e38b 100644
 --- a/include/linux/iommu.h
 +++ b/include/linux/iommu.h
 @@ -40,6 +40,23 @@ struct notifier_block;
  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
   struct device *, unsigned long, int, void *);
  
 +/* cache stash targets */
 +#define IOMMU_ATTR_CACHE_L1 1
 +#define IOMMU_ATTR_CACHE_L2 2
 +#define IOMMU_ATTR_CACHE_L3 3
 +
 +/* This attribute corresponds to IOMMUs capable of generating
 + * a stash transaction. A stash transaction is typically a
 + * hardware initiated prefetch of data from memory to cache.
 + * This attribute allows configuring stashig specific parameters
 + * in the IOMMU hardware.
 + */
 +
 +struct iommu_stash_attribute {
 + u32 cpu;/* cpu number */
 + u32 cache;  /* cache to stash to: L1,L2,L3 */
 +};

Please make the cache-attribute an enum instead of using defines.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/4] iommu/fsl: Freescale PAMU driver and IOMMU API implementation.

2013-01-06 Thread Joerg Roedel
Hi Varun,

On Thu, Jan 03, 2013 at 05:21:09AM +, Sethi Varun-B16395 wrote:
 It's been a while since I submitted this patch. I have tried to
 address your comments regarding the subwindow attribute. I would
 really appreciate if I can get some feedback on this patch.

I have some ideas in mind how we can abstract this in the IOMMU-API
(with an extension to the API). I will send a RFC patchset soon to add
these changes and then we can discuss it.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/4 v5] iommu/fsl: Add iommu domain attributes required by fsl PAMU driver.

2012-12-03 Thread Joerg Roedel
On Mon, Dec 03, 2012 at 04:57:29PM +, Sethi Varun-B16395 wrote:
 
 
  -Original Message-
  From: iommu-boun...@lists.linux-foundation.org [mailto:iommu-
  boun...@lists.linux-foundation.org] On Behalf Of Joerg Roedel
  Sent: Sunday, December 02, 2012 7:33 PM
  To: Sethi Varun-B16395
  Cc: linux-ker...@vger.kernel.org; io...@lists.linux-foundation.org; Wood
  Scott-B07421; linuxppc-dev@lists.ozlabs.org; Tabi Timur-B04825
  Subject: Re: [PATCH 3/4 v5] iommu/fsl: Add iommu domain attributes
  required by fsl PAMU driver.
  
  Hmm, we need to work out a good abstraction for this.
  
  On Tue, Nov 20, 2012 at 07:24:56PM +0530, Varun Sethi wrote:
   Added the following domain attributes required by FSL PAMU driver:
   1. Subwindows field added to the iommu domain geometry attribute.
  
  Are the Subwindows mapped with full size or do you map only parts of the
  subwindows?
  
 [Sethi Varun-B16395] It's possible to map a part of the subwindow i.e.
 size of the mapping can be less than the sub window size.

Yes, I know that this is possible. But do you plan to support that or is
it sufficient when the whole subwindow is mapped?


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/4 v5] iommu/fsl: Add iommu domain attributes required by fsl PAMU driver.

2012-12-02 Thread Joerg Roedel
Hmm, we need to work out a good abstraction for this.

On Tue, Nov 20, 2012 at 07:24:56PM +0530, Varun Sethi wrote:
 Added the following domain attributes required by FSL PAMU driver:
 1. Subwindows field added to the iommu domain geometry attribute.

Are the Subwindows mapped with full size or do you map only parts of the
subwindows?

 +  * This attribute indicates number of DMA subwindows supported by
 +  * the geometry. If there is a single window that maps the entire
 +  * geometry, attribute must be set to 1. A value of 0 implies
 +  * that this mechanism is not used at all(normal paging is used).
 +  * Value other than* 0 or 1 indicates the actual number of
 +  * subwindows.
 +  */

This semantic is ugly, how about a feature detection mechanism?

 +struct iommu_stash_attribute {
 + u32 cpu;/* cpu number */
 + u32 cache;  /* cache to stash to: L1,L2,L3 */
  };
  
  struct iommu_domain {
 @@ -60,6 +95,14 @@ struct iommu_domain {
  enum iommu_attr {
   DOMAIN_ATTR_MAX,
   DOMAIN_ATTR_GEOMETRY,
 + /* Set the IOMMU hardware stashing
 +  * parameters.
 +  */
 + DOMAIN_ATTR_STASH,
 + /* Explicity enable/disable DMA for a
 + * particular memory window.
 + */
 + DOMAIN_ATTR_ENABLE,
  };

When you add implementation specific attributes please add some
indication to the names that it is only for PAMU. DOMAIN_ATTR_STASH
sounds too generic.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/9] dma_debug: add debug_dma_mapping_error support to architectures that support DMA_DEBUG_API

2012-12-02 Thread Joerg Roedel
Hi Marek,
 
On Mon, Nov 26, 2012 at 11:57:19AM +0100, Marek Szyprowski wrote:

 I've took all the patches to the next-dma-debug branch in my tree, I sorry
 that You have to wait so long for it. My branch is based on Joerg's
 dma-debug branch and I've included it for testing in linux-next branch.

The patches are now two times in next. One version from my tree and one
from yours. Please remove the version from your tree, the patches should
go upstream via my dma-debug branch.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/9] dma_debug: add debug_dma_mapping_error support to architectures that support DMA_DEBUG_API

2012-11-26 Thread Joerg Roedel
On Mon, Nov 26, 2012 at 11:57:19AM +0100, Marek Szyprowski wrote:
 I've took all the patches to the next-dma-debug branch in my tree, I sorry
 that You have to wait so long for it. My branch is based on Joerg's
 dma-debug branch and I've included it for testing in linux-next branch.
 
 Joerg: would You mind if I handle pushing the whole branch to v3.8
 via my kernel tree? Those changes should be kept close together to
 avoid build breaks for bisecting.

I'll apply the patches to my tree soon enough. But before that I'll wait
a little bit longer to give the arch maintainers the chance to add the
missing Acked-bys.


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/9] dma_debug: add debug_dma_mapping_error support to architectures that support DMA_DEBUG_API

2012-11-26 Thread Joerg Roedel
Hi Shuah,

On Fri, Nov 23, 2012 at 02:29:02PM -0700, Shuah Khan wrote:
 x86 - done in the first patch that added the feature.
 
 ARM64: dma_debug: add debug_dma_mapping_error support
 c6x: dma_debug: add debug_dma_mapping_error support
 ia64: dma_debug: add debug_dma_mapping_error support
 microblaze: dma-mapping: support debug_dma_mapping_error
 mips: dma_debug: add debug_dma_mapping_error support
 powerpc: dma_debug: add debug_dma_mapping_error support
 sh: dma_debug: add debug_dma_mapping_error support
 sparc: dma_debug: add debug_dma_mapping_error support
 tile: dma_debug: add debug_dma_mapping_error support

Have you compile-tested the invididual archs you are changing here?


Joerg


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-30 Thread Joerg Roedel
On Fri, Aug 26, 2011 at 12:04:22PM -0600, Alex Williamson wrote:
 On Thu, 2011-08-25 at 20:05 +0200, Joerg Roedel wrote:

  If we really expect segment numbers that need the full 16 bit then this
  would be the way to go. Otherwise I would prefer returning the group-id
  directly and partition the group-id space for the error values (s32 with
  negative numbers being errors).
 
 It's unlikely to have segments using the top bit, but it would be broken
 for an iommu driver to define it's group numbers using pci s:b:d.f if we
 don't have that bit available.  Ben/David, do PEs have an identifier of
 a convenient size?  I'd guess any hardware based identifier is going to
 use a full unsigned bit width.

Okay, if we want to go the secure way I am fine with the int *group
parameter. Another option is to just return u64 and use the extended
number space for errors. But that is even worse as an interface, I
think.

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-30 Thread Joerg Roedel
On Sun, Aug 28, 2011 at 05:04:32PM +0300, Avi Kivity wrote:
 On 08/28/2011 04:56 PM, Joerg Roedel wrote:

 This can't be secured by a lock, because it introduces potential
 A-B--B-A lock problem when two processes try to take each others mm.
 It could probably be solved by a task-real_mm pointer, havn't thought
 about this yet...


 Or a workqueue -  you get a kernel thread context with a bit of boilerplate.

Right, a workqueue might do the trick. We'll evaluate that. Thanks for
the idea :)

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-28 Thread Joerg Roedel
On Sun, Aug 28, 2011 at 04:14:00PM +0300, Avi Kivity wrote:
 On 08/26/2011 12:24 PM, Roedel, Joerg wrote:

 The biggest problem with this approach is that it has to happen in the
 context of the given process. Linux can't really modify an mm which
 which belong to another context in a safe way.


 Is use_mm() insufficient?

Yes, it introduces a set of race conditions when a process that already
has an mm wants to take over another processes mm temporarily (and when
use_mm is modified to actually provide this functionality). It is only
save when used from kernel-thread context.

One example:

Process A   Process B   Process C
.   .   .
.   -- takes A-mm .
.   and assignes as B-mm   .
.   .   -- Wants to take
.   .   B-mm, but gets
A-mm now

This can't be secured by a lock, because it introduces potential
A-B--B-A lock problem when two processes try to take each others mm.
It could probably be solved by a task-real_mm pointer, havn't thought
about this yet...

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-26 Thread Joerg Roedel
On Fri, Aug 26, 2011 at 09:07:35AM -0500, Alexander Graf wrote:
 On 26.08.2011, at 04:33, Roedel, Joerg wrote:
  
  The reason is that you mean the usability for the programmer and I mean
  it for the actual user of qemu :)
 
 No, we mean the actual user of qemu. The reason being that making a
 device available for any user space application is an administrative
 task.

 Forget the KVM case for a moment and think of a user space device
 driver. I as a user am not root. But I as a user when having access to
 /dev/vfioX want to be able to access the device and manage it - and
 only it. The admin of that box needs to set it up properly for me to
 be able to access it.

Right, and that task is being performed by attaching the device(s) in
question to the vfio driver. The rights-management happens on the
/dev/vfio/$group file.

 So having two steps is really the correct way to go:
 
   * create VFIO group
   * use VFIO group
 
 because the two are done by completely different users. It's similar
 to how tun/tap works in Linux too. Of course nothing keeps you from
 also creating a group on the fly, but it shouldn't be the only
 interface available. The persistent setup is definitely more useful.

I see the use-case. But to make it as easy as possible for the end-user
we can do both.

So the user of (qemu again) does this:

# vfio-ctl attach 00:01.0
vfio-ctl: attached to group 8
# vfio-ctl attach 00:02.0
vfio-ctl: attached to group 16
$ qemu -device vfio-pci,host=00:01.0 -device vfio,host=00:01.0 ...

which should cover the usecase you prefer. Qemu still creates the
meta-group that allow the devices to share the same page-table. But what
should also be possible is:

# qemu -device vfio-pci,host=00:01.0 -device vfio-pci,host=00:02.0

In that case qemu detects that the devices are not yet bound to vfio and
will do so and also unbinds them afterwards (essentially the developer
use-case).

Your interface which requires pre-binding of devices into one group by
the administrator only makes sense if you want to force userspace to
use certain devices (which do not belong to the same hw-group) only
together. But I don't see a usecase for defining such constraints (yet).

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-25 Thread Joerg Roedel
On Thu, Aug 25, 2011 at 11:20:30AM -0600, Alex Williamson wrote:
 On Thu, 2011-08-25 at 12:54 +0200, Roedel, Joerg wrote:

  We need to solve this differently. ARM is starting to use the iommu-api
  too and this definitly does not work there. One possible solution might
  be to make the iommu-ops per-bus.
 
 That sounds good.  Is anyone working on it?  It seems like it doesn't
 hurt to use this in the interim, we may just be watching the wrong bus
 and never add any sysfs group info.

I'll cook something up for RFC over the weekend.

  Also the return type should not be long but something that fits into
  32bit on all platforms. Since you use -ENODEV, probably s32 is a good
  choice.
 
 The convenience of using seg|bus|dev|fn was too much to resist, too bad
 it requires a full 32bits.  Maybe I'll change it to:
 int iommu_device_group(struct device *dev, unsigned int *group)

If we really expect segment numbers that need the full 16 bit then this
would be the way to go. Otherwise I would prefer returning the group-id
directly and partition the group-id space for the error values (s32 with
negative numbers being errors).

   @@ -438,6 +439,10 @@ static int __init intel_iommu_setup(char *str)
 printk(KERN_INFO
 Intel-IOMMU: disable supported super page\n);
 intel_iommu_superpage = 0;
   + } else if (!strncmp(str, no_mf_groups, 12)) {
   + printk(KERN_INFO
   + Intel-IOMMU: disable separate groups for 
   multifunction devices\n);
   + intel_iommu_no_mf_groups = 1;
  
  This should really be a global iommu option and not be VT-d specific.
 
 You think?  It's meaningless on benh's power systems.

But it is not meaningless on AMD-Vi systems :) There should be one
option for both.
On the other hand this requires an iommu= parameter on ia64, but thats
probably not that bad.

  This looks like code duplication in the VT-d driver. It doesn't need to
  be generalized now, but we should keep in mind to do a more general
  solution later.
  Maybe it is beneficial if the IOMMU drivers only setup the number in
  dev-arch.iommu.groupid and the iommu-api fetches it from there then.
  But as I said, this is some more work and does not need to be done for
  this patch(-set).
 
 The iommu-api reaches into dev-arch.iommu.groupid?  I figured we should
 at least start out with a lightweight, optional interface without the
 overhead of predefining groupids setup by bus notification callbacks in
 each iommu driver.  Thanks,

As I said, this is just an idea for an later optimization. It is fine
for now as it is in this patch.

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-24 Thread Joerg Roedel
On Tue, Aug 23, 2011 at 03:30:06PM -0400, Alex Williamson wrote:
 On Tue, 2011-08-23 at 07:01 +1000, Benjamin Herrenschmidt wrote:

  Could be tho in what form ? returning sysfs pathes ?
 
 I'm at a loss there, please suggest.  I think we need an ioctl that
 returns some kind of array of devices within the group and another that
 maybe takes an index from that array and returns an fd for that device.
 A sysfs path string might be a reasonable array element, but it sounds
 like a pain to work with.

Limiting to PCI we can just pass the BDF as the argument to optain the
device-fd. For a more generic solution we need a unique identifier in
some way which is unique across all 'struct device' instances in the
system. As far as I know we don't have that yet (besides the sysfs-path)
so we either add that or stick with bus-specific solutions.

  1:1 process has the advantage of linking to an -mm which makes the whole
  mmu notifier business doable. How do you want to track down mappings and
  do the second level translation in the case of explicit map/unmap (like
  on power) if you are not tied to an mm_struct ?
 
 Right, I threw away the mmu notifier code that was originally part of
 vfio because we can't do anything useful with it yet on x86.  I
 definitely don't want to prevent it where it makes sense though.  Maybe
 we just record current-mm on open and restrict subsequent opens to the
 same.

Hmm, I think we need io-page-fault support in the iommu-api then.

  Another aspect I don't see discussed is how we represent these things to
  the guest.
  
  On Power for example, I have a requirement that a given iommu domain is
  represented by a single dma window property in the device-tree. What
  that means is that that property needs to be either in the node of the
  device itself if there's only one device in the group or in a parent
  node (ie a bridge or host bridge) if there are multiple devices.
  
  Now I do -not- want to go down the path of simulating P2P bridges,
  besides we'll quickly run out of bus numbers if we go there.
  
  For us the most simple and logical approach (which is also what pHyp
  uses and what Linux handles well) is really to expose a given PCI host
  bridge per group to the guest. Believe it or not, it makes things
  easier :-)
 
 I'm all for easier.  Why does exposing the bridge use less bus numbers
 than emulating a bridge?
 
 On x86, I want to maintain that our default assignment is at the device
 level.  A user should be able to pick single or multiple devices from
 across several groups and have them all show up as individual,
 hotpluggable devices on bus 0 in the guest.  Not surprisingly, we've
 also seen cases where users try to attach a bridge to the guest,
 assuming they'll get all the devices below the bridge, so I'd be in
 favor of making this just work if possible too, though we may have to
 prevent hotplug of those.

A side-note: Might it be better to expose assigned devices in a guest on
a seperate bus? This will make it easier to emulate an IOMMU for the
guest inside qemu.


Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-24 Thread Joerg Roedel
On Tue, Aug 23, 2011 at 01:33:14PM -0400, Aaron Fabbri wrote:
 On 8/23/11 10:01 AM, Alex Williamson alex.william...@redhat.com wrote:
  The iommu domain would probably be allocated when the first device is
  bound to vfio.  As each device is bound, it gets attached to the group.
  DMAs are done via an ioctl on the group.
  
  I think group + uiommu leads to effectively reliving most of the
  problems with the current code.  The only benefit is the group
  assignment to enforce hardware restrictions.  We still have the problem
  that uiommu open() = iommu_domain_alloc(), whose properties are
  meaningless without attached devices (groups).  Which I think leads to
  the same awkward model of attaching groups to define the domain, then we
  end up doing mappings via the group to enforce ordering.
 
 Is there a better way to allow groups to share an IOMMU domain?
 
 Maybe, instead of having an ioctl to allow a group A to inherit the same
 iommu domain as group B, we could have an ioctl to fully merge two groups
 (could be what Ben was thinking):
 
 A.ioctl(MERGE_TO_GROUP, B)
 
 The group A now goes away and its devices join group B.  If A ever had an
 iommu domain assigned (and buffers mapped?) we fail.
 
 Groups cannot get smaller (they are defined as minimum granularity of an
 IOMMU, initially).  They can get bigger if you want to share IOMMU
 resources, though.
 
 Any downsides to this approach?

As long as this is a 2-way road its fine. There must be a way to split
the groups again after the guest exits. But then we are again at the
super-groups (aka meta-groups, aka uiommu) point.

Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-23 Thread Joerg Roedel
On Mon, Aug 22, 2011 at 08:52:18PM -0400, aafabbri wrote:
 You have to enforce group/iommu domain assignment whether you have the
 existing uiommu API, or if you change it to your proposed
 ioctl(inherit_iommu) API.
 
 The only change needed to VFIO here should be to make uiommu fd assignment
 happen on the groups instead of on device fds.  That operation fails or
 succeeds according to the group semantics (all-or-none assignment/same
 uiommu).

That is makes uiommu basically the same as the meta-groups, right?

Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-23 Thread Joerg Roedel
On Tue, Aug 23, 2011 at 02:54:43AM -0400, Benjamin Herrenschmidt wrote:
 Possibly, the question that interest me the most is what interface will
 KVM end up using. I'm also not terribly fan with the (perceived)
 discrepancy between using uiommu to create groups but using the group fd
 to actually do the mappings, at least if that is still the plan.
 
 If the separate uiommu interface is kept, then anything that wants to be
 able to benefit from the ability to put multiple devices (or existing
 groups) into such a meta group would need to be explicitly modified to
 deal with the uiommu APIs.
 
 I tend to prefer such meta groups as being something you create
 statically using a configuration interface, either via sysfs, netlink or
 ioctl's to a control vfio device driven by a simple command line tool
 (which can have the configuration stored in /etc and re-apply it at
 boot).

Hmm, I don't think that these groups are static for the systems
run-time. They only exist for the lifetime of a guest per default, at
least on x86. Thats why I prefer to do this grouping using VFIO and not
some sysfs interface (which would be the third interface beside the
ioctls and netlink a VFIO user needs to be aware of). Doing this in the
ioctl interface just makes things easier.

Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-22 Thread Joerg Roedel
On Mon, Aug 22, 2011 at 02:30:26AM -0400, Avi Kivity wrote:
 On 08/20/2011 07:51 PM, Alex Williamson wrote:
  We need to address both the description and enforcement of device
  groups.  Groups are formed any time the iommu does not have resolution
  between a set of devices.  On x86, this typically happens when a
  PCI-to-PCI bridge exists between the set of devices and the iommu.  For
  Power, partitionable endpoints define a group.  Grouping information
  needs to be exposed for both userspace and kernel internal usage.  This
  will be a sysfs attribute setup by the iommu drivers.  Perhaps:
 
  # cat /sys/devices/pci:00/:00:19.0/iommu_group
  42
 
 
 $ readlink /sys/devices/pci:00/:00:19.0/iommu_group
 ../../../path/to/device/which/represents/the/resource/constraint
 
 (the pci-to-pci bridge on x86, or whatever node represents partitionable 
 endpoints on power)

That does not work. The bridge in question may not even be visible as a
PCI device, so you can't link to it. This is the case on a few PCIe
cards which only have a PCIx chip and a PCIe-2-PCIx bridge to implement
the PCIe interface (yes, I have seen those cards).

Regards,

Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-22 Thread Joerg Roedel
On Sat, Aug 20, 2011 at 12:51:39PM -0400, Alex Williamson wrote:
 We had an extremely productive VFIO BoF on Monday.  Here's my attempt to
 capture the plan that I think we agreed to:
 
 We need to address both the description and enforcement of device
 groups.  Groups are formed any time the iommu does not have resolution
 between a set of devices.  On x86, this typically happens when a
 PCI-to-PCI bridge exists between the set of devices and the iommu.  For
 Power, partitionable endpoints define a group.  Grouping information
 needs to be exposed for both userspace and kernel internal usage.  This
 will be a sysfs attribute setup by the iommu drivers.  Perhaps:
 
 # cat /sys/devices/pci:00/:00:19.0/iommu_group
 42

Right, that is mainly for libvirt to provide that information to the
user in a meaningful way. So userspace is aware that other devices might
not work anymore when it assigns one to a guest.

 
 (I use a PCI example here, but attribute should not be PCI specific)
 
 From there we have a few options.  In the BoF we discussed a model where
 binding a device to vfio creates a /dev/vfio$GROUP character device
 file.  This group fd provides provides dma mapping ioctls as well as
 ioctls to enumerate and return a device fd for each attached member of
 the group (similar to KVM_CREATE_VCPU).  We enforce grouping by
 returning an error on open() of the group fd if there are members of the
 group not bound to the vfio driver.  Each device fd would then support a
 similar set of ioctls and mapping (mmio/pio/config) interface as current
 vfio, except for the obvious domain and dma ioctls superseded by the
 group fd.
 
 Another valid model might be that /dev/vfio/$GROUP is created for all
 groups when the vfio module is loaded.  The group fd would allow open()
 and some set of iommu querying and device enumeration ioctls, but would
 error on dma mapping and retrieving device fds until all of the group
 devices are bound to the vfio driver.

I am in favour of /dev/vfio/$GROUP. If multiple devices should be
assigned to a guest, there can also be an ioctl to bind a group to an
address-space of another group (certainly needs some care to not allow
that both groups belong to different processes).

Btw, a problem we havn't talked about yet entirely is
driver-deassignment. User space can decide to de-assign the device from
vfio while a fd is open on it. With PCI there is no way to let this fail
(the .release function returns void last time i checked). Is this a
problem, and yes, how we handle that?


Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-05 Thread Joerg Roedel
On Fri, Aug 05, 2011 at 08:26:11PM +1000, Benjamin Herrenschmidt wrote:
 On Thu, 2011-08-04 at 12:41 +0200, Joerg Roedel wrote:
  On Mon, Aug 01, 2011 at 02:27:36PM -0600, Alex Williamson wrote:
   It's not clear to me how we could skip it.  With VT-d, we'd have to
   implement an emulated interrupt remapper and hope that the guest picks
   unused indexes in the host interrupt remapping table before it could do
   anything useful with direct access to the MSI-X table.  Maybe AMD IOMMU
   makes this easier?
  
  AMD IOMMU provides remapping tables per-device, and not a global one.
  But that does not make direct guest-access to the MSI-X table safe. The
  table contains the table contains the interrupt-type and the vector
  which is used as an index into the remapping table by the IOMMU. So when
  the guest writes into its MSI-X table the remapping-table in the host
  needs to be updated too.
 
 Right, you need paravirt to avoid filtering :-)

Or a shadow MSI-X table like done on x86. How to handle this seems to be
platform specific. As you indicate there is a standardized paravirt
interface for that on Power.

 IE the problem is two fold:
 
  - Getting the right value in the table / remapper so things work
 (paravirt)
 
  - Protecting against the guest somewhat managing to change the value in
 the table (either directly or via a backdoor access to its own config
 space).
 
 The later for us comes from the HW PE filtering of the MSI transactions.

Right. The second part of the problem can be avoided with
interrupt-remapping/filtering hardware in the IOMMUs.

Joerg
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-05 Thread Joerg Roedel
On Fri, Aug 05, 2011 at 08:42:38PM +1000, Benjamin Herrenschmidt wrote:

 Right. In fact to try to clarify the problem for everybody, I think we
 can distinguish two different classes of constraints that can
 influence the grouping of devices:
 
  1- Hard constraints. These are typically devices using the same RID or
 where the RID cannot be reliably guaranteed (the later is the case with
 some PCIe-PCIX bridges which will take ownership of some transactions
 such as split but not all). Devices like that must be in the same
 domain. This is where PowerPC adds to what x86 does today the concept
 that the domains are pre-existing, since we use the RID for error
 isolation  MMIO segmenting as well. so we need to create those domains
 at boot time.

Domains (in the iommu-sense) are created at boot time on x86 today.
Every device needs at least a domain to provide dma-mapping
functionality to the drivers. So all the grouping is done too at
boot-time. This is specific to the iommu-drivers today but can be
generalized I think.

  2- Softer constraints. Those constraints derive from the fact that not
 applying them risks enabling the guest to create side effects outside of
 its sandbox. To some extent, there can be degrees of badness between
 the various things that can cause such constraints. Examples are shared
 LSIs (since trusting DisINTx can be chancy, see earlier discussions),
 potentially any set of functions in the same device can be problematic
 due to the possibility to get backdoor access to the BARs etc...

Hmm, there is no sane way to handle such constraints in a safe way,
right? We can either blacklist devices which are know to have such
backdoors or we just ignore the problem.

 Now, what I derive from the discussion we've had so far, is that we need
 to find a proper fix for #1, but Alex and Avi seem to prefer that #2
 remains a matter of libvirt/user doing the right thing (basically
 keeping a loaded gun aimed at the user's foot with a very very very
 sweet trigger but heh, let's not start a flamewar here :-)
 
 So let's try to find a proper solution for #1 now, and leave #2 alone
 for the time being.

Yes, and the solution for #1 should be entirely in the kernel. The
question is how to do that. Probably the most sane way is to introduce a
concept of device ownership. The ownership can either be a kernel driver
or a userspace process. Giving ownership of a device to userspace is
only possible if all devices in the same group are unbound from its
respective drivers. This is a very intrusive concept, no idea if it
has a chance of acceptance :-)
But the advantage is clearly that this allows better semantics in the
IOMMU drivers and a more stable handover of devices from host drivers to
kvm guests.

 Maybe the right option is for x86 to move toward pre-existing domains
 like powerpc does, or maybe we can just expose some kind of ID.

As I said, the domains are created a iommu driver initialization time
(usually boot time). But the groups are internal to the iommu drivers
and not visible somewhere else.

 Ah you started answering to my above questions :-)
 
 We could do what you propose. It depends what we want to do with
 domains. Practically speaking, we could make domains pre-existing (with
 the ability to group several PEs into larger domains) or we could keep
 the concepts different, possibly with the limitation that on powerpc, a
 domain == a PE.
 
 I suppose we -could- make arbitrary domains on ppc as well by making the
 various PE's iommu's in HW point to the same in-memory table, but that's
 a bit nasty in practice due to the way we manage those, and it would to
 some extent increase the risk of a failing device/driver stomping on
 another one and thus taking it down with itself. IE. isolation of errors
 is an important feature for us.

These arbitrary domains exist in the iommu-api. It would be good to
emulate them on Power too. Can't you put a PE into an isolated
error-domain when something goes wrong with it? This should provide the
same isolation as before.
What you derive the group number from is your business :-) On x86 it is
certainly the best to use the RID these devices share together with the
PCI segment number.

Regards,

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-04 Thread Joerg Roedel
On Sat, Jul 30, 2011 at 12:20:08PM -0600, Alex Williamson wrote:
 On Sat, 2011-07-30 at 09:58 +1000, Benjamin Herrenschmidt wrote:
  - The -minimum- granularity of pass-through is not always a single
  device and not always under SW control
 
 But IMHO, we need to preserve the granularity of exposing a device to a
 guest as a single device.  That might mean some devices are held hostage
 by an agent on the host.

Thats true. There is a difference between unassign a group from the host
and make single devices in that PE visible to the guest. But we need
to make sure that no device in a PE is used by the host while at least
one device is assigned to a guest.

Unlike the other proposals to handle this in libvirt, I think this
belongs into the kernel. Doing this in userspace may break the entire
system if done wrong.

For example, if one device from e PE is assigned to a guest while
another one is not unbound from its host driver, the driver may get very
confused when DMA just stops working. This may crash the entire system
or lead to silent data corruption in the guest. The behavior is
basically undefined then. The kernel must not not allow that.


Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-04 Thread Joerg Roedel
Hi Ben,

thanks for your detailed introduction to the requirements for POWER. Its
good to know that the granularity problem is not x86-only.

On Sat, Jul 30, 2011 at 09:58:53AM +1000, Benjamin Herrenschmidt wrote:
 In IBM POWER land, we call this a partitionable endpoint (the term
 endpoint here is historic, such a PE can be made of several PCIe
 endpoints). I think partitionable is a pretty good name tho to
 represent the constraints, so I'll call this a partitionable group
 from now on.

On x86 this is mostly an issue of the IOMMU and which set of devices use
the same request-id. I used to call that an alias-group because the
devices have a request-id alias to the pci-bridge.

 - The -minimum- granularity of pass-through is not always a single
 device and not always under SW control

Correct.
 
 - Having a magic heuristic in libvirt to figure out those constraints is
 WRONG. This reeks of XFree 4 PCI layer trying to duplicate the kernel
 knowledge of PCI resource management and getting it wrong in many many
 cases, something that took years to fix essentially by ripping it all
 out. This is kernel knowledge and thus we need the kernel to expose in a
 way or another what those constraints are, what those partitionable
 groups are.

I agree. Managing the ownership of a group should be done in the kernel.
Doing this in userspace is just too dangerous.

The problem to be solved here is how to present these PEs inside the
kernel and to userspace. I thought a bit about making this visbible
through the iommu-api for in-kernel users. That is probably the most
logical place.

For userspace I would like to propose a new device attribute in sysfs.
This attribute contains the group number. All devices with the same
group number belong to the same PE. Libvirt needs to scan the whole
device tree to build the groups but that is probalbly not a big deal.


Joerg

 
 - That does -not- mean that we cannot specify for each individual device
 within such a group where we want to put it in qemu (what devfn etc...).
 As long as there is a clear understanding that the ownership of the
 device goes with the group, this is somewhat orthogonal to how they are
 represented in qemu. (Not completely... if the iommu is exposed to the
 guest ,via paravirt for example, some of these constraints must be
 exposed but I'll talk about that more later).
 
 The interface currently proposed for VFIO (and associated uiommu)
 doesn't handle that problem at all. Instead, it is entirely centered
 around a specific feature of the VTd iommu's for creating arbitrary
 domains with arbitrary devices (tho those devices -do- have the same
 constraints exposed above, don't try to put 2 legacy PCI devices behind
 the same bridge into 2 different domains !), but the API totally ignores
 the problem, leaves it to libvirt magic foo and focuses on something
 that is both quite secondary in the grand scheme of things, and quite
 x86 VTd specific in the implementation and API definition.
 
 Now, I'm not saying these programmable iommu domains aren't a nice
 feature and that we shouldn't exploit them when available, but as it is,
 it is too much a central part of the API.
 
 I'll talk a little bit more about recent POWER iommu's here to
 illustrate where I'm coming from with my idea of groups:
 
 On p7ioc (the IO chip used on recent P7 machines), there -is- a concept
 of domain and a per-RID filtering. However it differs from VTd in a few
 ways:
 
 The domains (aka PEs) encompass more than just an iommu filtering
 scheme. The MMIO space and PIO space are also segmented, and those
 segments assigned to domains. Interrupts (well, MSI ports at least) are
 assigned to domains. Inbound PCIe error messages are targeted to
 domains, etc...
 
 Basically, the PEs provide a very strong isolation feature which
 includes errors, and has the ability to immediately isolate a PE on
 the first occurence of an error. For example, if an inbound PCIe error
 is signaled by a device on a PE or such a device does a DMA to a
 non-authorized address, the whole PE gets into error state. All
 subsequent stores (both DMA and MMIO) are swallowed and reads return all
 1's, interrupts are blocked. This is designed to prevent any propagation
 of bad data, which is a very important feature in large high reliability
 systems.
 
 Software then has the ability to selectively turn back on MMIO and/or
 DMA, perform diagnostics, reset devices etc...
 
 Because the domains encompass more than just DMA, but also segment the
 MMIO space, it is not practical at all to dynamically reconfigure them
 at runtime to move devices into domains. The firmware or early kernel
 code (it depends) will assign devices BARs using an algorithm that keeps
 them within PE segment boundaries, etc
 
 Additionally (and this is indeed a restriction compared to VTd, though
 I expect our future IO chips to lift it to some extent), PE don't get
 separate DMA address spaces. There is one 64-bit DMA address space per

Re: kvm PCI assignment VFIO ramblings

2011-08-04 Thread Joerg Roedel
On Mon, Aug 01, 2011 at 02:27:36PM -0600, Alex Williamson wrote:
 It's not clear to me how we could skip it.  With VT-d, we'd have to
 implement an emulated interrupt remapper and hope that the guest picks
 unused indexes in the host interrupt remapping table before it could do
 anything useful with direct access to the MSI-X table.  Maybe AMD IOMMU
 makes this easier?

AMD IOMMU provides remapping tables per-device, and not a global one.
But that does not make direct guest-access to the MSI-X table safe. The
table contains the table contains the interrupt-type and the vector
which is used as an index into the remapping table by the IOMMU. So when
the guest writes into its MSI-X table the remapping-table in the host
needs to be updated too.

Joerg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] Revert Merge branch 'x86/iommu' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into for-linus

2008-08-01 Thread Joerg Roedel
On Fri, Aug 01, 2008 at 08:51:23AM +0900, FUJITA Tomonori wrote:
 On Fri, 1 Aug 2008 09:43:23 +1000
 Stephen Rothwell [EMAIL PROTECTED] wrote:
 
  This reverts commit 29111f579f4f3f2a07385f931854ab0527ae7ea5.
  
  This undoes the hasty addition of a global version of iommu_num_pages()
  that broke both the powerpc and sparc builds.  This function can be
  revisited later.
  
  Signed-off-by: Stephen Rothwell [EMAIL PROTECTED]
  ---
   arch/x86/kernel/amd_iommu.c   |   13 -
   arch/x86/kernel/pci-gart_64.c |   11 +++
   include/linux/iommu-helper.h  |1 -
   lib/iommu-helper.c|8 
   4 files changed, 15 insertions(+), 18 deletions(-)
  
  This patch comes from
  git revert -m 1 29111f579f4f3f2a07385f931854ab0527ae7ea5
  
  I have test built powerpc ppc64_defconfig and sparc64 defconfig.  The only
  references to iommu_num_pages() after this is applied are in the powerpc
  and sparc code.
  
  Linus, please apply.  This is impacting on both powerpc and sparc
  development and even the author of the patches said that those patches
  were not urgent.
 
 Ingo has a patch to fix this problem in the x86 tree:
 
 http://marc.info/?l=linux-kernelm=121754062325903w=2

FUJITA,

can you send your fix directly to Linus again please? Andrew mentioned
that the x86 maintainers are on vacation. That may be the reason that
your fix is not yet upstream.

Joerg

-- 
   |   AMD Saxony Limited Liability Company  Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System|  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
 Center| AMD Saxony LLC (Wilmington, Delaware, US)
   | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev