[PATCH v5 5/5] vfio/iommu_type1: Simplify group attachment

2022-07-01 Thread Nicolin Chen via iommu
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Reviewed-by: Kevin Tian 
Co-developed-by: Jason Gunthorpe 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 333 +---
 1 file changed, 180 insertions(+), 153 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5624bbf02ab7..d3a4cedcd082 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2155,14 +2155,179 @@ static int vfio_iommu_domain_alloc(struct device *dev, 
void *data)
return 1; /* Don't iterate */
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct vfio_iommu *iommu,
+  struct vfio_iommu_group *group,
+  struct list_head *group_resv_regions)
+{
+   struct iommu_domain *new_domain;
+   struct vfio_domain *domain;
+   phys_addr_t resv_msi_base;
+   int ret = 0;
+
+   /* Try to match an existing compatible domain */
+   list_for_each_entry (domain, >domain_list, next) {
+   ret = iommu_attach_group(domain->domain, group->iommu_group);
+   /* -EMEDIUMTYPE means an incompatible domain, so try next one */
+   if (ret == -EMEDIUMTYPE)
+   continue;
+   if (ret)
+   return ERR_PTR(ret);
+   goto done;
+   }
+
+   /*
+* Going via the iommu_group iterator avoids races, and trivially gives
+* us a representative device for the IOMMU API call. We don't actually
+* want to iterate beyond the first device (if any).
+*/
+   iommu_group_for_each_dev(group->iommu_group, _domain,
+vfio_iommu_domain_alloc);
+   if (!new_domain)
+   return ERR_PTR(-EIO);
+
+   if (iommu->nesting) {
+   ret = iommu_enable_nesting(new_domain);
+   if (ret)
+   goto out_free_iommu_domain;
+   }
+
+   ret = iommu_attach_group(new_domain, group->iommu_group);
+   if (ret)
+   goto out_free_iommu_domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain) {
+   ret = -ENOMEM;
+   goto out_detach;
+   }
+
+   domain->domain = new_domain;
+   vfio_test_domain_fgsp(domain);
+
+   /*
+* If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+* no-snoop set) then VFIO always turns this feature on because on Intel
+* platforms it optimizes KVM to disable wbinvd emulation.
+*/
+   if (new_domain->ops->enforce_cache_coherency)
+   domain->enforce_cache_coherency =
+   new_domain->ops->enforce_cache_coherency(new_domain);
+
+   /* replay mappings on new domains */
+   ret = vfio_iommu_replay(iommu, domain);
+   if (ret)
+   goto out_free_domain;
+
+   if (vfio_iommu_has_sw_msi(group_resv_regions, _msi_base)) {
+   ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
+   if (ret && ret != -ENODEV)
+   goto out_free_domain;
+   }
+
+   INIT_LIST_HEAD(>group_list);
+   list_add(>next, >domain_list);
+   vfio_update_pgsize_bitmap(iommu);
+
+done:
+   list_add(>next, >group_list);
+
+   /*
+* An iommu backed group can dirty memory directly and therefore
+* demotes the iommu scope until it declares itself dirty tracking
+* capable via the page pinning interface.
+*/
+   iommu->num_non_pinned_groups++;
+
+   return domain;
+
+out_free_domain:
+   kfree(domain);
+out_detach:
+   iommu_detach_group(new_domain, group->iommu_group);
+out_free_iommu_domain:
+   iommu_domain_free(new_domain);
+   return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+   struct rb_node *node;
+
+   while ((node = rb_first(>dma_list)))
+   vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+   struct rb_node *n, *p;
+
+   n = rb_first(>dma_list);
+   for (; n; n = rb_next(n)) {
+   struct vfio_dma *dma;
+   long locked = 0, unlocked = 0;
+
+   dma = rb_entry(n, struct vfio_

[PATCH v5 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-07-01 Thread Nicolin Chen via iommu
All devices in emulated_iommu_groups have pinned_page_dirty_scope
set, so the update_dirty_scope in the first list_for_each_entry
is always false. Clean it up, and move the "if update_dirty_scope"
part from the detach_group_done routine to the domain_list part.

Suggested-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5992ee2345a0..5624bbf02ab7 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2456,14 +2456,12 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_iommu_group *group;
-   bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
 
mutex_lock(>lock);
list_for_each_entry(group, >emulated_iommu_groups, next) {
if (group->iommu_group != iommu_group)
continue;
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
 
@@ -2472,7 +2470,8 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu);
}
-   goto detach_group_done;
+   mutex_unlock(>lock);
+   return;
}
 
/*
@@ -2488,9 +2487,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
continue;
 
iommu_detach_group(domain->domain, group->iommu_group);
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
-   kfree(group);
/*
 * Group ownership provides privilege, if the group list is
 * empty, the domain goes away. If it's the last domain with
@@ -2513,6 +2510,16 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
vfio_iommu_aper_expand(iommu, _copy);
vfio_update_pgsize_bitmap(iommu);
}
+   /*
+* Removal of a group without dirty tracking may allow
+* the iommu scope to be promoted.
+*/
+   if (!group->pinned_page_dirty_scope) {
+   iommu->num_non_pinned_groups--;
+   if (iommu->dirty_page_tracking)
+   vfio_iommu_populate_bitmap_full(iommu);
+   }
+   kfree(group);
break;
}
 
@@ -2521,16 +2528,6 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
else
vfio_iommu_iova_free(_copy);
 
-detach_group_done:
-   /*
-* Removal of a group without dirty tracking may allow the iommu scope
-* to be promoted.
-*/
-   if (update_dirty_scope) {
-   iommu->num_non_pinned_groups--;
-   if (iommu->dirty_page_tracking)
-   vfio_iommu_populate_bitmap_full(iommu);
-   }
mutex_unlock(>lock);
 }
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 3/5] vfio/iommu_type1: Remove the domain->ops comparison

2022-07-01 Thread Nicolin Chen via iommu
The domain->ops validation was added, as a precaution, for mixed-driver
systems.

Per Robin's remarks,
* While bus_set_iommu() still exists, the core code prevents multiple
  drivers from registering, so we can't really run into a situation of
  having a mixed-driver system:
  https://lore.kernel.org/kvm/6e1280c5-4b22-ebb3-3912-6c72bc169...@arm.com/

* And there's plenty more significant problems than this to fix; in future
  when many can be permitted, we will rely on the IOMMU core code to check
  the domain->ops:
  https://lore.kernel.org/kvm/6575de6d-94ba-c427-5b1e-967750ddf...@arm.com/

So remove the check in VFIO for simplicity.

Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 7530f0d727e5..5992ee2345a0 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2280,29 +2280,19 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
domain->domain->ops->enforce_cache_coherency(
domain->domain);
 
-   /*
-* Try to match an existing compatible domain.  We don't want to
-* preclude an IOMMU driver supporting multiple bus_types and being
-* able to include different bus_types in the same IOMMU domain, so
-* we test whether the domains use the same iommu_ops rather than
-* testing if they're on the same bus_type.
-*/
+   /* Try to match an existing compatible domain */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops) {
-   iommu_detach_group(domain->domain, group->iommu_group);
-   if (!iommu_attach_group(d->domain,
-   group->iommu_group)) {
-   list_add(>next, >group_list);
-   iommu_domain_free(domain->domain);
-   kfree(domain);
-   goto done;
-   }
-
-   ret = iommu_attach_group(domain->domain,
-group->iommu_group);
-   if (ret)
-   goto out_domain;
+   iommu_detach_group(domain->domain, group->iommu_group);
+   if (!iommu_attach_group(d->domain, group->iommu_group)) {
+   list_add(>next, >group_list);
+   iommu_domain_free(domain->domain);
+   kfree(domain);
+   goto done;
}
+
+   ret = iommu_attach_group(domain->domain,  group->iommu_group);
+   if (ret)
+   goto out_domain;
}
 
vfio_test_domain_fgsp(domain);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-07-01 Thread Nicolin Chen via iommu
From: Jason Gunthorpe 

The KVM mechanism for controlling wbinvd is based on OR of the coherency
property of all devices attached to a guest, no matter whether those
devices are attached to a single domain or multiple domains.

On the other hand, the benefit to using separate domains was that those
devices attached to domains supporting enforced cache coherency always
mapped with the attributes necessary to provide that feature, therefore
if a non-enforced domain was dropped, the associated group removal would
re-trigger an evaluation by KVM.

In practice however, the only known cases of such mixed domains included
an Intel IGD device behind an IOMMU lacking snoop control, where such
devices do not support hotplug, therefore this scenario lacks testing and
is not considered sufficiently relevant to support.

After all, KVM won't take advantage of trying to push a device that could
do enforced cache coherency to a dedicated domain vs re-using an existing
domain, which is non-coherent.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

It's unclear whether we want to further optimize the Intel driver to
update the domain coherency after a device is detached from it, at
least not before KVM can be verified to handle such dynamics in related
emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
we don't see an usage requiring such optimization as the only device
which imposes such non-coherency is Intel GPU which even doesn't
support hotplug/hot remove.

Signed-off-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Reviewed-by: Lu Baolu 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c496b7d0b96f..7530f0d727e5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2288,9 +2288,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 * testing if they're on the same bus_type.
 */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops &&
-   d->enforce_cache_coherency ==
-   domain->enforce_cache_coherency) {
+   if (d->domain->ops == domain->domain->ops) {
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-07-01 Thread Nicolin Chen via iommu
Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno from the IOMMU driver during attach that the
reason attached failed is because of domain incompatability. EMEDIUMTYPE
is chosen because it is never used within the iommu subsystem today and
evokes a sense that the 'medium' aka the domain is incompatible.

VFIO can use this to know attach is a soft failure and it should continue
searching. Otherwise the attach will be a hard failure and VFIO will
return the code to userspace.

Update all drivers to return EMEDIUMTYPE in their failure paths that are
related to domain incompatability. Also remove adjacent error prints for
these soft failures, to prevent a kernel log spam, since -EMEDIUMTYPE is
clear enough to indicate an incompatability error.

Add kdocs describing this behavior.

Suggested-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Reviewed-by: Lu Baolu 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/amd/iommu.c   |  2 +-
 drivers/iommu/apple-dart.c  |  4 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |  5 +---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |  9 ++-
 drivers/iommu/intel/iommu.c | 10 +++-
 drivers/iommu/iommu.c   | 28 +
 drivers/iommu/ipmmu-vmsa.c  |  4 +--
 drivers/iommu/omap-iommu.c  |  3 +--
 drivers/iommu/s390-iommu.c  |  2 +-
 drivers/iommu/sprd-iommu.c  |  6 ++---
 drivers/iommu/tegra-gart.c  |  2 +-
 drivers/iommu/virtio-iommu.c|  3 +--
 13 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 840831d5d2ad..ad499658a6b6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1662,7 +1662,7 @@ static int attach_device(struct device *dev,
if (domain->flags & PD_IOMMUV2_MASK) {
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
 
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af0242a90d9..e58dc310afd7 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -495,10 +495,10 @@ static int apple_dart_attach_dev(struct iommu_domain 
*domain,
 
if (cfg->stream_maps[0].dart->force_bypass &&
domain->type != IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
if (!cfg->stream_maps[0].dart->supports_bypass &&
domain->type == IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
 
ret = apple_dart_finalize_domain(domain, cfg);
if (ret)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88817a3376ef..5b64138f549d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2420,24 +2420,15 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
goto out_unlock;
}
} else if (smmu_domain->smmu != smmu) {
-   dev_err(dev,
-   "cannot attach to SMMU %s (upstream of %s)\n",
-   dev_name(smmu_domain->smmu->dev),
-   dev_name(smmu->dev));
-   ret = -ENXIO;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
-   dev_err(dev,
-   "cannot attach to incompatible domain (%u SSID bits != 
%u)\n",
-   smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   smmu_domain->stall_enabled != master->stall_enabled) {
-   dev_err(dev, "cannot attach to stall-%s domain\n",
-   smmu_domain->stall_enabled ? "enabled" : "disabled");
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
}
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drive

[PATCH v5 0/5] cover-letter: Simplify vfio_iommu_type1 attach/detach routine

2022-07-01 Thread Nicolin Chen via iommu
This is a preparatory series for IOMMUFD v2 patches. It enforces error
code -EMEDIUMTYPE in iommu_attach_device() and iommu_attach_group() when
an IOMMU domain and a device/group are incompatible. It also drops the
useless domain->ops check since it won't fail in current environment.

These allow VFIO iommu code to simplify its group attachment routine, by
avoiding the extra IOMMU domain allocations and attach/detach sequences
of the old code.

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that as KVM will
not be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.

This is on github:
https://github.com/nicolinc/iommufd/commits/vfio_iommu_attach

Changelog
v5:
 * Rebased on top of Robin's "Simplify bus_type determination".
 * Fixed a wrong change returning -EMEDIUMTYPE in arm-smmu driver.
 * Added Baolu's "Reviewed-by".
v4:
 * Dropped -EMEDIUMTYPE change in mtk_v1 driver per Robin's input
 * Added Baolu's and Kevin's Reviewed-by lines
v3: https://lore.kernel.org/kvm/20220623200029.26007-1-nicol...@nvidia.com/
 * Dropped all dev_err since -EMEDIUMTYPE clearly indicates what error.
 * Updated commit message of enforce_cache_coherency removing patch.
 * Updated commit message of domain->ops removing patch.
 * Replaced "goto out_unlock" with simply mutex_unlock() and return.
 * Added a line of comments for -EMEDIUMTYPE return check.
 * Moved iommu_get_msi_cookie() into alloc_attach_domain() as a cookie
   should be logically tied to the lifetime of a domain itself.
 * Added Kevin's "Reviewed-by".
v2: https://lore.kernel.org/kvm/20220616000304.23890-1-nicol...@nvidia.com/
 * Added -EMEDIUMTYPE to more IOMMU drivers that fit the category.
 * Changed dev_err to dev_dbg for -EMEDIUMTYPE to avoid kernel log spam.
 * Dropped iommu_ops patch, and removed domain->ops in VFIO directly,
   since there's no mixed-driver use case that would fail the sanity.
 * Updated commit log of the patch removing enforce_cache_coherency.
 * Fixed a misplace of "num_non_pinned_groups--" in detach_group patch.
 * Moved "num_non_pinned_groups++" in PATCH-5 to the common path between
   domain-reusing and new-domain pathways, like the code previously did.
 * Fixed a typo in EMEDIUMTYPE patch.
v1: https://lore.kernel.org/kvm/20220606061927.26049-1-nicol...@nvidia.com/

Jason Gunthorpe (1):
  vfio/iommu_type1: Prefer to reuse domains vs match enforced cache
coherency

Nicolin Chen (4):
  iommu: Return -EMEDIUMTYPE for incompatible domain and device/group
  vfio/iommu_type1: Remove the domain->ops comparison
  vfio/iommu_type1: Clean up update_dirty_scope in detach_group()
  vfio/iommu_type1: Simplify group attachment

 drivers/iommu/amd/iommu.c   |   2 +-
 drivers/iommu/apple-dart.c  |   4 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  15 +-
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   5 +-
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |   9 +-
 drivers/iommu/intel/iommu.c |  10 +-
 drivers/iommu/iommu.c   |  28 ++
 drivers/iommu/ipmmu-vmsa.c  |   4 +-
 drivers/iommu/omap-iommu.c  |   3 +-
 drivers/iommu/s390-iommu.c  |   2 +-
 drivers/iommu/sprd-iommu.c  |   6 +-
 drivers/iommu/tegra-gart.c  |   2 +-
 drivers/iommu/virtio-iommu.c|   3 +-
 drivers/vfio/vfio_iommu_type1.c | 352 ++--
 14 files changed, 229 insertions(+), 216 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-07-01 Thread Nicolin Chen via iommu
On Fri, Jul 01, 2022 at 07:17:38PM +0100, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 01/07/2022 5:43 pm, Nicolin Chen wrote:
> > On Fri, Jul 01, 2022 at 11:21:48AM +0100, Robin Murphy wrote:
> > 
> > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
> > > > b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > index 2ed3594f384e..072cac5ab5a4 100644
> > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > @@ -1135,10 +1135,8 @@ static int arm_smmu_attach_dev(struct 
> > > > iommu_domain *domain, struct device *dev)
> > > >struct arm_smmu_device *smmu;
> > > >int ret;
> > > > 
> > > > - if (!fwspec || fwspec->ops != _smmu_ops) {
> > > > - dev_err(dev, "cannot attach to SMMU, is it on the same 
> > > > bus?\n");
> > > > - return -ENXIO;
> > > > - }
> > > > + if (!fwspec || fwspec->ops != _smmu_ops)
> > > > + return -EMEDIUMTYPE;
> > > 
> > > This is the wrong check, you want the "if (smmu_domain->smmu != smmu)"
> > > condition further down. If this one fails it's effectively because the
> > > device doesn't have an IOMMU at all, and similar to patch #3 it will be
> > 
> > Thanks for the review! I will fix that. The "on the same bus" is
> > quite eye-catching.
> > 
> > > removed once the core code takes over properly (I even have both those
> > > patches written now!)
> > 
> > Actually in my v1 the proposal for ops check returned -EMEDIUMTYPE
> > also upon an ops mismatch, treating that too as an incompatibility.
> > Do you mean that we should have fine-grained it further?
> 
> On second look, I think this particular check was already entirely
> redundant by the time I made the fwspec conversion to it, oh well. Since
> it remains harmless for the time being, let's just ignore it entirely
> until we can confidently say goodbye to the whole lot[1].

That looks cleaner!

> I don't think there's any need to differentiate an instance mismatch
> from a driver mismatch, once the latter becomes realistically possible,
> mostly due to iommu_domain_alloc() also having to become device-aware to
> know which driver to allocate from. Thus as far as a user is concerned,
> if attaching a device to an existing domain fails with -EMEDIUMTYPE,
> allocating a new domain using the given device, and attaching to that,
> can be expected to succeed, regardless of why the original attempt was
> rejected. In fact even in the theoretical different-driver-per-bus model
> the same principle still holds up.

I see. Thanks for the explanation. 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-07-01 Thread Nicolin Chen via iommu
On Fri, Jul 01, 2022 at 11:21:48AM +0100, Robin Murphy wrote:

> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
> > b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > index 2ed3594f384e..072cac5ab5a4 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > @@ -1135,10 +1135,8 @@ static int arm_smmu_attach_dev(struct iommu_domain 
> > *domain, struct device *dev)
> >   struct arm_smmu_device *smmu;
> >   int ret;
> > 
> > - if (!fwspec || fwspec->ops != _smmu_ops) {
> > - dev_err(dev, "cannot attach to SMMU, is it on the same 
> > bus?\n");
> > - return -ENXIO;
> > - }
> > + if (!fwspec || fwspec->ops != _smmu_ops)
> > + return -EMEDIUMTYPE;
> 
> This is the wrong check, you want the "if (smmu_domain->smmu != smmu)"
> condition further down. If this one fails it's effectively because the
> device doesn't have an IOMMU at all, and similar to patch #3 it will be

Thanks for the review! I will fix that. The "on the same bus" is
quite eye-catching.

> removed once the core code takes over properly (I even have both those
> patches written now!)

Actually in my v1 the proposal for ops check returned -EMEDIUMTYPE
also upon an ops mismatch, treating that too as an incompatibility.
Do you mean that we should have fine-grained it further?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-06-30 Thread Nicolin Chen via iommu
All devices in emulated_iommu_groups have pinned_page_dirty_scope
set, so the update_dirty_scope in the first list_for_each_entry
is always false. Clean it up, and move the "if update_dirty_scope"
part from the detach_group_done routine to the domain_list part.

Suggested-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 11be5f95580b..b9ccb3cfac5d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2453,14 +2453,12 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_iommu_group *group;
-   bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
 
mutex_lock(>lock);
list_for_each_entry(group, >emulated_iommu_groups, next) {
if (group->iommu_group != iommu_group)
continue;
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
 
@@ -2469,7 +2467,8 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu);
}
-   goto detach_group_done;
+   mutex_unlock(>lock);
+   return;
}
 
/*
@@ -2485,9 +2484,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
continue;
 
iommu_detach_group(domain->domain, group->iommu_group);
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
-   kfree(group);
/*
 * Group ownership provides privilege, if the group list is
 * empty, the domain goes away. If it's the last domain with
@@ -2510,6 +2507,16 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
vfio_iommu_aper_expand(iommu, _copy);
vfio_update_pgsize_bitmap(iommu);
}
+   /*
+* Removal of a group without dirty tracking may allow
+* the iommu scope to be promoted.
+*/
+   if (!group->pinned_page_dirty_scope) {
+   iommu->num_non_pinned_groups--;
+   if (iommu->dirty_page_tracking)
+   vfio_iommu_populate_bitmap_full(iommu);
+   }
+   kfree(group);
break;
}
 
@@ -2518,16 +2525,6 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
else
vfio_iommu_iova_free(_copy);
 
-detach_group_done:
-   /*
-* Removal of a group without dirty tracking may allow the iommu scope
-* to be promoted.
-*/
-   if (update_dirty_scope) {
-   iommu->num_non_pinned_groups--;
-   if (iommu->dirty_page_tracking)
-   vfio_iommu_populate_bitmap_full(iommu);
-   }
mutex_unlock(>lock);
 }
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-30 Thread Nicolin Chen via iommu
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Reviewed-by: Kevin Tian 
Co-developed-by: Jason Gunthorpe 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 321 +---
 1 file changed, 174 insertions(+), 147 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b9ccb3cfac5d..3ffa4e2d9d18 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2153,15 +2153,174 @@ static void vfio_iommu_iova_insert_copy(struct 
vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+  struct vfio_iommu_group *group,
+  struct list_head *group_resv_regions)
+{
+   struct iommu_domain *new_domain;
+   struct vfio_domain *domain;
+   phys_addr_t resv_msi_base;
+   int ret = 0;
+
+   /* Try to match an existing compatible domain */
+   list_for_each_entry (domain, >domain_list, next) {
+   ret = iommu_attach_group(domain->domain, group->iommu_group);
+   /* -EMEDIUMTYPE means an incompatible domain, so try next one */
+   if (ret == -EMEDIUMTYPE)
+   continue;
+   if (ret)
+   return ERR_PTR(ret);
+   goto done;
+   }
+
+   new_domain = iommu_domain_alloc(bus);
+   if (!new_domain)
+   return ERR_PTR(-EIO);
+
+   if (iommu->nesting) {
+   ret = iommu_enable_nesting(new_domain);
+   if (ret)
+   goto out_free_iommu_domain;
+   }
+
+   ret = iommu_attach_group(new_domain, group->iommu_group);
+   if (ret)
+   goto out_free_iommu_domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain) {
+   ret = -ENOMEM;
+   goto out_detach;
+   }
+
+   domain->domain = new_domain;
+   vfio_test_domain_fgsp(domain);
+
+   /*
+* If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+* no-snoop set) then VFIO always turns this feature on because on Intel
+* platforms it optimizes KVM to disable wbinvd emulation.
+*/
+   if (new_domain->ops->enforce_cache_coherency)
+   domain->enforce_cache_coherency =
+   new_domain->ops->enforce_cache_coherency(new_domain);
+
+   /* replay mappings on new domains */
+   ret = vfio_iommu_replay(iommu, domain);
+   if (ret)
+   goto out_free_domain;
+
+   if (vfio_iommu_has_sw_msi(group_resv_regions, _msi_base)) {
+   ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
+   if (ret && ret != -ENODEV)
+   goto out_free_domain;
+   }
+
+   INIT_LIST_HEAD(>group_list);
+   list_add(>next, >domain_list);
+   vfio_update_pgsize_bitmap(iommu);
+
+done:
+   list_add(>next, >group_list);
+
+   /*
+* An iommu backed group can dirty memory directly and therefore
+* demotes the iommu scope until it declares itself dirty tracking
+* capable via the page pinning interface.
+*/
+   iommu->num_non_pinned_groups++;
+
+   return domain;
+
+out_free_domain:
+   kfree(domain);
+out_detach:
+   iommu_detach_group(new_domain, group->iommu_group);
+out_free_iommu_domain:
+   iommu_domain_free(new_domain);
+   return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+   struct rb_node *node;
+
+   while ((node = rb_first(>dma_list)))
+   vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+   struct rb_node *n, *p;
+
+   n = rb_first(>dma_list);
+   for (; n; n = rb_next(n)) {
+   struct vfio_dma *dma;
+   long locked = 0, unlocked = 0;
+
+   dma = rb_entry(n, struct vfio_dma, node);
+   unlocked += vfio_unmap_unpin(iommu, dma, false);
+   p = rb_first(>pfn_list);
+   for (; p; p = rb_next(p)) {
+   struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+ 

[PATCH v4 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-30 Thread Nicolin Chen via iommu
From: Jason Gunthorpe 

The KVM mechanism for controlling wbinvd is based on OR of the coherency
property of all devices attached to a guest, no matter whether those
devices are attached to a single domain or multiple domains.

On the other hand, the benefit to using separate domains was that those
devices attached to domains supporting enforced cache coherency always
mapped with the attributes necessary to provide that feature, therefore
if a non-enforced domain was dropped, the associated group removal would
re-trigger an evaluation by KVM.

In practice however, the only known cases of such mixed domains included
an Intel IGD device behind an IOMMU lacking snoop control, where such
devices do not support hotplug, therefore this scenario lacks testing and
is not considered sufficiently relevant to support.

After all, KVM won't take advantage of trying to push a device that could
do enforced cache coherency to a dedicated domain vs re-using an existing
domain, which is non-coherent.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

It's unclear whether we want to further optimize the Intel driver to
update the domain coherency after a device is detached from it, at
least not before KVM can be verified to handle such dynamics in related
emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
we don't see an usage requiring such optimization as the only device
which imposes such non-coherency is Intel GPU which even doesn't
support hotplug/hot remove.

Signed-off-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Reviewed-by: Lu Baolu 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..f4e3b423a453 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2285,9 +2285,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 * testing if they're on the same bus_type.
 */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops &&
-   d->enforce_cache_coherency ==
-   domain->enforce_cache_coherency) {
+   if (d->domain->ops == domain->domain->ops) {
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 3/5] vfio/iommu_type1: Remove the domain->ops comparison

2022-06-30 Thread Nicolin Chen via iommu
The domain->ops validation was added, as a precaution, for mixed-driver
systems.

Per Robin's remarks,
* While bus_set_iommu() still exists, the core code prevents multiple
  drivers from registering, so we can't really run into a situation of
  having a mixed-driver system:
  https://lore.kernel.org/kvm/6e1280c5-4b22-ebb3-3912-6c72bc169...@arm.com/

* And there's plenty more significant problems than this to fix; in future
  when many can be permitted, we will rely on the IOMMU core code to check
  the domain->ops:
  https://lore.kernel.org/kvm/6575de6d-94ba-c427-5b1e-967750ddf...@arm.com/

So remove the check in VFIO for simplicity.

Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f4e3b423a453..11be5f95580b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2277,29 +2277,19 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
domain->domain->ops->enforce_cache_coherency(
domain->domain);
 
-   /*
-* Try to match an existing compatible domain.  We don't want to
-* preclude an IOMMU driver supporting multiple bus_types and being
-* able to include different bus_types in the same IOMMU domain, so
-* we test whether the domains use the same iommu_ops rather than
-* testing if they're on the same bus_type.
-*/
+   /* Try to match an existing compatible domain */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops) {
-   iommu_detach_group(domain->domain, group->iommu_group);
-   if (!iommu_attach_group(d->domain,
-   group->iommu_group)) {
-   list_add(>next, >group_list);
-   iommu_domain_free(domain->domain);
-   kfree(domain);
-   goto done;
-   }
-
-   ret = iommu_attach_group(domain->domain,
-group->iommu_group);
-   if (ret)
-   goto out_domain;
+   iommu_detach_group(domain->domain, group->iommu_group);
+   if (!iommu_attach_group(d->domain, group->iommu_group)) {
+   list_add(>next, >group_list);
+   iommu_domain_free(domain->domain);
+   kfree(domain);
+   goto done;
}
+
+   ret = iommu_attach_group(domain->domain,  group->iommu_group);
+   if (ret)
+   goto out_domain;
}
 
vfio_test_domain_fgsp(domain);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-30 Thread Nicolin Chen via iommu
Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno from the IOMMU driver during attach that the
reason attached failed is because of domain incompatability. EMEDIUMTYPE
is chosen because it is never used within the iommu subsystem today and
evokes a sense that the 'medium' aka the domain is incompatible.

VFIO can use this to know attach is a soft failure and it should continue
searching. Otherwise the attach will be a hard failure and VFIO will
return the code to userspace.

Update all drivers to return EMEDIUMTYPE in their failure paths that are
related to domain incompatability. Also remove adjacent error prints for
these soft failures, to prevent a kernel log spam, since -EMEDIUMTYPE is
clear enough to indicate an incompatability error.

Add kdocs describing this behavior.

Suggested-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/amd/iommu.c   |  2 +-
 drivers/iommu/apple-dart.c  |  4 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |  6 ++---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |  9 ++-
 drivers/iommu/intel/iommu.c | 10 +++-
 drivers/iommu/iommu.c   | 28 +
 drivers/iommu/ipmmu-vmsa.c  |  4 +--
 drivers/iommu/omap-iommu.c  |  3 +--
 drivers/iommu/s390-iommu.c  |  2 +-
 drivers/iommu/sprd-iommu.c  |  6 ++---
 drivers/iommu/tegra-gart.c  |  2 +-
 drivers/iommu/virtio-iommu.c|  3 +--
 13 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index a56a9ad3273e..e851c3e91145 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1743,7 +1743,7 @@ static int attach_device(struct device *dev,
if (domain->flags & PD_IOMMUV2_MASK) {
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
 
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af0242a90d9..e58dc310afd7 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -495,10 +495,10 @@ static int apple_dart_attach_dev(struct iommu_domain 
*domain,
 
if (cfg->stream_maps[0].dart->force_bypass &&
domain->type != IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
if (!cfg->stream_maps[0].dart->supports_bypass &&
domain->type == IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
 
ret = apple_dart_finalize_domain(domain, cfg);
if (ret)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88817a3376ef..5b64138f549d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2420,24 +2420,15 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
goto out_unlock;
}
} else if (smmu_domain->smmu != smmu) {
-   dev_err(dev,
-   "cannot attach to SMMU %s (upstream of %s)\n",
-   dev_name(smmu_domain->smmu->dev),
-   dev_name(smmu->dev));
-   ret = -ENXIO;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
-   dev_err(dev,
-   "cannot attach to incompatible domain (%u SSID bits != 
%u)\n",
-   smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   smmu_domain->stall_enabled != master->stall_enabled) {
-   dev_err(dev, "cannot attach to stall-%s domain\n",
-   smmu_domain->stall_enabled ? "enabled" : "disabled");
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
}
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/ar

[PATCH v4 0/5] Simplify vfio_iommu_type1 attach/detach routine

2022-06-30 Thread Nicolin Chen via iommu
This is a preparatory series for IOMMUFD v2 patches. It enforces error
code -EMEDIUMTYPE in iommu_attach_device() and iommu_attach_group() when
an IOMMU domain and a device/group are incompatible. It also drops the
useless domain->ops check since it won't fail in current environment.

These allow VFIO iommu code to simplify its group attachment routine, by
avoiding the extra IOMMU domain allocations and attach/detach sequences
of the old code.

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that as KVM will
not be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.

This is on github:
https://github.com/nicolinc/iommufd/commits/vfio_iommu_attach

Changelog
v4:
 * Dropped EMEDIUMTYPE change in mtk_v1 driver per Robin's input
 * Added Baolu's and Kevin's Reviewed-by lines
v3: https://lore.kernel.org/kvm/20220623200029.26007-1-nicol...@nvidia.com/
 * Dropped all dev_err since -EMEDIUMTYPE clearly indicates what error.
 * Updated commit message of enforce_cache_coherency removing patch.
 * Updated commit message of domain->ops removing patch.
 * Replaced "goto out_unlock" with simply mutex_unlock() and return.
 * Added a line of comments for -EMEDIUMTYPE return check.
 * Moved iommu_get_msi_cookie() into alloc_attach_domain() as a cookie
   should be logically tied to the lifetime of a domain itself.
 * Added Kevin's "Reviewed-by".
v2: https://lore.kernel.org/kvm/20220616000304.23890-1-nicol...@nvidia.com/
 * Added -EMEDIUMTYPE to more IOMMU drivers that fit the category.
 * Changed dev_err to dev_dbg for -EMEDIUMTYPE to avoid kernel log spam.
 * Dropped iommu_ops patch, and removed domain->ops in VFIO directly,
   since there's no mixed-driver use case that would fail the sanity.
 * Updated commit log of the patch removing enforce_cache_coherency.
 * Fixed a misplace of "num_non_pinned_groups--" in detach_group patch.
 * Moved "num_non_pinned_groups++" in PATCH-5 to the common path between
   domain-reusing and new-domain pathways, like the code previously did.
 * Fixed a typo in EMEDIUMTYPE patch.
v1: https://lore.kernel.org/kvm/20220606061927.26049-1-nicol...@nvidia.com/

Jason Gunthorpe (1):
  vfio/iommu_type1: Prefer to reuse domains vs match enforced cache
coherency

Nicolin Chen (4):
  iommu: Return -EMEDIUMTYPE for incompatible domain and device/group
  vfio/iommu_type1: Remove the domain->ops comparison
  vfio/iommu_type1: Clean up update_dirty_scope in detach_group()
  vfio/iommu_type1: Simplify group attachment

 drivers/iommu/amd/iommu.c   |   2 +-
 drivers/iommu/apple-dart.c  |   4 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  15 +-
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   6 +-
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |   9 +-
 drivers/iommu/intel/iommu.c |  10 +-
 drivers/iommu/iommu.c   |  28 ++
 drivers/iommu/ipmmu-vmsa.c  |   4 +-
 drivers/iommu/omap-iommu.c  |   3 +-
 drivers/iommu/s390-iommu.c  |   2 +-
 drivers/iommu/sprd-iommu.c  |   6 +-
 drivers/iommu/tegra-gart.c  |   2 +-
 drivers/iommu/virtio-iommu.c|   3 +-
 drivers/vfio/vfio_iommu_type1.c | 340 ++--
 14 files changed, 224 insertions(+), 210 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-30 Thread Nicolin Chen via iommu
On Thu, Jun 30, 2022 at 09:21:42AM +0100, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-06-29 20:47, Nicolin Chen wrote:
> > On Fri, Jun 24, 2022 at 03:19:43PM -0300, Jason Gunthorpe wrote:
> > > On Fri, Jun 24, 2022 at 06:35:49PM +0800, Yong Wu wrote:
> > > 
> > > > > > It's not used in VFIO context. "return 0" just satisfy the iommu
> > > > > > framework to go ahead. and yes, here we only allow the shared
> > > > > > "mapping-domain" (All the devices share a domain created
> > > > > > internally).
> > > 
> > > What part of the iommu framework is trying to attach a domain and
> > > wants to see success when the domain was not actually attached ?
> > > 
> > > > > What prevent this driver from being used in VFIO context?
> > > > 
> > > > Nothing prevent this. Just I didn't test.
> > > 
> > > This is why it is wrong to return success here.
> > 
> > Hi Yong, would you or someone you know be able to confirm whether
> > this "return 0" is still a must or not?
> 
> From memory, it is unfortunately required, due to this driver being in
> the rare position of having to support multiple devices in a single
> address space on 32-bit ARM. Since the old ARM DMA code doesn't
> understand groups, the driver sets up its own canonical
> dma_iommu_mapping to act like a default domain, but then has to politely
> say "yeah OK" to arm_setup_iommu_dma_ops() for each device so that they
> do all end up with the right DMA ops rather than dying in screaming
> failure (the ARM code's per-device mappings then get leaked, but we
> can't really do any better).
> 
> The whole mess disappears in the proper default domain conversion, but
> in the meantime, it's still safe to assume that nobody's doing VFIO with
> embedded display/video codec/etc. blocks that don't even have reset drivers.

Thanks for the input! I'll just respin it by dropping mtk_v1 diff.

Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-30 Thread Nicolin Chen via iommu
On Thu, Jun 30, 2022 at 05:33:16PM +0800, Yong Wu wrote:
> External email: Use caution opening links or attachments
> 
> 
> On Wed, 2022-06-29 at 12:47 -0700, Nicolin Chen wrote:
> > On Fri, Jun 24, 2022 at 03:19:43PM -0300, Jason Gunthorpe wrote:
> > > On Fri, Jun 24, 2022 at 06:35:49PM +0800, Yong Wu wrote:
> > >
> > > > > > It's not used in VFIO context. "return 0" just satisfy the
> > > > > > iommu
> > > > > > framework to go ahead. and yes, here we only allow the shared
> > > > > > "mapping-domain" (All the devices share a domain created
> > > > > > internally).
> > >
> > > What part of the iommu framework is trying to attach a domain and
> > > wants to see success when the domain was not actually attached ?
> > >
> > > > > What prevent this driver from being used in VFIO context?
> > > >
> > > > Nothing prevent this. Just I didn't test.
> > >
> > > This is why it is wrong to return success here.
> >
> > Hi Yong, would you or someone you know be able to confirm whether
> > this "return 0" is still a must or not?
> >
> > Considering that it's an old 32-bit platform for MTK, if it would
> > take time to do so, I'd like to drop the change in MTK driver and
> > note in commit log for you or other MTK folks to change in future.
> 
> Yes. Please help drop the change in this file.
> 
> Sorry I don't have the board at hand right now and I could not list the
> backtrace where this is needed(should be bus_iommu_probe from the
> previous debug...)

OK. Thanks for the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-29 Thread Nicolin Chen via iommu
On Fri, Jun 24, 2022 at 03:19:43PM -0300, Jason Gunthorpe wrote:
> On Fri, Jun 24, 2022 at 06:35:49PM +0800, Yong Wu wrote:
> 
> > > > It's not used in VFIO context. "return 0" just satisfy the iommu
> > > > framework to go ahead. and yes, here we only allow the shared
> > > > "mapping-domain" (All the devices share a domain created
> > > > internally).
> 
> What part of the iommu framework is trying to attach a domain and
> wants to see success when the domain was not actually attached ?
> 
> > > What prevent this driver from being used in VFIO context?
> > 
> > Nothing prevent this. Just I didn't test.
> 
> This is why it is wrong to return success here.

Hi Yong, would you or someone you know be able to confirm whether
this "return 0" is still a must or not?

Considering that it's an old 32-bit platform for MTK, if it would
take time to do so, I'd like to drop the change in MTK driver and
note in commit log for you or other MTK folks to change in future.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-23 Thread Nicolin Chen via iommu
On Fri, Jun 24, 2022 at 01:38:58PM +0800, Yong Wu wrote:

> > > > diff --git a/drivers/iommu/mtk_iommu_v1.c
> > > > b/drivers/iommu/mtk_iommu_v1.c
> > > > index e1cb51b9866c..5386d889429d 100644
> > > > --- a/drivers/iommu/mtk_iommu_v1.c
> > > > +++ b/drivers/iommu/mtk_iommu_v1.c
> > > > @@ -304,7 +304,7 @@ static int mtk_iommu_v1_attach_device(struct
> > > > iommu_domain *domain, struct device
> > > >   /* Only allow the domain created internally. */
> > > >   mtk_mapping = data->mapping;
> > > >   if (mtk_mapping->domain != domain)
> > > > - return 0;
> > > > + return -EMEDIUMTYPE;
> > > >
> > > >   if (!data->m4u_dom) {
> > > >   data->m4u_dom = dom;
> > >
> > > This change looks odd. It turns the return value from success to
> > > failure. Is it a bug? If so, it should go through a separated fix
> > > patch.
> 
> Thanks for the review:)
> 
> >
> > Makes sense.
> >
> > I read the commit log of the original change:
> >
> https://lore.kernel.org/r/1589530123-30240-1-git-send-email-yong...@mediatek.com
> >
> > It doesn't seem to allow devices to get attached to different
> > domains other than the shared mapping->domain, created in the
> > in the mtk_iommu_probe_device(). So it looks like returning 0
> > is intentional. Though I am still very confused by this return
> > value here, I doubt it has ever been used in a VFIO context.
> 
> It's not used in VFIO context. "return 0" just satisfy the iommu
> framework to go ahead. and yes, here we only allow the shared "mapping-
> >domain" (All the devices share a domain created internally).
> 
> thus I think we should still keep "return 0" here.

Thanks for the reply. I will just drop the change of this file.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-23 Thread Nicolin Chen via iommu
On Fri, Jun 24, 2022 at 09:35:49AM +0800, Baolu Lu wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022/6/24 04:00, Nicolin Chen wrote:
> > diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
> > index e1cb51b9866c..5386d889429d 100644
> > --- a/drivers/iommu/mtk_iommu_v1.c
> > +++ b/drivers/iommu/mtk_iommu_v1.c
> > @@ -304,7 +304,7 @@ static int mtk_iommu_v1_attach_device(struct 
> > iommu_domain *domain, struct device
> >   /* Only allow the domain created internally. */
> >   mtk_mapping = data->mapping;
> >   if (mtk_mapping->domain != domain)
> > - return 0;
> > + return -EMEDIUMTYPE;
> > 
> >   if (!data->m4u_dom) {
> >   data->m4u_dom = dom;
> 
> This change looks odd. It turns the return value from success to
> failure. Is it a bug? If so, it should go through a separated fix patch.

Makes sense.

I read the commit log of the original change:
https://lore.kernel.org/r/1589530123-30240-1-git-send-email-yong...@mediatek.com

It doesn't seem to allow devices to get attached to different
domains other than the shared mapping->domain, created in the
in the mtk_iommu_probe_device(). So it looks like returning 0
is intentional. Though I am still very confused by this return
value here, I doubt it has ever been used in a VFIO context.

Young, would you please give us some input?

Overall, I feel it's better to play it safe here by dropping
this part. If we later confirm there is a need to fix it, we
will do that in a separate patch anyway.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/5] vfio/iommu_type1: Remove the domain->ops comparison

2022-06-23 Thread Nicolin Chen via iommu
The domain->ops validation was added, as a precaution, for mixed-driver
systems.

Per Robin's remarks,
* While bus_set_iommu() still exists, the core code prevents multiple
  drivers from registering, so we can't really run into a situation of
  having a mixed-driver system:
  https://lore.kernel.org/kvm/6e1280c5-4b22-ebb3-3912-6c72bc169...@arm.com/

* And there's plenty more significant problems than this to fix; in future
  when many can be permitted, we will rely on the IOMMU core code to check
  the domain->ops:
  https://lore.kernel.org/kvm/6575de6d-94ba-c427-5b1e-967750ddf...@arm.com/

So remove the check in VFIO for simplicity.

Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f4e3b423a453..11be5f95580b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2277,29 +2277,19 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
domain->domain->ops->enforce_cache_coherency(
domain->domain);
 
-   /*
-* Try to match an existing compatible domain.  We don't want to
-* preclude an IOMMU driver supporting multiple bus_types and being
-* able to include different bus_types in the same IOMMU domain, so
-* we test whether the domains use the same iommu_ops rather than
-* testing if they're on the same bus_type.
-*/
+   /* Try to match an existing compatible domain */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops) {
-   iommu_detach_group(domain->domain, group->iommu_group);
-   if (!iommu_attach_group(d->domain,
-   group->iommu_group)) {
-   list_add(>next, >group_list);
-   iommu_domain_free(domain->domain);
-   kfree(domain);
-   goto done;
-   }
-
-   ret = iommu_attach_group(domain->domain,
-group->iommu_group);
-   if (ret)
-   goto out_domain;
+   iommu_detach_group(domain->domain, group->iommu_group);
+   if (!iommu_attach_group(d->domain, group->iommu_group)) {
+   list_add(>next, >group_list);
+   iommu_domain_free(domain->domain);
+   kfree(domain);
+   goto done;
}
+
+   ret = iommu_attach_group(domain->domain,  group->iommu_group);
+   if (ret)
+   goto out_domain;
}
 
vfio_test_domain_fgsp(domain);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-06-23 Thread Nicolin Chen via iommu
All devices in emulated_iommu_groups have pinned_page_dirty_scope
set, so the update_dirty_scope in the first list_for_each_entry
is always false. Clean it up, and move the "if update_dirty_scope"
part from the detach_group_done routine to the domain_list part.

Suggested-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 11be5f95580b..b9ccb3cfac5d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2453,14 +2453,12 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_iommu_group *group;
-   bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
 
mutex_lock(>lock);
list_for_each_entry(group, >emulated_iommu_groups, next) {
if (group->iommu_group != iommu_group)
continue;
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
 
@@ -2469,7 +2467,8 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu);
}
-   goto detach_group_done;
+   mutex_unlock(>lock);
+   return;
}
 
/*
@@ -2485,9 +2484,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
continue;
 
iommu_detach_group(domain->domain, group->iommu_group);
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
-   kfree(group);
/*
 * Group ownership provides privilege, if the group list is
 * empty, the domain goes away. If it's the last domain with
@@ -2510,6 +2507,16 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
vfio_iommu_aper_expand(iommu, _copy);
vfio_update_pgsize_bitmap(iommu);
}
+   /*
+* Removal of a group without dirty tracking may allow
+* the iommu scope to be promoted.
+*/
+   if (!group->pinned_page_dirty_scope) {
+   iommu->num_non_pinned_groups--;
+   if (iommu->dirty_page_tracking)
+   vfio_iommu_populate_bitmap_full(iommu);
+   }
+   kfree(group);
break;
}
 
@@ -2518,16 +2525,6 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
else
vfio_iommu_iova_free(_copy);
 
-detach_group_done:
-   /*
-* Removal of a group without dirty tracking may allow the iommu scope
-* to be promoted.
-*/
-   if (update_dirty_scope) {
-   iommu->num_non_pinned_groups--;
-   if (iommu->dirty_page_tracking)
-   vfio_iommu_populate_bitmap_full(iommu);
-   }
mutex_unlock(>lock);
 }
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-23 Thread Nicolin Chen via iommu
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Co-developed-by: Jason Gunthorpe 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 321 +---
 1 file changed, 174 insertions(+), 147 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b9ccb3cfac5d..3ffa4e2d9d18 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2153,15 +2153,174 @@ static void vfio_iommu_iova_insert_copy(struct 
vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+  struct vfio_iommu_group *group,
+  struct list_head *group_resv_regions)
+{
+   struct iommu_domain *new_domain;
+   struct vfio_domain *domain;
+   phys_addr_t resv_msi_base;
+   int ret = 0;
+
+   /* Try to match an existing compatible domain */
+   list_for_each_entry (domain, >domain_list, next) {
+   ret = iommu_attach_group(domain->domain, group->iommu_group);
+   /* -EMEDIUMTYPE means an incompatible domain, so try next one */
+   if (ret == -EMEDIUMTYPE)
+   continue;
+   if (ret)
+   return ERR_PTR(ret);
+   goto done;
+   }
+
+   new_domain = iommu_domain_alloc(bus);
+   if (!new_domain)
+   return ERR_PTR(-EIO);
+
+   if (iommu->nesting) {
+   ret = iommu_enable_nesting(new_domain);
+   if (ret)
+   goto out_free_iommu_domain;
+   }
+
+   ret = iommu_attach_group(new_domain, group->iommu_group);
+   if (ret)
+   goto out_free_iommu_domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain) {
+   ret = -ENOMEM;
+   goto out_detach;
+   }
+
+   domain->domain = new_domain;
+   vfio_test_domain_fgsp(domain);
+
+   /*
+* If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+* no-snoop set) then VFIO always turns this feature on because on Intel
+* platforms it optimizes KVM to disable wbinvd emulation.
+*/
+   if (new_domain->ops->enforce_cache_coherency)
+   domain->enforce_cache_coherency =
+   new_domain->ops->enforce_cache_coherency(new_domain);
+
+   /* replay mappings on new domains */
+   ret = vfio_iommu_replay(iommu, domain);
+   if (ret)
+   goto out_free_domain;
+
+   if (vfio_iommu_has_sw_msi(group_resv_regions, _msi_base)) {
+   ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
+   if (ret && ret != -ENODEV)
+   goto out_free_domain;
+   }
+
+   INIT_LIST_HEAD(>group_list);
+   list_add(>next, >domain_list);
+   vfio_update_pgsize_bitmap(iommu);
+
+done:
+   list_add(>next, >group_list);
+
+   /*
+* An iommu backed group can dirty memory directly and therefore
+* demotes the iommu scope until it declares itself dirty tracking
+* capable via the page pinning interface.
+*/
+   iommu->num_non_pinned_groups++;
+
+   return domain;
+
+out_free_domain:
+   kfree(domain);
+out_detach:
+   iommu_detach_group(new_domain, group->iommu_group);
+out_free_iommu_domain:
+   iommu_domain_free(new_domain);
+   return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+   struct rb_node *node;
+
+   while ((node = rb_first(>dma_list)))
+   vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+   struct rb_node *n, *p;
+
+   n = rb_first(>dma_list);
+   for (; n; n = rb_next(n)) {
+   struct vfio_dma *dma;
+   long locked = 0, unlocked = 0;
+
+   dma = rb_entry(n, struct vfio_dma, node);
+   unlocked += vfio_unmap_unpin(iommu, dma, false);
+   p = rb_first(>pfn_list);
+   for (; p; p = rb_next(p)) {
+   struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+ 

[PATCH v3 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-23 Thread Nicolin Chen via iommu
From: Jason Gunthorpe 

The KVM mechanism for controlling wbinvd is based on OR of the coherency
property of all devices attached to a guest, no matter whether those
devices are attached to a single domain or multiple domains.

On the other hand, the benefit to using separate domains was that those
devices attached to domains supporting enforced cache coherency always
mapped with the attributes necessary to provide that feature, therefore
if a non-enforced domain was dropped, the associated group removal would
re-trigger an evaluation by KVM.

In practice however, the only known cases of such mixed domains included
an Intel IGD device behind an IOMMU lacking snoop control, where such
devices do not support hotplug, therefore this scenario lacks testing and
is not considered sufficiently relevant to support.

After all, KVM won't take advantage of trying to push a device that could
do enforced cache coherency to a dedicated domain vs re-using an existing
domain, which is non-coherent.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

It's unclear whether we want to further optimize the Intel driver to
update the domain coherency after a device is detached from it, at
least not before KVM can be verified to handle such dynamics in related
emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
we don't see an usage requiring such optimization as the only device
which imposes such non-coherency is Intel GPU which even doesn't
support hotplug/hot remove.

Signed-off-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..f4e3b423a453 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2285,9 +2285,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 * testing if they're on the same bus_type.
 */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops &&
-   d->enforce_cache_coherency ==
-   domain->enforce_cache_coherency) {
+   if (d->domain->ops == domain->domain->ops) {
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-23 Thread Nicolin Chen via iommu
Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno from the IOMMU driver during attach that the
reason attached failed is because of domain incompatability. EMEDIUMTYPE
is chosen because it is never used within the iommu subsystem today and
evokes a sense that the 'medium' aka the domain is incompatible.

VFIO can use this to know attach is a soft failure and it should continue
searching. Otherwise the attach will be a hard failure and VFIO will
return the code to userspace.

Update all drivers to return EMEDIUMTYPE in their failure paths that are
related to domain incompatability. Also remove adjacent error prints for
these soft failures, to prevent a kernel log spam, since -EMEDIUMTYPE is
clear enough to indicate an incompatability error.

Add kdocs describing this behavior.

Suggested-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/amd/iommu.c   |  2 +-
 drivers/iommu/apple-dart.c  |  4 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |  6 ++---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |  9 ++-
 drivers/iommu/intel/iommu.c | 10 +++-
 drivers/iommu/iommu.c   | 28 +
 drivers/iommu/ipmmu-vmsa.c  |  4 +--
 drivers/iommu/mtk_iommu_v1.c|  2 +-
 drivers/iommu/omap-iommu.c  |  3 +--
 drivers/iommu/s390-iommu.c  |  2 +-
 drivers/iommu/sprd-iommu.c  |  6 ++---
 drivers/iommu/tegra-gart.c  |  2 +-
 drivers/iommu/virtio-iommu.c|  3 +--
 14 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 840831d5d2ad..ad499658a6b6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1662,7 +1662,7 @@ static int attach_device(struct device *dev,
if (domain->flags & PD_IOMMUV2_MASK) {
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
 
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af0242a90d9..e58dc310afd7 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -495,10 +495,10 @@ static int apple_dart_attach_dev(struct iommu_domain 
*domain,
 
if (cfg->stream_maps[0].dart->force_bypass &&
domain->type != IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
if (!cfg->stream_maps[0].dart->supports_bypass &&
domain->type == IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
 
ret = apple_dart_finalize_domain(domain, cfg);
if (ret)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88817a3376ef..5b64138f549d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2420,24 +2420,15 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
goto out_unlock;
}
} else if (smmu_domain->smmu != smmu) {
-   dev_err(dev,
-   "cannot attach to SMMU %s (upstream of %s)\n",
-   dev_name(smmu_domain->smmu->dev),
-   dev_name(smmu->dev));
-   ret = -ENXIO;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
-   dev_err(dev,
-   "cannot attach to incompatible domain (%u SSID bits != 
%u)\n",
-   smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   smmu_domain->stall_enabled != master->stall_enabled) {
-   dev_err(dev, "cannot attach to stall-%s domain\n",
-   smmu_domain->stall_enabled ? "enabled" : "disabled");
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
}
 
diff --git a/drivers/iommu

[PATCH v3 0/5] Simplify vfio_iommu_type1 attach/detach routine

2022-06-23 Thread Nicolin Chen via iommu
This is a preparatory series for IOMMUFD v2 patches. It enforces error
code -EMEDIUMTYPE in iommu_attach_device() and iommu_attach_group() when
an IOMMU domain and a device/group are incompatible. It also drops the
useless domain->ops check since it won't fail in current environment.

These allow VFIO iommu code to simplify its group attachment routine, by
avoiding the extra IOMMU domain allocations and attach/detach sequences
of the old code.

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that since KVM
won't be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.

This is on github:
https://github.com/nicolinc/iommufd/commits/vfio_iommu_attach

Changelog
v3:
 * Dropped all dev_err since -EMEDIUMTYPE clearly indicates what error.
 * Updated commit message of enforce_cache_coherency removing patch.
 * Updated commit message of domain->ops removing patch.
 * Replaced "goto out_unlock" with simply mutex_unlock() and return.
 * Added a line of comments for -EMEDIUMTYPE return check.
 * Moved iommu_get_msi_cookie() into alloc_attach_domain() as a cookie
   should be logically tied to the lifetime of a domain itself.
 * Added Kevin's "Reviewed-by".
v2: https://lore.kernel.org/kvm/20220616000304.23890-1-nicol...@nvidia.com/
 * Added -EMEDIUMTYPE to more IOMMU drivers that fit the category.
 * Changed dev_err to dev_dbg for -EMEDIUMTYPE to avoid kernel log spam.
 * Dropped iommu_ops patch, and removed domain->ops in VFIO directly,
   since there's no mixed-driver use case that would fail the sanity.
 * Updated commit log of the patch removing enforce_cache_coherency.
 * Fixed a misplace of "num_non_pinned_groups--" in detach_group patch.
 * Moved "num_non_pinned_groups++" in PATCH-5 to the common path between
   domain-reusing and new-domain pathways, like the code previously did.
 * Fixed a typo in EMEDIUMTYPE patch.
v1: https://lore.kernel.org/kvm/20220606061927.26049-1-nicol...@nvidia.com/

Jason Gunthorpe (1):
  vfio/iommu_type1: Prefer to reuse domains vs match enforced cache
coherency

Nicolin Chen (4):
  iommu: Return -EMEDIUMTYPE for incompatible domain and device/group
  vfio/iommu_type1: Remove the domain->ops comparison
  vfio/iommu_type1: Clean up update_dirty_scope in detach_group()
  vfio/iommu_type1: Simplify group attachment

 drivers/iommu/amd/iommu.c   |   2 +-
 drivers/iommu/apple-dart.c  |   4 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  15 +-
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   6 +-
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |   9 +-
 drivers/iommu/intel/iommu.c |  10 +-
 drivers/iommu/iommu.c   |  28 ++
 drivers/iommu/ipmmu-vmsa.c  |   4 +-
 drivers/iommu/mtk_iommu_v1.c|   2 +-
 drivers/iommu/omap-iommu.c  |   3 +-
 drivers/iommu/s390-iommu.c  |   2 +-
 drivers/iommu/sprd-iommu.c  |   6 +-
 drivers/iommu/tegra-gart.c  |   2 +-
 drivers/iommu/virtio-iommu.c|   3 +-
 drivers/vfio/vfio_iommu_type1.c | 340 ++--
 15 files changed, 225 insertions(+), 211 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/5] vfio/iommu_type1: Remove the domain->ops comparison

2022-06-23 Thread Nicolin Chen via iommu
On Thu, Jun 23, 2022 at 03:50:22AM +, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Robin Murphy 
> > Sent: Wednesday, June 22, 2022 3:55 PM
> >
> > On 2022-06-16 23:23, Nicolin Chen wrote:
> > > On Thu, Jun 16, 2022 at 06:40:14AM +, Tian, Kevin wrote:
> > >
> > >>> The domain->ops validation was added, as a precaution, for mixed-
> > driver
> > >>> systems. However, at this moment only one iommu driver is possible. So
> > >>> remove it.
> > >>
> > >> It's true on a physical platform. But I'm not sure whether a virtual
> > platform
> > >> is allowed to include multiple e.g. one virtio-iommu alongside a virtual 
> > >> VT-
> > d
> > >> or a virtual smmu. It might be clearer to claim that (as Robin pointed 
> > >> out)
> > >> there is plenty more significant problems than this to solve instead of
> > simply
> > >> saying that only one iommu driver is possible if we don't have explicit
> > code
> > >> to reject such configuration. 
> > >
> > > Will edit this part. Thanks!
> >
> > Oh, physical platforms with mixed IOMMUs definitely exist already. The
> > main point is that while bus_set_iommu still exists, the core code
> > effectively *does* prevent multiple drivers from registering - even in
> > emulated cases like the example above, virtio-iommu and VT-d would both
> > try to bus_set_iommu(_bus_type), and one of them will lose. The
> > aspect which might warrant clarification is that there's no combination
> > of supported drivers which claim non-overlapping buses *and* could
> > appear in the same system - even if you tried to contrive something by
> > emulating, say, VT-d (PCI) alongside rockchip-iommu (platform), you
> > could still only describe one or the other due to ACPI vs. Devicetree.
> >
> 
> This explanation is much clearer! thanks.

Thanks +1

I've also updated the commit log.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-21 Thread Nicolin Chen via iommu
On Tue, Jun 21, 2022 at 04:46:02PM -0600, Alex Williamson wrote:
> External email: Use caution opening links or attachments
> 
> 
> On Wed, 15 Jun 2022 17:03:01 -0700
> Nicolin Chen  wrote:
> 
> > From: Jason Gunthorpe 
> >
> > The KVM mechanism for controlling wbinvd is based on OR of the coherency
> > property of all devices attached to a guest, no matter those devices are
> > attached to a single domain or multiple domains.
> >
> > So, there is no value in trying to push a device that could do enforced
> > cache coherency to a dedicated domain vs re-using an existing domain
> > which is non-coherent since KVM won't be able to take advantage of it.
> > This just wastes domain memory.
> >
> > Simplify this code and eliminate the test. This removes the only logic
> > that needed to have a dummy domain attached prior to searching for a
> > matching domain and simplifies the next patches.
> >
> > It's unclear whether we want to further optimize the Intel driver to
> > update the domain coherency after a device is detached from it, at
> > least not before KVM can be verified to handle such dynamics in related
> > emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
> > we don't see an usage requiring such optimization as the only device
> > which imposes such non-coherency is Intel GPU which even doesn't
> > support hotplug/hot remove.
> 
> The 2nd paragraph above is quite misleading in this respect.  I think
> it would be more accurate to explain that the benefit to using separate
> domains was that devices attached to domains supporting enforced cache
> coherency always mapped with the attributes necessary to provide that
> feature, therefore if a non-enforced domain was dropped, the associated
> group removal would re-trigger an evaluation by KVM.  We can then go on
> to discuss that in practice the only known cases of such mixed domains
> included an Intel IGD device behind an IOMMU lacking snoop control,
> where such devices do not support hotplug, therefore this scenario lacks
> testing and is not considered sufficiently relevant to support.  Thanks,

Thanks for the input. I integrated that into the commit log:

vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

The KVM mechanism for controlling wbinvd is based on OR of the coherency
property of all devices attached to a guest, no matter whether those
devices are attached to a single domain or multiple domains.

On the other hand, the benefit to using separate domains was that those
devices attached to domains supporting enforced cache coherency always
mapped with the attributes necessary to provide that feature, therefore
if a non-enforced domain was dropped, the associated group removal would
re-trigger an evaluation by KVM.

In practice however, the only known cases of such mixed domains included
an Intel IGD device behind an IOMMU lacking snoop control, where such
devices do not support hotplug, therefore this scenario lacks testing and
is not considered sufficiently relevant to support.

After all, KVM won't take advantage of trying to push a device that could
do enforced cache coherency to a dedicated domain vs re-using an existing
domain, which is non-coherent.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

It's unclear whether we want to further optimize the Intel driver to
update the domain coherency after a device is detached from it, at
least not before KVM can be verified to handle such dynamics in related
emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
we don't see an usage requiring such optimization as the only device
which imposes such non-coherency is Intel GPU which even doesn't
support hotplug/hot remove.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-21 Thread Nicolin Chen via iommu
On Mon, Jun 20, 2022 at 11:11:01AM +0100, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-06-17 03:53, Tian, Kevin wrote:
> > > From: Nicolin Chen 
> > > Sent: Friday, June 17, 2022 6:41 AM
> > > 
> > > > ...
> > > > > - if (resv_msi) {
> > > > > + if (resv_msi && !domain->msi_cookie) {
> > > > >ret = iommu_get_msi_cookie(domain->domain,
> > > > > resv_msi_base);
> > > > >if (ret && ret != -ENODEV)
> > > > >goto out_detach;
> > > > > + domain->msi_cookie = true;
> > > > >}
> > > > 
> > > > why not moving to alloc_attach_domain() then no need for the new
> > > > domain field? It's required only when a new domain is allocated.
> > > 
> > > When reusing an existing domain that doesn't have an msi_cookie,
> > > we can do iommu_get_msi_cookie() if resv_msi is found. So it is
> > > not limited to a new domain.
> > 
> > Looks msi_cookie requirement is per platform (currently only
> > for smmu. see arm_smmu_get_resv_regions()). If there is
> > no mixed case then above check is not required.
> > 
> > But let's hear whether Robin has a different thought here.
> 
> Yes, the cookie should logically be tied to the lifetime of the domain
> itself. In the relevant context, "an existing domain that doesn't have
> an msi_cookie" should never exist.

Thanks for the explanation. I will move the iommu_get_msi_cookie()
into alloc_attach_domain(), as Kevin suggested.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-21 Thread Nicolin Chen via iommu
On Mon, Jun 20, 2022 at 01:03:17AM -0300, Jason Gunthorpe wrote:
> On Fri, Jun 17, 2022 at 04:07:20PM -0700, Nicolin Chen wrote:
> 
> > > > > > + vfio_iommu_aper_expand(iommu, _copy);
> > > > >
> > > > > but now it's done for every group detach. The aperture is decided
> > > > > by domain geometry which is not affected by attached groups.
> > > >
> > > > Yea, I've noticed this part. Actually Jason did this change for
> > > > simplicity, and I think it'd be safe to do so?
> > > 
> > > Perhaps detach_destroy() can return a Boolean to indicate whether
> > > a domain is destroyed.
> > 
> > It could be a solution but doesn't feel that common for a clean
> > function to have a return value indicating a special case. Maybe
> > passing in "" so that we can check if it's NULL after?
> 
> It is harmless to do every time, it just burns a few CPU cycles on a
> slow path. We don't need complexity to optmize it.

OK. I will keep it simple then. If Kevin or other has a further
objection, please let us know.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-17 Thread Nicolin Chen via iommu
On Fri, Jun 17, 2022 at 02:53:13AM +, Tian, Kevin wrote:
> > > ...
> > > > - if (resv_msi) {
> > > > + if (resv_msi && !domain->msi_cookie) {
> > > >   ret = iommu_get_msi_cookie(domain->domain,
> > > > resv_msi_base);
> > > >   if (ret && ret != -ENODEV)
> > > >   goto out_detach;
> > > > + domain->msi_cookie = true;
> > > >   }
> > >
> > > why not moving to alloc_attach_domain() then no need for the new
> > > domain field? It's required only when a new domain is allocated.
> >
> > When reusing an existing domain that doesn't have an msi_cookie,
> > we can do iommu_get_msi_cookie() if resv_msi is found. So it is
> > not limited to a new domain.
> 
> Looks msi_cookie requirement is per platform (currently only
> for smmu. see arm_smmu_get_resv_regions()). If there is
> no mixed case then above check is not required.

Do you mean "reusing existing domain" for the "mixed case"?

> But let's hear whether Robin has a different thought here.

Yea, sure.

> > > > - iommu_domain_free(domain->domain);
> > > > - list_del(>next);
> > > > - kfree(domain);
> > > > - vfio_iommu_aper_expand(iommu, _copy);
> > >
> > > Previously the aperture is adjusted when a domain is freed...
> > >
> > > > - vfio_update_pgsize_bitmap(iommu);
> > > > - }
> > > > - /*
> > > > -  * Removal of a group without dirty tracking may allow
> > > > -  * the iommu scope to be promoted.
> > > > -  */
> > > > - if (!group->pinned_page_dirty_scope) {
> > > > - iommu->num_non_pinned_groups--;
> > > > - if (iommu->dirty_page_tracking)
> > > > - vfio_iommu_populate_bitmap_full(iommu);
> > > > - }
> > > > + vfio_iommu_detach_destroy_domain(domain, iommu,
> > > > group);
> > > >   kfree(group);
> > > >   break;
> > > >   }
> > > >
> > > > + vfio_iommu_aper_expand(iommu, _copy);
> > >
> > > but now it's done for every group detach. The aperture is decided
> > > by domain geometry which is not affected by attached groups.
> >
> > Yea, I've noticed this part. Actually Jason did this change for
> > simplicity, and I think it'd be safe to do so?
> 
> Perhaps detach_destroy() can return a Boolean to indicate whether
> a domain is destroyed.

It could be a solution but doesn't feel that common for a clean
function to have a return value indicating a special case. Maybe
passing in "" so that we can check if it's NULL after?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-16 Thread Nicolin Chen via iommu
On Thu, Jun 16, 2022 at 07:08:10AM +, Tian, Kevin wrote:
> ...
> > +static struct vfio_domain *
> > +vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu
> > *iommu,
> > +struct vfio_iommu_group *group)
> > +{
> > + struct iommu_domain *new_domain;
> > + struct vfio_domain *domain;
> > + int ret = 0;
> > +
> > + /* Try to match an existing compatible domain */
> > + list_for_each_entry (domain, >domain_list, next) {
> > + ret = iommu_attach_group(domain->domain, group-
> > >iommu_group);
> > + if (ret == -EMEDIUMTYPE)
> > + continue;
> 
> Probably good to add one line comment here for what EMEDIUMTYPE
> represents. It's not a widely-used retry type like EAGAIN. A comment
> can save the time of digging out the fact by jumping to iommu file.

Sure. I can add that.

> ...
> > - if (resv_msi) {
> > + if (resv_msi && !domain->msi_cookie) {
> >   ret = iommu_get_msi_cookie(domain->domain,
> > resv_msi_base);
> >   if (ret && ret != -ENODEV)
> >   goto out_detach;
> > + domain->msi_cookie = true;
> >   }
> 
> why not moving to alloc_attach_domain() then no need for the new
> domain field? It's required only when a new domain is allocated.

When reusing an existing domain that doesn't have an msi_cookie,
we can do iommu_get_msi_cookie() if resv_msi is found. So it is
not limited to a new domain.

> ...
> > - if (list_empty(>group_list)) {
> > - if (list_is_singular(>domain_list)) {
> > - if (list_empty(
> > >emulated_iommu_groups)) {
> > - WARN_ON(iommu->notifier.head);
> > -
> >   vfio_iommu_unmap_unpin_all(iommu);
> > - } else {
> > -
> >   vfio_iommu_unmap_unpin_reaccount(iommu);
> > - }
> > - }
> > - iommu_domain_free(domain->domain);
> > - list_del(>next);
> > - kfree(domain);
> > - vfio_iommu_aper_expand(iommu, _copy);
> 
> Previously the aperture is adjusted when a domain is freed...
> 
> > - vfio_update_pgsize_bitmap(iommu);
> > - }
> > - /*
> > -  * Removal of a group without dirty tracking may allow
> > -  * the iommu scope to be promoted.
> > -  */
> > - if (!group->pinned_page_dirty_scope) {
> > - iommu->num_non_pinned_groups--;
> > - if (iommu->dirty_page_tracking)
> > - vfio_iommu_populate_bitmap_full(iommu);
> > - }
> > + vfio_iommu_detach_destroy_domain(domain, iommu,
> > group);
> >   kfree(group);
> >   break;
> >   }
> >
> > + vfio_iommu_aper_expand(iommu, _copy);
> 
> but now it's done for every group detach. The aperture is decided
> by domain geometry which is not affected by attached groups.

Yea, I've noticed this part. Actually Jason did this change for
simplicity, and I think it'd be safe to do so?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-06-16 Thread Nicolin Chen via iommu
On Thu, Jun 16, 2022 at 06:45:09AM +, Tian, Kevin wrote:

> > +out_unlock:
> >   mutex_unlock(>lock);
> >  }
> >
> 
> I'd just replace the goto with a direct unlock and then return there.
> the readability is slightly better.

OK. Will do that.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/5] vfio/iommu_type1: Remove the domain->ops comparison

2022-06-16 Thread Nicolin Chen via iommu
On Thu, Jun 16, 2022 at 06:40:14AM +, Tian, Kevin wrote:

> > The domain->ops validation was added, as a precaution, for mixed-driver
> > systems. However, at this moment only one iommu driver is possible. So
> > remove it.
> 
> It's true on a physical platform. But I'm not sure whether a virtual platform
> is allowed to include multiple e.g. one virtio-iommu alongside a virtual VT-d
> or a virtual smmu. It might be clearer to claim that (as Robin pointed out)
> there is plenty more significant problems than this to solve instead of simply
> saying that only one iommu driver is possible if we don't have explicit code
> to reject such configuration. 

Will edit this part. Thanks!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-15 Thread Nicolin Chen via iommu
On Thu, Jun 16, 2022 at 10:09:49AM +0800, Baolu Lu wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022/6/16 08:03, Nicolin Chen wrote:
> > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > index 44016594831d..0dd13330fe12 100644
> > --- a/drivers/iommu/intel/iommu.c
> > +++ b/drivers/iommu/intel/iommu.c
> > @@ -4323,7 +4323,7 @@ static int prepare_domain_attach_device(struct 
> > iommu_domain *domain,
> >   return -ENODEV;
> > 
> >   if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
> > - return -EOPNOTSUPP;
> > + return -EMEDIUMTYPE;
> > 
> >   /* check if this iommu agaw is sufficient for max mapped address */
> >   addr_width = agaw_to_width(iommu->agaw);
> > @@ -4331,10 +4331,10 @@ static int prepare_domain_attach_device(struct 
> > iommu_domain *domain,
> >   addr_width = cap_mgaw(iommu->cap);
> > 
> >   if (dmar_domain->max_addr > (1LL << addr_width)) {
> > - dev_err(dev, "%s: iommu width (%d) is not "
> > + dev_dbg(dev, "%s: iommu width (%d) is not "
> >   "sufficient for the mapped address (%llx)\n",
> >   __func__, addr_width, dmar_domain->max_addr);
> > - return -EFAULT;
> > + return -EMEDIUMTYPE;
> >   }
> >   dmar_domain->gaw = addr_width;
> 
> Can we simply remove the dev_err()? As the return value has explicitly
> explained the failure reason, putting a print statement won't help much.

Yes. As long as no one has objection, I can remove that in the next
version.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-15 Thread Nicolin Chen via iommu
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Co-developed-by: Jason Gunthorpe 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 298 +---
 1 file changed, 163 insertions(+), 135 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 573caf320788..5986c68e59ee 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -86,6 +86,7 @@ struct vfio_domain {
struct list_headgroup_list;
boolfgsp : 1;   /* Fine-grained super pages */
boolenforce_cache_coherency : 1;
+   boolmsi_cookie : 1;
 };
 
 struct vfio_dma {
@@ -2153,12 +2154,163 @@ static void vfio_iommu_iova_insert_copy(struct 
vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+  struct vfio_iommu_group *group)
+{
+   struct iommu_domain *new_domain;
+   struct vfio_domain *domain;
+   int ret = 0;
+
+   /* Try to match an existing compatible domain */
+   list_for_each_entry (domain, >domain_list, next) {
+   ret = iommu_attach_group(domain->domain, group->iommu_group);
+   if (ret == -EMEDIUMTYPE)
+   continue;
+   if (ret)
+   return ERR_PTR(ret);
+   goto done;
+   }
+
+   new_domain = iommu_domain_alloc(bus);
+   if (!new_domain)
+   return ERR_PTR(-EIO);
+
+   if (iommu->nesting) {
+   ret = iommu_enable_nesting(new_domain);
+   if (ret)
+   goto out_free_iommu_domain;
+   }
+
+   ret = iommu_attach_group(new_domain, group->iommu_group);
+   if (ret)
+   goto out_free_iommu_domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain) {
+   ret = -ENOMEM;
+   goto out_detach;
+   }
+
+   domain->domain = new_domain;
+   vfio_test_domain_fgsp(domain);
+
+   /*
+* If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+* no-snoop set) then VFIO always turns this feature on because on Intel
+* platforms it optimizes KVM to disable wbinvd emulation.
+*/
+   if (new_domain->ops->enforce_cache_coherency)
+   domain->enforce_cache_coherency =
+   new_domain->ops->enforce_cache_coherency(new_domain);
+
+   /* replay mappings on new domains */
+   ret = vfio_iommu_replay(iommu, domain);
+   if (ret)
+   goto out_free_domain;
+
+   INIT_LIST_HEAD(>group_list);
+   list_add(>next, >domain_list);
+   vfio_update_pgsize_bitmap(iommu);
+
+done:
+   list_add(>next, >group_list);
+
+   /*
+* An iommu backed group can dirty memory directly and therefore
+* demotes the iommu scope until it declares itself dirty tracking
+* capable via the page pinning interface.
+*/
+   iommu->num_non_pinned_groups++;
+
+   return domain;
+
+out_free_domain:
+   kfree(domain);
+out_detach:
+   iommu_detach_group(new_domain, group->iommu_group);
+out_free_iommu_domain:
+   iommu_domain_free(new_domain);
+   return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+   struct rb_node *node;
+
+   while ((node = rb_first(>dma_list)))
+   vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+   struct rb_node *n, *p;
+
+   n = rb_first(>dma_list);
+   for (; n; n = rb_next(n)) {
+   struct vfio_dma *dma;
+   long locked = 0, unlocked = 0;
+
+   dma = rb_entry(n, struct vfio_dma, node);
+   unlocked += vfio_unmap_unpin(iommu, dma, false);
+   p = rb_first(>pfn_list);
+   for (; p; p = rb_next(p)) {
+   struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+node);
+
+   if (!is_invalid_reserved_pfn(vpfn->pfn))
+   locked++;
+   }
+   vfio_lo

[PATCH v2 3/5] vfio/iommu_type1: Remove the domain->ops comparison

2022-06-15 Thread Nicolin Chen via iommu
The domain->ops validation was added, as a precaution, for mixed-driver
systems. However, at this moment only one iommu driver is possible. So
remove it.

Per discussion with Robin, in future when many can be permitted we will
rely on the IOMMU core code to check the domain->ops:
https://lore.kernel.org/linux-iommu/6575de6d-94ba-c427-5b1e-967750ddf...@arm.com/

Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f4e3b423a453..11be5f95580b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2277,29 +2277,19 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
domain->domain->ops->enforce_cache_coherency(
domain->domain);
 
-   /*
-* Try to match an existing compatible domain.  We don't want to
-* preclude an IOMMU driver supporting multiple bus_types and being
-* able to include different bus_types in the same IOMMU domain, so
-* we test whether the domains use the same iommu_ops rather than
-* testing if they're on the same bus_type.
-*/
+   /* Try to match an existing compatible domain */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops) {
-   iommu_detach_group(domain->domain, group->iommu_group);
-   if (!iommu_attach_group(d->domain,
-   group->iommu_group)) {
-   list_add(>next, >group_list);
-   iommu_domain_free(domain->domain);
-   kfree(domain);
-   goto done;
-   }
-
-   ret = iommu_attach_group(domain->domain,
-group->iommu_group);
-   if (ret)
-   goto out_domain;
+   iommu_detach_group(domain->domain, group->iommu_group);
+   if (!iommu_attach_group(d->domain, group->iommu_group)) {
+   list_add(>next, >group_list);
+   iommu_domain_free(domain->domain);
+   kfree(domain);
+   goto done;
}
+
+   ret = iommu_attach_group(domain->domain,  group->iommu_group);
+   if (ret)
+   goto out_domain;
}
 
vfio_test_domain_fgsp(domain);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-06-15 Thread Nicolin Chen via iommu
All devices in emulated_iommu_groups have pinned_page_dirty_scope
set, so the update_dirty_scope in the first list_for_each_entry
is always false. Clean it up, and move the "if update_dirty_scope"
part from the detach_group_done routine to the domain_list part.

Rename the "detach_group_done" goto label accordingly.

Suggested-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 11be5f95580b..573caf320788 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2453,14 +2453,12 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_iommu_group *group;
-   bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
 
mutex_lock(>lock);
list_for_each_entry(group, >emulated_iommu_groups, next) {
if (group->iommu_group != iommu_group)
continue;
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
 
@@ -2469,7 +2467,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu);
}
-   goto detach_group_done;
+   goto out_unlock;
}
 
/*
@@ -2485,9 +2483,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
continue;
 
iommu_detach_group(domain->domain, group->iommu_group);
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
-   kfree(group);
/*
 * Group ownership provides privilege, if the group list is
 * empty, the domain goes away. If it's the last domain with
@@ -2510,6 +2506,16 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
vfio_iommu_aper_expand(iommu, _copy);
vfio_update_pgsize_bitmap(iommu);
}
+   /*
+* Removal of a group without dirty tracking may allow
+* the iommu scope to be promoted.
+*/
+   if (!group->pinned_page_dirty_scope) {
+   iommu->num_non_pinned_groups--;
+   if (iommu->dirty_page_tracking)
+   vfio_iommu_populate_bitmap_full(iommu);
+   }
+   kfree(group);
break;
}
 
@@ -2518,16 +2524,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
else
vfio_iommu_iova_free(_copy);
 
-detach_group_done:
-   /*
-* Removal of a group without dirty tracking may allow the iommu scope
-* to be promoted.
-*/
-   if (update_dirty_scope) {
-   iommu->num_non_pinned_groups--;
-   if (iommu->dirty_page_tracking)
-   vfio_iommu_populate_bitmap_full(iommu);
-   }
+out_unlock:
mutex_unlock(>lock);
 }
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-15 Thread Nicolin Chen via iommu
Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno from the IOMMU driver during attach that the
reason attached failed is because of domain incompatability. EMEDIUMTYPE
is chosen because it is never used within the iommu subsystem today and
evokes a sense that the 'medium' aka the domain is incompatible.

VFIO can use this to know attach is a soft failure and it should continue
searching. Otherwise the attach will be a hard failure and VFIO will
return the code to userspace.

Update all drivers to return EMEDIUMTYPE in their failure paths that are
related to domain incompatability. Also turn adjacent error prints into
debug prints, for these soft failures, to prevent a kernel log spam.

Add kdocs describing this behavior.

Suggested-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/amd/iommu.c   |  2 +-
 drivers/iommu/apple-dart.c  |  4 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 12 -
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |  4 +--
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |  4 +--
 drivers/iommu/intel/iommu.c |  6 ++---
 drivers/iommu/iommu.c   | 28 +
 drivers/iommu/ipmmu-vmsa.c  |  4 +--
 drivers/iommu/mtk_iommu_v1.c|  2 +-
 drivers/iommu/omap-iommu.c  |  4 +--
 drivers/iommu/s390-iommu.c  |  2 +-
 drivers/iommu/sprd-iommu.c  |  4 +--
 drivers/iommu/tegra-gart.c  |  2 +-
 drivers/iommu/virtio-iommu.c|  4 +--
 14 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 840831d5d2ad..ad499658a6b6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1662,7 +1662,7 @@ static int attach_device(struct device *dev,
if (domain->flags & PD_IOMMUV2_MASK) {
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
 
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af0242a90d9..e58dc310afd7 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -495,10 +495,10 @@ static int apple_dart_attach_dev(struct iommu_domain 
*domain,
 
if (cfg->stream_maps[0].dart->force_bypass &&
domain->type != IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
if (!cfg->stream_maps[0].dart->supports_bypass &&
domain->type == IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
 
ret = apple_dart_finalize_domain(domain, cfg);
if (ret)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88817a3376ef..1c66e4b6d852 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2420,24 +2420,24 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
goto out_unlock;
}
} else if (smmu_domain->smmu != smmu) {
-   dev_err(dev,
+   dev_dbg(dev,
"cannot attach to SMMU %s (upstream of %s)\n",
dev_name(smmu_domain->smmu->dev),
dev_name(smmu->dev));
-   ret = -ENXIO;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
-   dev_err(dev,
+   dev_dbg(dev,
"cannot attach to incompatible domain (%u SSID bits != 
%u)\n",
smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   smmu_domain->stall_enabled != master->stall_enabled) {
-   dev_err(dev, "cannot attach to stall-%s domain\n",
+   dev_dbg(dev, "cannot attach to stall-%s domain\n",
smmu_domain->stall_enabled ? "enabled" : "disabled");
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock

[PATCH v2 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-15 Thread Nicolin Chen via iommu
From: Jason Gunthorpe 

The KVM mechanism for controlling wbinvd is based on OR of the coherency
property of all devices attached to a guest, no matter those devices are
attached to a single domain or multiple domains.

So, there is no value in trying to push a device that could do enforced
cache coherency to a dedicated domain vs re-using an existing domain
which is non-coherent since KVM won't be able to take advantage of it.
This just wastes domain memory.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

It's unclear whether we want to further optimize the Intel driver to
update the domain coherency after a device is detached from it, at
least not before KVM can be verified to handle such dynamics in related
emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
we don't see an usage requiring such optimization as the only device
which imposes such non-coherency is Intel GPU which even doesn't
support hotplug/hot remove.

Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..f4e3b423a453 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2285,9 +2285,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 * testing if they're on the same bus_type.
 */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops &&
-   d->enforce_cache_coherency ==
-   domain->enforce_cache_coherency) {
+   if (d->domain->ops == domain->domain->ops) {
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/5] Simplify vfio_iommu_type1 attach/detach routine

2022-06-15 Thread Nicolin Chen via iommu
This is a preparatory series for IOMMUFD v2 patches. It enforces error
code -EMEDIUMTYPE in iommu_attach_device() and iommu_attach_group() when
an IOMMU domain and a device/group are incompatible. It also drops the
useless domain->ops check since it won't fail in current environment.

These allow VFIO iommu code to simplify its group attachment routine, by
avoiding the extra IOMMU domain allocations and attach/detach sequences
of the old code.

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that since KVM
won't be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.

This is on github: https://github.com/nicolinc/iommufd/commits/vfio_iommu_attach

Changelog
v2:
 * Added -EMEDIUMTYPE to more IOMMU drivers that fit the category.
 * Changed dev_err to dev_dbg for -EMEDIUMTYPE to avoid kernel log spam.
 * Dropped iommu_ops patch, and removed domain->ops in VFIO directly,
   since there's no mixed-driver use case that would fail the sanity.
 * Updated commit log of the patch removing enforce_cache_coherency.
 * Fixed a misplace of "num_non_pinned_groups--" in detach_group patch.
 * Moved "num_non_pinned_groups++" in PATCH-5 to the common path between
   domain-reusing and new-domain pathways, like the code previously did.
 * Fixed a typo in EMEDIUMTYPE patch.
v1: https://lore.kernel.org/kvm/20220606061927.26049-1-nicol...@nvidia.com/

Jason Gunthorpe (1):
  vfio/iommu_type1: Prefer to reuse domains vs match enforced cache
    coherency

Nicolin Chen (4):
  iommu: Return -EMEDIUMTYPE for incompatible domain and device/group
  vfio/iommu_type1: Remove the domain->ops comparison
  vfio/iommu_type1: Clean up update_dirty_scope in detach_group()
  vfio/iommu_type1: Simplify group attachment

 drivers/iommu/amd/iommu.c   |   2 +-
 drivers/iommu/apple-dart.c  |   4 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  12 +-
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   4 +-
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |   4 +-
 drivers/iommu/intel/iommu.c |   6 +-
 drivers/iommu/iommu.c   |  28 ++
 drivers/iommu/ipmmu-vmsa.c  |   4 +-
 drivers/iommu/mtk_iommu_v1.c|   2 +-
 drivers/iommu/omap-iommu.c  |   4 +-
 drivers/iommu/s390-iommu.c  |   2 +-
 drivers/iommu/sprd-iommu.c  |   4 +-
 drivers/iommu/tegra-gart.c  |   2 +-
 drivers/iommu/virtio-iommu.c|   4 +-
 drivers/vfio/vfio_iommu_type1.c | 317 ++--
 15 files changed, 220 insertions(+), 179 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-15 Thread Nicolin Chen via iommu
On Wed, Jun 15, 2022 at 07:35:00AM +, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Nicolin Chen 
> > Sent: Wednesday, June 15, 2022 4:45 AM
> >
> > Hi Kevin,
> >
> > On Wed, Jun 08, 2022 at 11:48:27PM +, Tian, Kevin wrote:
> > > > > > The KVM mechanism for controlling wbinvd is only triggered during
> > > > > > kvm_vfio_group_add(), meaning it is a one-shot test done once the
> > > > devices
> > > > > > are setup.
> > > > >
> > > > > It's not one-shot. kvm_vfio_update_coherency() is called in both
> > > > > group_add() and group_del(). Then the coherency property is
> > > > > checked dynamically in wbinvd emulation:
> > > >
> > > > From the perspective of managing the domains that is still
> > > > one-shot. It doesn't get updated when individual devices are
> > > > added/removed to domains.
> > >
> > > It's unchanged per-domain but dynamic per-vm when multiple
> > > domains are added/removed (i.e. kvm->arch.noncoherent_dma_count).
> > > It's the latter being checked in the kvm.
> >
> > I am going to send a v2, yet not quite getting the point here.
> > Meanwhile, Jason is on leave.
> >
> > What, in your opinion, would be an accurate description here?
> >
> 
> Something like below:
> --
> The KVM mechanism for controlling wbinvd is based on OR of
> the coherency property of all devices attached to a guest, no matter
> those devices  are attached to a single domain or multiple domains.
> 
> So, there is no value in trying to push a device that could do enforced
> cache coherency to a dedicated domain vs re-using an existing domain
> which is non-coherent since KVM won't be able to take advantage of it.
> This just wastes domain memory.
> 
> Simplify this code and eliminate the test. This removes the only logic
> that needed to have a dummy domain attached prior to searching for a
> matching domain and simplifies the next patches.
> 
> It's unclear whether we want to further optimize the Intel driver to
> update the domain coherency after a device is detached from it, at
> least not before KVM can be verified to handle such dynamics in related
> emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
> we don't see an usage requiring such optimization as the only device
> which imposes such non-coherency is Intel GPU which even doesn't
> support hotplug/hot remove.

Thanks! I just updated that and will send v2.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-14 Thread Nicolin Chen via iommu
Hi Kevin,

On Wed, Jun 08, 2022 at 11:48:27PM +, Tian, Kevin wrote:
> > > > The KVM mechanism for controlling wbinvd is only triggered during
> > > > kvm_vfio_group_add(), meaning it is a one-shot test done once the
> > devices
> > > > are setup.
> > >
> > > It's not one-shot. kvm_vfio_update_coherency() is called in both
> > > group_add() and group_del(). Then the coherency property is
> > > checked dynamically in wbinvd emulation:
> >
> > From the perspective of managing the domains that is still
> > one-shot. It doesn't get updated when individual devices are
> > added/removed to domains.
> 
> It's unchanged per-domain but dynamic per-vm when multiple
> domains are added/removed (i.e. kvm->arch.noncoherent_dma_count).
> It's the latter being checked in the kvm.

I am going to send a v2, yet not quite getting the point here.
Meanwhile, Jason is on leave.

What, in your opinion, would be an accurate description here?

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-06-08 Thread Nicolin Chen via iommu
On Wed, Jun 08, 2022 at 08:35:47AM +, Tian, Kevin wrote:

> > @@ -2519,7 +2515,17 @@ static void vfio_iommu_type1_detach_group(void
> > *iommu_data,
> >   kfree(domain);
> >   vfio_iommu_aper_expand(iommu, _copy);
> >   vfio_update_pgsize_bitmap(iommu);
> > + /*
> > +  * Removal of a group without dirty tracking may
> > allow
> > +  * the iommu scope to be promoted.
> > +  */
> > + if (!group->pinned_page_dirty_scope) {
> > + iommu->num_non_pinned_groups--;
> > + if (iommu->dirty_page_tracking)
> > +
> >   vfio_iommu_populate_bitmap_full(iommu);
> 
> This doesn't look correct. The old code decrements
> num_non_pinned_groups for every detach group without dirty
> tracking. But now it's only done when the domain is about to
> be released...

Hmm..you are right. It should be placed outside:
if (list_empty(>group_list)) {
...
}
+   if (!group->pinned_page_dirty_scope) {
+   ...
+   }

Will fix this and the same problem in PATCH-5 too.

Thanks!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-08 Thread Nicolin Chen via iommu
Hi Kevin,

On Wed, Jun 08, 2022 at 07:49:10AM +, Tian, Kevin wrote:
> External email: Use caution opening links or attachments
> 
> 
> > From: Nicolin Chen
> > Sent: Monday, June 6, 2022 2:19 PM
> >
> > Cases like VFIO wish to attach a device to an existing domain that was
> > not allocated specifically from the device. This raises a condition
> > where the IOMMU driver can fail the domain attach because the domain and
> > device are incompatible with each other.
> >
> > This is a soft failure that can be resolved by using a different domain.
> >
> > Provide a dedicated errno from the IOMMU driver during attach that the
> > reason attached failed is because of domain incompatability. EMEDIUMTYPE
> > is chosen because it is never used within the iommu subsystem today and
> > evokes a sense that the 'medium' aka the domain is incompatible.
> >
> > VFIO can use this to know attach is a soft failure and it should continue
> > searching. Otherwise the attach will be a hard failure and VFIO will
> > return the code to userspace.
> >
> > Update all drivers to return EMEDIUMTYPE in their failure paths that are
> > related to domain incompatability.
> 
> Seems not all drivers are converted, e.g.:

Thank you for going through all of them!

> mtk_iommu_v1_attach_device():
> /* Only allow the domain created internally. */
> mtk_mapping = data->mapping;
> if (mtk_mapping->domain != domain)
> return 0;
> ** the current code sounds incorrect which should return an error

I agree.
 
> s390_iommu_attach_device():
> /* Allow only devices with identical DMA range limits */
> } else if (domain->geometry.aperture_start != zdev->start_dma ||
> domain->geometry.aperture_end != zdev->end_dma) {
> rc = -EINVAL;
>
> sprd_iommu_attach_device():
> if (dom->sdev) {
> pr_err("There's already a device attached to this domain.\n");
> return -EINVAL;
> }
> 
> 
> gart_iommu_attach_dev():
> if (gart->active_domain && gart->active_domain != domain) {
> ret = -EBUSY;

Will add these.

> arm_smmu_attach_dev():
> if (!fwspec || fwspec->ops != _smmu_ops) {
> dev_err(dev, "cannot attach to SMMU, is it on the same 
> bus?\n");
> return -ENXIO;
> }
> **probably this check can be covered by next patch which moves bus ops
> check into iommu core?

I was thinking that it could be covered. Yet, we are about to drop
that ops check, as Robin pointed out that we don't need that ops
sanity for we don't have mixed-driver systems yet. So perhaps this
would be a different ops check and should return -EMEDIUMTYPE too,
I think.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-06 Thread Nicolin Chen via iommu
On Tue, Jun 07, 2022 at 11:23:27AM +0800, Baolu Lu wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022/6/6 14:19, Nicolin Chen wrote:
> > +/**
> > + * iommu_attach_group - Attach an IOMMU group to an IOMMU domain
> > + * @domain: IOMMU domain to attach
> > + * @dev: IOMMU group that will be attached
> 
> Nit: @group: ...

Oh...Thanks!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/5] iommu: Ensure device has the same iommu_ops as the domain

2022-06-06 Thread Nicolin Chen via iommu
On Mon, Jun 06, 2022 at 06:50:33PM +0100, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-06-06 17:51, Nicolin Chen wrote:
> > Hi Robin,
> > 
> > On Mon, Jun 06, 2022 at 03:33:42PM +0100, Robin Murphy wrote:
> > > On 2022-06-06 07:19, Nicolin Chen wrote:
> > > > The core code should not call an iommu driver op with a struct device
> > > > parameter unless it knows that the dev_iommu_priv_get() for that struct
> > > > device was setup by the same driver. Otherwise in a mixed driver system
> > > > the iommu_priv could be casted to the wrong type.
> > > 
> > > We don't have mixed-driver systems, and there are plenty more
> > > significant problems than this one to solve before we can (but thanks
> > > for pointing it out - I hadn't got as far as auditing the public
> > > interfaces yet). Once domains are allocated via a particular device's
> > > IOMMU instance in the first place, there will be ample opportunity for
> > > the core to stash suitable identifying information in the domain for
> > > itself. TBH even the current code could do it without needing the
> > > weirdly invasive changes here.
> > 
> > Do you have an alternative and less invasive solution in mind?
> > 
> > > > Store the iommu_ops pointer in the iommu_domain and use it as a check to
> > > > validate that the struct device is correct before invoking any domain op
> > > > that accepts a struct device.
> > > 
> > > In fact this even describes exactly that - "Store the iommu_ops pointer
> > > in the iommu_domain", vs. the "Store the iommu_ops pointer in the
> > > iommu_domain_ops" which the patch is actually doing :/
> > 
> > Will fix that.
> 
> Well, as before I'd prefer to make the code match the commit message -
> if I really need to spell it out, see below - since I can't imagine that
> we should ever have need to identify a set of iommu_domain_ops in
> isolation, therefore I think it's considerably clearer to use the
> iommu_domain itself. However, either way we really don't need this yet,
> so we may as well just go ahead and remove the redundant test from VFIO
> anyway, and I can add some form of this patch to my dev branch for now.

I see. The version below is much cleaner. Yet, it'd become having a
common pointer per iommu_domain vs. one pointer per driver. Jason
pointed it out to me earlier that by doing so memory waste would be
unnecessary on platforms that have considerable numbers of masters.

Since we know that it'd be safe to exclude this single change from
this series, I can drop it in next version, if you don't like the
change.

Thanks!
Nic

> ->8-
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index cde2e1d6ab9b..72990edc9314 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1902,6 +1902,7 @@ static struct iommu_domain
> *__iommu_domain_alloc(struct device *dev,
>domain->type = type;
>/* Assume all sizes by default; the driver may override this later */
>domain->pgsize_bitmap = ops->pgsize_bitmap;
> +   domain->owner = ops;
>if (!domain->ops)
>domain->ops = ops->default_domain_ops;
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 6f64cbbc6721..79e557207f53 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -89,6 +89,7 @@ struct iommu_domain_geometry {
> 
>  struct iommu_domain {
>unsigned type;
> +   const struct iommu_ops *owner; /* Who allocated this domain */
>const struct iommu_domain_ops *ops;
>unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
>iommu_fault_handler_t handler;
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/5] iommu: Ensure device has the same iommu_ops as the domain

2022-06-06 Thread Nicolin Chen via iommu
Hi Robin,

On Mon, Jun 06, 2022 at 03:33:42PM +0100, Robin Murphy wrote:
> On 2022-06-06 07:19, Nicolin Chen wrote:
> > The core code should not call an iommu driver op with a struct device
> > parameter unless it knows that the dev_iommu_priv_get() for that struct
> > device was setup by the same driver. Otherwise in a mixed driver system
> > the iommu_priv could be casted to the wrong type.
> 
> We don't have mixed-driver systems, and there are plenty more
> significant problems than this one to solve before we can (but thanks
> for pointing it out - I hadn't got as far as auditing the public
> interfaces yet). Once domains are allocated via a particular device's
> IOMMU instance in the first place, there will be ample opportunity for
> the core to stash suitable identifying information in the domain for
> itself. TBH even the current code could do it without needing the
> weirdly invasive changes here.

Do you have an alternative and less invasive solution in mind?

> > Store the iommu_ops pointer in the iommu_domain and use it as a check to
> > validate that the struct device is correct before invoking any domain op
> > that accepts a struct device.
> 
> In fact this even describes exactly that - "Store the iommu_ops pointer
> in the iommu_domain", vs. the "Store the iommu_ops pointer in the
> iommu_domain_ops" which the patch is actually doing :/

Will fix that.

> [...]
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 19cf28d40ebe..8a1f437a51f2 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -1963,6 +1963,10 @@ static int __iommu_attach_device(struct iommu_domain 
> > *domain,
> >   {
> >   int ret;
> > 
> > + /* Ensure the device was probe'd onto the same driver as the domain */
> > + if (dev->bus->iommu_ops != domain->ops->iommu_ops)
> 
> Nope, dev_iommu_ops(dev) please. Furthermore I think the logical place
> to put this is in iommu_group_do_attach_device(), since that's the
> gateway for the public interfaces - we shouldn't need to second-guess
> ourselves for internal default-domain-related calls.

Will move to iommu_group_do_attach_device and change to dev_iommu_ops.

Thanks!
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group()

2022-06-06 Thread Nicolin Chen via iommu
All devices in emulated_iommu_groups have pinned_page_dirty_scope
set, so the update_dirty_scope in the first list_for_each_entry
is always false. Clean it up, and move the "if update_dirty_scope"
part from the detach_group_done routine to the domain_list part.

Rename the "detach_group_done" goto label accordingly.

Suggested-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 27 ---
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f4e3b423a453..b45b1cc118ef 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2463,14 +2463,12 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_iommu_group *group;
-   bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
 
mutex_lock(>lock);
list_for_each_entry(group, >emulated_iommu_groups, next) {
if (group->iommu_group != iommu_group)
continue;
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
 
@@ -2479,7 +2477,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu);
}
-   goto detach_group_done;
+   goto out_unlock;
}
 
/*
@@ -2495,9 +2493,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
continue;
 
iommu_detach_group(domain->domain, group->iommu_group);
-   update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
-   kfree(group);
/*
 * Group ownership provides privilege, if the group list is
 * empty, the domain goes away. If it's the last domain with
@@ -2519,7 +2515,17 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
kfree(domain);
vfio_iommu_aper_expand(iommu, _copy);
vfio_update_pgsize_bitmap(iommu);
+   /*
+* Removal of a group without dirty tracking may allow
+* the iommu scope to be promoted.
+*/
+   if (!group->pinned_page_dirty_scope) {
+   iommu->num_non_pinned_groups--;
+   if (iommu->dirty_page_tracking)
+   vfio_iommu_populate_bitmap_full(iommu);
+   }
}
+   kfree(group);
break;
}
 
@@ -2528,16 +2534,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
else
vfio_iommu_iova_free(_copy);
 
-detach_group_done:
-   /*
-* Removal of a group without dirty tracking may allow the iommu scope
-* to be promoted.
-*/
-   if (update_dirty_scope) {
-   iommu->num_non_pinned_groups--;
-   if (iommu->dirty_page_tracking)
-   vfio_iommu_populate_bitmap_full(iommu);
-   }
+out_unlock:
mutex_unlock(>lock);
 }
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/5] vfio/iommu_type1: Simplify group attachment

2022-06-06 Thread Nicolin Chen via iommu
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Remove the duplicated domain->ops comparison that is taken care of in the
IOMMU core.

Co-developed-by: Jason Gunthorpe 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 306 +---
 1 file changed, 161 insertions(+), 145 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b45b1cc118ef..c6f937e1d71f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -86,6 +86,7 @@ struct vfio_domain {
struct list_headgroup_list;
boolfgsp : 1;   /* Fine-grained super pages */
boolenforce_cache_coherency : 1;
+   boolmsi_cookie : 1;
 };
 
 struct vfio_dma {
@@ -2153,12 +2154,161 @@ static void vfio_iommu_iova_insert_copy(struct 
vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+  struct vfio_iommu_group *group)
+{
+   struct iommu_domain *new_domain;
+   struct vfio_domain *domain;
+   int ret = 0;
+
+   /* Try to match an existing compatible domain */
+   list_for_each_entry (domain, >domain_list, next) {
+   ret = iommu_attach_group(domain->domain, group->iommu_group);
+   if (ret == -EMEDIUMTYPE)
+   continue;
+   if (ret)
+   return ERR_PTR(ret);
+   list_add(>next, >group_list);
+   return domain;
+   }
+
+   new_domain = iommu_domain_alloc(bus);
+   if (!new_domain)
+   return ERR_PTR(-EIO);
+
+   if (iommu->nesting) {
+   ret = iommu_enable_nesting(new_domain);
+   if (ret)
+   goto out_free_iommu_domain;
+   }
+
+   ret = iommu_attach_group(new_domain, group->iommu_group);
+   if (ret)
+   goto out_free_iommu_domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain) {
+   ret = -ENOMEM;
+   goto out_detach;
+   }
+
+   domain->domain = new_domain;
+   vfio_test_domain_fgsp(domain);
+
+   /*
+* If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+* no-snoop set) then VFIO always turns this feature on because on Intel
+* platforms it optimizes KVM to disable wbinvd emulation.
+*/
+   if (new_domain->ops->enforce_cache_coherency)
+   domain->enforce_cache_coherency =
+   new_domain->ops->enforce_cache_coherency(new_domain);
+
+   /* replay mappings on new domains */
+   ret = vfio_iommu_replay(iommu, domain);
+   if (ret)
+   goto out_free_domain;
+
+   /*
+* An iommu backed group can dirty memory directly and therefore
+* demotes the iommu scope until it declares itself dirty tracking
+* capable via the page pinning interface.
+*/
+   iommu->num_non_pinned_groups++;
+
+   INIT_LIST_HEAD(>group_list);
+   list_add(>next, >group_list);
+   list_add(>next, >domain_list);
+   vfio_update_pgsize_bitmap(iommu);
+   return domain;
+
+out_free_domain:
+   kfree(domain);
+out_detach:
+   iommu_detach_group(domain->domain, group->iommu_group);
+out_free_iommu_domain:
+   iommu_domain_free(new_domain);
+   return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+   struct rb_node *node;
+
+   while ((node = rb_first(>dma_list)))
+   vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+   struct rb_node *n, *p;
+
+   n = rb_first(>dma_list);
+   for (; n; n = rb_next(n)) {
+   struct vfio_dma *dma;
+   long locked = 0, unlocked = 0;
+
+   dma = rb_entry(n, struct vfio_dma, node);
+   unlocked += vfio_unmap_unpin(iommu, dma, false);
+   p = rb_first(>pfn_list);
+   for (; p; p = rb_next(p)) {
+   struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+

[PATCH 3/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-06 Thread Nicolin Chen via iommu
From: Jason Gunthorpe 

The KVM mechanism for controlling wbinvd is only triggered during
kvm_vfio_group_add(), meaning it is a one-shot test done once the devices
are setup.

So, there is no value in trying to push a device that could do enforced
cache coherency to a dedicated domain vs re-using an existing domain since
KVM won't be able to take advantage of it. This just wastes domain memory.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

If someday we want to try and optimize this further the better approach is
to update the Intel driver so that enforce_cache_coherency() can work on a
domain that already has IOPTEs and then call the enforce_cache_coherency()
after detaching a device from a domain to upgrade the whole domain to
enforced cache coherency mode.

Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/vfio/vfio_iommu_type1.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..f4e3b423a453 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2285,9 +2285,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 * testing if they're on the same bus_type.
 */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops &&
-   d->enforce_cache_coherency ==
-   domain->enforce_cache_coherency) {
+   if (d->domain->ops == domain->domain->ops) {
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/5] iommu: Ensure device has the same iommu_ops as the domain

2022-06-06 Thread Nicolin Chen via iommu
The core code should not call an iommu driver op with a struct device
parameter unless it knows that the dev_iommu_priv_get() for that struct
device was setup by the same driver. Otherwise in a mixed driver system
the iommu_priv could be casted to the wrong type.

Store the iommu_ops pointer in the iommu_domain and use it as a check to
validate that the struct device is correct before invoking any domain op
that accepts a struct device.

This allows removing the check of the domain op equality in VFIO.

Co-developed-by: Jason Gunthorpe 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/amd/iommu.c   | 1 +
 drivers/iommu/apple-dart.c  | 1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
 drivers/iommu/arm/arm-smmu/arm-smmu.c   | 1 +
 drivers/iommu/arm/arm-smmu/qcom_iommu.c | 1 +
 drivers/iommu/exynos-iommu.c| 1 +
 drivers/iommu/fsl_pamu_domain.c | 1 +
 drivers/iommu/intel/iommu.c | 1 +
 drivers/iommu/iommu.c   | 4 
 drivers/iommu/ipmmu-vmsa.c  | 1 +
 drivers/iommu/msm_iommu.c   | 1 +
 drivers/iommu/mtk_iommu.c   | 1 +
 drivers/iommu/mtk_iommu_v1.c| 1 +
 drivers/iommu/omap-iommu.c  | 1 +
 drivers/iommu/rockchip-iommu.c  | 1 +
 drivers/iommu/s390-iommu.c  | 1 +
 drivers/iommu/sprd-iommu.c  | 1 +
 drivers/iommu/sun50i-iommu.c| 1 +
 drivers/iommu/tegra-gart.c  | 1 +
 drivers/iommu/tegra-smmu.c  | 1 +
 drivers/iommu/virtio-iommu.c| 1 +
 include/linux/iommu.h   | 2 ++
 22 files changed, 26 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index ad499658a6b6..679f7a265013 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2285,6 +2285,7 @@ const struct iommu_ops amd_iommu_ops = {
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
.def_domain_type = amd_iommu_def_domain_type,
.default_domain_ops = &(const struct iommu_domain_ops) {
+   .iommu_ops  = _iommu_ops,
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
.map= amd_iommu_map,
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index e58dc310afd7..3d36d9a12aa7 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -775,6 +775,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.pgsize_bitmap = -1UL, /* Restricted during dart probe */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
+   .iommu_ops  = _dart_iommu_ops,
.attach_dev = apple_dart_attach_dev,
.detach_dev = apple_dart_detach_dev,
.map_pages  = apple_dart_map_pages,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6c393cd84925..471ceb60427c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2859,6 +2859,7 @@ static struct iommu_ops arm_smmu_ops = {
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
.owner  = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
+   .iommu_ops  = _smmu_ops,
.attach_dev = arm_smmu_attach_dev,
.map_pages  = arm_smmu_map_pages,
.unmap_pages= arm_smmu_unmap_pages,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 2ed3594f384e..52c2589a4deb 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1597,6 +1597,7 @@ static struct iommu_ops arm_smmu_ops = {
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
.owner  = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
+   .iommu_ops  = _smmu_ops,
.attach_dev = arm_smmu_attach_dev,
.map_pages  = arm_smmu_map_pages,
.unmap_pages= arm_smmu_unmap_pages,
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c 
b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index a8b63b855ffb..8806a621f81e 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -596,6 +596,7 @@ static const struct iommu_ops qcom_iommu_ops = {
.of_xlate   = qcom_iommu_of_xlate,
.pgsize_bitmap  = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
.default_domain_ops = &(const struct iommu_domain_ops) {
+   .iommu_o

[PATCH 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-06 Thread Nicolin Chen via iommu
Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno from the IOMMU driver during attach that the
reason attached failed is because of domain incompatability. EMEDIUMTYPE
is chosen because it is never used within the iommu subsystem today and
evokes a sense that the 'medium' aka the domain is incompatible.

VFIO can use this to know attach is a soft failure and it should continue
searching. Otherwise the attach will be a hard failure and VFIO will
return the code to userspace.

Update all drivers to return EMEDIUMTYPE in their failure paths that are
related to domain incompatability.

Add kdocs describing this behavior.

Suggested-by: Jason Gunthorpe 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/amd/iommu.c   |  2 +-
 drivers/iommu/apple-dart.c  |  4 ++--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  6 +++---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |  2 +-
 drivers/iommu/intel/iommu.c |  4 ++--
 drivers/iommu/iommu.c   | 22 +
 drivers/iommu/ipmmu-vmsa.c  |  2 +-
 drivers/iommu/omap-iommu.c  |  2 +-
 drivers/iommu/virtio-iommu.c|  2 +-
 9 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 840831d5d2ad..ad499658a6b6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1662,7 +1662,7 @@ static int attach_device(struct device *dev,
if (domain->flags & PD_IOMMUV2_MASK) {
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
 
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af0242a90d9..e58dc310afd7 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -495,10 +495,10 @@ static int apple_dart_attach_dev(struct iommu_domain 
*domain,
 
if (cfg->stream_maps[0].dart->force_bypass &&
domain->type != IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
if (!cfg->stream_maps[0].dart->supports_bypass &&
domain->type == IOMMU_DOMAIN_IDENTITY)
-   return -EINVAL;
+   return -EMEDIUMTYPE;
 
ret = apple_dart_finalize_domain(domain, cfg);
if (ret)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88817a3376ef..6c393cd84925 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2424,20 +2424,20 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
"cannot attach to SMMU %s (upstream of %s)\n",
dev_name(smmu_domain->smmu->dev),
dev_name(smmu->dev));
-   ret = -ENXIO;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
dev_err(dev,
"cannot attach to incompatible domain (%u SSID bits != 
%u)\n",
smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   smmu_domain->stall_enabled != master->stall_enabled) {
dev_err(dev, "cannot attach to stall-%s domain\n",
smmu_domain->stall_enabled ? "enabled" : "disabled");
-   ret = -EINVAL;
+   ret = -EMEDIUMTYPE;
goto out_unlock;
}
 
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c 
b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 4c077c38fbd6..a8b63b855ffb 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -386,7 +386,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain 
*domain, struct device *dev
"attached to domain on IOMMU %s\n",
dev_name(qcom_domain->iommu->dev),
dev_name(qcom_iommu->dev));
-   return -EINVAL;
+   return -EMEDIUMTYPE;
}
 
return 0;
diff --git a/

[PATCH 0/5] Simplify vfio_iommu_type1 attach/detach routine

2022-06-06 Thread Nicolin Chen via iommu
This is a preparatory series for IOMMUFD v2 patches. It enforces error
code -EMEDIUMTYPE in iommu_attach_device() and iommu_attach_group() when
an IOMMU domain and a device/group are incompatible. It also moves the
domain->ops check into __iommu_attach_device(). These allow VFIO iommu
code to simplify its group attachment routine, by avoiding the extra
IOMMU domain allocations and attach/detach sequences of the old code.

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that since KVM
won't be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.

This is on github: https://github.com/nicolinc/iommufd/commits/vfio_iommu_attach

Jason Gunthorpe (1):
  vfio/iommu_type1: Prefer to reuse domains vs match enforced cache
coherency

Nicolin Chen (4):
  iommu: Return -EMEDIUMTYPE for incompatible domain and device/group
  iommu: Ensure device has the same iommu_ops as the domain
  vfio/iommu_type1: Clean up update_dirty_scope in detach_group()
  vfio/iommu_type1: Simplify group attachment

 drivers/iommu/amd/iommu.c   |   3 +-
 drivers/iommu/apple-dart.c  |   5 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   7 +-
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   1 +
 drivers/iommu/arm/arm-smmu/qcom_iommu.c |   3 +-
 drivers/iommu/exynos-iommu.c|   1 +
 drivers/iommu/fsl_pamu_domain.c |   1 +
 drivers/iommu/intel/iommu.c |   5 +-
 drivers/iommu/iommu.c   |  26 ++
 drivers/iommu/ipmmu-vmsa.c  |   3 +-
 drivers/iommu/msm_iommu.c   |   1 +
 drivers/iommu/mtk_iommu.c   |   1 +
 drivers/iommu/mtk_iommu_v1.c|   1 +
 drivers/iommu/omap-iommu.c  |   3 +-
 drivers/iommu/rockchip-iommu.c  |   1 +
 drivers/iommu/s390-iommu.c  |   1 +
 drivers/iommu/sprd-iommu.c  |   1 +
 drivers/iommu/sun50i-iommu.c|   1 +
 drivers/iommu/tegra-gart.c  |   1 +
 drivers/iommu/tegra-smmu.c  |   1 +
 drivers/iommu/virtio-iommu.c|   3 +-
 drivers/vfio/vfio_iommu_type1.c | 315 ++--
 include/linux/iommu.h   |   2 +
 23 files changed, 223 insertions(+), 164 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/vt-d: Try info->iommu in device_to_iommu()

2022-05-13 Thread Nicolin Chen via iommu
On Fri, May 13, 2022 at 08:50:32AM -0300, Jason Gunthorpe wrote:

> > Perhaps, we can make device_to_iommu() only for probe_device() where the
> > per-device info data is not initialized yet. After probe_device(), iommu
> > and sid are retrieved through other helpers by looking up the device
> > info directly?
> 
> This design makes the most sense to me... Nicolin you said there was a
> case where attach was happening before probe though??

I was testing on top of our IOMMUFD dev branch actually, which
is still on 5.17 where priv/info seems to be set at the end of
first ->attach_dev() call. In 5.18, Baolu has already cleaned
away some code. So now, just eyeballing here, it should be set
at driver's ->probe_device() call, so it'd be safe to get info
in attach_dev (and in the new op that we are adding).

I am also rebasing our dev branch to more recent version btw.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/vt-d: Try info->iommu in device_to_iommu()

2022-05-12 Thread Nicolin Chen via iommu
On Fri, May 13, 2022 at 11:32:11AM +0800, Baolu Lu wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022/5/13 08:32, Nicolin Chen wrote:
> > Local boot test and VFIO sanity test show that info->iommu can be
> > used in device_to_iommu() as a fast path. So this patch adds it.
> > 
> > Signed-off-by: Nicolin Chen 
> > ---
> >   drivers/iommu/intel/iommu.c | 5 +
> >   1 file changed, 5 insertions(+)
> > 
> > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > index 2990f80c5e08..412fca5ab9cd 100644
> > --- a/drivers/iommu/intel/iommu.c
> > +++ b/drivers/iommu/intel/iommu.c
> > @@ -777,6 +777,7 @@ static bool iommu_is_dummy(struct intel_iommu *iommu, 
> > struct device *dev)
> >   struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 
> > *devfn)
> >   {
> >   struct dmar_drhd_unit *drhd = NULL;
> > + struct device_domain_info *info;
> >   struct pci_dev *pdev = NULL;
> >   struct intel_iommu *iommu;
> >   struct device *tmp;
> > @@ -786,6 +787,10 @@ struct intel_iommu *device_to_iommu(struct device 
> > *dev, u8 *bus, u8 *devfn)
> >   if (!dev)
> >   return NULL;
> > 
> > + info = dev_iommu_priv_get(dev);
> > + if (info)
> > + return info->iommu;
> 
> device_to_iommu() also returns device source id (@bus, @devfn).
> 
> Perhaps, we can make device_to_iommu() only for probe_device() where the
> per-device info data is not initialized yet. After probe_device(), iommu
> and sid are retrieved through other helpers by looking up the device
> info directly?

That should work I think. I was just not sure when the priv
could be unset. But it seems that you have cleaned up those
places other than probe/release_device() in recent version :)

Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/vt-d: Try info->iommu in device_to_iommu()

2022-05-12 Thread Nicolin Chen via iommu
Local boot test and VFIO sanity test show that info->iommu can be
used in device_to_iommu() as a fast path. So this patch adds it.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/intel/iommu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2990f80c5e08..412fca5ab9cd 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -777,6 +777,7 @@ static bool iommu_is_dummy(struct intel_iommu *iommu, 
struct device *dev)
 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
struct dmar_drhd_unit *drhd = NULL;
+   struct device_domain_info *info;
struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
@@ -786,6 +787,10 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 
*bus, u8 *devfn)
if (!dev)
return NULL;
 
+   info = dev_iommu_priv_get(dev);
+   if (info)
+   return info->iommu;
+
if (dev_is_pci(dev)) {
struct pci_dev *pf_pdev;
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] vfio: Remove VFIO_TYPE1_NESTING_IOMMU

2022-05-10 Thread Nicolin Chen via iommu
On Tue, May 10, 2022 at 01:55:24PM -0300, Jason Gunthorpe wrote:
> This control causes the ARM SMMU drivers to choose a stage 2
> implementation for the IO pagetable (vs the stage 1 usual default),
> however this choice has no visible impact to the VFIO user. Further qemu
> never implemented this and no other userspace user is known.
> 
> The original description in commit f5c9ecebaf2a ("vfio/iommu_type1: add
> new VFIO_TYPE1_NESTING_IOMMU IOMMU type") suggested this was to "provide
> SMMU translation services to the guest operating system" however the rest
> of the API to set the guest table pointer for the stage 1 was never
> completed, or at least never upstreamed, rendering this part useless dead
> code.
> 
> Since the current patches to enable nested translation, aka userspace page
> tables, rely on iommufd and will not use the enable_nesting()
> iommu_domain_op, remove this infrastructure. However, don't cut too deep
> into the SMMU drivers for now expecting the iommufd work to pick it up -
> we still need to create S2 IO page tables.
> 
> Remove VFIO_TYPE1_NESTING_IOMMU and everything under it including the
> enable_nesting iommu_domain_op.
> 
> Just in-case there is some userspace using this continue to treat
> requesting it as a NOP, but do not advertise support any more.
> 
> Signed-off-by: Jason Gunthorpe 

Sanity-tested with qemu-system-aarch64 using "iommu=nested-smmuv3"
(unmerged feature) and "iommu=none" strings on top of vfio next.

Tested-by: Nicolin Chen 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-04 Thread Nicolin Chen via iommu
On Tue, May 03, 2022 at 09:11:02PM -0300, Jason Gunthorpe wrote:

> This is based on Robins draft here:
> 
> https://lore.kernel.org/linux-iommu/18831161-473f-e04f-4a81-1c7062ad1...@arm.com/
> 
> With some rework. I re-organized the call chains instead of introducing
> iommu_group_user_attached(), fixed a recursive locking for
> iommu_group_get_purgatory(), and made a proper commit message.
> 
> Still only compile tested, so RFCish.
> 
> Nicolin/Lu? What do you think, can you check it?

I am able to repro the issue on ARM64 and give this a quick try.
But the patch seems to need to include the following change too.

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 94d99768023c..9bb108d01baa 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2040,7 +2040,8 @@ static int __iommu_attach_group(struct iommu_domain 
*domain,
 {
int ret;
 
-   if (group->domain && group->domain != group->default_domain)
+   if (group->domain && group->domain != group->default_domain &&
+   group->domain != group->blocking_domain)
return -EBUSY;
 
ret = __iommu_group_for_each_dev(group, domain,

> @@ -2072,38 +2072,66 @@ static int iommu_group_do_detach_device(struct device 
> *dev, void *data)
>   return 0;
>  }
>  
> -static void __iommu_detach_group(struct iommu_domain *domain,
> -  struct iommu_group *group)
> +static int __iommu_group_attach_domain(struct iommu_group *group,
> +struct iommu_domain *new_domain)
>  {
>   int ret;
>  
> + if (group->domain == new_domain)
> + return 0;
> +
>   /*
> -  * If the group has been claimed already, do not re-attach the default
> -  * domain.
> +  * A NULL domain means to call the detach_dev() op. New drivers should
> +  * use a IOMMU_DOMAIN_IDENTITY domain instead of a NULL default_domain

an IOMMU_DOMAIN_IDENTITY?

Just a nit here. I will take a closer look at the change tomorrow.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: Fix size calculation in arm_smmu_mm_invalidate_range()

2022-04-19 Thread Nicolin Chen via iommu
On Tue, Apr 19, 2022 at 08:10:34PM -0300, Jason Gunthorpe wrote:

> > - size_t size = end - start + 1;
> > + size_t size;
> > +
> > + /*
> > +  * The mm_types defines vm_end as the first byte after the end 
> > address,
> > +  * different from IOMMU subsystem using the last address of an address
> > +  * range. So do a simple translation here by calculating size 
> > correctly.
> > +  */
> > + size = end - start;
> 
> I would skip the comment though

It's a bit of highlight here to help us remember in the future,
per Robin's comments at my previous patch.

Thanks!
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/arm-smmu-v3: Fix size calculation in arm_smmu_mm_invalidate_range()

2022-04-19 Thread Nicolin Chen via iommu
The arm_smmu_mm_invalidate_range function is designed to be called
by mm core for Shared Virtual Addressing purpose between IOMMU and
CPU MMU. However, the ways of two subsystems defining their "end"
addresses are slightly different. IOMMU defines its "end" address
using the last address of an address range, while mm core defines
that using the following address of an address range:

include/linux/mm_types.h:
unsigned long vm_end;
/* The first byte after our end address ...

This mismatch resulted in an incorrect calculation for size so it
failed to be page-size aligned. Further, it caused a dead loop at
"while (iova < end)" check in __arm_smmu_tlb_inv_range function.

This patch fixes the issue by doing the calculation correctly.

Fixes: 2f7e8c553e98d ("iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops")
Cc: sta...@vger.kernel.org
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 22ddd05bbdcd..c623dae1e115 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -183,7 +183,14 @@ static void arm_smmu_mm_invalidate_range(struct 
mmu_notifier *mn,
 {
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
-   size_t size = end - start + 1;
+   size_t size;
+
+   /*
+* The mm_types defines vm_end as the first byte after the end address,
+* different from IOMMU subsystem using the last address of an address
+* range. So do a simple translation here by calculating size correctly.
+*/
+   size = end - start;
 
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: Align size in __arm_smmu_tlb_inv_range

2022-04-19 Thread Nicolin Chen via iommu
On Tue, Apr 19, 2022 at 05:02:33PM -0300, Jason Gunthorpe wrote:

> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > index d816759a6bcf..e280568bb513 100644
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > @@ -183,7 +183,7 @@ static void arm_smmu_mm_invalidate_range(struct 
> > mmu_notifier *mn,
> >  {
> > struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
> > struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
> > -   size_t size = end - start + 1;
> > +   size_t size = end - start;
> 
> +1 to this bug fix. You should send a formal patch for stable with a fixes/etc
> 
> mmu notifiers uses 'end' not 'last' in alignment with how VMA's work:
> 
> include/linux/mm_types.h:   unsigned long vm_end;   /* The first 
> byte after our end address

Thanks for the review!

Yea, I will send a new patch.

Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: Align size in __arm_smmu_tlb_inv_range

2022-04-15 Thread Nicolin Chen via iommu
On Thu, Apr 14, 2022 at 11:32:38AM +0100, Robin Murphy wrote:
> > By looking at the call trace within arm_smmu_* functions:
> >__arm_smmu_tlb_inv_range
> >arm_smmu_tlb_inv_range_asid
> >arm_smmu_mm_invalidate_range
> >{from mm_notifier_* functions}
> > 
> > There's no address alignment check. Although I do think we
> > should fix the source who passes down the non-page-aligned
> > parameter, the SMMU driver shouldn't silently dead loop if
> > a set of unaligned inputs are given, IMHO.
> 
> Oh, sure, I'm not saying we definitely don't need to fix anything, I'd
> just like to get a better understanding of *what* we're fixing. I'd have
> (naively) expected the mm layer to give us page-aligned quantities even
> in the SVA notifier case, so if we've got a clear off-by-one somewhere
> in that path we should fix that before just blindly over-invalidating to
> paper over it; if we still also want to be robust at the SMMU driver end
> just in case, something like "if (WARN_ON(num_pages == 0)) num_pages =
> 1;" might be more appropriate. However if it turns out that we *can*
> actually end up with unsanitised input from some userspace unmap
> interface getting this far, then a silent fixup is the best option, but
> if so I'd still like to confirm that we're rounding in the same
> direction as whoever touched the pagetables (since it can't have been us).

I got some details:

[ 1008.868735] mmap: ---__do_munmap: range [a4fd, a4fe] len 
1
[ 1008.869183] ---arm_smmu_mm_invalidate_range: range [a4fd, 
a4fe] len 10001
[ 1009.056127] [ cut here ]
[ 1009.345791] WARNING: CPU: 0 PID: 131 at 
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c:189 
arm_smmu_mm_invalidate_range+0x4c/0xa8
[ 1009.605439] Modules linked in: nvidia(O)
[ 1009.692799] CPU: 0 PID: 131 Comm: dmaTest Tainted: GW  O  
5.15.0-tegra #30
[ 1009.865535] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[ 1010.015871] pstate: 4045 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 1010.168191] pc : arm_smmu_mm_invalidate_range+0x4c/0xa8
[ 1010.283136] lr : arm_smmu_mm_invalidate_range+0x48/0xa8
[ 1010.397119] sp : 80001436fa60
[ 1010.469568] x29: 80001436fa60 x28: 1840be80 x27: 07b3fff0
[ 1010.629631] x26: 00e8589f0f43 x25: 1aa20288 x24: 
[ 1010.786432] x23: 138c1000 x22: 1783aa00 x21: 1c021380
[ 1010.98] x20: a4fd x19: 00010001 x18: 
[ 1011.101568] x17: 8e4b x16: 80001001 x15: 81a13a744e89
[ 1011.259839] x14: 00ce x13: 00ce x12: 
[ 1011.415616] x11: 0010 x10: 09c0 x9 : 80001436f7f0
[ 1011.575552] x8 : 13563420 x7 : 1feb9180 x6 : 35aa
[ 1011.731775] x5 :  x4 : 1feb29e0 x3 : 1feb5a78
[ 1011.887615] x2 : 66f9034381513000 x1 :  x0 : 0051
[ 1012.042944] Call trace:
[ 1012.097919]  arm_smmu_mm_invalidate_range+0x4c/0xa8
[ 1012.204480]  __mmu_notifier_invalidate_range+0x68/0xb0
[ 1012.318208]  unmap_page_range+0x730/0x740
[ 1012.405951]  unmap_single_vma+0x4c/0xb0
[ 1012.521920]  unmap_vmas+0x70/0xf0
[ 1012.633727]  unmap_region+0xb0/0x110
[ 1012.753856]  __do_munmap+0x36c/0x460
[ 1012.855168]  __vm_munmap+0x70/0xd0
[ 1012.929791]  __arm64_sys_munmap+0x34/0x50
[ 1013.018944]  invoke_syscall.constprop.0+0x4c/0xe0
[ 1013.122047]  do_el0_svc+0x50/0x150
[ 1013.196415]  el0_svc+0x28/0xc0
[ 1013.262848]  el0t_64_sync_handler+0xb0/0xc0
[ 1013.355584]  el0t_64_sync+0x1a0/0x1a4
[ 1013.435903] ---[ end trace c95eb7dc909f29ba ]---

We can see from call trace and logs that the invalidation range
comes from __do_munmap() with end address = 0xa4fe.

The problem seems to be the difference between how mm and iommu
cores express their end addresses: mm core calculates end using
start + size, while iommu core subtracts 1 from that. So that
end address 0xa4fe should be 0xa4fd in iommu's
way.

Perhaps we should simply do something like the following?

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index d816759a6bcf..e280568bb513 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -183,7 +183,7 @@ static void arm_smmu_mm_invalidate_range(struct 
mmu_notifier *mn,
 {
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
-   size_t size = end - start + 1;
+   size_t size = end - start;

if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org

Re: [PATCH] iommu/arm-smmu-v3: Align size in __arm_smmu_tlb_inv_range

2022-04-14 Thread Nicolin Chen via iommu
On Thu, Apr 14, 2022 at 11:32:38AM +0100, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-04-13 21:19, Nicolin Chen wrote:
> > Hi Robin,
> > 
> > On Wed, Apr 13, 2022 at 02:40:31PM +0100, Robin Murphy wrote:
> > > On 2022-04-13 05:17, Nicolin Chen wrote:
> > > > To calculate num_pages, the size should be aligned with
> > > > "page size", determined by the tg value. Otherwise, its
> > > > following "while (iova < end)" might become an infinite
> > > > loop if unaligned size is slightly greater than 1 << tg.
> > > 
> > > Hmm, how does a non-page-aligned invalidation request get generated in
> > > the first place?
> > 
> > I don't have the testing environment because it was a bug,
> > reported by a client who uses SVA feature on top of SMMU.
> > 
> > But judging from the log, the non-page-aligned inv request
> > was coming from an likely incorrect end address, e.g.
> >   { start = 0xff1, end = 0xff2 }
> > So the size turned out to be 0x10001, unaligned.
> > 
> > I don't have a full call trace on hand right now to see if
> > upper callers are doing something wrong when calculate the
> > end address, though I've asked the owner to check.
> > 
> > By looking at the call trace within arm_smmu_* functions:
> >__arm_smmu_tlb_inv_range
> >arm_smmu_tlb_inv_range_asid
> >arm_smmu_mm_invalidate_range
> >{from mm_notifier_* functions}
> > 
> > There's no address alignment check. Although I do think we
> > should fix the source who passes down the non-page-aligned
> > parameter, the SMMU driver shouldn't silently dead loop if
> > a set of unaligned inputs are given, IMHO.
> 
> Oh, sure, I'm not saying we definitely don't need to fix anything, I'd
> just like to get a better understanding of *what* we're fixing. I'd have
> (naively) expected the mm layer to give us page-aligned quantities even
> in the SVA notifier case, so if we've got a clear off-by-one somewhere
> in that path we should fix that before just blindly over-invalidating to
> paper over it;

I see. Yea, definitely should fix the source too. I've asked
the owner to track it down. I sent the change, thinking that
we could do it in parallel.

> if we still also want to be robust at the SMMU driver end
> just in case, something like "if (WARN_ON(num_pages == 0)) num_pages =
> 1;" might be more appropriate. However if it turns out that we *can*
> actually end up with unsanitised input from some userspace unmap
> interface getting this far, then a silent fixup is the best option, but
> if so I'd still like to confirm that we're rounding in the same
> direction as whoever touched the pagetables (since it can't have been us).

I see. I'll give an update once I have the debugging result.

Thanks!
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: Align size in __arm_smmu_tlb_inv_range

2022-04-13 Thread Nicolin Chen via iommu
Hi Robin,

On Wed, Apr 13, 2022 at 02:40:31PM +0100, Robin Murphy wrote:
> On 2022-04-13 05:17, Nicolin Chen wrote:
> > To calculate num_pages, the size should be aligned with
> > "page size", determined by the tg value. Otherwise, its
> > following "while (iova < end)" might become an infinite
> > loop if unaligned size is slightly greater than 1 << tg.
> 
> Hmm, how does a non-page-aligned invalidation request get generated in
> the first place?

I don't have the testing environment because it was a bug,
reported by a client who uses SVA feature on top of SMMU.

But judging from the log, the non-page-aligned inv request
was coming from an likely incorrect end address, e.g.
{ start = 0xff1, end = 0xff2 }
So the size turned out to be 0x10001, unaligned.

I don't have a full call trace on hand right now to see if
upper callers are doing something wrong when calculate the
end address, though I've asked the owner to check.

By looking at the call trace within arm_smmu_* functions:
  __arm_smmu_tlb_inv_range
  arm_smmu_tlb_inv_range_asid
  arm_smmu_mm_invalidate_range
  {from mm_notifier_* functions}

There's no address alignment check. Although I do think we
should fix the source who passes down the non-page-aligned
parameter, the SMMU driver shouldn't silently dead loop if
a set of unaligned inputs are given, IMHO.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/arm-smmu-v3: Align size in __arm_smmu_tlb_inv_range

2022-04-12 Thread Nicolin Chen via iommu
To calculate num_pages, the size should be aligned with
"page size", determined by the tg value. Otherwise, its
following "while (iova < end)" might become an infinite
loop if unaligned size is slightly greater than 1 << tg.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 627a3ed5ee8f..8249dad5ae44 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1879,7 +1879,7 @@ static void __arm_smmu_tlb_inv_range(struct 
arm_smmu_cmdq_ent *cmd,
/* Determine what level the granule is at */
cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
 
-   num_pages = size >> tg;
+   num_pages = ALIGN(size, 1 << tg) >> tg;
}
 
cmds.num = 0;
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 5/5] iommu/nvidia-grace-cmdqv: Limit CMDs for guest owned VINTF

2021-12-27 Thread Nicolin Chen via iommu
On Fri, Dec 24, 2021 at 12:13:57PM +, Robin Murphy wrote:

> > > > > > @@ -176,6 +177,24 @@ struct arm_smmu_cmdq 
> > > > > > *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
> > > > > > if (!FIELD_GET(VINTF_STATUS, vintf0->status))
> > > > > > return >cmdq;
> > > > > > 
> > > > > > + /* Check for supported CMDs if VINTF is owned by guest (not 
> > > > > > hypervisor) */
> > > > > > + if (!FIELD_GET(VINTF_HYP_OWN, vintf0->cfg)) {
> > > > > > + u64 opcode = (n) ? FIELD_GET(CMDQ_0_OP, cmds[0]) : 
> > > > > > CMDQ_OP_CMD_SYNC;
> > > > > 
> > > > > I'm not sure there was ever a conscious design decision that batches
> > > > > only ever contain one type of command - if something needs to start
> > > > 
> > > > Hmm, I think that's a good catch -- as it could be a potential
> > > > bug here. Though the SMMUv3 driver currently seems to use loop
> > > > by adding one type of cmds to any batch and submitting it right
> > > > away so checking opcode of cmds[0] alone seems to be sufficient
> > > > at this moment, yet it might not be so in the future. We'd need
> > > > to apply certain constrains on the type of cmds in the batch in
> > > > SMMUv3 driver upon smmu->nvidia_grace_cmdqv, or fallback to the
> > > > SMMUv3's CMDQ pathway here if one of cmds is not supported.
> > > > 
> > > > > depending on that behaviour then that dependency probably wants to be
> > > > > clearly documented. Also, a sync on its own gets trapped to the main
> > > > > cmdq but a sync on the end of a batch of TLBIs or ATCIs goes to the
> > > > > VCMDQ, huh?
> > > > 
> > > > Yea...looks like an implication again where cmds must have SYNC
> > > > at the end of the batch. I will see if any simple change can be
> > > > done to fix these two. If you have suggestions for them, I would
> > > > love to hear too.
> > > 
> > > Can you explain the current logic here? It's not entirely clear to me
> > > whether the VCMDQ is actually meant to support CMD_SYNC or not.
> > 
> > Yes. It's designed to take CMD_SYNC in same queue too. Though it
> > also has features, such as HW-inserted-SYNC when scheduler moves
> > away from the current queue or when the number of cmds in vcmdq
> > meets a MAX-BATCH-SIZE setting (in config register), yet it'd be
> > safer for software to ensure the CMD_SYNC is inserted to the end
> > of the batch.
> 
> OK, so the bug here is just that we're missing CMDQ_OP_CMD_SYNC from the
> switch statement? That's reassuring at least. Having to trap to the host
> to issue a sync would be horrible, and largely defeat the point of the
> whole exercise.

Hmm..I'm not sure why we need CMD_SYNC in the switch statement.
I thought that you pointed out a potential corner case where a
batch could be submitted separately, e.g. Batch A {TLBI_NH_VAx2}
and then Batch B {CMD_SYNC}. Right now the SMMUv3 driver submits
all TLBI commands with sync=true, so we don't run into a problem
so far.

> It's not generally much use to software to know that the hardware may or
> may not have automatically inserted syncs at arbitrary points in the
> timeline; certainly for our flow in Linux, which I don't think is
> atypical, we need to know for sure that specific invalidation commands
> have completed before we can safely reuse resources associated with the
> invalidated translations, and the only way to guarantee that is to
> explicitly observe the consumption of a CMD_SYNC from a later queue index.

Hmm, if I capture it correctly, for the potential issue that I
listed above, we could simply ensure each TLBI batch to contain
TLBI commands only and to have CMD_SYNC at the end.

> > > > > > +
> > > > > > + /* List all supported CMDs for vintf->cmdq pathway */
> > > > > > + switch (opcode) {
> > > > > > + case CMDQ_OP_TLBI_NH_ASID:
> > > > > > + case CMDQ_OP_TLBI_NH_VA:
> > > > > > + case CMDQ_OP_TLBI_S12_VMALL:
> > > > > > + case CMDQ_OP_TLBI_S2_IPA:
> > > > > 
> > > > > Fun! Can the guest invalidate any VMID it feels like, or is there some
> > > > > additional magic on the host side that we're missing here?
> > > > 
> > > > Yes. VINTF has a register for SW to program VMID so that the HW
> > > > can replace VMIDs in the cmds in the VCMDQs of that VINTF with
> > > > the programmed VMID. That was the reason why we had numbers of
> > > > patches in v2 to route the VMID between guest and host.
> > > > 
> > > > > > + case CMDQ_OP_ATC_INV:
> > > > > > + break;
> > > > > Ditto for StreamID here.
> > > > 
> > > > Yes. StreamID works similarly by the HW: each VINTF provides us
> > > > 16 pairs of MATCH+REPLACE registers to program host and guest's
> > > > StreamIDs. Our previous mdev implementation in v2 can be a good
> > > > reference code:
> > > > https://lore.kernel.org/kvm/20210831101549.237151fa.alex.william...@redhat.com/T/#m903a1b44935d9e0376439a0c63e832eb464fbaee
> > > 
> > > Ah, sorry, I haven't had the 

Re: [PATCH v3 5/5] iommu/nvidia-grace-cmdqv: Limit CMDs for guest owned VINTF

2021-12-24 Thread Nicolin Chen via iommu
On Thu, Dec 23, 2021 at 11:14:17AM +, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2021-12-22 22:52, Nicolin Chen wrote:
> > On Wed, Dec 22, 2021 at 12:32:29PM +, Robin Murphy wrote:
> > > External email: Use caution opening links or attachments
> > > 
> > > 
> > > On 2021-11-19 07:19, Nicolin Chen via iommu wrote:
> > > > When VCMDQs are assigned to a VINTF that is owned by a guest, not
> > > > hypervisor (HYP_OWN bit is unset), only TLB invalidation commands
> > > > are supported. This requires get_cmd() function to scan the input
> > > > cmd before selecting cmdq between smmu->cmdq and vintf->vcmdq, so
> > > > unsupported commands can still go through emulated smmu->cmdq.
> > > > 
> > > > Also the guest shouldn't have HYP_OWN bit being set regardless of
> > > > guest kernel driver writing it or not, i.e. the user space driver
> > > > running in the host OS should wire this bit to zero when trapping
> > > > a write access to this VINTF_CONFIG register from a guest kernel.
> > > > So instead of using the existing regval, this patch reads out the
> > > > register value explicitly to cache in vintf->cfg.
> > > > 
> > > > Signed-off-by: Nicolin Chen 
> > > > ---
> > > >drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  6 ++--
> > > >drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  5 +--
> > > >.../arm/arm-smmu-v3/nvidia-grace-cmdqv.c  | 32 
> > > > +--
> > > >3 files changed, 36 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> > > > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > > index b1182dd825fd..73941ccc1a3e 100644
> > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > > @@ -337,10 +337,10 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, 
> > > > struct arm_smmu_cmdq_ent *ent)
> > > >return 0;
> > > >}
> > > > 
> > > > -static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device 
> > > > *smmu)
> > > > +static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device 
> > > > *smmu, u64 *cmds, int n)
> > > >{
> > > >if (smmu->nvidia_grace_cmdqv)
> > > > - return nvidia_grace_cmdqv_get_cmdq(smmu);
> > > > + return nvidia_grace_cmdqv_get_cmdq(smmu, cmds, n);
> > > > 
> > > >return >cmdq;
> > > >}
> > > > @@ -747,7 +747,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct 
> > > > arm_smmu_device *smmu,
> > > >u32 prod;
> > > >unsigned long flags;
> > > >bool owner;
> > > > - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
> > > > + struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu, cmds, n);
> > > >struct arm_smmu_ll_queue llq, head;
> > > >int ret = 0;
> > > > 
> > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
> > > > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > > index 24f93444aeeb..085c775c2eea 100644
> > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > > @@ -832,7 +832,8 @@ struct nvidia_grace_cmdqv *
> > > >nvidia_grace_cmdqv_acpi_probe(struct arm_smmu_device *smmu,
> > > >  struct acpi_iort_node *node);
> > > >int nvidia_grace_cmdqv_device_reset(struct arm_smmu_device *smmu);
> > > > -struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct 
> > > > arm_smmu_device *smmu);
> > > > +struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct 
> > > > arm_smmu_device *smmu,
> > > > +   u64 *cmds, int n);
> > > >#else /* CONFIG_NVIDIA_GRACE_CMDQV */
> > > >static inline struct nvidia_grace_cmdqv *
> > > >nvidia_grace_cmdqv_acpi_probe(struct arm_smmu_device *smmu,
> > > > @@ -847,7 +848,7 @@ static inline int 
> > > > nvidia_grace_cmdqv_device_reset(struct arm_smmu_device *smmu)
> > > >}
> > > > 
> > > >static inline struct arm_

Re: [PATCH v3 5/5] iommu/nvidia-grace-cmdqv: Limit CMDs for guest owned VINTF

2021-12-22 Thread Nicolin Chen via iommu
On Wed, Dec 22, 2021 at 12:32:29PM +, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2021-11-19 07:19, Nicolin Chen via iommu wrote:
> > When VCMDQs are assigned to a VINTF that is owned by a guest, not
> > hypervisor (HYP_OWN bit is unset), only TLB invalidation commands
> > are supported. This requires get_cmd() function to scan the input
> > cmd before selecting cmdq between smmu->cmdq and vintf->vcmdq, so
> > unsupported commands can still go through emulated smmu->cmdq.
> > 
> > Also the guest shouldn't have HYP_OWN bit being set regardless of
> > guest kernel driver writing it or not, i.e. the user space driver
> > running in the host OS should wire this bit to zero when trapping
> > a write access to this VINTF_CONFIG register from a guest kernel.
> > So instead of using the existing regval, this patch reads out the
> > register value explicitly to cache in vintf->cfg.
> > 
> > Signed-off-by: Nicolin Chen 
> > ---
> >   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  6 ++--
> >   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  5 +--
> >   .../arm/arm-smmu-v3/nvidia-grace-cmdqv.c  | 32 +--
> >   3 files changed, 36 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index b1182dd825fd..73941ccc1a3e 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -337,10 +337,10 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
> > arm_smmu_cmdq_ent *ent)
> >   return 0;
> >   }
> > 
> > -static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device 
> > *smmu)
> > +static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device 
> > *smmu, u64 *cmds, int n)
> >   {
> >   if (smmu->nvidia_grace_cmdqv)
> > - return nvidia_grace_cmdqv_get_cmdq(smmu);
> > + return nvidia_grace_cmdqv_get_cmdq(smmu, cmds, n);
> > 
> >   return >cmdq;
> >   }
> > @@ -747,7 +747,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct 
> > arm_smmu_device *smmu,
> >   u32 prod;
> >   unsigned long flags;
> >   bool owner;
> > - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
> > + struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu, cmds, n);
> >   struct arm_smmu_ll_queue llq, head;
> >   int ret = 0;
> > 
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > index 24f93444aeeb..085c775c2eea 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > @@ -832,7 +832,8 @@ struct nvidia_grace_cmdqv *
> >   nvidia_grace_cmdqv_acpi_probe(struct arm_smmu_device *smmu,
> > struct acpi_iort_node *node);
> >   int nvidia_grace_cmdqv_device_reset(struct arm_smmu_device *smmu);
> > -struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device 
> > *smmu);
> > +struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device 
> > *smmu,
> > +   u64 *cmds, int n);
> >   #else /* CONFIG_NVIDIA_GRACE_CMDQV */
> >   static inline struct nvidia_grace_cmdqv *
> >   nvidia_grace_cmdqv_acpi_probe(struct arm_smmu_device *smmu,
> > @@ -847,7 +848,7 @@ static inline int 
> > nvidia_grace_cmdqv_device_reset(struct arm_smmu_device *smmu)
> >   }
> > 
> >   static inline struct arm_smmu_cmdq *
> > -nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
> > +nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu, u64 *cmds, int n)
> >   {
> >   return NULL;
> >   }
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c 
> > b/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
> > index c0d7351f13e2..71f6bc684e64 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
> > @@ -166,7 +166,8 @@ static int nvidia_grace_cmdqv_init_one_vcmdq(struct 
> > nvidia_grace_cmdqv *cmdqv,
> >   return arm_smmu_cmdq_init(cmdqv->smmu, cmdq);
> >   }
> > 
> > -struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device 
> > *smmu)
> > +struct arm_smmu_cmdq *
> > +nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu, u64 *cmds, int n)
> >   {
> > 

Re: [PATCH v3 4/5] iommu/arm-smmu-v3: Add host support for NVIDIA Grace CMDQ-V

2021-12-21 Thread Nicolin Chen via iommu
On Tue, Dec 21, 2021 at 06:55:20PM +, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2021-12-20 19:27, Nicolin Chen wrote:
> > Hi Robin,
> > 
> > Thank you for the reply!
> > 
> > On Mon, Dec 20, 2021 at 06:42:26PM +, Robin Murphy wrote:
> > > On 2021-11-19 07:19, Nicolin Chen wrote:
> > > > From: Nate Watterson 
> > > > 
> > > > NVIDIA's Grace Soc has a CMDQ-Virtualization (CMDQV) hardware,
> > > > which extends the standard ARM SMMU v3 IP to support multiple
> > > > VCMDQs with virtualization capabilities. In-kernel of host OS,
> > > > they're used to reduce contention on a single queue. In terms
> > > > of command queue, they are very like the standard CMDQ/ECMDQs,
> > > > but only support CS_NONE in the CS field of CMD_SYNC command.
> > > > 
> > > > This patch adds a new nvidia-grace-cmdqv file and inserts its
> > > > structure pointer into the existing arm_smmu_device, and then
> > > > adds related function calls in the arm-smmu-v3 driver.
> > > > 
> > > > In the CMDQV driver itself, this patch only adds minimal part
> > > > for host kernel support. Upon probe(), VINTF0 is reserved for
> > > > in-kernel use. And some of the VCMDQs are assigned to VINTF0.
> > > > Then the driver will select one of VCMDQs in the VINTF0 based
> > > > on the CPU currently executing, to issue commands.
> > > 
> > > Is there a tangible difference to DMA API or VFIO performance?
> > 
> > Our testing environment is currently running on a single-core
> > CPU, so unfortunately we don't have a perf data at this point.
> 
> OK, as for the ECMDQ patches I think we'll need some investigation with
> real workloads to judge whether we can benefit from these things enough
> to justify the complexity, and whether the design is right.
> 
> My gut feeling is that if these multi-queue schemes really can live up
> to their promise of making contention negligible, then they should
> further stand to benefit from bypassing the complex lock-free command
> batching in favour of something more lightweight, which could change the
> direction of much of the refactoring.

Makes sense. We will share our perf data once we have certain
level of support on our test environment.

> > > [...]
> > > > +struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct 
> > > > arm_smmu_device *smmu)
> > > > +{
> > > > + struct nvidia_grace_cmdqv *cmdqv = smmu->nvidia_grace_cmdqv;
> > > > + struct nvidia_grace_cmdqv_vintf *vintf0 = >vintf0;
> > > > + u16 qidx;
> > > > +
> > > > + /* Check error status of vintf0 */
> > > > + if (!FIELD_GET(VINTF_STATUS, vintf0->status))
> > > > + return >cmdq;
> > > > +
> > > > + /*
> > > > +  * Select a vcmdq to use. Here we use a temporal solution to
> > > > +  * balance out traffic on cmdq issuing: each cmdq has its own
> > > > +  * lock, if all cpus issue cmdlist using the same cmdq, only
> > > > +  * one CPU at a time can enter the process, while the others
> > > > +  * will be spinning at the same lock.
> > > > +  */
> > > > + qidx = smp_processor_id() % cmdqv->num_vcmdqs_per_vintf;
> > > 
> > > How does ordering work between queues? Do they follow a global order
> > > such that a sync on any queue is guaranteed to complete all prior
> > > commands on all queues?
> > 
> > CMDQV internal scheduler would insert a SYNC when (for example)
> > switching from VCMDQ0 to VCMDQ1 while last command in VCMDQ0 is
> > not SYNC. HW has a configuration bit in the register to disable
> > this feature, which is by default enabled.
> 
> Interesting, thanks. So it sounds like this is something you can get
> away with for the moment, but may need to revisit once people chasing
> real-world performance start wanting to turn that bit off.

Yea, we have limitations on both testing setup and available
clients for an in-depth perf measurement at this moment. But
we surely will do as you mentioned. Anyway, this is just for
initial support.

> > > The challenge to make ECMDQ useful to Linux is how to make sure that all
> > > the commands expected to be within scope of a future CMND_SYNC plus that
> > > sync itself all get issued on the same queue, so I'd be mildly surprised
> > > if you didn't have the same problem.
> > 
> > PATCH-3 in this

Re: [PATCH v3 4/5] iommu/arm-smmu-v3: Add host support for NVIDIA Grace CMDQ-V

2021-12-20 Thread Nicolin Chen via iommu
Hi Robin,

Thank you for the reply!

On Mon, Dec 20, 2021 at 06:42:26PM +, Robin Murphy wrote:
> On 2021-11-19 07:19, Nicolin Chen wrote:
> > From: Nate Watterson 
> > 
> > NVIDIA's Grace Soc has a CMDQ-Virtualization (CMDQV) hardware,
> > which extends the standard ARM SMMU v3 IP to support multiple
> > VCMDQs with virtualization capabilities. In-kernel of host OS,
> > they're used to reduce contention on a single queue. In terms
> > of command queue, they are very like the standard CMDQ/ECMDQs,
> > but only support CS_NONE in the CS field of CMD_SYNC command.
> > 
> > This patch adds a new nvidia-grace-cmdqv file and inserts its
> > structure pointer into the existing arm_smmu_device, and then
> > adds related function calls in the arm-smmu-v3 driver.
> > 
> > In the CMDQV driver itself, this patch only adds minimal part
> > for host kernel support. Upon probe(), VINTF0 is reserved for
> > in-kernel use. And some of the VCMDQs are assigned to VINTF0.
> > Then the driver will select one of VCMDQs in the VINTF0 based
> > on the CPU currently executing, to issue commands.
> 
> Is there a tangible difference to DMA API or VFIO performance?

Our testing environment is currently running on a single-core
CPU, so unfortunately we don't have a perf data at this point.

> [...]
> > +struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device 
> > *smmu)
> > +{
> > + struct nvidia_grace_cmdqv *cmdqv = smmu->nvidia_grace_cmdqv;
> > + struct nvidia_grace_cmdqv_vintf *vintf0 = >vintf0;
> > + u16 qidx;
> > +
> > + /* Check error status of vintf0 */
> > + if (!FIELD_GET(VINTF_STATUS, vintf0->status))
> > + return >cmdq;
> > +
> > + /*
> > +  * Select a vcmdq to use. Here we use a temporal solution to
> > +  * balance out traffic on cmdq issuing: each cmdq has its own
> > +  * lock, if all cpus issue cmdlist using the same cmdq, only
> > +  * one CPU at a time can enter the process, while the others
> > +  * will be spinning at the same lock.
> > +  */
> > + qidx = smp_processor_id() % cmdqv->num_vcmdqs_per_vintf;
> 
> How does ordering work between queues? Do they follow a global order
> such that a sync on any queue is guaranteed to complete all prior
> commands on all queues?

CMDQV internal scheduler would insert a SYNC when (for example)
switching from VCMDQ0 to VCMDQ1 while last command in VCMDQ0 is
not SYNC. HW has a configuration bit in the register to disable
this feature, which is by default enabled.

> The challenge to make ECMDQ useful to Linux is how to make sure that all
> the commands expected to be within scope of a future CMND_SYNC plus that
> sync itself all get issued on the same queue, so I'd be mildly surprised
> if you didn't have the same problem.

PATCH-3 in this series actually helps align the command queues,
between issued commands and SYNC, if bool sync == true. Yet, if
doing something like issue->issue->issue_with_sync, it could be
tricker.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-09 Thread Nicolin Chen via iommu
On Thu, Dec 09, 2021 at 10:58:15PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 09.12.2021 22:51, Nicolin Chen пишет:
> > On Thu, Dec 09, 2021 at 10:40:42PM +0300, Dmitry Osipenko wrote:
> >> External email: Use caution opening links or attachments
> >>
> >>
> >> 09.12.2021 22:32, Nicolin Chen пишет:
> >>> On Thu, Dec 09, 2021 at 05:47:18PM +0300, Dmitry Osipenko wrote:
> >>>> External email: Use caution opening links or attachments
> >>>>
> >>>>
> >>>> 09.12.2021 10:38, Nicolin Chen пишет:
> >>>>> @@ -545,6 +719,15 @@ static void tegra_smmu_detach_as(struct tegra_smmu 
> >>>>> *smmu,
> >>>>>   if (group->swgrp != swgrp)
> >>>>>   continue;
> >>>>>   group->as = NULL;
> >>>>> +
> >>>>> + if (smmu->debugfs_mappings) {
> >>>> Do we really need this check?
> >>>>
> >>>> Looks like all debugfs_create_dir() usages in this driver are incorrect,
> >>>> that function never returns NULL. Please fix this.
> >>> debugfs_create_dir returns ERR_PTR on failure. So here should be
> >>> to check !IS_ERR. Thanks for pointing it out!
> >>>
> >>
> >> All debugfs functions handle IS_ERR(). GregKH removes all such checks
> >> all over the kernel. So the check shouldn't be needed at all, please
> >> remove it if it's unneeded or prove that it's needed.
> >
> > debugfs_create_file can handle a NULL parent, but not ERR_PTR one,
> > and then it puts the new node under the root. So either passing an
> > ERR_PTR parent or creating orphan nodes here doesn't sound good...
> >
> 
> What makes you say so? Please show the exact source code that will cause
> the problem.
> 
> The smmu->debugfs_mappings can't ever be NULL and debugfs_create_file
> handles the ERR_PTR [1][2].

Ah...my tool jumps to start_creating in fs/tracefs/inode.c instead.

Thanks for the reply. I will remove the if line then.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-09 Thread Nicolin Chen via iommu
On Thu, Dec 09, 2021 at 10:58:32PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 09.12.2021 22:54, Nicolin Chen пишет:
> > On Thu, Dec 09, 2021 at 10:44:25PM +0300, Dmitry Osipenko wrote:
> >> External email: Use caution opening links or attachments
> >>
> >>
> >> 09.12.2021 22:24, Nicolin Chen пишет:
> >>> On Thu, Dec 09, 2021 at 05:49:09PM +0300, Dmitry Osipenko wrote:
> >>>> External email: Use caution opening links or attachments
> >>>>
> >>>>
> >>>> 09.12.2021 10:38, Nicolin Chen пишет:
> >>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned 
> >>>>> int pt_index)
> >>>>> +{
> >>>>> + return (pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> >>>>> +(pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> >>>>> +}
> >>>>
> >>>> I'd change the return type to u32 here, for consistency.
> >>>
> >>> The whole file defines iova using "unsigned long", which I see
> >>> as the consistency... If we change it to u32, it'd be probably
> >>> necessary to change all iova types to u32 too... So I'd rather
> >>> keep it "unsigned long" here. If you see a big necessity to do
> >>> that, an additional patch would be nicer IMHO.
> >>>
> >>
> >> In general IOVA is "unsigned long", of course. But in case of Tegra
> >> SMMU, we know that is always u32.
> >>
> >> Alright, I see now that there are other places in the driver code that
> >> use "unsigned long", so need to change it in this patch.
> >
> > I think we should do that in a separate patch that changes the iova
> > type in the entire tegra-smmu file. I can add one in this series, if
> > this makes you happy...
> >
> 
> Please keep it "unsigned long", no need for extra patches.

Oh, I guess that "so need to change it in this patch" should be
"so (no) need to change it in this patch" then?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-09 Thread Nicolin Chen via iommu
On Thu, Dec 09, 2021 at 10:44:25PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 09.12.2021 22:24, Nicolin Chen пишет:
> > On Thu, Dec 09, 2021 at 05:49:09PM +0300, Dmitry Osipenko wrote:
> >> External email: Use caution opening links or attachments
> >>
> >>
> >> 09.12.2021 10:38, Nicolin Chen пишет:
> >>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned 
> >>> int pt_index)
> >>> +{
> >>> + return (pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> >>> +(pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> >>> +}
> >>
> >> I'd change the return type to u32 here, for consistency.
> >
> > The whole file defines iova using "unsigned long", which I see
> > as the consistency... If we change it to u32, it'd be probably
> > necessary to change all iova types to u32 too... So I'd rather
> > keep it "unsigned long" here. If you see a big necessity to do
> > that, an additional patch would be nicer IMHO.
> >
> 
> In general IOVA is "unsigned long", of course. But in case of Tegra
> SMMU, we know that is always u32.
> 
> Alright, I see now that there are other places in the driver code that
> use "unsigned long", so need to change it in this patch.

I think we should do that in a separate patch that changes the iova
type in the entire tegra-smmu file. I can add one in this series, if
this makes you happy...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-09 Thread Nicolin Chen via iommu
On Thu, Dec 09, 2021 at 10:40:42PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 09.12.2021 22:32, Nicolin Chen пишет:
> > On Thu, Dec 09, 2021 at 05:47:18PM +0300, Dmitry Osipenko wrote:
> >> External email: Use caution opening links or attachments
> >>
> >>
> >> 09.12.2021 10:38, Nicolin Chen пишет:
> >>> @@ -545,6 +719,15 @@ static void tegra_smmu_detach_as(struct tegra_smmu 
> >>> *smmu,
> >>>   if (group->swgrp != swgrp)
> >>>   continue;
> >>>   group->as = NULL;
> >>> +
> >>> + if (smmu->debugfs_mappings) {
> >> Do we really need this check?
> >>
> >> Looks like all debugfs_create_dir() usages in this driver are incorrect,
> >> that function never returns NULL. Please fix this.
> > debugfs_create_dir returns ERR_PTR on failure. So here should be
> > to check !IS_ERR. Thanks for pointing it out!
> >
> 
> All debugfs functions handle IS_ERR(). GregKH removes all such checks
> all over the kernel. So the check shouldn't be needed at all, please
> remove it if it's unneeded or prove that it's needed.

debugfs_create_file can handle a NULL parent, but not ERR_PTR one,
and then it puts the new node under the root. So either passing an
ERR_PTR parent or creating orphan nodes here doesn't sound good...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-09 Thread Nicolin Chen via iommu
On Thu, Dec 09, 2021 at 05:47:18PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 09.12.2021 10:38, Nicolin Chen пишет:
> > @@ -545,6 +719,15 @@ static void tegra_smmu_detach_as(struct tegra_smmu 
> > *smmu,
> >   if (group->swgrp != swgrp)
> >   continue;
> >   group->as = NULL;
> > +
> > + if (smmu->debugfs_mappings) {
> 
> Do we really need this check?
> 
> Looks like all debugfs_create_dir() usages in this driver are incorrect,
> that function never returns NULL. Please fix this.

debugfs_create_dir returns ERR_PTR on failure. So here should be
to check !IS_ERR. Thanks for pointing it out!

> > + struct dentry *d;
> 
> The file name is wrong here.
> 
> if (group->soc)
> name = group->soc->name;
> else
> name = group->swgrp->name;

Yea, I'll add this.

> 
> > + d = debugfs_lookup(group->swgrp->name,
> > +smmu->debugfs_mappings);
> > + debugfs_remove(d);
> > + }
> 
> This now looks problematic to me. You created debugfs file when the
> first member of the shared group was attached to AS, now you remove this
> file when any device is detached. The shared debugfs file should be
> refcounted or something.a

Will see how to handle it.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-09 Thread Nicolin Chen via iommu
On Thu, Dec 09, 2021 at 05:49:09PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 09.12.2021 10:38, Nicolin Chen пишет:
> > +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int 
> > pt_index)
> > +{
> > + return (pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> > +(pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> > +}
> 
> I'd change the return type to u32 here, for consistency.

The whole file defines iova using "unsigned long", which I see
as the consistency... If we change it to u32, it'd be probably
necessary to change all iova types to u32 too... So I'd rather
keep it "unsigned long" here. If you see a big necessity to do
that, an additional patch would be nicer IMHO.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v8 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-08 Thread Nicolin Chen via iommu
This patch dumps all active mapping entries from pagetable to a
debugfs directory named "mappings".

Part of this patch for listing all swgroup names in a group_soc
is provided by Dmitry Osipenko 

Attaching an example:

[SWGROUP: xusb_host] [as: (id: 5), (attr: R|W|-), (pd_dma: 0x80005000)]
{
[index: 1023] 0xf0080007 (count: 52)
{
PTE RANGE  | ATTR | PHYS   | IOVA   | SIZE
[#913 , #913 ] | 0x7  | 0x000101fbe000 | 0xfff91000 | 0x1000
[#914 , #914 ] | 0x7  | 0x000101fbd000 | 0xfff92000 | 0x1000
[#915 , #915 ] | 0x7  | 0x000101fbc000 | 0xfff93000 | 0x1000
[#916 , #916 ] | 0x7  | 0x000101fbb000 | 0xfff94000 | 0x1000
[#921 , #921 ] | 0x7  | 0xfcc02000 | 0xfff99000 | 0x1000
[#922 , #922 ] | 0x7  | 0x000101fb7000 | 0xfff9a000 | 0x1000
[#923 , #923 ] | 0x7  | 0x000101fb5000 | 0xfff9b000 | 0x1000
[#948 , #948 ] | 0x7  | 0x000101fb2000 | 0xfffb4000 | 0x1000
[#949 , #949 ] | 0x7  | 0x000101fb1000 | 0xfffb5000 | 0x1000
[#950 , #950 ] | 0x7  | 0x000101faf000 | 0xfffb6000 | 0x1000
[#951 , #951 ] | 0x7  | 0x000101fae000 | 0xfffb7000 | 0x1000
[#952 , #952 ] | 0x7  | 0x00010263d000 | 0xfffb8000 | 0x1000
[#953 , #953 ] | 0x7  | 0x00010263c000 | 0xfffb9000 | 0x1000
[#954 , #954 ] | 0x7  | 0x00010263b000 | 0xfffba000 | 0x1000
[#955 , #955 ] | 0x7  | 0x00010263a000 | 0xfffbb000 | 0x1000
[#956 , #956 ] | 0x7  | 0x000102639000 | 0xfffbc000 | 0x1000
[#957 , #957 ] | 0x7  | 0x000102638000 | 0xfffbd000 | 0x1000
[#958 , #958 ] | 0x7  | 0x000102637000 | 0xfffbe000 | 0x1000
[#959 , #959 ] | 0x7  | 0x000102636000 | 0xfffbf000 | 0x1000
[#960 , #992 ] | 0x7  | 0x000102613000 | 0xfffc | 
0x21000
}
}
Total PDEs: 1, total PTEs: 52

Note that the example above was output after I locally enabled
IOMMU_DOMAIN_DMA, which is not merged to mainline yet due to a
known framebuffer issue.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 185 +
 1 file changed, 185 insertions(+)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 454504aa6602..cbd1a52f2a9f 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -47,6 +47,7 @@ struct tegra_smmu {
struct list_head list;
 
struct dentry *debugfs;
+   struct dentry *debugfs_mappings;
 
struct iommu_device iommu;  /* IOMMU Core code handle */
 };
@@ -154,6 +155,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, 
unsigned long offset)
 
 #define SMMU_PDE_ATTR  (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
 SMMU_PDE_NONSECURE)
+#define SMMU_PTE_ATTR  (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
+SMMU_PTE_NONSECURE)
+#define SMMU_PTE_ATTR_SHIFT29
 
 static unsigned int iova_pd_index(unsigned long iova)
 {
@@ -165,6 +169,12 @@ static unsigned int iova_pt_index(unsigned long iova)
return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
 }
 
+static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int 
pt_index)
+{
+   return (pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
+  (pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
+}
+
 static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
 {
addr >>= 12;
@@ -498,6 +508,156 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu 
*smmu,
mutex_unlock(>lock);
 }
 
+static int tegra_smmu_debugfs_mappings_show(struct seq_file *s, void *data)
+{
+   struct tegra_smmu_group *group = s->private;
+   const struct tegra_smmu_group_soc *soc;
+   const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_as *as;
+   struct tegra_smmu *smmu;
+   unsigned int pd_index;
+   unsigned int pt_index;
+   unsigned long flags;
+   u64 pte_count = 0;
+   u32 pde_count = 0;
+   u32 *pd, val;
+
+   if (!group || !group->as || !group->swgrp)
+   return 0;
+
+   swgrp = group->swgrp;
+   smmu = group->smmu;
+   soc = group->soc;
+   as = group->as;
+
+   mutex_lock(>lock);
+
+   val = smmu_readl(smmu, swgrp->reg);
+   if (!(val & SMMU_ASID_ENABLE))
+   goto unlock;
+
+   pd = page_address(as->pd);
+   if (!pd)
+   goto unlock;
+
+   seq_puts(s, "[SWGROUP: ");
+   /* List all the swgroup names in the same group_soc */
+   if (soc) {
+   bool first_swgroup = true;
+   unsigned int

[PATCH v8 4/6] iommu/tegra-smmu: Use swgrp pointer instead of swgroup id

2021-12-08 Thread Nicolin Chen via iommu
This patch changes in struct tegra_smmu_group to use swgrp
pointer instead of swgroup, as a preparational change for
the "mappings" debugfs feature.

Acked-by: Thierry Reding 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 05a386036fce..532c843eb631 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -24,8 +24,8 @@ struct tegra_smmu_group {
struct list_head list;
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
+   const struct tegra_smmu_swgroup *swgrp;
struct iommu_group *grp;
-   unsigned int swgroup;
 };
 
 struct tegra_smmu {
@@ -899,18 +899,22 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
const struct tegra_smmu_group_soc *soc;
+   const struct tegra_smmu_swgroup *swgrp;
unsigned int swgroup = fwspec->ids[0];
struct tegra_smmu_group *group;
struct iommu_group *grp;
 
+   /* Find swgrp according to the swgroup id */
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+
/* Find group_soc associating with swgroup */
soc = tegra_smmu_find_group_soc(smmu, swgroup);
 
mutex_lock(>lock);
 
-   /* Find existing iommu_group associating with swgroup or group_soc */
+   /* Find existing iommu_group associating with swgrp or group_soc */
list_for_each_entry(group, >groups, list)
-   if ((group->swgroup == swgroup) || (soc && group->soc == soc)) {
+   if ((swgrp && group->swgrp == swgrp) || (soc && group->soc == 
soc)) {
grp = iommu_group_ref_get(group->grp);
mutex_unlock(>lock);
return grp;
@@ -923,7 +927,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
}
 
INIT_LIST_HEAD(>list);
-   group->swgroup = swgroup;
+   group->swgrp = swgrp;
group->smmu = smmu;
group->soc = soc;
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 2/6] iommu/tegra-smmu: Rename tegra_smmu_find_group to tegra_smmu_find_group_soc

2021-12-08 Thread Nicolin Chen via iommu
The existing function tegra_smmu_find_group really finds group->soc
pointer, so naming it "find_group" might not be clear by looking at
it alone. This patch renames it to tegra_smmu_group_soc in order to
disambiguate the use of "group" in this driver.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index fd9ef08cb7d9..5628865c04b0 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -872,7 +872,7 @@ static struct iommu_device *tegra_smmu_probe_device(struct 
device *dev)
 static void tegra_smmu_release_device(struct device *dev) {}
 
 static const struct tegra_smmu_group_soc *
-tegra_smmu_find_group(struct tegra_smmu *smmu, unsigned int swgroup)
+tegra_smmu_find_group_soc(struct tegra_smmu *smmu, unsigned int swgroup)
 {
unsigned int i, j;
 
@@ -904,7 +904,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
struct iommu_group *grp;
 
/* Find group_soc associating with swgroup */
-   soc = tegra_smmu_find_group(smmu, swgroup);
+   soc = tegra_smmu_find_group_soc(smmu, swgroup);
 
mutex_lock(>lock);
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 3/6] iommu/tegra-smmu: Rename struct tegra_smmu_swgroup *group to *swgrp

2021-12-08 Thread Nicolin Chen via iommu
There are both tegra_smmu_swgroup and tegra_smmu_group structs
using "group" for their pointer instances. This gets confusing
to read the driver sometimes.

So this patch renames "group" of struct tegra_smmu_swgroup to
"swgrp" as a cleanup. Also renames its "find" function.

Note that we already have "swgroup" being used for an unsigned
int type variable that is inside struct tegra_smmu_swgroup, so
it's not able to use "swgroup" but only something like "swgrp".

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 5628865c04b0..05a386036fce 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -336,35 +336,35 @@ static void tegra_smmu_domain_free(struct iommu_domain 
*domain)
 }
 
 static const struct tegra_smmu_swgroup *
-tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup)
+tegra_smmu_find_swgrp(struct tegra_smmu *smmu, unsigned int swgroup)
 {
-   const struct tegra_smmu_swgroup *group = NULL;
+   const struct tegra_smmu_swgroup *swgrp = NULL;
unsigned int i;
 
for (i = 0; i < smmu->soc->num_swgroups; i++) {
if (smmu->soc->swgroups[i].swgroup == swgroup) {
-   group = >soc->swgroups[i];
+   swgrp = >soc->swgroups[i];
break;
}
}
 
-   return group;
+   return swgrp;
 }
 
 static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup,
  unsigned int asid)
 {
-   const struct tegra_smmu_swgroup *group;
+   const struct tegra_smmu_swgroup *swgrp;
unsigned int i;
u32 value;
 
-   group = tegra_smmu_find_swgroup(smmu, swgroup);
-   if (group) {
-   value = smmu_readl(smmu, group->reg);
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (swgrp) {
+   value = smmu_readl(smmu, swgrp->reg);
value &= ~SMMU_ASID_MASK;
value |= SMMU_ASID_VALUE(asid);
value |= SMMU_ASID_ENABLE;
-   smmu_writel(smmu, value, group->reg);
+   smmu_writel(smmu, value, swgrp->reg);
} else {
pr_warn("%s group from swgroup %u not found\n", __func__,
swgroup);
@@ -387,17 +387,17 @@ static void tegra_smmu_enable(struct tegra_smmu *smmu, 
unsigned int swgroup,
 static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup,
   unsigned int asid)
 {
-   const struct tegra_smmu_swgroup *group;
+   const struct tegra_smmu_swgroup *swgrp;
unsigned int i;
u32 value;
 
-   group = tegra_smmu_find_swgroup(smmu, swgroup);
-   if (group) {
-   value = smmu_readl(smmu, group->reg);
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (swgrp) {
+   value = smmu_readl(smmu, swgrp->reg);
value &= ~SMMU_ASID_MASK;
value |= SMMU_ASID_VALUE(asid);
value &= ~SMMU_ASID_ENABLE;
-   smmu_writel(smmu, value, group->reg);
+   smmu_writel(smmu, value, swgrp->reg);
}
 
for (i = 0; i < smmu->soc->num_clients; i++) {
@@ -1009,11 +1009,11 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, 
void *data)
seq_printf(s, "\n");
 
for (i = 0; i < smmu->soc->num_swgroups; i++) {
-   const struct tegra_smmu_swgroup *group = 
>soc->swgroups[i];
+   const struct tegra_smmu_swgroup *swgrp = 
>soc->swgroups[i];
const char *status;
unsigned int asid;
 
-   value = smmu_readl(smmu, group->reg);
+   value = smmu_readl(smmu, swgrp->reg);
 
if (value & SMMU_ASID_ENABLE)
status = "yes";
@@ -1022,7 +1022,7 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, 
void *data)
 
asid = value & SMMU_ASID_MASK;
 
-   seq_printf(s, "%-9s  %-7s  %#04x\n", group->name, status,
+   seq_printf(s, "%-9s  %-7s  %#04x\n", swgrp->name, status,
   asid);
}
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 1/6] iommu/tegra-smmu: Rename struct iommu_group *group to *grp

2021-12-08 Thread Nicolin Chen via iommu
There are a few structs using "group" for their pointer instances.
This gets confusing sometimes. The instance of struct iommu_group
is used in local function with an alias "grp", which can separate
it from others.

So this patch simply renames "group" to "grp" as a cleanup.

Acked-by: Thierry Reding 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 8e906504882d..fd9ef08cb7d9 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -24,7 +24,7 @@ struct tegra_smmu_group {
struct list_head list;
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
-   struct iommu_group *group;
+   struct iommu_group *grp;
unsigned int swgroup;
 };
 
@@ -911,7 +911,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
/* Find existing iommu_group associating with swgroup or group_soc */
list_for_each_entry(group, >groups, list)
if ((group->swgroup == swgroup) || (soc && group->soc == soc)) {
-   grp = iommu_group_ref_get(group->group);
+   grp = iommu_group_ref_get(group->grp);
mutex_unlock(>lock);
return grp;
}
@@ -928,23 +928,23 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
group->soc = soc;
 
if (dev_is_pci(dev))
-   group->group = pci_device_group(dev);
+   group->grp = pci_device_group(dev);
else
-   group->group = generic_device_group(dev);
+   group->grp = generic_device_group(dev);
 
-   if (IS_ERR(group->group)) {
+   if (IS_ERR(group->grp)) {
devm_kfree(smmu->dev, group);
mutex_unlock(>lock);
return NULL;
}
 
-   iommu_group_set_iommudata(group->group, group, 
tegra_smmu_group_release);
+   iommu_group_set_iommudata(group->grp, group, tegra_smmu_group_release);
if (soc)
-   iommu_group_set_name(group->group, soc->name);
+   iommu_group_set_name(group->grp, soc->name);
list_add_tail(>list, >groups);
mutex_unlock(>lock);
 
-   return group->group;
+   return group->grp;
 }
 
 static int tegra_smmu_of_xlate(struct device *dev,
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 5/6] iommu/tegra-smmu: Attach as pointer to tegra_smmu_group

2021-12-08 Thread Nicolin Chen via iommu
This could ease driver to access corresponding as pointer
when having tegra_smmu_group pointer only, which can help
new mappings debugfs nodes.

Also moving tegra_smmu_find_group_soc() upward, for using
it in new tegra_smmu_attach_as(); and it's better to have
all tegra_smmu_find_* functions together.

Acked-by: Thierry Reding 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 96 +++---
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 532c843eb631..454504aa6602 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -25,6 +25,7 @@ struct tegra_smmu_group {
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_as *as;
struct iommu_group *grp;
 };
 
@@ -351,6 +352,19 @@ tegra_smmu_find_swgrp(struct tegra_smmu *smmu, unsigned 
int swgroup)
return swgrp;
 }
 
+static const struct tegra_smmu_group_soc *
+tegra_smmu_find_group_soc(struct tegra_smmu *smmu, unsigned int swgroup)
+{
+   unsigned int i, j;
+
+   for (i = 0; i < smmu->soc->num_groups; i++)
+   for (j = 0; j < smmu->soc->groups[i].num_swgroups; j++)
+   if (smmu->soc->groups[i].swgroups[j] == swgroup)
+   return >soc->groups[i];
+
+   return NULL;
+}
+
 static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup,
  unsigned int asid)
 {
@@ -484,6 +498,59 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu 
*smmu,
mutex_unlock(>lock);
 }
 
+static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
+struct tegra_smmu_as *as,
+unsigned int swgroup)
+{
+   const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_group *group;
+
+   /* Find swgrp according to the swgroup id */
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (!swgrp)
+   return;
+
+   mutex_lock(>lock);
+
+   list_for_each_entry(group, >groups, list) {
+   if (group->swgrp != swgrp)
+   continue;
+   if (group->as == as)
+   break;
+
+   if (group->as)
+   dev_warn(smmu->dev,
+"overwriting group->as for swgroup: %s\n", 
swgrp->name);
+   group->as = as;
+   break;
+   }
+
+   mutex_unlock(>lock);
+}
+
+static void tegra_smmu_detach_as(struct tegra_smmu *smmu,
+unsigned int swgroup)
+{
+   const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_group *group;
+
+   /* Find swgrp according to the swgroup id */
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (!swgrp)
+   return;
+
+   mutex_lock(>lock);
+
+   list_for_each_entry(group, >groups, list) {
+   if (group->swgrp != swgrp)
+   continue;
+   group->as = NULL;
+   break;
+   }
+
+   mutex_unlock(>lock);
+}
+
 static int tegra_smmu_attach_dev(struct iommu_domain *domain,
 struct device *dev)
 {
@@ -497,11 +564,15 @@ static int tegra_smmu_attach_dev(struct iommu_domain 
*domain,
return -ENOENT;
 
for (index = 0; index < fwspec->num_ids; index++) {
+   unsigned int swgroup = fwspec->ids[index];
+
err = tegra_smmu_as_prepare(smmu, as);
if (err)
goto disable;
 
-   tegra_smmu_enable(smmu, fwspec->ids[index], as->id);
+   tegra_smmu_attach_as(smmu, as, swgroup);
+
+   tegra_smmu_enable(smmu, swgroup, as->id);
}
 
if (index == 0)
@@ -511,7 +582,10 @@ static int tegra_smmu_attach_dev(struct iommu_domain 
*domain,
 
 disable:
while (index--) {
-   tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
+   unsigned int swgroup = fwspec->ids[index];
+
+   tegra_smmu_disable(smmu, swgroup, as->id);
+   tegra_smmu_detach_as(smmu, swgroup);
tegra_smmu_as_unprepare(smmu, as);
}
 
@@ -529,7 +603,10 @@ static void tegra_smmu_detach_dev(struct iommu_domain 
*domain, struct device *de
return;
 
for (index = 0; index < fwspec->num_ids; index++) {
-   tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
+   unsigned int swgroup = fwspec->ids[index];
+
+   tegra_smmu_disable(smmu, swgroup, as->id);
+   tegra_smmu_detach_as(smmu, swgroup);
tegra_smmu_as_unprepare(smmu, as);
}
 

[PATCH v8 0/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-08 Thread Nicolin Chen via iommu
This series of patches adds a new mappings node to debugfs for
tegra-smmu driver. The first five patches are all preparational
changes for PATCH-6, based on Thierry's review feedback against
v5.

Changelog
v8:
 * No changes for PATCH 1-4
 * PATCH-5:
 * * bypassed "group->as == as" to fix KMSG bug reported by Dmitry
 * PATCH-6:
 * * changed to use u32 casting for IOVA outputs
 * * squashed Dmitry's change to list all swgroup names in the same
 group_soc since they share the same as pointer
v7: 
https://lore.kernel.org/linux-iommu/20211208084732.23363-1-nicol...@nvidia.com/T/
 * Added "Acked-by" from Thierry to PATCH1,4,5
 * No other changes for PATCH1,3,4,5
 * PATCH-2: dropped "s/soc/group_soc" change
 * PATCH-6:
 * * avoided forward declaration
 * * dropped castings in pd_pt_index_iova()
 * * used "'-' : 'S'" for non-secure attribute
 * * changed multi-line outputs to single-line format
v6: https://lore.kernel.org/linux-iommu/20210915043806.GA19185@Asurada-Nvidia/t/
 * Added PATCH1-3 for better naming conventions
 * Added PATCH4-5 to embed previous struct tegra_smmu_group_debug
   into struct tegra_smmu_group
 * Dropped parentheses at SMMU_PTE_ATTR_SHIFT
 * Dropped swgrp->reg print
 * Replaced ptb_reg contents with as->attr and as->pd_dma
 * Added "index" and "count" in the PD entries for readability
 * Removed Dmitry's Tested-by and Reviewed-by for the big change
   from v5 to v6.
v5: 
https://lore.kernel.org/linux-iommu/20210315203631.24990-1-nicoleots...@gmail.com/
 * Fixed a typo in commit message
 * Split a long line into two lines
 * Rearranged variable defines by length
 * Added Tested-by and Reviewed-by from Dmitry
v4: https://lore.kernel.org/lkml/20210315033504.23937-1-nicoleots...@gmail.com/
 * Changed %d to %u for unsigned variables
 * Fixed print format mismatch warnings on ARM32
v3: https://lore.kernel.org/linux-iommu/20210315031530.GA15245@Asurada-Nvidia/T/
 * Fixed PHYS and IOVA print formats
 * Changed variables to unsigned int type
 * Changed the table outputs to be compact
v2: https://lore.kernel.org/linux-iommu/20210312010932.GB29926@Asurada-Nvidia/T/
 * Expanded mutex range to the entire function
 * Added as->lock to protect pagetable walkthrough
 * Replaced devm_kzalloc with devm_kcalloc for group_debug
 * Added "PTE RANGE" and "SIZE" columns to group contiguous mappings
 * Dropped as->count check
 * Added WARN_ON when as->count mismatches pd[pd_index]
v1: https://lkml.org/lkml/2020/9/26/70

Nicolin Chen (6):
  iommu/tegra-smmu: Rename struct iommu_group *group to *grp
  iommu/tegra-smmu: Rename tegra_smmu_find_group to
tegra_smmu_find_group_soc
  iommu/tegra-smmu: Rename struct tegra_smmu_swgroup *group to *swgrp
  iommu/tegra-smmu: Use swgrp pointer instead of swgroup id
  iommu/tegra-smmu: Attach as pointer to tegra_smmu_group
  iommu/tegra-smmu: Add pagetable mappings to debugfs

 drivers/iommu/tegra-smmu.c | 345 -
 1 file changed, 299 insertions(+), 46 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-08 Thread Nicolin Chen via iommu
On Wed, Dec 08, 2021 at 07:09:37PM +0300, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 08.12.2021 11:47, Nicolin Chen пишет:
> >  static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
> >struct tegra_smmu_as *as,
> >unsigned int swgroup)
> > @@ -517,6 +646,12 @@ static void tegra_smmu_attach_as(struct tegra_smmu 
> > *smmu,
> >   dev_warn(smmu->dev,
> >"overwriting group->as for swgroup: %s\n", 
> > swgrp->name);
> >   group->as = as;
> > +
> > + if (smmu->debugfs_mappings)
> > + debugfs_create_file(group->swgrp->name, 0444,
> > + smmu->debugfs_mappings, group,
> > + 
> > _smmu_debugfs_mappings_fops);
> 
> I noticed this in KMSG:
> 
>  tegra-mc 7000f000.memory-controller: overwriting group->as for swgroup: g2
>  debugfs: File 'g2' in directory 'mappings' already present!
>  tegra-mc 7000f000.memory-controller: overwriting group->as for swgroup: g2
>  debugfs: File 'g2' in directory 'mappings' already present
> 
> Doesn't look okay, please fix.

Thanks for testing and reporting!

I will see and fix it along with your other comments.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v7 0/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-08 Thread Nicolin Chen via iommu
This series of patches adds a new mappings node to debugfs for
tegra-smmu driver. The first five patches are all preparational
changes for PATCH-6, based on Thierry's review feedback against
v5.

Changelog
v7:
 * Added "Acked-by" from Thierry to PATCH1,4,5
 * No other changes for PATCH1,3,4,5
 * PATCH-2: dropped "s/soc/group_soc" change
 * PATCH-6:
 * * avoided forward declaration
 * * dropped castings in pd_pt_index_iova()
 * * used "'-' : 'S'" for non-secure attribute
 * * changed multi-line outputs to single-line format
v6: https://lore.kernel.org/linux-iommu/20210915043806.GA19185@Asurada-Nvidia/t/
 * Added PATCH1-3 for better naming conventions
 * Added PATCH4-5 to embed previous struct tegra_smmu_group_debug
   into struct tegra_smmu_group
 * Dropped parentheses at SMMU_PTE_ATTR_SHIFT
 * Dropped swgrp->reg print
 * Replaced ptb_reg contents with as->attr and as->pd_dma
 * Added "index" and "count" in the PD entries for readability
 * Removed Dmitry's Tested-by and Reviewed-by for the big change
   from v5 to v6.
v5: 
https://lore.kernel.org/linux-iommu/20210315203631.24990-1-nicoleots...@gmail.com/
 * Fixed a typo in commit message
 * Split a long line into two lines
 * Rearranged variable defines by length
 * Added Tested-by and Reviewed-by from Dmitry
v4: https://lore.kernel.org/lkml/20210315033504.23937-1-nicoleots...@gmail.com/
 * Changed %d to %u for unsigned variables
 * Fixed print format mismatch warnings on ARM32
v3: https://lore.kernel.org/linux-iommu/20210315031530.GA15245@Asurada-Nvidia/T/
 * Fixed PHYS and IOVA print formats
 * Changed variables to unsigned int type
 * Changed the table outputs to be compact
v2: https://lore.kernel.org/linux-iommu/20210312010932.GB29926@Asurada-Nvidia/T/
 * Expanded mutex range to the entire function
 * Added as->lock to protect pagetable walkthrough
 * Replaced devm_kzalloc with devm_kcalloc for group_debug
 * Added "PTE RANGE" and "SIZE" columns to group contiguous mappings
 * Dropped as->count check
 * Added WARN_ON when as->count mismatches pd[pd_index]
v1: https://lkml.org/lkml/2020/9/26/70

Nicolin Chen (6):
  iommu/tegra-smmu: Rename struct iommu_group *group to *grp
  iommu/tegra-smmu: Rename tegra_smmu_find_group to
tegra_smmu_find_group_soc
  iommu/tegra-smmu: Rename struct tegra_smmu_swgroup *group to *swgrp
  iommu/tegra-smmu: Use swgrp pointer instead of swgroup id
  iommu/tegra-smmu: Attach as pointer to tegra_smmu_group
  iommu/tegra-smmu: Add pagetable mappings to debugfs

 drivers/iommu/tegra-smmu.c | 301 +++--
 1 file changed, 255 insertions(+), 46 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 1/6] iommu/tegra-smmu: Rename struct iommu_group *group to *grp

2021-12-08 Thread Nicolin Chen via iommu
There are a few structs using "group" for their pointer instances.
This gets confusing sometimes. The instance of struct iommu_group
is used in local function with an alias "grp", which can separate
it from others.

So this patch simply renames "group" to "grp" as a cleanup.

Acked-by: Thierry Reding 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index c5fa8b8673b6..f874ee2600e5 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -23,7 +23,7 @@ struct tegra_smmu_group {
struct list_head list;
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
-   struct iommu_group *group;
+   struct iommu_group *grp;
unsigned int swgroup;
 };
 
@@ -909,7 +909,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
/* Find existing iommu_group associating with swgroup or group_soc */
list_for_each_entry(group, >groups, list)
if ((group->swgroup == swgroup) || (soc && group->soc == soc)) {
-   grp = iommu_group_ref_get(group->group);
+   grp = iommu_group_ref_get(group->grp);
mutex_unlock(>lock);
return grp;
}
@@ -926,23 +926,23 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
group->soc = soc;
 
if (dev_is_pci(dev))
-   group->group = pci_device_group(dev);
+   group->grp = pci_device_group(dev);
else
-   group->group = generic_device_group(dev);
+   group->grp = generic_device_group(dev);
 
-   if (IS_ERR(group->group)) {
+   if (IS_ERR(group->grp)) {
devm_kfree(smmu->dev, group);
mutex_unlock(>lock);
return NULL;
}
 
-   iommu_group_set_iommudata(group->group, group, 
tegra_smmu_group_release);
+   iommu_group_set_iommudata(group->grp, group, tegra_smmu_group_release);
if (soc)
-   iommu_group_set_name(group->group, soc->name);
+   iommu_group_set_name(group->grp, soc->name);
list_add_tail(>list, >groups);
mutex_unlock(>lock);
 
-   return group->group;
+   return group->grp;
 }
 
 static int tegra_smmu_of_xlate(struct device *dev,
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-12-08 Thread Nicolin Chen via iommu
This patch dumps all active mapping entries from pagetable to a
debugfs directory named "mappings".

Attaching an example:

[SWGROUP: xusb_host] [as: (id: 5), (attr: R|W|-), (pd_dma: 0x80005000)]
{
[index: 1023] 0xf0080040 (count: 52)
{
PTE RANGE  | ATTR | PHYS   | IOVA   
| SIZE
[#913 , #913 ] | 0x7  | 0x000102674000 | 0xfff91000 
| 0x1000
[#914 , #914 ] | 0x7  | 0x000102672000 | 0xfff92000 
| 0x1000
[#915 , #915 ] | 0x7  | 0x000102671000 | 0xfff93000 
| 0x1000
[#916 , #916 ] | 0x7  | 0x00010267 | 0xfff94000 
| 0x1000
[#921 , #921 ] | 0x7  | 0xfcc0 | 0xfff99000 
| 0x1000
[#922 , #922 ] | 0x7  | 0x00010266d000 | 0xfff9a000 
| 0x1000
[#923 , #923 ] | 0x7  | 0x00010266c000 | 0xfff9b000 
| 0x1000
[#948 , #948 ] | 0x7  | 0x000102668000 | 0xfffb4000 
| 0x1000
[#949 , #949 ] | 0x7  | 0x000102667000 | 0xfffb5000 
| 0x1000
[#950 , #950 ] | 0x7  | 0x000102666000 | 0xfffb6000 
| 0x1000
[#951 , #951 ] | 0x7  | 0x000102665000 | 0xfffb7000 
| 0x1000
[#952 , #952 ] | 0x7  | 0x00010264b000 | 0xfffb8000 
| 0x1000
[#953 , #953 ] | 0x7  | 0x00010264a000 | 0xfffb9000 
| 0x1000
[#954 , #954 ] | 0x7  | 0x000102649000 | 0xfffba000 
| 0x1000
[#955 , #955 ] | 0x7  | 0x000102648000 | 0xfffbb000 
| 0x1000
[#956 , #956 ] | 0x7  | 0x00010260f000 | 0xfffbc000 
| 0x1000
[#957 , #957 ] | 0x7  | 0x00010260e000 | 0xfffbd000 
| 0x1000
[#958 , #958 ] | 0x7  | 0x00010260d000 | 0xfffbe000 
| 0x1000
[#959 , #959 ] | 0x7  | 0x00010260b000 | 0xfffbf000 
| 0x1000
[#960 , #992 ] | 0x7  | 0x0001025ea000 | 0xfffc 
| 0x21000
}
}
Total PDEs: 1, total PTEs: 52

Note that the example above was output after I locally enabled
IOMMU_DOMAIN_DMA, which is not merged to mainline yet due to a
known framebuffer issue.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 143 +
 1 file changed, 143 insertions(+)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 1fbb93215463..575e82076270 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -46,6 +46,7 @@ struct tegra_smmu {
struct list_head list;
 
struct dentry *debugfs;
+   struct dentry *debugfs_mappings;
 
struct iommu_device iommu;  /* IOMMU Core code handle */
 };
@@ -153,6 +154,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, 
unsigned long offset)
 
 #define SMMU_PDE_ATTR  (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
 SMMU_PDE_NONSECURE)
+#define SMMU_PTE_ATTR  (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
+SMMU_PTE_NONSECURE)
+#define SMMU_PTE_ATTR_SHIFT29
 
 static unsigned int iova_pd_index(unsigned long iova)
 {
@@ -164,6 +168,12 @@ static unsigned int iova_pt_index(unsigned long iova)
return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
 }
 
+static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int 
pt_index)
+{
+   return (pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
+  (pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
+}
+
 static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
 {
addr >>= 12;
@@ -496,6 +506,125 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu 
*smmu,
mutex_unlock(>lock);
 }
 
+static int tegra_smmu_debugfs_mappings_show(struct seq_file *s, void *data)
+{
+   struct tegra_smmu_group *group = s->private;
+   const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_as *as;
+   struct tegra_smmu *smmu;
+   unsigned int pd_index;
+   unsigned int pt_index;
+   unsigned long flags;
+   u64 pte_count = 0;
+   u32 pde_count = 0;
+   u32 *pd, val;
+
+   if (!group || !group->as || !group->swgrp)
+   return 0;
+
+   swgrp = group->swgrp;
+   smmu = group->smmu;
+   as = group->as;
+
+   mutex_lock(>lock);
+
+   val = smmu_readl(smmu, swgrp->reg) & SMMU_ASID_ENABLE;
+   if (!val)
+   goto unlock;
+
+   pd = page_address(as->pd);
+   if (!pd)
+   goto unlock;
+
+   seq_printf(s, "[SWGROUP: %s] ", swgrp->name);
+   seq_printf(s, "[as: (id: %d), ", as->id);
+   seq_printf(s, "(attr: %c|%c|%c), &qu

[PATCH v7 3/6] iommu/tegra-smmu: Rename struct tegra_smmu_swgroup *group to *swgrp

2021-12-08 Thread Nicolin Chen via iommu
There are both tegra_smmu_swgroup and tegra_smmu_group structs
using "group" for their pointer instances. This gets confusing
to read the driver sometimes.

So this patch renames "group" of struct tegra_smmu_swgroup to
"swgrp" as a cleanup. Also renames its "find" function.

Note that we already have "swgroup" being used for an unsigned
int type variable that is inside struct tegra_smmu_swgroup, so
it's not able to use "swgroup" but only something like "swgrp".

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 9e9f37e4894c..42af216ef65c 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -334,35 +334,35 @@ static void tegra_smmu_domain_free(struct iommu_domain 
*domain)
 }
 
 static const struct tegra_smmu_swgroup *
-tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup)
+tegra_smmu_find_swgrp(struct tegra_smmu *smmu, unsigned int swgroup)
 {
-   const struct tegra_smmu_swgroup *group = NULL;
+   const struct tegra_smmu_swgroup *swgrp = NULL;
unsigned int i;
 
for (i = 0; i < smmu->soc->num_swgroups; i++) {
if (smmu->soc->swgroups[i].swgroup == swgroup) {
-   group = >soc->swgroups[i];
+   swgrp = >soc->swgroups[i];
break;
}
}
 
-   return group;
+   return swgrp;
 }
 
 static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup,
  unsigned int asid)
 {
-   const struct tegra_smmu_swgroup *group;
+   const struct tegra_smmu_swgroup *swgrp;
unsigned int i;
u32 value;
 
-   group = tegra_smmu_find_swgroup(smmu, swgroup);
-   if (group) {
-   value = smmu_readl(smmu, group->reg);
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (swgrp) {
+   value = smmu_readl(smmu, swgrp->reg);
value &= ~SMMU_ASID_MASK;
value |= SMMU_ASID_VALUE(asid);
value |= SMMU_ASID_ENABLE;
-   smmu_writel(smmu, value, group->reg);
+   smmu_writel(smmu, value, swgrp->reg);
} else {
pr_warn("%s group from swgroup %u not found\n", __func__,
swgroup);
@@ -385,17 +385,17 @@ static void tegra_smmu_enable(struct tegra_smmu *smmu, 
unsigned int swgroup,
 static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup,
   unsigned int asid)
 {
-   const struct tegra_smmu_swgroup *group;
+   const struct tegra_smmu_swgroup *swgrp;
unsigned int i;
u32 value;
 
-   group = tegra_smmu_find_swgroup(smmu, swgroup);
-   if (group) {
-   value = smmu_readl(smmu, group->reg);
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (swgrp) {
+   value = smmu_readl(smmu, swgrp->reg);
value &= ~SMMU_ASID_MASK;
value |= SMMU_ASID_VALUE(asid);
value &= ~SMMU_ASID_ENABLE;
-   smmu_writel(smmu, value, group->reg);
+   smmu_writel(smmu, value, swgrp->reg);
}
 
for (i = 0; i < smmu->soc->num_clients; i++) {
@@ -1007,11 +1007,11 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, 
void *data)
seq_printf(s, "\n");
 
for (i = 0; i < smmu->soc->num_swgroups; i++) {
-   const struct tegra_smmu_swgroup *group = 
>soc->swgroups[i];
+   const struct tegra_smmu_swgroup *swgrp = 
>soc->swgroups[i];
const char *status;
unsigned int asid;
 
-   value = smmu_readl(smmu, group->reg);
+   value = smmu_readl(smmu, swgrp->reg);
 
if (value & SMMU_ASID_ENABLE)
status = "yes";
@@ -1020,7 +1020,7 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, 
void *data)
 
asid = value & SMMU_ASID_MASK;
 
-   seq_printf(s, "%-9s  %-7s  %#04x\n", group->name, status,
+   seq_printf(s, "%-9s  %-7s  %#04x\n", swgrp->name, status,
   asid);
}
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 4/6] iommu/tegra-smmu: Use swgrp pointer instead of swgroup id

2021-12-08 Thread Nicolin Chen via iommu
This patch changes in struct tegra_smmu_group to use swgrp
pointer instead of swgroup, as a preparational change for
the "mappings" debugfs feature.

Acked-by: Thierry Reding 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 42af216ef65c..b0a04cc8f560 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -23,8 +23,8 @@ struct tegra_smmu_group {
struct list_head list;
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
+   const struct tegra_smmu_swgroup *swgrp;
struct iommu_group *grp;
-   unsigned int swgroup;
 };
 
 struct tegra_smmu {
@@ -897,18 +897,22 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
const struct tegra_smmu_group_soc *soc;
+   const struct tegra_smmu_swgroup *swgrp;
unsigned int swgroup = fwspec->ids[0];
struct tegra_smmu_group *group;
struct iommu_group *grp;
 
+   /* Find swgrp according to the swgroup id */
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+
/* Find group_soc associating with swgroup */
soc = tegra_smmu_find_group_soc(smmu, swgroup);
 
mutex_lock(>lock);
 
-   /* Find existing iommu_group associating with swgroup or group_soc */
+   /* Find existing iommu_group associating with swgrp or group_soc */
list_for_each_entry(group, >groups, list)
-   if ((group->swgroup == swgroup) || (soc && group->soc == soc)) {
+   if ((swgrp && group->swgrp == swgrp) || (soc && group->soc == 
soc)) {
grp = iommu_group_ref_get(group->grp);
mutex_unlock(>lock);
return grp;
@@ -921,7 +925,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
}
 
INIT_LIST_HEAD(>list);
-   group->swgroup = swgroup;
+   group->swgrp = swgrp;
group->smmu = smmu;
group->soc = soc;
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 2/6] iommu/tegra-smmu: Rename tegra_smmu_find_group to tegra_smmu_find_group_soc

2021-12-08 Thread Nicolin Chen via iommu
The existing function tegra_smmu_find_group really finds group->soc
pointer, so naming it "find_group" might not be clear by looking at
it alone. This patch renames it to tegra_smmu_group_soc in order to
disambiguate the use of "group" in this driver.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index f874ee2600e5..9e9f37e4894c 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -870,7 +870,7 @@ static struct iommu_device *tegra_smmu_probe_device(struct 
device *dev)
 static void tegra_smmu_release_device(struct device *dev) {}
 
 static const struct tegra_smmu_group_soc *
-tegra_smmu_find_group(struct tegra_smmu *smmu, unsigned int swgroup)
+tegra_smmu_find_group_soc(struct tegra_smmu *smmu, unsigned int swgroup)
 {
unsigned int i, j;
 
@@ -902,7 +902,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
struct iommu_group *grp;
 
/* Find group_soc associating with swgroup */
-   soc = tegra_smmu_find_group(smmu, swgroup);
+   soc = tegra_smmu_find_group_soc(smmu, swgroup);
 
mutex_lock(>lock);
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 5/6] iommu/tegra-smmu: Attach as pointer to tegra_smmu_group

2021-12-08 Thread Nicolin Chen via iommu
This could ease driver to access corresponding as pointer
when having tegra_smmu_group pointer only, which can help
new mappings debugfs nodes.

Also moving tegra_smmu_find_group_soc() upward, for using
it in new tegra_smmu_attach_as(); and it's better to have
all tegra_smmu_find_* functions together.

Acked-by: Thierry Reding 
Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 94 +++---
 1 file changed, 78 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index b0a04cc8f560..1fbb93215463 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -24,6 +24,7 @@ struct tegra_smmu_group {
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_as *as;
struct iommu_group *grp;
 };
 
@@ -349,6 +350,19 @@ tegra_smmu_find_swgrp(struct tegra_smmu *smmu, unsigned 
int swgroup)
return swgrp;
 }
 
+static const struct tegra_smmu_group_soc *
+tegra_smmu_find_group_soc(struct tegra_smmu *smmu, unsigned int swgroup)
+{
+   unsigned int i, j;
+
+   for (i = 0; i < smmu->soc->num_groups; i++)
+   for (j = 0; j < smmu->soc->groups[i].num_swgroups; j++)
+   if (smmu->soc->groups[i].swgroups[j] == swgroup)
+   return >soc->groups[i];
+
+   return NULL;
+}
+
 static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup,
  unsigned int asid)
 {
@@ -482,6 +496,57 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu 
*smmu,
mutex_unlock(>lock);
 }
 
+static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
+struct tegra_smmu_as *as,
+unsigned int swgroup)
+{
+   const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_group *group;
+
+   /* Find swgrp according to the swgroup id */
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (!swgrp)
+   return;
+
+   mutex_lock(>lock);
+
+   list_for_each_entry(group, >groups, list) {
+   if (group->swgrp != swgrp)
+   continue;
+   if (group->as)
+   dev_warn(smmu->dev,
+"overwriting group->as for swgroup: %s\n", 
swgrp->name);
+   group->as = as;
+   break;
+   }
+
+   mutex_unlock(>lock);
+}
+
+static void tegra_smmu_detach_as(struct tegra_smmu *smmu,
+unsigned int swgroup)
+{
+   const struct tegra_smmu_swgroup *swgrp;
+   struct tegra_smmu_group *group;
+   struct dentry *d;
+
+   /* Find swgrp according to the swgroup id */
+   swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+   if (!swgrp)
+   return;
+
+   mutex_lock(>lock);
+
+   list_for_each_entry(group, >groups, list) {
+   if (group->swgrp != swgrp)
+   continue;
+   group->as = NULL;
+   break;
+   }
+
+   mutex_unlock(>lock);
+}
+
 static int tegra_smmu_attach_dev(struct iommu_domain *domain,
 struct device *dev)
 {
@@ -495,11 +560,15 @@ static int tegra_smmu_attach_dev(struct iommu_domain 
*domain,
return -ENOENT;
 
for (index = 0; index < fwspec->num_ids; index++) {
+   unsigned int swgroup = fwspec->ids[index];
+
err = tegra_smmu_as_prepare(smmu, as);
if (err)
goto disable;
 
-   tegra_smmu_enable(smmu, fwspec->ids[index], as->id);
+   tegra_smmu_attach_as(smmu, as, swgroup);
+
+   tegra_smmu_enable(smmu, swgroup, as->id);
}
 
if (index == 0)
@@ -509,7 +578,10 @@ static int tegra_smmu_attach_dev(struct iommu_domain 
*domain,
 
 disable:
while (index--) {
-   tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
+   unsigned int swgroup = fwspec->ids[index];
+
+   tegra_smmu_disable(smmu, swgroup, as->id);
+   tegra_smmu_detach_as(smmu, swgroup);
tegra_smmu_as_unprepare(smmu, as);
}
 
@@ -527,7 +599,10 @@ static void tegra_smmu_detach_dev(struct iommu_domain 
*domain, struct device *de
return;
 
for (index = 0; index < fwspec->num_ids; index++) {
-   tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
+   unsigned int swgroup = fwspec->ids[index];
+
+   tegra_smmu_disable(smmu, swgroup, as->id);
+   tegra_smmu_detach_as(smmu, swgroup);
tegra_smmu_as_unprepare(smmu, as);
}
 }
@@ -869,19 +944,6 @@ static struct iommu

[PATCH v3 4/5] iommu/arm-smmu-v3: Add host support for NVIDIA Grace CMDQ-V

2021-11-18 Thread Nicolin Chen via iommu
From: Nate Watterson 

NVIDIA's Grace Soc has a CMDQ-Virtualization (CMDQV) hardware,
which extends the standard ARM SMMU v3 IP to support multiple
VCMDQs with virtualization capabilities. In-kernel of host OS,
they're used to reduce contention on a single queue. In terms
of command queue, they are very like the standard CMDQ/ECMDQs,
but only support CS_NONE in the CS field of CMD_SYNC command.

This patch adds a new nvidia-grace-cmdqv file and inserts its
structure pointer into the existing arm_smmu_device, and then
adds related function calls in the arm-smmu-v3 driver.

In the CMDQV driver itself, this patch only adds minimal part
for host kernel support. Upon probe(), VINTF0 is reserved for
in-kernel use. And some of the VCMDQs are assigned to VINTF0.
Then the driver will select one of VCMDQs in the VINTF0 based
on the CPU currently executing, to issue commands.

Note that for the current plan the CMDQV driver only supports
ACPI configuration.

Signed-off-by: Nate Watterson 
Signed-off-by: Nicolin Chen 
---
Changelog:
v2->v3:
 * Replaced impl design with simpler "nvidia_grace_cmdqv" pointer
 * Aligned all the namings to "nvidia_grace_cmdqv" or "cmdqv"
 * Changed VINTF_ENABLED check in get_cmdq() to VINTF_STATUS
 * Dropped overrides at smmu->features and smmu->options
 * Inlined hw_probe() to acpi_probe() for simplification
 * Added a new CMDQV CONFIG depending on CONFIG_ACPI
 * Removed additional platform_device involvement
 * Switched krealloc to kzalloc for cmdqv Pointer
 * Moved devm_request_irq() out of device_reset()
 * Dropped IRQF_SHARED flag at devm_request_irq()
 * Reused acpi_iort_node pointer from SMMU driver
 * Reused existing smmu functions to init vcmdqs
 * Changed writel_relaxed to writel to be safe
 * Removed pointless comments and prints
 * Updated Copyright lines

 MAINTAINERS   |   1 +
 drivers/iommu/Kconfig |  12 +
 drivers/iommu/arm/arm-smmu-v3/Makefile|   1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  21 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  41 ++
 .../arm/arm-smmu-v3/nvidia-grace-cmdqv.c  | 418 ++
 6 files changed, 488 insertions(+), 6 deletions(-)
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f32c7d733255..0314ee1edf62 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18726,6 +18726,7 @@ M:  Thierry Reding 
 R: Krishna Reddy 
 L: linux-te...@vger.kernel.org
 S: Supported
+F: drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
 F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
 F: drivers/iommu/tegra*
 
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..290af9c7b2a5 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -388,6 +388,18 @@ config ARM_SMMU_V3_SVA
  Say Y here if your system supports SVA extensions such as PCIe PASID
  and PRI.
 
+config NVIDIA_GRACE_CMDQV
+   bool "NVIDIA Grace CMDQ-V extension support for ARM SMMUv3"
+   depends on ARM_SMMU_V3
+   depends on ACPI
+   help
+ Support for NVIDIA Grace CMDQ-Virtualization extension for ARM SMMUv3.
+ The CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
+ support, except with virtualization capabilities.
+
+ Say Y here if your system is NVIDIA Grace or it has the same CMDQ-V
+ extension.
+
 config S390_IOMMU
def_bool y if S390 && PCI
depends on S390 && PCI
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile 
b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 54feb1ecccad..a083019de68a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -2,4 +2,5 @@
 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
 arm_smmu_v3-objs-y += arm-smmu-v3.o
 arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
+arm_smmu_v3-objs-$(CONFIG_NVIDIA_GRACE_CMDQV) += nvidia-grace-cmdqv.o
 arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 188865ec9a33..b1182dd825fd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -339,6 +339,9 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
arm_smmu_cmdq_ent *ent)
 
 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
 {
+   if (smmu->nvidia_grace_cmdqv)
+   return nvidia_grace_cmdqv_get_cmdq(smmu);
+
return >cmdq;
 }
 
@@ -2874,12 +2877,10 @@ static struct iommu_ops arm_smmu_ops = {
 };
 
 /* Probing and initialisation functions */
-static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
-  struct arm_smmu_queue *q,
-  void __iomem *page,
-  u

[PATCH v3 2/5] iommu/arm-smmu-v3: Make arm_smmu_cmdq_init reusable

2021-11-18 Thread Nicolin Chen via iommu
The CMDQV extension in NVIDIA Grace SoC resues the arm_smmu_cmdq
structure while the queue location isn't same as smmu->cmdq. So,
this patch adds a cmdq argument to arm_smmu_cmdq_init() function
and shares its define in the header for CMDQV driver to use.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e6fee69dd79c..6be20e926f63 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2922,10 +2922,10 @@ static void arm_smmu_cmdq_free_bitmap(void *data)
bitmap_free(bitmap);
 }
 
-static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+  struct arm_smmu_cmdq *cmdq)
 {
int ret = 0;
-   struct arm_smmu_cmdq *cmdq = >cmdq;
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_long_t *bitmap;
 
@@ -2955,7 +2955,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device 
*smmu)
if (ret)
return ret;
 
-   ret = arm_smmu_cmdq_init(smmu);
+   ret = arm_smmu_cmdq_init(smmu, >cmdq);
if (ret)
return ret;
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 7a6a6045700d..475f004ccbe4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -751,6 +751,8 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t 
size, int asid,
 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
unsigned long iova, size_t size);
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+  struct arm_smmu_cmdq *cmdq);
 
 #ifdef CONFIG_ARM_SMMU_V3_SVA
 bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 5/5] iommu/nvidia-grace-cmdqv: Limit CMDs for guest owned VINTF

2021-11-18 Thread Nicolin Chen via iommu
When VCMDQs are assigned to a VINTF that is owned by a guest, not
hypervisor (HYP_OWN bit is unset), only TLB invalidation commands
are supported. This requires get_cmd() function to scan the input
cmd before selecting cmdq between smmu->cmdq and vintf->vcmdq, so
unsupported commands can still go through emulated smmu->cmdq.

Also the guest shouldn't have HYP_OWN bit being set regardless of
guest kernel driver writing it or not, i.e. the user space driver
running in the host OS should wire this bit to zero when trapping
a write access to this VINTF_CONFIG register from a guest kernel.
So instead of using the existing regval, this patch reads out the
register value explicitly to cache in vintf->cfg.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  6 ++--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  5 +--
 .../arm/arm-smmu-v3/nvidia-grace-cmdqv.c  | 32 +--
 3 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index b1182dd825fd..73941ccc1a3e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -337,10 +337,10 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
arm_smmu_cmdq_ent *ent)
return 0;
 }
 
-static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, 
u64 *cmds, int n)
 {
if (smmu->nvidia_grace_cmdqv)
-   return nvidia_grace_cmdqv_get_cmdq(smmu);
+   return nvidia_grace_cmdqv_get_cmdq(smmu, cmds, n);
 
return >cmdq;
 }
@@ -747,7 +747,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct 
arm_smmu_device *smmu,
u32 prod;
unsigned long flags;
bool owner;
-   struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
+   struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu, cmds, n);
struct arm_smmu_ll_queue llq, head;
int ret = 0;
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 24f93444aeeb..085c775c2eea 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -832,7 +832,8 @@ struct nvidia_grace_cmdqv *
 nvidia_grace_cmdqv_acpi_probe(struct arm_smmu_device *smmu,
  struct acpi_iort_node *node);
 int nvidia_grace_cmdqv_device_reset(struct arm_smmu_device *smmu);
-struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device 
*smmu);
+struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
+ u64 *cmds, int n);
 #else /* CONFIG_NVIDIA_GRACE_CMDQV */
 static inline struct nvidia_grace_cmdqv *
 nvidia_grace_cmdqv_acpi_probe(struct arm_smmu_device *smmu,
@@ -847,7 +848,7 @@ static inline int nvidia_grace_cmdqv_device_reset(struct 
arm_smmu_device *smmu)
 }
 
 static inline struct arm_smmu_cmdq *
-nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
+nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu, u64 *cmds, int n)
 {
return NULL;
 }
diff --git a/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c 
b/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
index c0d7351f13e2..71f6bc684e64 100644
--- a/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c
@@ -166,7 +166,8 @@ static int nvidia_grace_cmdqv_init_one_vcmdq(struct 
nvidia_grace_cmdqv *cmdqv,
return arm_smmu_cmdq_init(cmdqv->smmu, cmdq);
 }
 
-struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
+struct arm_smmu_cmdq *
+nvidia_grace_cmdqv_get_cmdq(struct arm_smmu_device *smmu, u64 *cmds, int n)
 {
struct nvidia_grace_cmdqv *cmdqv = smmu->nvidia_grace_cmdqv;
struct nvidia_grace_cmdqv_vintf *vintf0 = >vintf0;
@@ -176,6 +177,24 @@ struct arm_smmu_cmdq *nvidia_grace_cmdqv_get_cmdq(struct 
arm_smmu_device *smmu)
if (!FIELD_GET(VINTF_STATUS, vintf0->status))
return >cmdq;
 
+   /* Check for supported CMDs if VINTF is owned by guest (not hypervisor) 
*/
+   if (!FIELD_GET(VINTF_HYP_OWN, vintf0->cfg)) {
+   u64 opcode = (n) ? FIELD_GET(CMDQ_0_OP, cmds[0]) : 
CMDQ_OP_CMD_SYNC;
+
+   /* List all supported CMDs for vintf->cmdq pathway */
+   switch (opcode) {
+   case CMDQ_OP_TLBI_NH_ASID:
+   case CMDQ_OP_TLBI_NH_VA:
+   case CMDQ_OP_TLBI_S12_VMALL:
+   case CMDQ_OP_TLBI_S2_IPA:
+   case CMDQ_OP_ATC_INV:
+   break;
+   default:
+   /* Unsupported CMDs go for smmu->cmdq pathway */
+   return >cmdq;
+   }
+   }
+
/*
 * Select a 

[PATCH v3 3/5] iommu/arm-smmu-v3: Pass cmdq pointer in arm_smmu_cmdq_issue_cmdlist()

2021-11-18 Thread Nicolin Chen via iommu
The driver currently calls arm_smmu_get_cmdq() helper internally in
different places, though they are all actually called from the same
source -- arm_smmu_cmdq_issue_cmdlist() function.

This patch changes this to pass the cmdq pointer to these functions
instead of calling arm_smmu_get_cmdq() every time.

This also helps CMDQV extension in NVIDIA Grace SoC, whose driver'd
maintain its own cmdq pointers and needs to redirect arm_smmu->cmdq
to that upon seeing a supported command by checking its opcode.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6be20e926f63..188865ec9a33 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -586,11 +586,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct 
arm_smmu_cmdq *cmdq,
 
 /* Wait for the command queue to become non-full */
 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+struct arm_smmu_cmdq *cmdq,
 struct arm_smmu_ll_queue *llq)
 {
unsigned long flags;
struct arm_smmu_queue_poll qp;
-   struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
int ret = 0;
 
/*
@@ -621,11 +621,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct 
arm_smmu_device *smmu,
  * Must be called with the cmdq lock held in some capacity.
  */
 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
  struct arm_smmu_ll_queue *llq)
 {
int ret = 0;
struct arm_smmu_queue_poll qp;
-   struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 *cmd = (u32 *)(Q_ENT(>q, llq->prod));
 
queue_poll_init(smmu, );
@@ -645,10 +645,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct 
arm_smmu_device *smmu,
  * Must be called with the cmdq lock held in some capacity.
  */
 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+  struct arm_smmu_cmdq *cmdq,
   struct arm_smmu_ll_queue *llq)
 {
struct arm_smmu_queue_poll qp;
-   struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 prod = llq->prod;
int ret = 0;
 
@@ -695,12 +695,13 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct 
arm_smmu_device *smmu,
 }
 
 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+struct arm_smmu_cmdq *cmdq,
 struct arm_smmu_ll_queue *llq)
 {
if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
-   return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+   return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
 
-   return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+   return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
 }
 
 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
@@ -757,7 +758,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct 
arm_smmu_device *smmu,
 
while (!queue_has_space(, n + sync)) {
local_irq_restore(flags);
-   if (arm_smmu_cmdq_poll_until_not_full(smmu, ))
+   if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, ))
dev_err_ratelimited(smmu->dev, "CMDQ 
timeout\n");
local_irq_save(flags);
}
@@ -833,7 +834,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct 
arm_smmu_device *smmu,
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(, n);
-   ret = arm_smmu_cmdq_poll_until_sync(smmu, );
+   ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, );
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 
0x%08x, hwcons 0x%08x]\n",
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/5] iommu/arm-smmu-v3: Add CS_NONE quirk

2021-11-18 Thread Nicolin Chen via iommu
The CMDQV extension in NVIDIA Grace SoC only supports CS_NONE in the
CS field of CMD_SYNC. So this patch adds a quirk flag to accommodate
that.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 7 ++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index f5848b351b19..e6fee69dd79c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -319,7 +319,9 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
arm_smmu_cmdq_ent *ent)
cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
break;
case CMDQ_OP_CMD_SYNC:
-   if (ent->sync.msiaddr) {
+   if (ent->sync.cs_none) {
+   cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, 
CMDQ_SYNC_0_CS_NONE);
+   } else if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, 
CMDQ_SYNC_0_CS_IRQ);
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
} else {
@@ -356,6 +358,9 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct 
arm_smmu_device *smmu,
   q->ent_dwords * 8;
}
 
+   if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)
+   ent.sync.cs_none = true;
+
arm_smmu_cmdq_build_cmd(cmd, );
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 4cb136f07914..7a6a6045700d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -499,6 +499,7 @@ struct arm_smmu_cmdq_ent {
#define CMDQ_OP_CMD_SYNC0x46
struct {
u64 msiaddr;
+   boolcs_none;
} sync;
};
 };
@@ -531,6 +532,9 @@ struct arm_smmu_queue {
 
u32 __iomem *prod_reg;
u32 __iomem *cons_reg;
+
+#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY   BIT(0)  /* CMD_SYNC CS field supports 
CS_NONE only */
+   u32 quirks;
 };
 
 struct arm_smmu_queue_poll {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 0/5] iommu/arm-smmu-v3: Add NVIDIA Grace CMDQ-V Support

2021-11-18 Thread Nicolin Chen via iommu
From: Nicolin Chen 

NVIDIA's Grace SoC has a CMDQ-Virtualization (CMDQV) hardware that
extends standard ARM SMMUv3 to support multiple command queues with
virtualization capabilities. Though this is similar to the ECMDQ in
SMMUv3.3, CMDQV provides additional V-Interfaces that allow VMs to
have their own interfaces and command queues, and these queues are
able to execute a limited set of commands, mainly TLB invalidation
commands when running in the guest mode, comparing to the standard
SMMUv3 CMDQ.

This patch series extends the SMMUv3 driver to support NVIDIA CMDQV
and implements it first for in-kernel use. Upon kernel boot some of
the vcmdqs will be setup for kernel driver to use, by selecting one
of the command queues based on the CPU currently executing to avoid
lock contention hot spots with a single queue.

Although HW is able to securely expose the additional V-Interfaces
and command queues to guest VMs for fast TLB invalidations without
a hypervisor trap, due to the ongoing proposal of IOMMUFD [0], we
have to postpone the virtualization support that were available in
v2, suggested by Alex and Jason [1]. And we envision that it will
be added back via IOMMUFD in the months ahead.

Thank you!

[0] https://lore.kernel.org/lkml/20210919063848.1476776-1-yi.l@intel.com/
[1] 
https://lore.kernel.org/kvm/20210831101549.237151fa.alex.william...@redhat.com/T/#ma07dcfce69fa3f9d59e8b16579f694a0e10798d9

Changelog (details available in PATCH)
v2->v3:
 * Dropped VMID and mdev patches to redesign later based on IOMMUFD.
 * Separated HYP_OWN part for guest support into a new patch
 * Added new preparational changes
v1->v2:
 * Added mdev interface support for hypervisor and VMs.
 * Added preparational changes for mdev interface implementation.
 * PATCH-12 Changed ->issue_cmdlist() to ->get_cmdq() for a better
   integration with recently merged ECMDQ-related changes.

Nate Watterson (1):
  iommu/arm-smmu-v3: Add host support for NVIDIA Grace CMDQ-V

Nicolin Chen (4):
  iommu/arm-smmu-v3: Add CS_NONE quirk
  iommu/arm-smmu-v3: Make arm_smmu_cmdq_init reusable
  iommu/arm-smmu-v3: Pass cmdq pointer in arm_smmu_cmdq_issue_cmdlist()
  iommu/nvidia-grace-cmdqv: Limit CMDs for guest owned VINTF

 MAINTAINERS   |   1 +
 drivers/iommu/Kconfig |  12 +
 drivers/iommu/arm/arm-smmu-v3/Makefile|   1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  53 ++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  48 ++
 .../arm/arm-smmu-v3/nvidia-grace-cmdqv.c  | 446 ++
 6 files changed, 542 insertions(+), 19 deletions(-)
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/nvidia-grace-cmdqv.c

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

2021-10-07 Thread Nicolin Chen
On Thu, Oct 07, 2021 at 07:13:25PM +0200, Thierry Reding wrote:
> > @@ -496,6 +506,8 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu 
> > *smmu,
> > mutex_unlock(>lock);
> >  }
> >  
> > +static const struct file_operations tegra_smmu_debugfs_mappings_fops;
> 
> Could the implementation be moved up here to avoid the forward
> declaration?

I thought that keeping all debugfs fops together would be preferable.
But yes, I will move it if you prefer no-additional forward declare.

> > +   seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
> > +   seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", 
> > as->id,
> > +  as->attr & SMMU_PD_READABLE ? 'R' : '-',
> > +  as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
> > +  as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
> > +  >pd_dma);
> > +   seq_puts(s, "{\n");
> 
> Maybe this can be more compact by putting the name, ID, attributes and
> base address onto a single line? Maybe also use "'-' : 'S'" for the
> non-secure attribute to keep in line with what you've done for readable
> and writable attributes.

Okay. Will change that.

> Then again, this is going to be very verbose output anyway, so maybe it
> isn't worth it.

Are you saying the whole debugfs thing or just attributes? Yet, for
either case, I don't think so, as mappings info would help for sure
from our past experience while the attributes are just one line...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 3/6] iommu/tegra-smmu: Rename struct tegra_smmu_swgroup *group to *swgrp

2021-10-07 Thread Nicolin Chen
On Thu, Oct 07, 2021 at 06:57:31PM +0200, Thierry Reding wrote:
> On Mon, Sep 13, 2021 at 06:38:55PM -0700, Nicolin Chen wrote:
> > There are both tegra_smmu_swgroup and tegra_smmu_group structs
> > using "group" for their pointer instances. This gets confusing
> > to read the driver sometimes.
> > 
> > So this patch renames "group" of struct tegra_smmu_swgroup to
> > "swgrp" as a cleanup. Also renames its "find" function.
> > 
> > Note that we already have "swgroup" being used for an unsigned
> > int type variable that is inside struct tegra_smmu_swgroup, so
> > it's not able to use "swgroup" but only something like "swgrp".
> > 
> > Signed-off-by: Nicolin Chen 
> > ---
> >  drivers/iommu/tegra-smmu.c | 34 +-
> >  1 file changed, 17 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
> > index a32ed347e25d..0f3883045ffa 100644
> > --- a/drivers/iommu/tegra-smmu.c
> > +++ b/drivers/iommu/tegra-smmu.c
> > @@ -334,35 +334,35 @@ static void tegra_smmu_domain_free(struct 
> > iommu_domain *domain)
> >  }
> >  
> >  static const struct tegra_smmu_swgroup *
> > -tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup)
> > +tegra_smmu_find_swgrp(struct tegra_smmu *smmu, unsigned int swgroup)
> 
> This makes things inconsistent now. The tegra_smmu_find_swgroup() name
> indicates that we're looking for some "swgroup" entity within an "smmu"
> object. The entity that we're looking for is a struct tegra_smmu_swgroup
> so I think it makes sense to use that full name in the function name.

This is more like an indirect change to keep consistency between
function name and pointer name.

> >  {
> > -   const struct tegra_smmu_swgroup *group = NULL;
> > +   const struct tegra_smmu_swgroup *swgrp = NULL;
> 
> I don't think the existing naming is confusing. The variable name
> "group" is consistently used for tegra_smmu_swgroup structures and there
> are no cases where we would confuse them with struct tegra_smmu_group
> instances.

If we don't rename it, then PATCH-4 adds to struct tegra_smmu_group
a "struct tegra_smmu_swgroup *group", which results in a confusing
group->group...

> However, I don't feel strongly about it, so I'm fine with changing the
> variable names to "swgrp" if you think that makes things less confusing.

Yea, I'd like to keep this change. I will respin it in next version
after fixing other comments.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 2/6] iommu/tegra-smmu: Rename struct tegra_smmu_group_soc *soc to *group_soc

2021-10-07 Thread Nicolin Chen
On Thu, Oct 07, 2021 at 06:50:52PM +0200, Thierry Reding wrote:

> >  static const struct tegra_smmu_group_soc *
> > -tegra_smmu_find_group(struct tegra_smmu *smmu, unsigned int swgroup)
> > +tegra_smmu_find_group_soc(struct tegra_smmu *smmu, unsigned int swgroup)
> 
> This one might be okay to disambiguate, but even here I think this isn't
> really necessary. It's already clear from the return value what's being
> returned.

The point here is to disambiguate "group", as there are quite a few
places using the same naming for different structures. You may argue
that it's clear by looking at the return value/type. But it is still
hard to tell when reading the code of its caller, right?

> > @@ -921,9 +922,9 @@ static struct iommu_group 
> > *tegra_smmu_device_group(struct device *dev)
> > }
> >  
> > INIT_LIST_HEAD(>list);
> > +   group->group_soc = group_soc;
> > group->swgroup = swgroup;
> > group->smmu = smmu;
> > -   group->soc = soc;
> 
> As another example, it's pretty evident that group->soc refers to the
> group SoC data rather than the SMMU SoC data. The latter can be obtained
> from group->smmu->soc, which again is enough context to make it clear
> what this is.
> 
> So I don't think this makes things any clearer. It only makes the names
> more redundant and awkward to write.

Okay. I will drop the part of s/soc/group_soc.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


  1   2   3   4   >