When (smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS) is true, even if a
smmu domain does not contain any ats master, the operations of
arm_smmu_atc_inv_to_cmd() and lock protection in arm_smmu_atc_inv_domain()
are always executed. This will impact performance, especially in
multi-core and stress scenarios. For my FIO test scenario, about 8%
performance reduced.

In fact, we can use a struct member to record how many ats masters that
the smmu contains. And check that without traverse the list and check all
masters one by one in the lock protection.

Fixes: 9ce27afc0830 ("iommu/arm-smmu-v3: Add support for PCI ATS")
Signed-off-by: Zhen Lei <thunder.leiz...@huawei.com>
---
 drivers/iommu/arm-smmu-v3.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 29056d9bb12aa01..154334d3310c9b8 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -631,6 +631,7 @@ struct arm_smmu_domain {
 
        struct io_pgtable_ops           *pgtbl_ops;
        bool                            non_strict;
+       int                             nr_ats_masters;
 
        enum arm_smmu_domain_stage      stage;
        union {
@@ -1531,7 +1532,16 @@ static int arm_smmu_atc_inv_domain(struct 
arm_smmu_domain *smmu_domain,
        struct arm_smmu_cmdq_ent cmd;
        struct arm_smmu_master *master;
 
-       if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
+       /*
+        * The protectiom of spinlock(&iommu_domain->devices_lock) is omitted.
+        * Because for a given master, its map/unmap operations should only be
+        * happened after it has been attached and before it has been detached.
+        * So that, if at least one master need to be atc invalidated, the
+        * value of smmu_domain->nr_ats_masters can not be zero.
+        *
+        * This can alleviate performance loss in multi-core scenarios.
+        */
+       if (!smmu_domain->nr_ats_masters)
                return 0;
 
        arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
@@ -1913,6 +1923,7 @@ static void arm_smmu_detach_dev(struct arm_smmu_master 
*master)
 
        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
        list_del(&master->domain_head);
+       smmu_domain->nr_ats_masters--;
        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
        master->domain = NULL;
@@ -1968,6 +1979,7 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
 
        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
        list_add(&master->domain_head, &smmu_domain->devices);
+       smmu_domain->nr_ats_masters++;
        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 out_unlock:
        mutex_unlock(&smmu_domain->init_mutex);
-- 
1.8.3


Reply via email to