dmam_alloc_coherent() will usually allocate memory from the default CMA. For
a common arm64 defconfig without reserved memory in device tree, there is only
one CMA close to address 0.
dma_alloc_contiguous() will allocate memory without any idea of  NUMA and smmu
has no customized per-numa cma_area.
struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
{
        size_t count = size >> PAGE_SHIFT;
        struct page *page = NULL;
        struct cma *cma = NULL;

        if (dev && dev->cma_area)
                cma = dev->cma_area;
        else if (count > 1)
                cma = dma_contiguous_default_area;

        ...
        return page;
}

if there are N numa nodes, N-1 nodes will put command/evt queues etc in a
remote node the default CMA belongs to,probably node 0. Tests show, after
sending CMD_SYNC in an empty command queue,
on Node2, without this patch, it takes 550ns to wait for the completion
of CMD_SYNC; with this patch, it takes 250ns to wait for the completion
of CMD_SYNC.

Signed-off-by: Barry Song <song.bao....@hisilicon.com>
---
 drivers/iommu/arm-smmu-v3.c | 63 ++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 82508730feb7..58295423e1d7 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -3157,21 +3157,23 @@ static int arm_smmu_init_one_queue(struct 
arm_smmu_device *smmu,
                                   size_t dwords, const char *name)
 {
        size_t qsz;
+       struct page *page;
 
-       do {
-               qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
-               q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
-                                             GFP_KERNEL);
-               if (q->base || qsz < PAGE_SIZE)
-                       break;
-
-               q->llq.max_n_shift--;
-       } while (1);
+       qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
+       page = alloc_pages_node(dev_to_node(smmu->dev), GFP_KERNEL,
+                               get_order(qsz));
+       if (!page) {
+               dev_err(smmu->dev,
+                               "failed to allocate queue (0x%zx bytes) for 
%s\n",
+                               qsz, name);
+               return -ENOMEM;
+       }
 
-       if (!q->base) {
+       q->base = page_address(page);
+       q->base_dma = dma_map_single(smmu->dev, q->base, qsz, 
DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(smmu->dev, q->base_dma)) {
                dev_err(smmu->dev,
-                       "failed to allocate queue (0x%zx bytes) for %s\n",
-                       qsz, name);
+                               "failed to dma map for %s\n", name);
                return -ENOMEM;
        }
 
@@ -3192,6 +3194,18 @@ static int arm_smmu_init_one_queue(struct 
arm_smmu_device *smmu,
        return 0;
 }
 
+static int arm_smmu_deinit_one_queue(struct arm_smmu_device *smmu,
+                                  struct arm_smmu_queue *q,
+                                  size_t dwords)
+{
+       size_t qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
+
+       dma_unmap_single(smmu->dev, q->base_dma, qsz, DMA_BIDIRECTIONAL);
+       free_page((unsigned long)q->base);
+
+       return 0;
+}
+
 static void arm_smmu_cmdq_free_bitmap(void *data)
 {
        unsigned long *bitmap = data;
@@ -3233,22 +3247,40 @@ static int arm_smmu_init_queues(struct arm_smmu_device 
*smmu)
 
        ret = arm_smmu_cmdq_init(smmu);
        if (ret)
-               return ret;
+               goto deinit_cmdq;
 
        /* evtq */
        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
                                      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
                                      "evtq");
        if (ret)
-               return ret;
+               goto deinit_cmdq;
 
        /* priq */
        if (!(smmu->features & ARM_SMMU_FEAT_PRI))
                return 0;
 
-       return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
+       ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
                                       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
                                       "priq");
+       if (ret)
+               goto deinit_evtq;
+
+       return 0;
+
+deinit_evtq:
+       arm_smmu_deinit_one_queue(smmu, &smmu->evtq.q, EVTQ_ENT_DWORDS);
+deinit_cmdq:
+       arm_smmu_deinit_one_queue(smmu, &smmu->cmdq.q, CMDQ_ENT_DWORDS);
+       return ret;
+}
+
+static void arm_smmu_deinit_queues(struct arm_smmu_device *smmu)
+{
+       arm_smmu_deinit_one_queue(smmu, &smmu->cmdq.q, CMDQ_ENT_DWORDS);
+       arm_smmu_deinit_one_queue(smmu, &smmu->evtq.q, EVTQ_ENT_DWORDS);
+       if (smmu->features & ARM_SMMU_FEAT_PRI)
+               arm_smmu_deinit_one_queue(smmu, &smmu->priq.q, PRIQ_ENT_DWORDS);
 }
 
 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -4121,6 +4153,7 @@ static int arm_smmu_device_remove(struct platform_device 
*pdev)
        arm_smmu_set_bus_ops(NULL);
        iommu_device_unregister(&smmu->iommu);
        iommu_device_sysfs_remove(&smmu->iommu);
+       arm_smmu_deinit_queues(smmu);
        arm_smmu_device_disable(smmu);
 
        return 0;
-- 
2.23.0


_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to