[PATCH v19 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. On msm8996, multiple cores, viz. mdss, video, etc. use this smmu. On sdm845, this smmu is used with gpu. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- Changes since v18: None. drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b6b11642b3a9..ba18d89d4732 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -120,6 +120,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -2030,6 +2031,7 @@ ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2); static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = _generic_v1 }, @@ -2038,6 +2040,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = _mmu401 }, { .compatible = "arm,mmu-500", .data = _mmu500 }, { .compatible = "cavium,smmu-v2", .data = _smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = _smmuv2 }, { }, }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v19 2/5] iommu/arm-smmu: Invoke pm_runtime across the driver
From: Sricharan R Enable pm-runtime on devices that implement a pm domain. Then, add pm runtime hooks to several iommu_ops to power cycle the smmu device for explicit TLB invalidation requests, and register space accesses, etc. We need these hooks when the smmu, linked to its master through device links, has to be powered-up without the master device being in context. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- Changes since v18: None. drivers/iommu/arm-smmu.c | 108 ++- 1 file changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 602b67d4f2d6..1917d214c4d9 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -270,6 +270,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -929,11 +943,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -948,6 +966,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1229,10 +1249,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1242,49 +1267,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smm
[PATCH v19 0/5] iommu/arm-smmu: Add runtime pm/sleep support
Changes since v18: - Addressing Stephen's comment [5]: Replaced the entire clock bulk data filling and handling with devm_clk_bulk_get_all(). Changes since v17: - Addressing Will's comment to embed Thor's change [2] for pulling clocks information from device tree. This is done by squashing Thor's change [2] in v17's 1/5 patch [3]. - Another minor change is addition of runtime pm hooks to arm_smmu_iova_to_phys_hard(). Previous version of this patch series is @ [1]. Also refer to [4] for change logs for previous versions. [1] https://lore.kernel.org/patchwork/cover/1017699/ [2] https://lore.kernel.org/patchwork/patch/996143/ [3] https://lore.kernel.org/patchwork/patch/1013167/ [4] https://lore.kernel.org/patchwork/cover/979429/ [5] https://lore.kernel.org/patchwork/patch/1017700/ Sricharan R (3): iommu/arm-smmu: Add pm_runtime/sleep ops iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device iommu/arm-smmu: Add the device_link between masters and smmu Vivek Gautam (2): dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2 iommu/arm-smmu: Add support for qcom,smmu-v2 variant .../devicetree/bindings/iommu/arm,smmu.txt | 39 + drivers/iommu/arm-smmu.c | 170 +++-- 2 files changed, 197 insertions(+), 12 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v19 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and the corresponding bulk clock handling for all the clocks needed by smmu. Also, while we enable the runtime pm, add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Add corresponding clock enable path in resume callback as well. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [Thor: Rework to get clocks from device tree] Signed-off-by: Thor Thayer [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy Tested-by: Thor Thayer --- Changes since v18: - Replaced the entire clock bulk data filling and handling with devm_clk_bulk_get_all(). drivers/iommu/arm-smmu.c | 58 +--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5a28ae892504..602b67d4f2d6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -206,6 +207,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int*irqs; + struct clk_bulk_data*clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -1947,7 +1950,7 @@ struct arm_smmu_match_data { }; #define ARM_SMMU_MATCH_DATA(name, ver, imp)\ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); @@ -2150,6 +2153,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->irqs[i] = irq; } + err = devm_clk_bulk_get_all(dev, >clks); + if (err < 0) { + dev_err(dev, "failed to get clocks %d\n", err); + return err; + } + smmu->num_clks = err; + + err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks); + if (err) + return err; + err = arm_smmu_device_cfg_probe(smmu); if (err) return err; @@ -2236,6 +2250,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + + clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); + return 0; } @@ -2244,15 +2261,50 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) arm_smmu_device_remove(pdev); } -static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_enable(smmu->num_clks, smmu->clks); + if (ret) + return ret; arm_smmu_device_reset(smmu); + return 0; } -static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + clk_bulk_disable(smmu->num_clks, smmu->clks); + + return 0; +} + +static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 0; + + return arm_smmu_runtime_resume(dev); +} + +static int __maybe_unused arm_smmu_pm_suspend(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 0; + + return arm_smmu_runtime_suspend(dev); +} + +static const struct dev_pm_ops arm_smmu_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume) + SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend, + arm_smmu_runtime_resume, NULL) +}; static struct platform_driver arm_smmu_driver = { .driver = { -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v18 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
On Fri, Nov 30, 2018 at 11:45 PM Will Deacon wrote: > > On Thu, Nov 29, 2018 at 08:25:20PM +0530, Vivek Gautam wrote: > > On Wed, Nov 28, 2018 at 10:07 PM Robin Murphy wrote: > > > > > > On 28/11/2018 16:24, Stephen Boyd wrote: > > > > Quoting Vivek Gautam (2018-11-27 02:11:41) > > > >> @@ -1966,6 +1970,23 @@ static const struct of_device_id > > > >> arm_smmu_of_match[] = { > > > >> }; > > > >> MODULE_DEVICE_TABLE(of, arm_smmu_of_match); > > > >> > > > >> +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, > > > >> + const char * const *clks) > > > >> +{ > > > >> + int i; > > > >> + > > > >> + if (smmu->num_clks < 1) > > > >> + return; > > > >> + > > > >> + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, > > > >> + sizeof(*smmu->clks), GFP_KERNEL); > > > >> + if (!smmu->clks) > > > >> + return; > > > >> + > > > >> + for (i = 0; i < smmu->num_clks; i++) > > > >> + smmu->clks[i].id = clks[i]; > > > > > > > > Is this clk_bulk_get_all()? > > > > From what I remember, and now I could go back to v7 and check [1], we parked > > clk_bulk_get out of OF's sole purview as we also have > > arm_smmu_device_acpi_probe() besides arm_smmu_device_dt_probe(). > > > > arm_smmu_device_dt_probe() could get the clocks from dt and fill in > > the clock bulk data, and > > similarly, arm_smmu_device_acpi_probe() could fill the clock bulk data > > by getting it from ACPI. > > > > clk_bulk_get_all() seems like going only the OF way. > > Is there another way here to have something common between ACPI > > and OF, and then do the clk_bulk_get? > > I'd say just go with clk_bulk_get_all() and if somebody really wants to > mess with the SMMU clocks on a system booted via ACPI, then it's their > problem to solve. My understanding is that the design of IORT makes this > next to impossible to solve anyway, because a static table is used and > therefore we're unable to run whatever ASL methods need to be invoked to > mess with the clocks. Sure then. I will respin this patch-series. > > Will -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v18 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
On Wed, Nov 28, 2018 at 10:07 PM Robin Murphy wrote: > > On 28/11/2018 16:24, Stephen Boyd wrote: > > Quoting Vivek Gautam (2018-11-27 02:11:41) > >> @@ -1966,6 +1970,23 @@ static const struct of_device_id > >> arm_smmu_of_match[] = { > >> }; > >> MODULE_DEVICE_TABLE(of, arm_smmu_of_match); > >> > >> +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, > >> + const char * const *clks) > >> +{ > >> + int i; > >> + > >> + if (smmu->num_clks < 1) > >> + return; > >> + > >> + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, > >> + sizeof(*smmu->clks), GFP_KERNEL); > >> + if (!smmu->clks) > >> + return; > >> + > >> + for (i = 0; i < smmu->num_clks; i++) > >> + smmu->clks[i].id = clks[i]; > > > > Is this clk_bulk_get_all()? >From what I remember, and now I could go back to v7 and check [1], we parked clk_bulk_get out of OF's sole purview as we also have arm_smmu_device_acpi_probe() besides arm_smmu_device_dt_probe(). arm_smmu_device_dt_probe() could get the clocks from dt and fill in the clock bulk data, and similarly, arm_smmu_device_acpi_probe() could fill the clock bulk data by getting it from ACPI. clk_bulk_get_all() seems like going only the OF way. Is there another way here to have something common between ACPI and OF, and then do the clk_bulk_get? [1] https://lore.kernel.org/patchwork/patch/881365/ Thanks & regards Vivek > > Ooh, did that finally get merged while we weren't looking? Great! > > Much as I don't want to drag this series out to a v19, it *would* be > neat if we no longer need to open-code that bit... > > Robin. > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v18 4/5] dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2
Add bindings doc for Qcom's smmu-v2 implementation. Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Rob Herring Reviewed-by: Robin Murphy --- .../devicetree/bindings/iommu/arm,smmu.txt | 39 ++ 1 file changed, 39 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..a6504b37cc21 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,16 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. + Qcom SoCs must contain, as below, SoC-specific compatibles + along with "qcom,smmu-v2": + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +77,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names:List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": +- "bus": clock required for downstream bus access and + for the smmu ptw, +- "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +159,20 @@ conditions. iommu-map = <0 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu@d0 { + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + reg = <0xd0 0x1>; + + #global-interrupts = <1>; + interrupts = , +, +; + #iommu-cells = <1>; + power-domains = < MDSS_GDSC>; + + clocks = < SMMU_MDP_AXI_CLK>, +< SMMU_MDP_AHB_CLK>; + clock-names = "bus", "iface"; + }; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v18 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. On msm8996, multiple cores, viz. mdss, video, etc. use this smmu. On sdm845, this smmu is used with gpu. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f02e0f58e696..29ce9e10abd4 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -121,6 +121,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -2031,6 +2032,7 @@ ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2); static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = _generic_v1 }, @@ -2039,6 +2041,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = _mmu401 }, { .compatible = "arm,mmu-500", .data = _mmu500 }, { .compatible = "cavium,smmu-v2", .data = _smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = _smmuv2 }, { }, }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v18 3/5] iommu/arm-smmu: Add the device_link between masters and smmu
From: Sricharan R Finally add the device link between the master device and smmu, so that the smmu gets runtime enabled/disabled only when the master needs it. This is done from add_device callback which gets called once when the master is added to the smmu. Signed-off-by: Sricharan R Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5610cc736f9d..f02e0f58e696 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1501,6 +1501,9 @@ static int arm_smmu_add_device(struct device *dev) iommu_device_link(>iommu, dev); + device_link_add(dev, smmu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + return 0; out_cfg_free: -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v18 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 108 ++- 1 file changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e47c840fc6a8..5610cc736f9d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -271,6 +271,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -930,11 +944,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -949,6 +967,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1230,10 +1250,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1243,49 +1268,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domai
[PATCH v18 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and also the functions to parse the smmu clocks from DT and enable them in resume/suspend. We pull all the information about clocks from device tree. Also, while we enable the runtime pm add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Also add corresponding clock enable path in resume callback. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [Thor: Rework to get clocks from device tree] Signed-off-by: Thor Thayer [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 100 +-- 1 file changed, 97 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5a28ae892504..e47c840fc6a8 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -44,10 +44,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -206,6 +208,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int*irqs; + struct clk_bulk_data*clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -1947,7 +1951,7 @@ struct arm_smmu_match_data { }; #define ARM_SMMU_MATCH_DATA(name, ver, imp)\ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); @@ -1966,6 +1970,23 @@ static const struct of_device_id arm_smmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, + const char * const *clks) +{ + int i; + + if (smmu->num_clks < 1) + return; + + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, + sizeof(*smmu->clks), GFP_KERNEL); + if (!smmu->clks) + return; + + for (i = 0; i < smmu->num_clks; i++) + smmu->clks[i].id = clks[i]; +} + #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) { @@ -2038,6 +2059,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, const struct arm_smmu_match_data *data; struct device *dev = >dev; bool legacy_binding; + const char **parent_names; if (of_property_read_u32(dev->of_node, "#global-interrupts", >num_global_irqs)) { @@ -2048,6 +2070,26 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, data = of_device_get_match_data(dev); smmu->version = data->version; smmu->model = data->model; + smmu->num_clks = of_clk_get_parent_count(dev->of_node); + /* check to see if clocks were specified in DT */ + if (smmu->num_clks) { + unsigned int i; + + parent_names = kmalloc_array(smmu->num_clks, +sizeof(*parent_names), +GFP_KERNEL); + if (!parent_names) + return -ENOMEM; + + for (i = 0; i < smmu->num_clks; i++) { + if (of_property_read_string_index(dev->of_node, + "clock-names", i, + _names[i])) + goto fail_clk_name; + } + arm_smmu_fill_clk_data(smmu, parent_names); + kfree(parent_names); + } parse_driver_options(smmu); @@ -2067,6 +2109,12 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; return 0; + +fail_clk_name: + kfree(parent_names); + /* clock-names required for clocks in devm_clk_bulk_get() */ + dev_err(dev, "clock-names required in device tree\n"); + return -ENODEV;
[PATCH v18 0/5] iommu/arm-smmu: Add runtime pm/sleep support
Looks like this is going to be the final respin before this support finally lands. The change isn't much from v17_resend series [1] besides taking Thor's change [2] for clocks. Changes since v17: - Addressing Will's comment to embed Thor's change [2] for pulling clocks information from device tree. This is done by squashing Thor's change [2] in v17's 1/5 patch [3]. - Another minor change is addition of runtime pm hooks to arm_smmu_iova_to_phys_hard(). Previous version of this patch series is @ [1]. Also refer to [4] for change logs for previous versions. [1] https://lore.kernel.org/patchwork/cover/1013166/ [2] https://lore.kernel.org/patchwork/patch/996143/ [3] https://lore.kernel.org/patchwork/patch/1013167/ [4] https://lore.kernel.org/patchwork/cover/979429/ Sricharan R (3): iommu/arm-smmu: Add pm_runtime/sleep ops iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device iommu/arm-smmu: Add the device_link between masters and smmu Vivek Gautam (2): dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2 iommu/arm-smmu: Add support for qcom,smmu-v2 variant .../devicetree/bindings/iommu/arm,smmu.txt | 39 drivers/iommu/arm-smmu.c | 212 +++-- 2 files changed, 239 insertions(+), 12 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
Hi Thor, On 11/26/2018 8:11 PM, Thor Thayer wrote: Hi Vivek, On 11/26/18 4:55 AM, Vivek Gautam wrote: On 11/24/2018 12:04 AM, Will Deacon wrote: On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote: On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa wrote: On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam wrote: On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote: @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +static const char * const qcom_smmuv2_clks[] = { + "bus", "iface", +}; + +static const struct arm_smmu_match_data qcom_smmuv2 = { + .version = ARM_SMMU_V2, + .model = QCOM_SMMUV2, + .clks = qcom_smmuv2_clks, + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), +}; These seems redundant if we go down the route proposed by Thor, where we just pull all of the clocks out of the device-tree. In which case, why do we need this match_data at all? Which is better? Driver relying solely on the device tree to tell which all clocks are required to be enabled, or, the driver deciding itself based on the platform's match data, that it should have X, Y, & Z clocks that should be supplied from the device tree. The former would simplify the driver, but would also make it impossible to spot mistakes in DT, which would ultimately surface out as very hard to debug bugs (likely complete system lockups). Thanks. Yea, this is how I understand things presently. Relying on device tree puts the things out of driver's control. But it also has the undesirable effect of having to update the driver code whenever we want to add support for a new SMMU implementation. If we do this all in the DT, as Thor is trying to do, then older kernels will work well with new hardware. Hi Will, Am I unable to understand the intentions here for Thor's clock-fetch design change? I'm having trouble parsing your question, sorry. Please work with Thor so that we have a single way to get the clock information. My preference is to take it from the firmware, for the reason I stated above. Hi Will, Sure, thanks. I will work with Thor to get this going. Hi Thor, Does it sound okay to you to squash your patch [1] into my patch [2] with your 'Signed-off-by' tag? I will update the commit log to include the information about getting clock details from device tree. [1] https://patchwork.kernel.org/patch/10628725/ [2] https://patchwork.kernel.org/patch/10686061/ Yes, that would be great and easier to understand than my patch on top of yours. Additionally, can you remove the "Error:" as Will requested as part of the squash? Thanks for your consent. I have reworked the patch today, and have addressed Will's comment. I will give a try on the board and post it by tomorrow. Best regards Vivek Thank you! Thor Best regards Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
On 11/26/2018 11:33 AM, Vivek Gautam wrote: On 11/24/2018 12:06 AM, Will Deacon wrote: On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote: Hi Will, On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote: From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 101 ++- 1 file changed, 91 insertions(+), 10 deletions(-) Given that you're doing the get/put in the TLBI ops unconditionally: static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smmu_domain->tlb_ops) { + arm_smmu_rpm_get(smmu); smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); + arm_smmu_rpm_put(smmu); + } } static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smmu_domain->tlb_ops) { + arm_smmu_rpm_get(smmu); smmu_domain->tlb_ops->tlb_sync(smmu_domain); + arm_smmu_rpm_put(smmu); + } Why do you need them around the map/unmap calls as well? We still have .tlb_add_flush path? Ok, so we could add the ops around that as well. Right now, we've got the runtime pm hooks crossing two parts of the API. Sure, will do that then, and remove the runtime pm hooks from map/unmap. I missed this earlier - We are adding runtime pm hooks in the 'iommu_ops' callbacks and not really to 'tlb_ops'. So how the runtime pm hooks crossing the paths? '.map/.unmap' iommu_ops don't call '.flush_iotlb_all' or '.iotlb_sync' iommu_ops anywhere. E.g., only callers to domain->ops->flush_iotlb_all() are: iommu_dma_flush_iotlb_all(), or iommu_flush_tlb_all() which are not in map/unmap paths. Regards Vivek Thanks Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
On 11/24/2018 12:04 AM, Will Deacon wrote: On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote: On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa wrote: On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam wrote: On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote: @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +static const char * const qcom_smmuv2_clks[] = { + "bus", "iface", +}; + +static const struct arm_smmu_match_data qcom_smmuv2 = { + .version = ARM_SMMU_V2, + .model = QCOM_SMMUV2, + .clks = qcom_smmuv2_clks, + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), +}; These seems redundant if we go down the route proposed by Thor, where we just pull all of the clocks out of the device-tree. In which case, why do we need this match_data at all? Which is better? Driver relying solely on the device tree to tell which all clocks are required to be enabled, or, the driver deciding itself based on the platform's match data, that it should have X, Y, & Z clocks that should be supplied from the device tree. The former would simplify the driver, but would also make it impossible to spot mistakes in DT, which would ultimately surface out as very hard to debug bugs (likely complete system lockups). Thanks. Yea, this is how I understand things presently. Relying on device tree puts the things out of driver's control. But it also has the undesirable effect of having to update the driver code whenever we want to add support for a new SMMU implementation. If we do this all in the DT, as Thor is trying to do, then older kernels will work well with new hardware. Hi Will, Am I unable to understand the intentions here for Thor's clock-fetch design change? I'm having trouble parsing your question, sorry. Please work with Thor so that we have a single way to get the clock information. My preference is to take it from the firmware, for the reason I stated above. Hi Will, Sure, thanks. I will work with Thor to get this going. Hi Thor, Does it sound okay to you to squash your patch [1] into my patch [2] with your 'Signed-off-by' tag? I will update the commit log to include the information about getting clock details from device tree. [1] https://patchwork.kernel.org/patch/10628725/ [2] https://patchwork.kernel.org/patch/10686061/ Best regards Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
On 11/24/2018 12:06 AM, Will Deacon wrote: On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote: Hi Will, On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote: From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 101 ++- 1 file changed, 91 insertions(+), 10 deletions(-) Given that you're doing the get/put in the TLBI ops unconditionally: static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smmu_domain->tlb_ops) { + arm_smmu_rpm_get(smmu); smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); + arm_smmu_rpm_put(smmu); + } } static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) + if (smmu_domain->tlb_ops) { + arm_smmu_rpm_get(smmu); smmu_domain->tlb_ops->tlb_sync(smmu_domain); + arm_smmu_rpm_put(smmu); + } Why do you need them around the map/unmap calls as well? We still have .tlb_add_flush path? Ok, so we could add the ops around that as well. Right now, we've got the runtime pm hooks crossing two parts of the API. Sure, will do that then, and remove the runtime pm hooks from map/unmap. Thanks Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
Hi Tomasz, On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa wrote: > > Hi Vivek, Will, > > On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam > wrote: > > > > Hi Will, > > > > On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: > > > > > > [+Thor] > > > > > > On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote: > > > > qcom,smmu-v2 is an arm,smmu-v2 implementation with specific > > > > clock and power requirements. > > > > On msm8996, multiple cores, viz. mdss, video, etc. use this > > > > smmu. On sdm845, this smmu is used with gpu. > > > > Add bindings for the same. > > > > > > > > Signed-off-by: Vivek Gautam > > > > Reviewed-by: Rob Herring > > > > Reviewed-by: Tomasz Figa > > > > Tested-by: Srinivas Kandagatla > > > > Reviewed-by: Robin Murphy > > > > --- > > > > drivers/iommu/arm-smmu.c | 13 + > > > > 1 file changed, 13 insertions(+) > > > > > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > > > index 2098c3141f5f..d315ca637097 100644 > > > > --- a/drivers/iommu/arm-smmu.c > > > > +++ b/drivers/iommu/arm-smmu.c > > > > @@ -120,6 +120,7 @@ enum arm_smmu_implementation { > > > > GENERIC_SMMU, > > > > ARM_MMU500, > > > > CAVIUM_SMMUV2, > > > > + QCOM_SMMUV2, > > > > }; > > > > > > > > struct arm_smmu_s2cr { > > > > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, > > > > GENERIC_SMMU); > > > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); > > > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); > > > > > > > > +static const char * const qcom_smmuv2_clks[] = { > > > > + "bus", "iface", > > > > +}; > > > > + > > > > +static const struct arm_smmu_match_data qcom_smmuv2 = { > > > > + .version = ARM_SMMU_V2, > > > > + .model = QCOM_SMMUV2, > > > > + .clks = qcom_smmuv2_clks, > > > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), > > > > +}; > > > > > > These seems redundant if we go down the route proposed by Thor, where we > > > just pull all of the clocks out of the device-tree. In which case, why > > > do we need this match_data at all? > > > > Which is better? Driver relying solely on the device tree to tell > > which all clocks > > are required to be enabled, > > or, the driver deciding itself based on the platform's match data, > > that it should > > have X, Y, & Z clocks that should be supplied from the device tree. > > The former would simplify the driver, but would also make it > impossible to spot mistakes in DT, which would ultimately surface out > as very hard to debug bugs (likely complete system lockups). Thanks. Yea, this is how I understand things presently. Relying on device tree puts the things out of driver's control. Hi Will, Am I unable to understand the intentions here for Thor's clock-fetch design change? > > For qcom_smmuv2, I believe we're eventually going to end up with > platform-specific quirks anyway, so specifying the clocks too wouldn't > hurt. Given that, I'd recommend sticking to the latter, i.e. what this > patch does. > > Best regards, > Tomasz Best regards Vivek > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
Hi Will, On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: > > [+Thor] > > On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote: > > qcom,smmu-v2 is an arm,smmu-v2 implementation with specific > > clock and power requirements. > > On msm8996, multiple cores, viz. mdss, video, etc. use this > > smmu. On sdm845, this smmu is used with gpu. > > Add bindings for the same. > > > > Signed-off-by: Vivek Gautam > > Reviewed-by: Rob Herring > > Reviewed-by: Tomasz Figa > > Tested-by: Srinivas Kandagatla > > Reviewed-by: Robin Murphy > > --- > > drivers/iommu/arm-smmu.c | 13 + > > 1 file changed, 13 insertions(+) > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > index 2098c3141f5f..d315ca637097 100644 > > --- a/drivers/iommu/arm-smmu.c > > +++ b/drivers/iommu/arm-smmu.c > > @@ -120,6 +120,7 @@ enum arm_smmu_implementation { > > GENERIC_SMMU, > > ARM_MMU500, > > CAVIUM_SMMUV2, > > + QCOM_SMMUV2, > > }; > > > > struct arm_smmu_s2cr { > > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, > > GENERIC_SMMU); > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); > > > > +static const char * const qcom_smmuv2_clks[] = { > > + "bus", "iface", > > +}; > > + > > +static const struct arm_smmu_match_data qcom_smmuv2 = { > > + .version = ARM_SMMU_V2, > > + .model = QCOM_SMMUV2, > > + .clks = qcom_smmuv2_clks, > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), > > +}; > > These seems redundant if we go down the route proposed by Thor, where we > just pull all of the clocks out of the device-tree. In which case, why > do we need this match_data at all? Which is better? Driver relying solely on the device tree to tell which all clocks are required to be enabled, or, the driver deciding itself based on the platform's match data, that it should have X, Y, & Z clocks that should be supplied from the device tree. Thanks Vivek > > Will > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v17 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
Hi Will, On Wed, Nov 21, 2018 at 11:09 PM Will Deacon wrote: > > On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote: > > From: Sricharan R > > > > The smmu device probe/remove and add/remove master device callbacks > > gets called when the smmu is not linked to its master, that is without > > the context of the master device. So calling runtime apis in those places > > separately. > > Global locks are also initialized before enabling runtime pm as the > > runtime_resume() calls device_reset() which does tlb_sync_global() > > that ultimately requires locks to be initialized. > > > > Signed-off-by: Sricharan R > > [vivek: Cleanup pm runtime calls] > > Signed-off-by: Vivek Gautam > > Reviewed-by: Tomasz Figa > > Tested-by: Srinivas Kandagatla > > Reviewed-by: Robin Murphy > > --- > > drivers/iommu/arm-smmu.c | 101 > > ++- > > 1 file changed, 91 insertions(+), 10 deletions(-) > > Given that you're doing the get/put in the TLBI ops unconditionally: > > > static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) > > { > > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > > > - if (smmu_domain->tlb_ops) > > + if (smmu_domain->tlb_ops) { > > + arm_smmu_rpm_get(smmu); > > smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); > > + arm_smmu_rpm_put(smmu); > > + } > > } > > > > static void arm_smmu_iotlb_sync(struct iommu_domain *domain) > > { > > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > > > - if (smmu_domain->tlb_ops) > > + if (smmu_domain->tlb_ops) { > > + arm_smmu_rpm_get(smmu); > > smmu_domain->tlb_ops->tlb_sync(smmu_domain); > > + arm_smmu_rpm_put(smmu); > > + } > > Why do you need them around the map/unmap calls as well? We still have .tlb_add_flush path? Thanks Vivek > > Will > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 0/2] Enable smmu support on sdm845
Hi Will, On 11/21/2018 9:22 PM, Will Deacon wrote: Hi Vivek, On Thu, Oct 11, 2018 at 03:19:28PM +0530, Vivek Gautam wrote: This series enables apps-smmu, the "arm,mmu-500" instance on sdm845. Series tested on SDM845 MTP device with related smmu patch series [1], and necessary config change, besides one hack to keep LDO14 in LPM mode to boot things up (see the commit in the test branch). The tested branch is available at [2]. [...] .../devicetree/bindings/iommu/arm,smmu.txt | 4 ++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 72 ++ 2 files changed, 76 insertions(+) Given that this doesn't touch any of the driver code, please take this via the Andy and arm-soc. Yea, once the driver changes are pulled in your tree, I can ask Andy to pick these. Thanks Best regards Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v17 5/5] iommu/arm-smmu: Add support for qcom, smmu-v2 variant
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. On msm8996, multiple cores, viz. mdss, video, etc. use this smmu. On sdm845, this smmu is used with gpu. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2098c3141f5f..d315ca637097 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -120,6 +120,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +static const char * const qcom_smmuv2_clks[] = { + "bus", "iface", +}; + +static const struct arm_smmu_match_data qcom_smmuv2 = { + .version = ARM_SMMU_V2, + .model = QCOM_SMMUV2, + .clks = qcom_smmuv2_clks, + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), +}; + static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = _generic_v1 }, { .compatible = "arm,smmu-v2", .data = _generic_v2 }, @@ -2033,6 +2045,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = _mmu401 }, { .compatible = "arm,mmu-500", .data = _mmu500 }, { .compatible = "cavium,smmu-v2", .data = _smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = _smmuv2 }, { }, }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v17 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and also the functions to parse the smmu clocks from DT and enable them in resume/suspend. Also, while we enable the runtime pm add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Also add corresponding clock enable path in resume callback. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 77 ++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5a28ae892504..f7ab7ce87a94 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -206,6 +207,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int*irqs; + struct clk_bulk_data*clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -1944,10 +1947,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) struct arm_smmu_match_data { enum arm_smmu_arch_version version; enum arm_smmu_implementation model; + const char * const *clks; + int num_clks; }; #define ARM_SMMU_MATCH_DATA(name, ver, imp)\ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); @@ -1966,6 +1971,23 @@ static const struct of_device_id arm_smmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, + const char * const *clks) +{ + int i; + + if (smmu->num_clks < 1) + return; + + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, + sizeof(*smmu->clks), GFP_KERNEL); + if (!smmu->clks) + return; + + for (i = 0; i < smmu->num_clks; i++) + smmu->clks[i].id = clks[i]; +} + #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) { @@ -2048,6 +2070,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, data = of_device_get_match_data(dev); smmu->version = data->version; smmu->model = data->model; + smmu->num_clks = data->num_clks; + + arm_smmu_fill_clk_data(smmu, data->clks); parse_driver_options(smmu); @@ -2150,6 +2175,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->irqs[i] = irq; } + err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks); + if (err) + return err; + + err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks); + if (err) + return err; + err = arm_smmu_device_cfg_probe(smmu); if (err) return err; @@ -2236,6 +2269,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + + clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); + return 0; } @@ -2244,15 +2280,50 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) arm_smmu_device_remove(pdev); } -static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_enable(smmu->num_clks, smmu->clks); + if (ret) + return ret; arm_smmu_device_reset(smmu); + return 0; } -static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + clk_bulk_disable(smmu->num_clks, smmu->clks); +
[RESEND PATCH v17 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 101 ++- 1 file changed, 91 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f7ab7ce87a94..cae88c9f83ca 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -270,6 +270,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -929,11 +943,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -948,6 +966,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1229,10 +1249,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1242,49 +1267,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domai
[RESEND PATCH v17 3/5] iommu/arm-smmu: Add the device_link between masters and smmu
From: Sricharan R Finally add the device link between the master device and smmu, so that the smmu gets runtime enabled/disabled only when the master needs it. This is done from add_device callback which gets called once when the master is added to the smmu. Signed-off-by: Sricharan R Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index cae88c9f83ca..2098c3141f5f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1493,6 +1493,9 @@ static int arm_smmu_add_device(struct device *dev) iommu_device_link(>iommu, dev); + device_link_add(dev, smmu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + return 0; out_cfg_free: -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v17 4/5] dt-bindings: arm-smmu: Add bindings for qcom, smmu-v2
Add bindings doc for Qcom's smmu-v2 implementation. Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Rob Herring Reviewed-by: Robin Murphy --- .../devicetree/bindings/iommu/arm,smmu.txt | 39 ++ 1 file changed, 39 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..a6504b37cc21 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,16 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. + Qcom SoCs must contain, as below, SoC-specific compatibles + along with "qcom,smmu-v2": + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +77,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names:List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": +- "bus": clock required for downstream bus access and + for the smmu ptw, +- "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +159,20 @@ conditions. iommu-map = <0 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu@d0 { + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + reg = <0xd0 0x1>; + + #global-interrupts = <1>; + interrupts = , +, +; + #iommu-cells = <1>; + power-domains = < MDSS_GDSC>; + + clocks = < SMMU_MDP_AXI_CLK>, +< SMMU_MDP_AHB_CLK>; + clock-names = "bus", "iface"; + }; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATCH v17 0/5] iommu/arm-smmu: Add runtime pm/sleep support
Hi Will, I am resending this series after we concluded on comments [1,2] on v16 of this patch-series, and the subsequent patch [3] was posted. Kindly merge this series. Thanks Vivek Previous version of this patch series is @ [4]. Also refer to [4] for change logs for previous versions. [1] https://lore.kernel.org/patchwork/patch/979430/ [2] https://lore.kernel.org/patchwork/patch/979433/ [3] https://lore.kernel.org/patchwork/patch/994194/ [4] https://lore.kernel.org/patchwork/cover/979429/ Sricharan R (3): iommu/arm-smmu: Add pm_runtime/sleep ops iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device iommu/arm-smmu: Add the device_link between masters and smmu Vivek Gautam (2): dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2 iommu/arm-smmu: Add support for qcom,smmu-v2 variant .../devicetree/bindings/iommu/arm,smmu.txt | 39 + drivers/iommu/arm-smmu.c | 192 +++-- 2 files changed, 219 insertions(+), 12 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache
Hi Tomasz, On Tue, Oct 23, 2018 at 9:45 AM Tomasz Figa wrote: > > Hi Vivek, > > On Fri, Jun 15, 2018 at 7:53 PM Vivek Gautam > wrote: > > > > Qualcomm SoCs have an additional level of cache called as > > System cache or Last level cache[1]. This cache sits right > > before the DDR, and is tightly coupled with the memory > > controller. > > The cache is available to all the clients present in the > > SoC system. The clients request their slices from this system > > cache, make it active, and can then start using it. For these > > clients with smmu, to start using the system cache for > > dma buffers and related page tables [2], few of the memory > > attributes need to be set accordingly. > > This change makes the related memory Outer-Shareable, and > > updates the MAIR with necessary protection. > > > > The MAIR attribute requirements are: > > Inner Cacheablity = 0 > > Outer Cacheablity = 1, Write-Back Write Allocate > > Outer Shareablity = 1 > > > > This change is a realisation of following changes > > from downstream msm-4.9: > > iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT > > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT > > Would you be able to provide links to those 2 downstream changes? Thanks for the review. Here are the links for the changes: [1] -- iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT [2] -- iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT [1] https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51 [2] https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602 > > > > > [1] https://patchwork.kernel.org/patch/10422531/ > > [2] https://patchwork.kernel.org/patch/10302791/ > > > > Signed-off-by: Vivek Gautam > > --- > > drivers/iommu/arm-smmu.c | 14 ++ > > drivers/iommu/io-pgtable-arm.c | 24 +++- > > drivers/iommu/io-pgtable.h | 4 > > include/linux/iommu.h | 4 > > 4 files changed, 41 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > index f7a96bcf94a6..8058e7205034 100644 > > --- a/drivers/iommu/arm-smmu.c > > +++ b/drivers/iommu/arm-smmu.c > > @@ -249,6 +249,7 @@ struct arm_smmu_domain { > > struct mutexinit_mutex; /* Protects smmu > > pointer */ > > spinlock_t cb_lock; /* Serialises ATS1* ops > > and TLB syncs */ > > struct iommu_domain domain; > > + boolhas_sys_cache; > > }; > > > > struct arm_smmu_option_prop { > > @@ -862,6 +863,8 @@ static int arm_smmu_init_domain_context(struct > > iommu_domain *domain, > > > > if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) > > pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; > > + if (smmu_domain->has_sys_cache) > > + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_SYS_CACHE; > > > > smmu_domain->smmu = smmu; > > pgtbl_ops = alloc_io_pgtable_ops(fmt, _cfg, smmu_domain); > > @@ -1477,6 +1480,9 @@ static int arm_smmu_domain_get_attr(struct > > iommu_domain *domain, > > case DOMAIN_ATTR_NESTING: > > *(int *)data = (smmu_domain->stage == > > ARM_SMMU_DOMAIN_NESTED); > > return 0; > > + case DOMAIN_ATTR_USE_SYS_CACHE: > > + *((int *)data) = smmu_domain->has_sys_cache; > > + return 0; > > default: > > return -ENODEV; > > } > > @@ -1506,6 +1512,14 @@ static int arm_smmu_domain_set_attr(struct > > iommu_domain *domain, > > smmu_domain->stage = ARM_SMMU_DOMAIN_S1; > > > > break; > > + case DOMAIN_ATTR_USE_SYS_CACHE: > > + if (smmu_domain->smmu) { > > + ret = -EPERM; > > + goto out_unlock; > > + } > > + if (*((int *)data)) > > + smmu_domain->has_sys_cache = true; > > + break; > > default: > > ret = -ENODEV; > > } > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > > index 010a254305dd..b2aee1828524 100644 > > --- a/drivers/iommu/io-pgtable-arm.c > > +++ b/drivers/iomm
Re: [PATCH v2 4/4] iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata
Hi Robin, On Tue, Sep 25, 2018 at 6:01 PM Robin Murphy wrote: > > On 10/09/18 07:25, Vivek Gautam wrote: > > Qcom's implementation of arm,mmu-500 require to serialize all > > TLB invalidations for context banks. > > What does "serailize all TLB invalidations" actually mean, because it's > not entirely clear from context, and furthermore this patch appears to > behave subtly differently to the downstream code so I'm really > struggling to figure out whether it's actually doing what it's intended > to do. Adding Pratik Patel from downstream team. Thanks for taking a look at this. We want to space out the TLB invalidation and then the workaround to toggle wait-safe logic would let the safe checks in HW work and only allow invalidation to occur when device is expected to not run into underruns. > > In case the TLB invalidation requests don't go through the first > > time, there's a way to disable/enable the wait for safe logic. > > Disabling this logic expadites the TLBIs. > > > > Different bootloaders with their access control policies allow this > > register access differntly. With one, we should be able to directly > > make qcom-scm call to do io read/write, while with other we should > > use the specific SCM command to send request to do the complete > > register configuration. > > A separate device tree flag for arm-smmu will allow to identify > > which firmware configuration of the two mentioned above we use. > > > > Signed-off-by: Vivek Gautam > > --- > > drivers/iommu/arm-smmu-regs.h | 2 + > > drivers/iommu/arm-smmu.c | 133 > > +- > > 2 files changed, 133 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h > > index a1226e4ab5f8..71662cae9806 100644 > > --- a/drivers/iommu/arm-smmu-regs.h > > +++ b/drivers/iommu/arm-smmu-regs.h > > @@ -177,6 +177,8 @@ enum arm_smmu_s2cr_privcfg { > > #define ARM_SMMU_CB_ATS1PR 0x800 > > #define ARM_SMMU_CB_ATSR0x8f0 > > > > +#define ARM_SMMU_GID_QCOM_CUSTOM_CFG 0x300 > > + > > #define SCTLR_S1_ASIDPNE(1 << 12) > > #define SCTLR_CFCFG (1 << 7) > > #define SCTLR_CFIE (1 << 6) > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > index 411e5ac57c64..de9c4a5bf686 100644 > > --- a/drivers/iommu/arm-smmu.c > > +++ b/drivers/iommu/arm-smmu.c > > @@ -49,6 +49,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > > > @@ -181,7 +182,8 @@ struct arm_smmu_device { > > #define ARM_SMMU_FEAT_EXIDS (1 << 12) > > u32 features; > > > > -#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) > > +#define ARM_SMMU_OPT_SECURE_CFG_ACCESS(1 << 0) > > +#define ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA (1 << 1) > > u32 options; > > enum arm_smmu_arch_version version; > > enum arm_smmu_implementationmodel; > > @@ -266,6 +268,7 @@ static bool using_legacy_binding, using_generic_binding; > > > > static struct arm_smmu_option_prop arm_smmu_options[] = { > > { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" > > }, > > + { ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA, "qcom,smmu-500-fw-impl-errata" }, > > { 0, NULL}, > > }; > > > > @@ -531,12 +534,134 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned > > long iova, size_t size, > > writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); > > } > > > > +#define CUSTOM_CFG_MDP_SAFE_ENABLE BIT(15) > > +#define CUSTOM_CFG_IFE1_SAFE_ENABLE BIT(14) > > +#define CUSTOM_CFG_IFE0_SAFE_ENABLE BIT(13) > > + > > +static int __qsmmu500_wait_safe_toggle(struct arm_smmu_device *smmu, int > > en) > > +{ > > + int ret; > > + u32 val, gid_phys_base; > > + phys_addr_t reg; > > + struct vm_struct *vm; > > + > > + /* We want physical address of SMMU, so the vm_area */ > > + vm = find_vm_area(smmu->base); > > + > > + /* > > + * GID (implementation defined address space) is located at > > + * SMMU_BASE + (2 × PAGESIZE). > > + */ > > + gid_phys_base = vm->phys_addr + (2 << (smmu)->pgshift); > > + reg = gid_phys_base + ARM_SMMU_GID_QCO
Re: [PATCH v4 1/2] dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500
On 10/12/2018 3:46 AM, Rob Herring wrote: On Thu, 11 Oct 2018 15:19:29 +0530, Vivek Gautam wrote: Qcom's implementation of arm,mmu-500 works well with current arm-smmu driver implementation. Adding a soc specific compatible along with arm,mmu-500 makes the bindings future safe. Signed-off-by: Vivek Gautam --- Changes since v3: - Refined language more to state things directly for the bindings description. Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 1 file changed, 4 insertions(+) Reviewed-by: Rob Herring Thank you Rob. Best regards Vivek ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 2/2] dts: arm64/sdm845: Add node for arm,mmu-500
Add device node for arm,mmu-500 available on sdm845. This MMU-500 with single TCU and multiple TBU architecture is shared among all the peripherals except gpu. Signed-off-by: Vivek Gautam --- Changes since v3: - none. arch/arm64/boot/dts/qcom/sdm845.dtsi | 72 1 file changed, 72 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index b72bdb0a31a5..0aace729643d 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -1297,6 +1297,78 @@ cell-index = <0>; }; + apps_smmu: iommu@1500 { + compatible = "qcom,sdm845-smmu-500", "arm,mmu-500"; + reg = <0x1500 0x8>; + #iommu-cells = <2>; + #global-interrupts = <1>; + interrupts = , +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +; + }; + apss_shared: mailbox@1799 { compatible = "qcom,sdm845-apss-shared"; reg = <0x1799 0x1000>; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 1/2] dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500
Qcom's implementation of arm,mmu-500 works well with current arm-smmu driver implementation. Adding a soc specific compatible along with arm,mmu-500 makes the bindings future safe. Signed-off-by: Vivek Gautam --- Changes since v3: - Refined language more to state things directly for the bindings description. Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index a6504b37cc21..3133f3ba7567 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -27,6 +27,10 @@ conditions. "qcom,msm8996-smmu-v2", "qcom,smmu-v2", "qcom,sdm845-smmu-v2", "qcom,smmu-v2". + Qcom SoCs implementing "arm,mmu-500" must also include, + as below, SoC-specific compatibles: + "qcom,sdm845-smmu-500", "arm,mmu-500" + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 0/2] Enable smmu support on sdm845
This series enables apps-smmu, the "arm,mmu-500" instance on sdm845. Series tested on SDM845 MTP device with related smmu patch series [1], and necessary config change, besides one hack to keep LDO14 in LPM mode to boot things up (see the commit in the test branch). The tested branch is available at [2]. Changes since v3: - Refined the dt binding documentation more to align with Rob's comments for qcom,smmu-v2 binding documentation [3] on SoC specific compatibles and state things similar to qcom,smmu-v2 [4]. Changes since v2: - Dropped patch for gpu-smmu [5] as the same has been pushed by Jordan [6] after incorporating the dt binding update. - Updated binding doc to include valid value for '' in "qcom,-smmu-500" compatible string. Changes since v1: - Addressed Rob's review comments by adding a SoC specific compatible. Have added a new dt-bindings patch for this. - Updated node name to 'iommu'. - Addressed Doug's review comment about removing status property from smmu's nodes, as smmu is either present on the soc or not. Enabling it is not a board-level decision. [1] https://patchwork.kernel.org/cover/10581891/ [2] https://github.com/vivekgautam1/linux/commits/v4.19-rc7/sdm845-smmu-dt [3] https://patchwork.kernel.org/patch/10576937/ [4] https://patchwork.kernel.org/patch/10581911/ [5] https://patchwork.kernel.org/patch/10565295/ [6] https://patchwork.kernel.org/patch/10577297/ Vivek Gautam (2): dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500 dts: arm64/sdm845: Add node for arm,mmu-500 .../devicetree/bindings/iommu/arm,smmu.txt | 4 ++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 72 ++ 2 files changed, 76 insertions(+) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 0/2] Enable smmu support on sdm845
This series enables apps-smmu, the "arm,mmu-500" instance on sdm845. Changes since v3: - Refined the dt binding documentation more to align with Rob's comments for qcom,smmu-v2 binding documentation [1] on SoC specific compatibles and state things similar to qcom,smmu-v2 [2]. Changes since v2: - Dropped patch for gpu-smmu [3] as the same has been pushed by Jordan [4] after incorporating the dt binding update. - Updated binding doc to include valid value for '' in "qcom,-smmu-500" compatible string. Changes since v1: - Addressed Rob's review comments by adding a SoC specific compatible. Have added a new dt-bindings patch for this. - Updated node name to 'iommu'. - Addressed Doug's review comment about removing status property from smmu's nodes, as smmu is either present on the soc or not. Enabling it is not a board-level decision. [1] https://patchwork.kernel.org/patch/10576937/ [1] https://patchwork.kernel.org/patch/10581911/ [3] https://patchwork.kernel.org/patch/10565295/ [4] https://patchwork.kernel.org/patch/10577297/ Vivek Gautam (2): dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500 dts: arm64/sdm845: Add node for arm,mmu-500 .../devicetree/bindings/iommu/arm,smmu.txt | 4 ++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 72 ++ 2 files changed, 76 insertions(+) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache
Hi Will, On Fri, Sep 28, 2018 at 6:49 PM Will Deacon wrote: > > Hi Vivek, > > On Thu, Sep 20, 2018 at 05:11:53PM +0530, Vivek Gautam wrote: > > On Wed, Jun 27, 2018 at 10:07 PM Will Deacon wrote: > > > On Tue, Jun 19, 2018 at 02:04:44PM +0530, Vivek Gautam wrote: > > > > On Fri, Jun 15, 2018 at 10:22 PM, Will Deacon > > > > wrote: > > > > > On Fri, Jun 15, 2018 at 04:23:29PM +0530, Vivek Gautam wrote: > > > > >> Qualcomm SoCs have an additional level of cache called as > > > > >> System cache or Last level cache[1]. This cache sits right > > > > >> before the DDR, and is tightly coupled with the memory > > > > >> controller. > > > > >> The cache is available to all the clients present in the > > > > >> SoC system. The clients request their slices from this system > > > > >> cache, make it active, and can then start using it. For these > > > > >> clients with smmu, to start using the system cache for > > > > >> dma buffers and related page tables [2], few of the memory > > > > >> attributes need to be set accordingly. > > > > >> This change makes the related memory Outer-Shareable, and > > > > >> updates the MAIR with necessary protection. > > > > >> > > > > >> The MAIR attribute requirements are: > > > > >> Inner Cacheablity = 0 > > > > >> Outer Cacheablity = 1, Write-Back Write Allocate > > > > >> Outer Shareablity = 1 > > > > > > > > > > Hmm, so is this cache coherent with the CPU or not? > > > > > > > > Thanks for reviewing. > > > > Yes, this LLC is cache coherent with CPU, so we mark for > > > > Outer-cacheable. > > > > The different masters such as GPU as able to allocated and activate a > > > > slice > > > > in this Last Level Cache. > > > > > > What I mean is, for example, if the CPU writes some data using Normal, > > > Inner > > > Shareable, Inner/Outer Cacheable, Inner/Outer Write-back, Non-transient > > > Read/Write-allocate and a device reads that data using your MAIR encoding > > > above, is the device guaranteed to see the CPU writes after the CPU has > > > executed a DSB instruction? > > > > No, these MAIR configurations don't guarantee that devices will have > > coherent view > > of what CPU writes. Not all devices can snoop into CPU caches (only > > IO-Coherent > > devices can). > > So a normal cached memory configuration in CPU MMU tables, and SMMU page > > tables > > is valid only for few devices that are IO-coherent. > > > > Moreover, CPU can lookup in system cache, and so do all devices; > > allocation will depend on h/w configurations and memory attributes. > > So anything that CPU caches in system cache will be coherently visible > > to devices. > > > > > > > > I don't think so, because the ARM ARM would say that there's a mismatch on > > > the Inner Cacheability attribute. > > > > > > > > Why don't normal > > > > > non-cacheable mappings allocated in the LLC by default? > > > > > > > > Sorry, I couldn't fully understand your question here. > > > > Few of the masters on qcom socs are not io-coherent, so for them > > > > the IC has to be marked as 0. > > > > > > By IC you mean Inner Cacheability? In your MAIR encoding above, it is zero > > > so I don't understand the problem. What goes wrong if non-coherent devices > > > use your MAIR encoding for their DMA buffers? > > > > > > > But they are able to use the LLC with OC marked as 1. > > > > > > The issue here is that whatever attributes we put in the SMMU need to > > > align > > > with the attributes used by the CPU in order to avoid introducing > > > mismatched > > > aliases. > > > > Not really, right? > > Devices can use Inner non-Cacheable, Outer-cacheable (IC=0, OC=1) to > > allocate > > into the system cache (as these devices don't want to allocate in > > their inner caches), > > and the CPU will have a coherent view of these buffers/page-tables. > > This should be > > a normal cached non-IO-Coherent memory. > > > > But anything that CPU writes using Normal, Inner Shareable, > > Inner/Outer Cacheable, > > Inner/Outer Write-back, Non-transient Read/Write-allocate, may n
Re: [PATCHv3 2/2] iommu/arm-smmu: Add SMMU clock
Hi Thor, On 10/4/2018 3:58 AM, thor.tha...@linux.intel.com wrote: From: Thor Thayer Add a clock to the SMMU structure. In the device tree case, check for a clock node and enable the clock if found. This patch is dependent upon the following patches that add a device tree bulk clock function. "[V6, 1/4] clk: bulk: add of_clk_bulk_get()" https://patchwork.kernel.org/patch/10583133/ "[V6, 2/4] clk: add new APIs to operation on all available clocks" https://patchwork.kernel.org/patch/10583131/ "[V6, 3/4] clk: add managerged version of clk_bulk_get_all" https://patchwork.kernel.org/patch/10583139/ Signed-off-by: Thor Thayer --- drivers/iommu/arm-smmu.c | 20 1 file changed, 20 insertions(+) The clock part is already handled by one of the patch in the runtime pm series of arm-smmu [1]. This patch should not be needed. [1] https://patchwork.kernel.org/patch/10581899/ [...] Regards Vivek ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v17 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla Reviewed-by: Robin Murphy --- Changes since v16: - Rebased on Will's tree for arm-smmu updates [1]. - Added rpm{get|put} calls to .flush_iotlb_all, and .iotlb_sync callbacks to take care of non-strict tlb flush queue updates. drivers/iommu/arm-smmu.c | 101 ++- 1 file changed, 91 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 59d1430a9f2d..bf106688ab7a 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -269,6 +269,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -928,11 +942,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -947,6 +965,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1228,10 +1248,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1241,49 +1266,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_s
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
On Tue, Oct 2, 2018 at 9:44 AM Vivek Gautam wrote: > > Hi Will, > > On Mon, Oct 1, 2018 at 6:29 PM Will Deacon wrote: > > > > Hi Vivek, > > > > On Thu, Aug 30, 2018 at 08:15:38PM +0530, Vivek Gautam wrote: > > > From: Sricharan R > > > > > > The smmu device probe/remove and add/remove master device callbacks > > > gets called when the smmu is not linked to its master, that is without > > > the context of the master device. So calling runtime apis in those places > > > separately. > > > Global locks are also initialized before enabling runtime pm as the > > > runtime_resume() calls device_reset() which does tlb_sync_global() > > > that ultimately requires locks to be initialized. > > > > > > Signed-off-by: Sricharan R > > > [vivek: Cleanup pm runtime calls] > > > Signed-off-by: Vivek Gautam > > > Reviewed-by: Tomasz Figa > > > Tested-by: Srinivas Kandagatla > > > --- > > > drivers/iommu/arm-smmu.c | 89 > > > +++- > > > 1 file changed, 81 insertions(+), 8 deletions(-) > > > > This doesn't apply on my tree[1], possibly because I've got Robin's > > non-strict > > invalidation queued there. However, that got me thinking -- how does this > > work in conjunction with the timer-based TLB invalidation? Do we need to > > rpm_{get,put} around flush_iotlb_all()? If so, do we still need the calls > > in map/unmap when non-strict mode is in use? For map/unmap(), i think there would be no harm in having additional power.usage_count even for the non-strict mode. So, I will just add rpm{get,put} in arm_smmu_flush_iotlb_all(), and arm_smmu_iotlb_sync(). Regards Vivek > > I haven't tested things with flush queues, but from what it looks like > both .flush_iotlb_all, and .iotlb_sync callbacks need rpm_get/put(). > I will respin the patches. > > Thanks > Vivek > > > > Will > > > > [1] > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-joerg/arm-smmu/updates > > > > -- > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member > of Code Aurora Forum, hosted by The Linux Foundation -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
Hi Will, On Mon, Oct 1, 2018 at 6:29 PM Will Deacon wrote: > > Hi Vivek, > > On Thu, Aug 30, 2018 at 08:15:38PM +0530, Vivek Gautam wrote: > > From: Sricharan R > > > > The smmu device probe/remove and add/remove master device callbacks > > gets called when the smmu is not linked to its master, that is without > > the context of the master device. So calling runtime apis in those places > > separately. > > Global locks are also initialized before enabling runtime pm as the > > runtime_resume() calls device_reset() which does tlb_sync_global() > > that ultimately requires locks to be initialized. > > > > Signed-off-by: Sricharan R > > [vivek: Cleanup pm runtime calls] > > Signed-off-by: Vivek Gautam > > Reviewed-by: Tomasz Figa > > Tested-by: Srinivas Kandagatla > > --- > > drivers/iommu/arm-smmu.c | 89 > > +++- > > 1 file changed, 81 insertions(+), 8 deletions(-) > > This doesn't apply on my tree[1], possibly because I've got Robin's non-strict > invalidation queued there. However, that got me thinking -- how does this > work in conjunction with the timer-based TLB invalidation? Do we need to > rpm_{get,put} around flush_iotlb_all()? If so, do we still need the calls > in map/unmap when non-strict mode is in use? I haven't tested things with flush queues, but from what it looks like both .flush_iotlb_all, and .iotlb_sync callbacks need rpm_get/put(). I will respin the patches. Thanks Vivek > > Will > > [1] > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-joerg/arm-smmu/updates -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
On Wed, Sep 26, 2018 at 8:57 PM Robin Murphy wrote: > > On 30/08/18 15:45, Vivek Gautam wrote: > > From: Sricharan R > > > > The smmu needs to be functional only when the respective > > master's using it are active. The device_link feature > > helps to track such functional dependencies, so that the > > iommu gets powered when the master device enables itself > > using pm_runtime. So by adapting the smmu driver for > > runtime pm, above said dependency can be addressed. > > > > This patch adds the pm runtime/sleep callbacks to the > > driver and also the functions to parse the smmu clocks > > from DT and enable them in resume/suspend. > > > > Also, while we enable the runtime pm add a pm sleep suspend > > callback that pushes devices to low power state by turning > > the clocks off in a system sleep. > > Also add corresponding clock enable path in resume callback. > > > > Signed-off-by: Sricharan R > > Signed-off-by: Archit Taneja > > [vivek: rework for clock and pm ops] > > Signed-off-by: Vivek Gautam > > Reviewed-by: Tomasz Figa > > Tested-by: Srinivas Kandagatla > > --- > > drivers/iommu/arm-smmu.c | 77 > > ++-- > > 1 file changed, 74 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > index fd1b80ef9490..d900e007c3c9 100644 > > --- a/drivers/iommu/arm-smmu.c > > +++ b/drivers/iommu/arm-smmu.c > > @@ -48,6 +48,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > > > @@ -205,6 +206,8 @@ struct arm_smmu_device { > > u32 num_global_irqs; > > u32 num_context_irqs; > > unsigned int*irqs; > > + struct clk_bulk_data*clks; > > + int num_clks; > > > > u32 cavium_id_base; /* Specific to Cavium > > */ > > > > @@ -1896,10 +1899,12 @@ static int arm_smmu_device_cfg_probe(struct > > arm_smmu_device *smmu) > > struct arm_smmu_match_data { > > enum arm_smmu_arch_version version; > > enum arm_smmu_implementation model; > > + const char * const *clks; > > + int num_clks; > > }; > > > > #define ARM_SMMU_MATCH_DATA(name, ver, imp) \ > > -static struct arm_smmu_match_data name = { .version = ver, .model = imp } > > +static const struct arm_smmu_match_data name = { .version = ver, .model = > > imp } > > > > ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); > > ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); > > @@ -1918,6 +1923,23 @@ static const struct of_device_id arm_smmu_of_match[] > > = { > > }; > > MODULE_DEVICE_TABLE(of, arm_smmu_of_match); > > > > +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, > > +const char * const *clks) > > +{ > > + int i; > > + > > + if (smmu->num_clks < 1) > > + return; > > + > > + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, > > + sizeof(*smmu->clks), GFP_KERNEL); > > + if (!smmu->clks) > > + return; > > + > > + for (i = 0; i < smmu->num_clks; i++) > > + smmu->clks[i].id = clks[i]; > > +} > > + > > #ifdef CONFIG_ACPI > > static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) > > { > > @@ -2000,6 +2022,9 @@ static int arm_smmu_device_dt_probe(struct > > platform_device *pdev, > > data = of_device_get_match_data(dev); > > smmu->version = data->version; > > smmu->model = data->model; > > + smmu->num_clks = data->num_clks; > > + > > + arm_smmu_fill_clk_data(smmu, data->clks); > > > > parse_driver_options(smmu); > > > > @@ -2098,6 +2123,14 @@ static int arm_smmu_device_probe(struct > > platform_device *pdev) > > smmu->irqs[i] = irq; > > } > > > > + err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks); > > + if (err) > > + return err; > > + > > + err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks); > > + if (err) > > + return err; > > + > > Hmm, if we error out
Re: [PATCH v16 5/5] iommu/arm-smmu: Add support for qcom, smmu-v2 variant
Hi Robin, On Wed, Sep 26, 2018 at 9:29 PM Robin Murphy wrote: > > On 30/08/18 15:45, Vivek Gautam wrote: > > qcom,smmu-v2 is an arm,smmu-v2 implementation with specific > > clock and power requirements. > > On msm8996, multiple cores, viz. mdss, video, etc. use this > > smmu. On sdm845, this smmu is used with gpu. > > Add bindings for the same. > > > > Signed-off-by: Vivek Gautam > > Reviewed-by: Rob Herring > > Reviewed-by: Tomasz Figa > > Tested-by: Srinivas Kandagatla > > --- > > drivers/iommu/arm-smmu.c | 13 + > > 1 file changed, 13 insertions(+) > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > index 166c8c6da24f..411e5ac57c64 100644 > > --- a/drivers/iommu/arm-smmu.c > > +++ b/drivers/iommu/arm-smmu.c > > @@ -119,6 +119,7 @@ enum arm_smmu_implementation { > > GENERIC_SMMU, > > ARM_MMU500, > > CAVIUM_SMMUV2, > > + QCOM_SMMUV2, > > Hmm, it seems we don't actually need this right now, but maybe that just > means there's more imp-def registers and/or errata to come ;) > > Either way I guess there's no real harm in having it. Thanks for the review. Best regards Vivek > > Reviewed-by: Robin Murphy > > > }; > > > > struct arm_smmu_s2cr { > > @@ -1970,6 +1971,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, > > GENERIC_SMMU); > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); > > > > +static const char * const qcom_smmuv2_clks[] = { > > + "bus", "iface", > > +}; > > + > > +static const struct arm_smmu_match_data qcom_smmuv2 = { > > + .version = ARM_SMMU_V2, > > + .model = QCOM_SMMUV2, > > + .clks = qcom_smmuv2_clks, > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), > > +}; > > + > > static const struct of_device_id arm_smmu_of_match[] = { > > { .compatible = "arm,smmu-v1", .data = _generic_v1 }, > > { .compatible = "arm,smmu-v2", .data = _generic_v2 }, > > @@ -1977,6 +1989,7 @@ static const struct of_device_id arm_smmu_of_match[] > > = { > > { .compatible = "arm,mmu-401", .data = _mmu401 }, > > { .compatible = "arm,mmu-500", .data = _mmu500 }, > > { .compatible = "cavium,smmu-v2", .data = _smmuv2 }, > > + { .compatible = "qcom,smmu-v2", .data = _smmuv2 }, > > { }, > > }; > > MODULE_DEVICE_TABLE(of, arm_smmu_of_match); > > > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 0/4] Qcom smmu-500 TLB invalidation errata for sdm845
On Tue, Sep 25, 2018 at 10:09 PM Will Deacon wrote: > > On Tue, Sep 25, 2018 at 02:09:34PM +0200, Joerg Roedel wrote: > > On Mon, Sep 10, 2018 at 11:55:47AM +0530, Vivek Gautam wrote: > > > Vivek Gautam (4): > > > firmware: qcom_scm-64: Add atomic version of qcom_scm_call > > > firmware/qcom_scm: Add atomic version of io read/write APIs > > > firmware/qcom_scm: Add scm call to handle smmu errata > > > iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata > > > > > > drivers/firmware/qcom_scm-32.c | 17 > > > drivers/firmware/qcom_scm-64.c | 181 > > > +++-- > > > drivers/firmware/qcom_scm.c| 18 > > > drivers/firmware/qcom_scm.h| 9 ++ > > > drivers/iommu/arm-smmu-regs.h | 2 + > > > drivers/iommu/arm-smmu.c | 133 +- > > > include/linux/qcom_scm.h | 6 ++ > > > 7 files changed, 320 insertions(+), 46 deletions(-) > > > > Should this go through the iommu-tree? In that case I need Acked-by's > > for the firmware code changes. > > Yup, and I'd also like Robin's ack on the arm-smmu*.c changes (I see he has > some comments on the code as it stands). Thanks Joerg, and Will for taking a look at it. I will answer and address Robin's comments for smmu part. Best regards Vivek > > Will > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
Hi Robin, On Wed, Sep 26, 2018 at 12:25 AM Robin Murphy wrote: > > Hi Vivek, > > On 2018-09-25 6:56 AM, Vivek Gautam wrote: > > Hi Robin, Will, > > > > On Tue, Sep 18, 2018 at 8:41 AM Vivek Gautam > > wrote: > >> > >> Hi Robin, > >> > >> On Fri, Sep 7, 2018 at 3:52 PM Vivek Gautam > >> wrote: > >>> > >>> On Fri, Sep 7, 2018 at 3:22 PM Tomasz Figa wrote: > >>>> > >>>> On Fri, Sep 7, 2018 at 6:38 PM Vivek Gautam > >>>> wrote: > >>>>> > >>>>> Hi Tomasz, > >>>>> > >>>>> > >>>>> On 9/7/2018 2:46 PM, Tomasz Figa wrote: > >>>>>> Hi Vivek, > >>>>>> > >>>>>> On Thu, Aug 30, 2018 at 11:46 PM Vivek Gautam > >>>>>> wrote: > >>>>>>> From: Sricharan R > >>>>>>> > >>>>>>> The smmu device probe/remove and add/remove master device callbacks > >>>>>>> gets called when the smmu is not linked to its master, that is without > >>>>>>> the context of the master device. So calling runtime apis in those > >>>>>>> places > >>>>>>> separately. > >>>>>>> Global locks are also initialized before enabling runtime pm as the > >>>>>>> runtime_resume() calls device_reset() which does tlb_sync_global() > >>>>>>> that ultimately requires locks to be initialized. > >>>>>>> > >>>>>>> Signed-off-by: Sricharan R > >>>>>>> [vivek: Cleanup pm runtime calls] > >>>>>>> Signed-off-by: Vivek Gautam > >>>>>>> Reviewed-by: Tomasz Figa > >>>>>>> Tested-by: Srinivas Kandagatla > >>>>>>> --- > >>>>>>>drivers/iommu/arm-smmu.c | 89 > >>>>>>> +++- > >>>>>>>1 file changed, 81 insertions(+), 8 deletions(-) > >>>>>> [snip] > >>>>>>> @@ -2215,10 +2281,17 @@ static int arm_smmu_device_remove(struct > >>>>>>> platform_device *pdev) > >>>>>>> if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) > >>>>>>> dev_err(>dev, "removing device with active > >>>>>>> domains!\n"); > >>>>>>> > >>>>>>> + arm_smmu_rpm_get(smmu); > >>>>>>> /* Turn the thing off */ > >>>>>>> writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + > >>>>>>> ARM_SMMU_GR0_sCR0); > >>>>>>> + arm_smmu_rpm_put(smmu); > >>>>>>> + > >>>>>>> + if (pm_runtime_enabled(smmu->dev)) > >>>>>>> + pm_runtime_force_suspend(smmu->dev); > >>>>>>> + else > >>>>>>> + clk_bulk_disable(smmu->num_clks, smmu->clks); > >>>>>>> > >>>>>>> - clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); > >>>>>>> + clk_bulk_unprepare(smmu->num_clks, smmu->clks); > >>>>>> Aren't we missing pm_runtime_disable() here? We'll have the enable > >>>>>> count unbalanced if the driver is removed and probed again. > >>>>> > >>>>> pm_runtime_force_suspend() does a pm_runtime_disable() also if i am not > >>>>> wrong. > >>>>> And, as mentioned in a previous thread [1], we were seeing a warning > >>>>> which we avoided > >>>>> by keeping force_suspend(). > >>>>> > >>>>> [1] https://lkml.org/lkml/2018/7/8/124 > >>>> > >>>> I see, thanks. I didn't realize that pm_runtime_force_suspend() > >>>> already disables runtime PM indeed. Sorry for the noise. > >>> > >>> Hi Tomasz, > >>> No problem. Thanks for looking back at it. > >>> > >>> Hi Robin, > >>> If you are fine with this series, then can you please consider giving > >>> Reviewed-by, so that we are certain that this series will go in the next > >>> merge > >>> window. > >>> Thanks > >> > >> Gentle ping. > >> You ack will be very helpful in letting Will pull this series for 4.20. > >> Thanks. > > > > I would really appreciate if you could provide your ack for this series. > > Or if there are any concerns, I am willing to address them. > > Apologies, I thought I'd replied to say I'd be getting to this shortly, > but apparently not :( > > FWIW, "shortly" is now tomorrow - I don't *think* there's anything > outstanding, but given the number of subtleties we've turned up so far I > do just want one last thorough double-check to make sure. Cool. TIA for the review. I hope we have something that we can land :), and then work further to take care of addressing other needs of this driver. Thanks. Best regards Vivek > > Thanks, > Robin. > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 0/4] Qcom smmu-500 TLB invalidation errata for sdm845
Hi Bjorn, On Mon, Sep 10, 2018 at 4:08 PM Vivek Gautam wrote: > > +linux-arm-msm > > > On 09/10/2018 11:55 AM, Vivek Gautam wrote: > > Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance > > errata [1] because of which the TCU cache look ups are stalled during > > invalidation cycle. This is mitigated by serializing all the invalidation > > requests coming to the smmu. > > > > This patch series addresses this errata by adding new tlb_ops for > > qcom,sdm845-smmu-500 [2]. These ops take context bank locks for all the > > tlb_ops that queue and sync the TLB invalidation requests. > > > > Besides adding locks, there's a way to expadite these TLB invalidations > > for display and camera devices by turning off the 'wait-for-safe' logic > > in hardware that holds the tlb invalidations until a safe level. > > This 'wait-for-safe' logic is controlled by toggling a chicken bit > > through a secure register. This secure register is accessed by making an > > explicit SCM call into the EL3 firmware. > > There are two ways of handling this logic - > > * Firmware, such as tz present on sdm845-mtp devices has a handler to do > > all the register access and bit set/clear. So is the handling in > > downstream arm-smmu driver [3]. > > * Other firmwares can have handlers to just read/write this secure > > register. In such cases the kernel make io_read/writel scm calls to > > modify the register. > > This patch series adds APIs in qcom-scm driver to handle both of these > > cases. > > > > Lastly, since these TLB invalidations can happen in atomic contexts > > there's a need to add atomic versions of qcom_scm_io_readl/writel() and > > qcom_scm_call() APIs. The traditional scm calls take mutex and we therefore > > can't use these calls in atomic contexts. > > > > This patch series is adapted version of how the errata is handled in > > downstream [1]. Gentle ping. Please let me know if you have comments on the SCM pieces in this series. Thanks & Regards Vivek > > > > Changes since v1: > > * Addressed Will and Robin's comments: > > - Dropped the patch[4] that forked out > > __arm_smmu_tlb_inv_range_nosync(), > >and __arm_smmu_tlb_sync(). > > - Cleaned up the errata patch further to use downstream polling > > mechanism > >for tlb sync. > > * No change in SCM call patches - patches 1 to 3. > > > > [1] > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842 > > [2] https://lore.kernel.org/patchwork/patch/974114/ > > [3] > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4864 > > [4] https://patchwork.kernel.org/patch/10565349/ > > > > Vivek Gautam (4): > >firmware: qcom_scm-64: Add atomic version of qcom_scm_call > >firmware/qcom_scm: Add atomic version of io read/write APIs > >firmware/qcom_scm: Add scm call to handle smmu errata > >iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata > > > > drivers/firmware/qcom_scm-32.c | 17 > > drivers/firmware/qcom_scm-64.c | 181 > > +++-- > > drivers/firmware/qcom_scm.c| 18 > > drivers/firmware/qcom_scm.h| 9 ++ > > drivers/iommu/arm-smmu-regs.h | 2 + > > drivers/iommu/arm-smmu.c | 133 +- > > include/linux/qcom_scm.h | 6 ++ > > 7 files changed, 320 insertions(+), 46 deletions(-) > > > > -- > The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, > a Linux Foundation Collaborative Project > > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
Hi Robin, Will, On Tue, Sep 18, 2018 at 8:41 AM Vivek Gautam wrote: > > Hi Robin, > > On Fri, Sep 7, 2018 at 3:52 PM Vivek Gautam > wrote: > > > > On Fri, Sep 7, 2018 at 3:22 PM Tomasz Figa wrote: > > > > > > On Fri, Sep 7, 2018 at 6:38 PM Vivek Gautam > > > wrote: > > > > > > > > Hi Tomasz, > > > > > > > > > > > > On 9/7/2018 2:46 PM, Tomasz Figa wrote: > > > > > Hi Vivek, > > > > > > > > > > On Thu, Aug 30, 2018 at 11:46 PM Vivek Gautam > > > > > wrote: > > > > >> From: Sricharan R > > > > >> > > > > >> The smmu device probe/remove and add/remove master device callbacks > > > > >> gets called when the smmu is not linked to its master, that is > > > > >> without > > > > >> the context of the master device. So calling runtime apis in those > > > > >> places > > > > >> separately. > > > > >> Global locks are also initialized before enabling runtime pm as the > > > > >> runtime_resume() calls device_reset() which does tlb_sync_global() > > > > >> that ultimately requires locks to be initialized. > > > > >> > > > > >> Signed-off-by: Sricharan R > > > > >> [vivek: Cleanup pm runtime calls] > > > > >> Signed-off-by: Vivek Gautam > > > > >> Reviewed-by: Tomasz Figa > > > > >> Tested-by: Srinivas Kandagatla > > > > >> --- > > > > >> drivers/iommu/arm-smmu.c | 89 > > > > >> +++- > > > > >> 1 file changed, 81 insertions(+), 8 deletions(-) > > > > > [snip] > > > > >> @@ -2215,10 +2281,17 @@ static int arm_smmu_device_remove(struct > > > > >> platform_device *pdev) > > > > >> if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) > > > > >> dev_err(>dev, "removing device with active > > > > >> domains!\n"); > > > > >> > > > > >> + arm_smmu_rpm_get(smmu); > > > > >> /* Turn the thing off */ > > > > >> writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + > > > > >> ARM_SMMU_GR0_sCR0); > > > > >> + arm_smmu_rpm_put(smmu); > > > > >> + > > > > >> + if (pm_runtime_enabled(smmu->dev)) > > > > >> + pm_runtime_force_suspend(smmu->dev); > > > > >> + else > > > > >> + clk_bulk_disable(smmu->num_clks, smmu->clks); > > > > >> > > > > >> - clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); > > > > >> + clk_bulk_unprepare(smmu->num_clks, smmu->clks); > > > > > Aren't we missing pm_runtime_disable() here? We'll have the enable > > > > > count unbalanced if the driver is removed and probed again. > > > > > > > > pm_runtime_force_suspend() does a pm_runtime_disable() also if i am not > > > > wrong. > > > > And, as mentioned in a previous thread [1], we were seeing a warning > > > > which we avoided > > > > by keeping force_suspend(). > > > > > > > > [1] https://lkml.org/lkml/2018/7/8/124 > > > > > > I see, thanks. I didn't realize that pm_runtime_force_suspend() > > > already disables runtime PM indeed. Sorry for the noise. > > > > Hi Tomasz, > > No problem. Thanks for looking back at it. > > > > Hi Robin, > > If you are fine with this series, then can you please consider giving > > Reviewed-by, so that we are certain that this series will go in the next > > merge > > window. > > Thanks > > Gentle ping. > You ack will be very helpful in letting Will pull this series for 4.20. > Thanks. I would really appreciate if you could provide your ack for this series. Or if there are any concerns, I am willing to address them. Thanks. Best regards Vivek > > -- > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member > of Code Aurora Forum, hosted by The Linux Foundation -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache
Hi Will, On Wed, Jun 27, 2018 at 10:07 PM Will Deacon wrote: > > Hi Vivek, > > On Tue, Jun 19, 2018 at 02:04:44PM +0530, Vivek Gautam wrote: > > On Fri, Jun 15, 2018 at 10:22 PM, Will Deacon wrote: > > > On Fri, Jun 15, 2018 at 04:23:29PM +0530, Vivek Gautam wrote: > > >> Qualcomm SoCs have an additional level of cache called as > > >> System cache or Last level cache[1]. This cache sits right > > >> before the DDR, and is tightly coupled with the memory > > >> controller. > > >> The cache is available to all the clients present in the > > >> SoC system. The clients request their slices from this system > > >> cache, make it active, and can then start using it. For these > > >> clients with smmu, to start using the system cache for > > >> dma buffers and related page tables [2], few of the memory > > >> attributes need to be set accordingly. > > >> This change makes the related memory Outer-Shareable, and > > >> updates the MAIR with necessary protection. > > >> > > >> The MAIR attribute requirements are: > > >> Inner Cacheablity = 0 > > >> Outer Cacheablity = 1, Write-Back Write Allocate > > >> Outer Shareablity = 1 > > > > > > Hmm, so is this cache coherent with the CPU or not? > > > > Thanks for reviewing. > > Yes, this LLC is cache coherent with CPU, so we mark for Outer-cacheable. > > The different masters such as GPU as able to allocated and activate a slice > > in this Last Level Cache. > > What I mean is, for example, if the CPU writes some data using Normal, Inner > Shareable, Inner/Outer Cacheable, Inner/Outer Write-back, Non-transient > Read/Write-allocate and a device reads that data using your MAIR encoding > above, is the device guaranteed to see the CPU writes after the CPU has > executed a DSB instruction? No, these MAIR configurations don't guarantee that devices will have coherent view of what CPU writes. Not all devices can snoop into CPU caches (only IO-Coherent devices can). So a normal cached memory configuration in CPU MMU tables, and SMMU page tables is valid only for few devices that are IO-coherent. Moreover, CPU can lookup in system cache, and so do all devices; allocation will depend on h/w configurations and memory attributes. So anything that CPU caches in system cache will be coherently visible to devices. > > I don't think so, because the ARM ARM would say that there's a mismatch on > the Inner Cacheability attribute. > > > > Why don't normal > > > non-cacheable mappings allocated in the LLC by default? > > > > Sorry, I couldn't fully understand your question here. > > Few of the masters on qcom socs are not io-coherent, so for them > > the IC has to be marked as 0. > > By IC you mean Inner Cacheability? In your MAIR encoding above, it is zero > so I don't understand the problem. What goes wrong if non-coherent devices > use your MAIR encoding for their DMA buffers? > > > But they are able to use the LLC with OC marked as 1. > > The issue here is that whatever attributes we put in the SMMU need to align > with the attributes used by the CPU in order to avoid introducing mismatched > aliases. Not really, right? Devices can use Inner non-Cacheable, Outer-cacheable (IC=0, OC=1) to allocate into the system cache (as these devices don't want to allocate in their inner caches), and the CPU will have a coherent view of these buffers/page-tables. This should be a normal cached non-IO-Coherent memory. But anything that CPU writes using Normal, Inner Shareable, Inner/Outer Cacheable, Inner/Outer Write-back, Non-transient Read/Write-allocate, may not be visible to the device. Also added Jordan, and Pratik to this thread. Thanks & Regards Vivek > Currently, we support three types of mapping in the SMMU: > > 1. DMA non-coherent (e.g. "dma-coherent" is not set on the device) > Normal, Inner Shareable, Inner/Outer Non-Cacheable > > 2. DMA coherent (e.g. "dma-coherent" is set on the device) [IOMMU_CACHE] > Normal, Inner Shareable, Inner/Outer Cacheable, Inner/Outer > Write-back, Non-transient Read/Write-allocate > > 3. MMIO (e.g. MSI doorbell) [IOMMU_MMIO] > Device-nGnRE (Outer Shareable) > > So either you override one of these types (I was suggesting (1)) or you need > to create a new memory type, along with the infrastructure for it to be > recognised on a per-device basis and used by the DMA API so that we don't > get mismatched aliases on the CPU. > > Will > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache
On Thu, Sep 20, 2018 at 1:05 AM Jordan Crouse wrote: > > On Tue, Jul 24, 2018 at 03:13:37PM +0530, Vivek Gautam wrote: > > Hi Will, > > > > > > On Wed, Jun 27, 2018 at 10:07 PM, Will Deacon wrote: > > > Hi Vivek, > > > > > > On Tue, Jun 19, 2018 at 02:04:44PM +0530, Vivek Gautam wrote: > > >> On Fri, Jun 15, 2018 at 10:22 PM, Will Deacon > > >> wrote: > > >> > On Fri, Jun 15, 2018 at 04:23:29PM +0530, Vivek Gautam wrote: > > >> >> Qualcomm SoCs have an additional level of cache called as > > >> >> System cache or Last level cache[1]. This cache sits right > > >> >> before the DDR, and is tightly coupled with the memory > > >> >> controller. > > >> >> The cache is available to all the clients present in the > > >> >> SoC system. The clients request their slices from this system > > >> >> cache, make it active, and can then start using it. For these > > >> >> clients with smmu, to start using the system cache for > > >> >> dma buffers and related page tables [2], few of the memory > > >> >> attributes need to be set accordingly. > > >> >> This change makes the related memory Outer-Shareable, and > > >> >> updates the MAIR with necessary protection. > > >> >> > > >> >> The MAIR attribute requirements are: > > >> >> Inner Cacheablity = 0 > > >> >> Outer Cacheablity = 1, Write-Back Write Allocate > > >> >> Outer Shareablity = 1 > > >> > > > >> > Hmm, so is this cache coherent with the CPU or not? > > >> > > >> Thanks for reviewing. > > >> Yes, this LLC is cache coherent with CPU, so we mark for Outer-cacheable. > > >> The different masters such as GPU as able to allocated and activate a > > >> slice > > >> in this Last Level Cache. > > > > > > What I mean is, for example, if the CPU writes some data using Normal, > > > Inner > > > Shareable, Inner/Outer Cacheable, Inner/Outer Write-back, Non-transient > > > Read/Write-allocate and a device reads that data using your MAIR encoding > > > above, is the device guaranteed to see the CPU writes after the CPU has > > > executed a DSB instruction? > > > > > > I don't think so, because the ARM ARM would say that there's a mismatch on > > > the Inner Cacheability attribute. > > > > > >> > Why don't normal > > >> > non-cacheable mappings allocated in the LLC by default? > > >> > > >> Sorry, I couldn't fully understand your question here. > > >> Few of the masters on qcom socs are not io-coherent, so for them > > >> the IC has to be marked as 0. > > > > > > By IC you mean Inner Cacheability? In your MAIR encoding above, it is zero > > > so I don't understand the problem. What goes wrong if non-coherent devices > > > use your MAIR encoding for their DMA buffers? > > > > > >> But they are able to use the LLC with OC marked as 1. > > > > > > The issue here is that whatever attributes we put in the SMMU need to > > > align > > > with the attributes used by the CPU in order to avoid introducing > > > mismatched > > > aliases. Currently, we support three types of mapping in the SMMU: > > > > > > 1. DMA non-coherent (e.g. "dma-coherent" is not set on the device) > > > Normal, Inner Shareable, Inner/Outer Non-Cacheable > > > > > > 2. DMA coherent (e.g. "dma-coherent" is set on the device) [IOMMU_CACHE] > > > Normal, Inner Shareable, Inner/Outer Cacheable, Inner/Outer > > > Write-back, Non-transient Read/Write-allocate > > > > > > 3. MMIO (e.g. MSI doorbell) [IOMMU_MMIO] > > > Device-nGnRE (Outer Shareable) > > > > > > So either you override one of these types (I was suggesting (1)) or you > > > need > > > to create a new memory type, along with the infrastructure for it to be > > > recognised on a per-device basis and used by the DMA API so that we don't > > > get mismatched aliases on the CPU. > > > > My apologies for delay in responding to this thread. > > I have been digging and getting in touch with internal tech teams > > to get more information on this. I will update as soon as I have enough > > details. > > Thanks. > > Hi Vivek. I want to revive this discussion. I believe that Andy has pulled > in the base LLCC support so this the remaining dependency we need to implement > the LLCC in the GPU driver. Hi Jordan, yes I was in process of gathering information about the system cache usage and the attributes configurations required when devices use system cache. Let me respond to Will's questions now. Thanks Vivek > > Thanks, > Jordan > > -- > The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, > a Linux Foundation Collaborative Project > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
Hi Robin, On Fri, Sep 7, 2018 at 3:52 PM Vivek Gautam wrote: > > On Fri, Sep 7, 2018 at 3:22 PM Tomasz Figa wrote: > > > > On Fri, Sep 7, 2018 at 6:38 PM Vivek Gautam > > wrote: > > > > > > Hi Tomasz, > > > > > > > > > On 9/7/2018 2:46 PM, Tomasz Figa wrote: > > > > Hi Vivek, > > > > > > > > On Thu, Aug 30, 2018 at 11:46 PM Vivek Gautam > > > > wrote: > > > >> From: Sricharan R > > > >> > > > >> The smmu device probe/remove and add/remove master device callbacks > > > >> gets called when the smmu is not linked to its master, that is without > > > >> the context of the master device. So calling runtime apis in those > > > >> places > > > >> separately. > > > >> Global locks are also initialized before enabling runtime pm as the > > > >> runtime_resume() calls device_reset() which does tlb_sync_global() > > > >> that ultimately requires locks to be initialized. > > > >> > > > >> Signed-off-by: Sricharan R > > > >> [vivek: Cleanup pm runtime calls] > > > >> Signed-off-by: Vivek Gautam > > > >> Reviewed-by: Tomasz Figa > > > >> Tested-by: Srinivas Kandagatla > > > >> --- > > > >> drivers/iommu/arm-smmu.c | 89 > > > >> +++- > > > >> 1 file changed, 81 insertions(+), 8 deletions(-) > > > > [snip] > > > >> @@ -2215,10 +2281,17 @@ static int arm_smmu_device_remove(struct > > > >> platform_device *pdev) > > > >> if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) > > > >> dev_err(>dev, "removing device with active > > > >> domains!\n"); > > > >> > > > >> + arm_smmu_rpm_get(smmu); > > > >> /* Turn the thing off */ > > > >> writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + > > > >> ARM_SMMU_GR0_sCR0); > > > >> + arm_smmu_rpm_put(smmu); > > > >> + > > > >> + if (pm_runtime_enabled(smmu->dev)) > > > >> + pm_runtime_force_suspend(smmu->dev); > > > >> + else > > > >> + clk_bulk_disable(smmu->num_clks, smmu->clks); > > > >> > > > >> - clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); > > > >> + clk_bulk_unprepare(smmu->num_clks, smmu->clks); > > > > Aren't we missing pm_runtime_disable() here? We'll have the enable > > > > count unbalanced if the driver is removed and probed again. > > > > > > pm_runtime_force_suspend() does a pm_runtime_disable() also if i am not > > > wrong. > > > And, as mentioned in a previous thread [1], we were seeing a warning > > > which we avoided > > > by keeping force_suspend(). > > > > > > [1] https://lkml.org/lkml/2018/7/8/124 > > > > I see, thanks. I didn't realize that pm_runtime_force_suspend() > > already disables runtime PM indeed. Sorry for the noise. > > Hi Tomasz, > No problem. Thanks for looking back at it. > > Hi Robin, > If you are fine with this series, then can you please consider giving > Reviewed-by, so that we are certain that this series will go in the next merge > window. > Thanks Gentle ping. You ack will be very helpful in letting Will pull this series for 4.20. Thanks. Best regards Vivek -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 4/5] dt-bindings: arm-smmu: Add bindings for qcom, smmu-v2
On Mon, Sep 10, 2018 at 11:32 PM Rob Herring wrote: > > On Thu, 30 Aug 2018 20:15:40 +0530, Vivek Gautam wrote: > > Add bindings doc for Qcom's smmu-v2 implementation. > > > > Signed-off-by: Vivek Gautam > > Reviewed-by: Tomasz Figa > > Tested-by: Srinivas Kandagatla > > --- > > .../devicetree/bindings/iommu/arm,smmu.txt | 39 > > ++ > > 1 file changed, 39 insertions(+) > > > > Reviewed-by: Rob Herring Thanks Rob. Best regards Vivek -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 0/4] Qcom smmu-500 TLB invalidation errata for sdm845
+linux-arm-msm On 09/10/2018 11:55 AM, Vivek Gautam wrote: Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance errata [1] because of which the TCU cache look ups are stalled during invalidation cycle. This is mitigated by serializing all the invalidation requests coming to the smmu. This patch series addresses this errata by adding new tlb_ops for qcom,sdm845-smmu-500 [2]. These ops take context bank locks for all the tlb_ops that queue and sync the TLB invalidation requests. Besides adding locks, there's a way to expadite these TLB invalidations for display and camera devices by turning off the 'wait-for-safe' logic in hardware that holds the tlb invalidations until a safe level. This 'wait-for-safe' logic is controlled by toggling a chicken bit through a secure register. This secure register is accessed by making an explicit SCM call into the EL3 firmware. There are two ways of handling this logic - * Firmware, such as tz present on sdm845-mtp devices has a handler to do all the register access and bit set/clear. So is the handling in downstream arm-smmu driver [3]. * Other firmwares can have handlers to just read/write this secure register. In such cases the kernel make io_read/writel scm calls to modify the register. This patch series adds APIs in qcom-scm driver to handle both of these cases. Lastly, since these TLB invalidations can happen in atomic contexts there's a need to add atomic versions of qcom_scm_io_readl/writel() and qcom_scm_call() APIs. The traditional scm calls take mutex and we therefore can't use these calls in atomic contexts. This patch series is adapted version of how the errata is handled in downstream [1]. Changes since v1: * Addressed Will and Robin's comments: - Dropped the patch[4] that forked out __arm_smmu_tlb_inv_range_nosync(), and __arm_smmu_tlb_sync(). - Cleaned up the errata patch further to use downstream polling mechanism for tlb sync. * No change in SCM call patches - patches 1 to 3. [1] https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842 [2] https://lore.kernel.org/patchwork/patch/974114/ [3] https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4864 [4] https://patchwork.kernel.org/patch/10565349/ Vivek Gautam (4): firmware: qcom_scm-64: Add atomic version of qcom_scm_call firmware/qcom_scm: Add atomic version of io read/write APIs firmware/qcom_scm: Add scm call to handle smmu errata iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata drivers/firmware/qcom_scm-32.c | 17 drivers/firmware/qcom_scm-64.c | 181 +++-- drivers/firmware/qcom_scm.c| 18 drivers/firmware/qcom_scm.h| 9 ++ drivers/iommu/arm-smmu-regs.h | 2 + drivers/iommu/arm-smmu.c | 133 +- include/linux/qcom_scm.h | 6 ++ 7 files changed, 320 insertions(+), 46 deletions(-) -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 0/4] Qcom smmu-500 TLB invalidation errata for sdm845
Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance errata [1] because of which the TCU cache look ups are stalled during invalidation cycle. This is mitigated by serializing all the invalidation requests coming to the smmu. This patch series addresses this errata by adding new tlb_ops for qcom,sdm845-smmu-500 [2]. These ops take context bank locks for all the tlb_ops that queue and sync the TLB invalidation requests. Besides adding locks, there's a way to expadite these TLB invalidations for display and camera devices by turning off the 'wait-for-safe' logic in hardware that holds the tlb invalidations until a safe level. This 'wait-for-safe' logic is controlled by toggling a chicken bit through a secure register. This secure register is accessed by making an explicit SCM call into the EL3 firmware. There are two ways of handling this logic - * Firmware, such as tz present on sdm845-mtp devices has a handler to do all the register access and bit set/clear. So is the handling in downstream arm-smmu driver [3]. * Other firmwares can have handlers to just read/write this secure register. In such cases the kernel make io_read/writel scm calls to modify the register. This patch series adds APIs in qcom-scm driver to handle both of these cases. Lastly, since these TLB invalidations can happen in atomic contexts there's a need to add atomic versions of qcom_scm_io_readl/writel() and qcom_scm_call() APIs. The traditional scm calls take mutex and we therefore can't use these calls in atomic contexts. This patch series is adapted version of how the errata is handled in downstream [1]. Changes since v1: * Addressed Will and Robin's comments: - Dropped the patch[4] that forked out __arm_smmu_tlb_inv_range_nosync(), and __arm_smmu_tlb_sync(). - Cleaned up the errata patch further to use downstream polling mechanism for tlb sync. * No change in SCM call patches - patches 1 to 3. [1] https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842 [2] https://lore.kernel.org/patchwork/patch/974114/ [3] https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4864 [4] https://patchwork.kernel.org/patch/10565349/ Vivek Gautam (4): firmware: qcom_scm-64: Add atomic version of qcom_scm_call firmware/qcom_scm: Add atomic version of io read/write APIs firmware/qcom_scm: Add scm call to handle smmu errata iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata drivers/firmware/qcom_scm-32.c | 17 drivers/firmware/qcom_scm-64.c | 181 +++-- drivers/firmware/qcom_scm.c| 18 drivers/firmware/qcom_scm.h| 9 ++ drivers/iommu/arm-smmu-regs.h | 2 + drivers/iommu/arm-smmu.c | 133 +- include/linux/qcom_scm.h | 6 ++ 7 files changed, 320 insertions(+), 46 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 2/4] firmware/qcom_scm: Add atomic version of io read/write APIs
Add atomic versions of qcom_scm_io_readl/writel to enable reading/writing secure registers from atomic context. Signed-off-by: Vivek Gautam --- drivers/firmware/qcom_scm-32.c | 12 drivers/firmware/qcom_scm-64.c | 32 drivers/firmware/qcom_scm.c| 12 drivers/firmware/qcom_scm.h| 4 include/linux/qcom_scm.h | 4 5 files changed, 64 insertions(+) diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 4e24e591ae74..7293e5efad69 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -627,3 +627,15 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val) return qcom_scm_call_atomic2(QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, addr, val); } + +int __qcom_scm_io_readl_atomic(struct device *dev, phys_addr_t addr, + unsigned int *val) +{ + return -ENODEV; +} + +int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, + unsigned int val) +{ + return -ENODEV; +} diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 3a8c867cdf51..6bf55403f6e3 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -558,3 +558,35 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val) return qcom_scm_call(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, , ); } + +int __qcom_scm_io_readl_atomic(struct device *dev, phys_addr_t addr, + unsigned int *val) +{ + struct qcom_scm_desc desc = {0}; + struct arm_smccc_res res; + int ret; + + desc.args[0] = addr; + desc.arginfo = QCOM_SCM_ARGS(1); + + ret = qcom_scm_call_atomic(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_READ, + , ); + if (ret >= 0) + *val = res.a1; + + return ret < 0 ? ret : 0; +} + +int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, + unsigned int val) +{ + struct qcom_scm_desc desc = {0}; + struct arm_smccc_res res; + + desc.args[0] = addr; + desc.args[1] = val; + desc.arginfo = QCOM_SCM_ARGS(2); + + return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, + , ); +} diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index e778af766fae..36dab37f 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -365,6 +365,18 @@ int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) } EXPORT_SYMBOL(qcom_scm_io_writel); +int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val) +{ + return __qcom_scm_io_readl_atomic(__scm->dev, addr, val); +} +EXPORT_SYMBOL(qcom_scm_io_readl_atomic); + +int qcom_scm_io_writel_atomic(phys_addr_t addr, unsigned int val) +{ + return __qcom_scm_io_writel_atomic(__scm->dev, addr, val); +} +EXPORT_SYMBOL(qcom_scm_io_writel_atomic); + static void qcom_scm_set_download_mode(bool enable) { bool avail; diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h index dcd7f7917fc7..bb176107f51e 100644 --- a/drivers/firmware/qcom_scm.h +++ b/drivers/firmware/qcom_scm.h @@ -37,6 +37,10 @@ extern void __qcom_scm_cpu_power_down(u32 flags); #define QCOM_SCM_IO_WRITE 0x2 extern int __qcom_scm_io_readl(struct device *dev, phys_addr_t addr, unsigned int *val); extern int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val); +extern int __qcom_scm_io_readl_atomic(struct device *dev, phys_addr_t addr, + unsigned int *val); +extern int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, + unsigned int val); #define QCOM_SCM_SVC_INFO 0x6 #define QCOM_IS_CALL_AVAIL_CMD 0x1 diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 5d65521260b3..6a5d0c98b328 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -64,6 +64,8 @@ extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); +extern int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val); +extern int qcom_scm_io_writel_atomic(phys_addr_t addr, unsigned int val); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -100,5 +102,7 @@ static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { ret static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } stati
[PATCH v2 4/4] iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata
Qcom's implementation of arm,mmu-500 require to serialize all TLB invalidations for context banks. In case the TLB invalidation requests don't go through the first time, there's a way to disable/enable the wait for safe logic. Disabling this logic expadites the TLBIs. Different bootloaders with their access control policies allow this register access differntly. With one, we should be able to directly make qcom-scm call to do io read/write, while with other we should use the specific SCM command to send request to do the complete register configuration. A separate device tree flag for arm-smmu will allow to identify which firmware configuration of the two mentioned above we use. Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu-regs.h | 2 + drivers/iommu/arm-smmu.c | 133 +- 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h index a1226e4ab5f8..71662cae9806 100644 --- a/drivers/iommu/arm-smmu-regs.h +++ b/drivers/iommu/arm-smmu-regs.h @@ -177,6 +177,8 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 +#define ARM_SMMU_GID_QCOM_CUSTOM_CFG 0x300 + #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG(1 << 7) #define SCTLR_CFIE (1 << 6) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 411e5ac57c64..de9c4a5bf686 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -181,7 +182,8 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_EXIDS(1 << 12) u32 features; -#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) +#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) +#define ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA (1 << 1) u32 options; enum arm_smmu_arch_version version; enum arm_smmu_implementationmodel; @@ -266,6 +268,7 @@ static bool using_legacy_binding, using_generic_binding; static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, + { ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA, "qcom,smmu-500-fw-impl-errata" }, { 0, NULL}, }; @@ -531,12 +534,134 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } +#define CUSTOM_CFG_MDP_SAFE_ENABLE BIT(15) +#define CUSTOM_CFG_IFE1_SAFE_ENABLEBIT(14) +#define CUSTOM_CFG_IFE0_SAFE_ENABLEBIT(13) + +static int __qsmmu500_wait_safe_toggle(struct arm_smmu_device *smmu, int en) +{ + int ret; + u32 val, gid_phys_base; + phys_addr_t reg; + struct vm_struct *vm; + + /* We want physical address of SMMU, so the vm_area */ + vm = find_vm_area(smmu->base); + + /* +* GID (implementation defined address space) is located at +* SMMU_BASE + (2 × PAGESIZE). +*/ + gid_phys_base = vm->phys_addr + (2 << (smmu)->pgshift); + reg = gid_phys_base + ARM_SMMU_GID_QCOM_CUSTOM_CFG; + + ret = qcom_scm_io_readl_atomic(reg, ); + if (ret) + return ret; + + if (en) + val |= CUSTOM_CFG_MDP_SAFE_ENABLE | + CUSTOM_CFG_IFE0_SAFE_ENABLE | + CUSTOM_CFG_IFE1_SAFE_ENABLE; + else + val &= ~(CUSTOM_CFG_MDP_SAFE_ENABLE | +CUSTOM_CFG_IFE0_SAFE_ENABLE | +CUSTOM_CFG_IFE1_SAFE_ENABLE); + + ret = qcom_scm_io_writel_atomic(reg, val); + + return ret; +} + +static int qsmmu500_wait_safe_toggle(struct arm_smmu_device *smmu, +int en, bool is_fw_impl) +{ + if (is_fw_impl) + return qcom_scm_qsmmu500_wait_safe_toggle(en); + else + return __qsmmu500_wait_safe_toggle(smmu, en); +} + +static void qcom_errata_tlb_sync(struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + bool is_fw_impl; + u32 val; + + writel_relaxed(0, base + ARM_SMMU_CB_TLBSYNC); + + if (!readl_poll_timeout_atomic(base + ARM_SMMU_CB_TLBSTATUS, val, + !(val & sTLBGSTATUS_GSACTIVE), 0, 100)) + return; + + is_fw_impl = smmu->options & ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA ? + true : false; + + /* SCM call here to disable the wait-for-safe logic. */ + if (WARN(qsmmu500_wait_safe_toggle(smmu, false, is_fw
[PATCH v2 3/4] firmware/qcom_scm: Add scm call to handle smmu errata
Qcom's smmu-500 needs to toggle wait-for-safe sequence to handle TLB invalidation sync's. Few firmwares allow doing that through SCM interface. Add API to toggle wait for safe from firmware through a SCM call. Signed-off-by: Vivek Gautam --- drivers/firmware/qcom_scm-32.c | 5 + drivers/firmware/qcom_scm-64.c | 13 + drivers/firmware/qcom_scm.c| 6 ++ drivers/firmware/qcom_scm.h| 5 + include/linux/qcom_scm.h | 2 ++ 5 files changed, 31 insertions(+) diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 7293e5efad69..2d301ad053f8 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -639,3 +639,8 @@ int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, { return -ENODEV; } + +int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool enable) +{ + return -ENODEV; +} diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 6bf55403f6e3..f13bcabc5d78 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -590,3 +590,16 @@ int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, , ); } + +int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool en) +{ + struct qcom_scm_desc desc = {0}; + struct arm_smccc_res res; + + desc.args[0] = QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL; + desc.args[1] = en; + desc.arginfo = QCOM_SCM_ARGS(2); + + return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_SMMU_PROGRAM, + QCOM_SCM_CONFIG_ERRATA1, , ); +} diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 36dab37f..5f15cc2e9f69 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -353,6 +353,12 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) } EXPORT_SYMBOL(qcom_scm_iommu_secure_ptbl_init); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en) +{ + return __qcom_scm_qsmmu500_wait_safe_toggle(__scm->dev, en); +} +EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle); + int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return __qcom_scm_io_readl(__scm->dev, addr, val); diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h index bb176107f51e..89a822c23e33 100644 --- a/drivers/firmware/qcom_scm.h +++ b/drivers/firmware/qcom_scm.h @@ -103,10 +103,15 @@ extern int __qcom_scm_restore_sec_cfg(struct device *dev, u32 device_id, u32 spare); #define QCOM_SCM_IOMMU_SECURE_PTBL_SIZE3 #define QCOM_SCM_IOMMU_SECURE_PTBL_INIT4 +#define QCOM_SCM_SVC_SMMU_PROGRAM 0x15 +#define QCOM_SCM_CONFIG_ERRATA10x3 +#define QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL 0x2 extern int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare, size_t *size); extern int __qcom_scm_iommu_secure_ptbl_init(struct device *dev, u64 addr, u32 size, u32 spare); +extern int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, + bool enable); #define QCOM_MEM_PROT_ASSIGN_ID0x16 extern int __qcom_scm_assign_mem(struct device *dev, phys_addr_t mem_region, size_t mem_sz, diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 6a5d0c98b328..46e6b1692998 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -62,6 +62,7 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); extern int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val); @@ -100,6 +101,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } +static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return -ENODEV; } static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return -ENODEV; } static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { return -ENODEV; } static inline int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val) { return -
[PATCH v2 1/4] firmware: qcom_scm-64: Add atomic version of qcom_scm_call
There are scnenarios where drivers are required to make a scm call in atomic context, such as in one of the qcom's arm-smmu-500 errata [1]. [1] ("https://source.codeaurora.org/quic/la/kernel/msm-4.9/ tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842") Signed-off-by: Vivek Gautam --- drivers/firmware/qcom_scm-64.c | 136 - 1 file changed, 92 insertions(+), 44 deletions(-) diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 688525dd4aee..3a8c867cdf51 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -70,32 +70,71 @@ static DEFINE_MUTEX(qcom_scm_lock); #define FIRST_EXT_ARG_IDX 3 #define N_REGISTER_ARGS (MAX_QCOM_SCM_ARGS - N_EXT_QCOM_SCM_ARGS + 1) -/** - * qcom_scm_call() - Invoke a syscall in the secure world - * @dev: device - * @svc_id:service identifier - * @cmd_id:command identifier - * @desc: Descriptor structure containing arguments and return values - * - * Sends a command to the SCM and waits for the command to finish processing. - * This should *only* be called in pre-emptible context. -*/ -static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, -const struct qcom_scm_desc *desc, -struct arm_smccc_res *res) +static void __qcom_scm_call_do(const struct qcom_scm_desc *desc, + struct arm_smccc_res *res, u32 fn_id, + u64 x5, u32 type) +{ + u64 cmd; + struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6}; + + cmd = ARM_SMCCC_CALL_VAL(type, qcom_smccc_convention, +ARM_SMCCC_OWNER_SIP, fn_id); + + quirk.state.a6 = 0; + + do { + arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0], + desc->args[1], desc->args[2], x5, + quirk.state.a6, 0, res, ); + + if (res->a0 == QCOM_SCM_INTERRUPTED) + cmd = res->a0; + + } while (res->a0 == QCOM_SCM_INTERRUPTED); +} + +static void qcom_scm_call_do(const struct qcom_scm_desc *desc, +struct arm_smccc_res *res, u32 fn_id, +u64 x5, bool atomic) +{ + int retry_count = 0; + + if (!atomic) { + do { + mutex_lock(_scm_lock); + + __qcom_scm_call_do(desc, res, fn_id, x5, + ARM_SMCCC_STD_CALL); + + mutex_unlock(_scm_lock); + + if (res->a0 == QCOM_SCM_V2_EBUSY) { + if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY) + break; + msleep(QCOM_SCM_EBUSY_WAIT_MS); + } + } while (res->a0 == QCOM_SCM_V2_EBUSY); + } else { + __qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL); + } +} + +static int ___qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, + const struct qcom_scm_desc *desc, + struct arm_smccc_res *res, bool atomic) { int arglen = desc->arginfo & 0xf; - int retry_count = 0, i; + int i; u32 fn_id = QCOM_SCM_FNID(svc_id, cmd_id); - u64 cmd, x5 = desc->args[FIRST_EXT_ARG_IDX]; + u64 x5 = desc->args[FIRST_EXT_ARG_IDX]; dma_addr_t args_phys = 0; void *args_virt = NULL; size_t alloc_len; - struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6}; + gfp_t flag = atomic ? GFP_ATOMIC : GFP_KERNEL; if (unlikely(arglen > N_REGISTER_ARGS)) { alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64); - args_virt = kzalloc(PAGE_ALIGN(alloc_len), GFP_KERNEL); + args_virt = kzalloc(PAGE_ALIGN(alloc_len), flag); if (!args_virt) return -ENOMEM; @@ -125,33 +164,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, x5 = args_phys; } - do { - mutex_lock(_scm_lock); - - cmd = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, -qcom_smccc_convention, -ARM_SMCCC_OWNER_SIP, fn_id); - - quirk.state.a6 = 0; - - do { - arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0], - desc->args[1], desc->args[2], x5, - quirk.state.a6, 0, res, ); - - if (res->a0 == QCOM_SCM_INTERRUPTED) - cmd = res->a0; - - } while (res->a0 == QCOM_SCM_INTERRUPTED); - -
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
On Fri, Sep 7, 2018 at 3:22 PM Tomasz Figa wrote: > > On Fri, Sep 7, 2018 at 6:38 PM Vivek Gautam > wrote: > > > > Hi Tomasz, > > > > > > On 9/7/2018 2:46 PM, Tomasz Figa wrote: > > > Hi Vivek, > > > > > > On Thu, Aug 30, 2018 at 11:46 PM Vivek Gautam > > > wrote: > > >> From: Sricharan R > > >> > > >> The smmu device probe/remove and add/remove master device callbacks > > >> gets called when the smmu is not linked to its master, that is without > > >> the context of the master device. So calling runtime apis in those places > > >> separately. > > >> Global locks are also initialized before enabling runtime pm as the > > >> runtime_resume() calls device_reset() which does tlb_sync_global() > > >> that ultimately requires locks to be initialized. > > >> > > >> Signed-off-by: Sricharan R > > >> [vivek: Cleanup pm runtime calls] > > >> Signed-off-by: Vivek Gautam > > >> Reviewed-by: Tomasz Figa > > >> Tested-by: Srinivas Kandagatla > > >> --- > > >> drivers/iommu/arm-smmu.c | 89 > > >> +++- > > >> 1 file changed, 81 insertions(+), 8 deletions(-) > > > [snip] > > >> @@ -2215,10 +2281,17 @@ static int arm_smmu_device_remove(struct > > >> platform_device *pdev) > > >> if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) > > >> dev_err(>dev, "removing device with active > > >> domains!\n"); > > >> > > >> + arm_smmu_rpm_get(smmu); > > >> /* Turn the thing off */ > > >> writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + > > >> ARM_SMMU_GR0_sCR0); > > >> + arm_smmu_rpm_put(smmu); > > >> + > > >> + if (pm_runtime_enabled(smmu->dev)) > > >> + pm_runtime_force_suspend(smmu->dev); > > >> + else > > >> + clk_bulk_disable(smmu->num_clks, smmu->clks); > > >> > > >> - clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); > > >> + clk_bulk_unprepare(smmu->num_clks, smmu->clks); > > > Aren't we missing pm_runtime_disable() here? We'll have the enable > > > count unbalanced if the driver is removed and probed again. > > > > pm_runtime_force_suspend() does a pm_runtime_disable() also if i am not > > wrong. > > And, as mentioned in a previous thread [1], we were seeing a warning > > which we avoided > > by keeping force_suspend(). > > > > [1] https://lkml.org/lkml/2018/7/8/124 > > I see, thanks. I didn't realize that pm_runtime_force_suspend() > already disables runtime PM indeed. Sorry for the noise. Hi Tomasz, No problem. Thanks for looking back at it. Hi Robin, If you are fine with this series, then can you please consider giving Reviewed-by, so that we are certain that this series will go in the next merge window. Thanks Best regards Vivek -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
Hi Tomasz, On 9/7/2018 2:46 PM, Tomasz Figa wrote: Hi Vivek, On Thu, Aug 30, 2018 at 11:46 PM Vivek Gautam wrote: From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- drivers/iommu/arm-smmu.c | 89 +++- 1 file changed, 81 insertions(+), 8 deletions(-) [snip] @@ -2215,10 +2281,17 @@ static int arm_smmu_device_remove(struct platform_device *pdev) if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_err(>dev, "removing device with active domains!\n"); + arm_smmu_rpm_get(smmu); /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + arm_smmu_rpm_put(smmu); + + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_force_suspend(smmu->dev); + else + clk_bulk_disable(smmu->num_clks, smmu->clks); - clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); + clk_bulk_unprepare(smmu->num_clks, smmu->clks); Aren't we missing pm_runtime_disable() here? We'll have the enable count unbalanced if the driver is removed and probed again. pm_runtime_force_suspend() does a pm_runtime_disable() also if i am not wrong. And, as mentioned in a previous thread [1], we were seeing a warning which we avoided by keeping force_suspend(). [1] https://lkml.org/lkml/2018/7/8/124 Thanks Vivek Also, if we add pm_runtime_disable(), we can reorder things a bit and simplify into: arm_smmu_rpm_get(smmu); /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); if (pm_runtime_enabled()) pm_runtime_disable(); arm_smmu_rpm_put(smmu); clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); Best regards, Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v16 4/5] dt-bindings: arm-smmu: Add bindings for qcom, smmu-v2
Hi Rob, On Thu, Aug 30, 2018 at 8:16 PM Vivek Gautam wrote: > > Add bindings doc for Qcom's smmu-v2 implementation. > > Signed-off-by: Vivek Gautam > Reviewed-by: Tomasz Figa > Tested-by: Srinivas Kandagatla > --- I removed your reviewed-by for this particular patch. Can you please consider giving your review if you find the changes okay now. Thanks. Best regards Vivek > .../devicetree/bindings/iommu/arm,smmu.txt | 39 > ++ > 1 file changed, 39 insertions(+) > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > index 8a6ffce12af5..a6504b37cc21 100644 > --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > @@ -17,10 +17,16 @@ conditions. > "arm,mmu-401" > "arm,mmu-500" > "cavium,smmu-v2" > +"qcom,smmu-v2" > >depending on the particular implementation and/or the >version of the architecture implemented. > > + Qcom SoCs must contain, as below, SoC-specific compatibles > + along with "qcom,smmu-v2": > + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", > + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". > + > - reg : Base address and size of the SMMU. > > - #global-interrupts : The number of global interrupts exposed by the > @@ -71,6 +77,22 @@ conditions. >or using stream matching with #iommu-cells = <2>, and >may be ignored if present in such cases. > > +- clock-names:List of the names of clocks input to the device. The > + required list depends on particular implementation and > + is as follows: > + - for "qcom,smmu-v2": > +- "bus": clock required for downstream bus access and > + for the smmu ptw, > +- "iface": clock required to access smmu's registers > + through the TCU's programming interface. > + - unspecified for other implementations. > + > +- clocks: Specifiers for all clocks listed in the clock-names > property, > + as per generic clock bindings. > + > +- power-domains: Specifiers for power domains required to be powered on for > + the SMMU to operate, as per generic power domain bindings. > + > ** Deprecated properties: > > - mmu-masters (deprecated in favour of the generic "iommus" binding) : > @@ -137,3 +159,20 @@ conditions. > iommu-map = <0 0 0x400>; > ... > }; > + > + /* Qcom's arm,smmu-v2 implementation */ > + smmu4: iommu@d0 { > + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; > + reg = <0xd0 0x1>; > + > + #global-interrupts = <1>; > + interrupts = , > +, > +; > + #iommu-cells = <1>; > + power-domains = < MDSS_GDSC>; > + > + clocks = < SMMU_MDP_AXI_CLK>, > +< SMMU_MDP_AHB_CLK>; > + clock-names = "bus", "iface"; > + }; > -- > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member > of Code Aurora Forum, hosted by The Linux Foundation > > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/5] Qcom smmu-500 TLB invalidation errata for sdm845
On 9/5/2018 3:34 PM, Rob Clark wrote: On Wed, Sep 5, 2018 at 5:22 AM Vivek Gautam wrote: On 8/14/2018 5:54 PM, Vivek Gautam wrote: Hi Will, On 8/14/2018 5:10 PM, Will Deacon wrote: Hi Vivek, On Tue, Aug 14, 2018 at 04:25:23PM +0530, Vivek Gautam wrote: Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance errata [1] because of which the TCU cache look ups are stalled during invalidation cycle. This is mitigated by serializing all the invalidation requests coming to the smmu. How does this implementation differ from the one supported by qcom_iommu.c? I notice you're adding firmware hooks here, which we avoided by having the extra driver. Please help me understand which devices exist, how they differ, and which drivers are intended to support them! IIRC, the qcom_iommu driver was intended to support the static context bank - SID mapping, and is very specific to the smmu-v2 version present on msm8916 soc. However, this is the qcom's mmu-500 implementation specific errata. qcom_iommu will not be able to support mmu-500 configurations. Rob Clark can add more. Let you know what you suggest. Rob, can you please comment about how qcom-smmu driver has different implementation from arm-smmu driver? sorry, I missed this thread earlier. But yeah, as you mentioned, the purpose for qcom_iommu.c was to deal with the static context/SID mapping. (I guess it is all just software, and we could make qcom_iommu.c support dynamic mapping as well, but I think then it starts to duplicate most of arm_smmu.c, so that doesn't seem like the right direction) Thanks Rob for the response. I will wait for Will's response on how would he like this support be implemented. Best regards Vivek BR, -R Will, in case we would want to use arm-smmu driver, what would you suggest for having the firmware hooks? Thanks. Best regards Vivek ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/5] Qcom smmu-500 TLB invalidation errata for sdm845
On 8/14/2018 5:54 PM, Vivek Gautam wrote: Hi Will, On 8/14/2018 5:10 PM, Will Deacon wrote: Hi Vivek, On Tue, Aug 14, 2018 at 04:25:23PM +0530, Vivek Gautam wrote: Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance errata [1] because of which the TCU cache look ups are stalled during invalidation cycle. This is mitigated by serializing all the invalidation requests coming to the smmu. How does this implementation differ from the one supported by qcom_iommu.c? I notice you're adding firmware hooks here, which we avoided by having the extra driver. Please help me understand which devices exist, how they differ, and which drivers are intended to support them! IIRC, the qcom_iommu driver was intended to support the static context bank - SID mapping, and is very specific to the smmu-v2 version present on msm8916 soc. However, this is the qcom's mmu-500 implementation specific errata. qcom_iommu will not be able to support mmu-500 configurations. Rob Clark can add more. Let you know what you suggest. Rob, can you please comment about how qcom-smmu driver has different implementation from arm-smmu driver? Will, in case we would want to use arm-smmu driver, what would you suggest for having the firmware hooks? Thanks. Best regards Vivek Also -- you didn't CC all the maintainers for the firmware bits, so adding Andy here for that, and Rob for the previous question. I added Andy to the series, would you want me to add Rob H also? Best regards Vivek Thanks, Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Patch v15 4/5] dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2
Hi Rob, On 8/30/2018 6:13 AM, Rob Herring wrote: On Wed, Aug 29, 2018 at 6:23 AM Vivek Gautam wrote: On Wed, Aug 29, 2018 at 2:05 PM Vivek Gautam wrote: Hi Rob, On 8/29/2018 2:04 AM, Rob Herring wrote: On Mon, Aug 27, 2018 at 04:25:50PM +0530, Vivek Gautam wrote: Add bindings doc for Qcom's smmu-v2 implementation. Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - This is a new patch added in v15 after noticing the new checkpatch warning for separate dt-bindings doc. - This patch also addresses comments given by Rob and Robin to add a list of valid values of '' in "qcom,-smmu-v2" compatible string. .../devicetree/bindings/iommu/arm,smmu.txt | 47 ++ 1 file changed, 47 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..52198a539606 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,24 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,-smmu-v2", "qcom,smmu-v2" The v2 in the compatible string is kind of redundant unless the SoC has other SMMU types. sdm845 has smmu-v2, and smmu-500 [1]. depending on the particular implementation and/or the version of the architecture implemented. + A number of Qcom SoCs use qcom,smmu-v2 version of the IP. + "qcom,-smmu-v2" represents a soc specific compatible + string that should be present along with the "qcom,smmu-v2" + to facilitate SoC specific clocks/power connections and to + address specific bug fixes. + '' string in "qcom,-smmu-v2" should be one of the + following: + msm8996 - for msm8996 Qcom SoC. + sdm845 - for sdm845 Qcom Soc. Rather than all this prose, it would be simpler to just add 2 lines with the full compatibles rather than . The thing is not going to work when/if we move bindings to json-schema also. then we keep adding "qcom,msm8996-smmu-v2", "qcom,smmu-v2" "qcom,msm8998-smmu-v2", "qcom,smmu-v2" "qcom,sdm845-smmu-v2", "qcom,smmu-v2", and from [1] "qcom,sdm845-smmu-500", "arm,mmu-500", etc. for each SoCs? How about following diff on top of this patch? diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 52198a539606..5e6c04876533 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,23 +17,18 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" -"qcom,-smmu-v2", "qcom,smmu-v2" +"qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. - A number of Qcom SoCs use qcom,smmu-v2 version of the IP. - "qcom,-smmu-v2" represents a soc specific compatible - string that should be present along with the "qcom,smmu-v2" - to facilitate SoC specific clocks/power connections and to - address specific bug fixes. - '' string in "qcom,-smmu-v2" should be one of the - following: - msm8996 - for msm8996 Qcom SoC. - sdm845 - for sdm845 Qcom Soc. - - An example string would be - - "qcom,msm8996-smmu-v2", "qcom,smmu-v2". + Qcom SoCs using qcom,smmu-v2 must have soc specific + compatible string attached to "qcom,smmu-v2" to take care + of SoC specific clocks/power connections and to address + specific bug fixes. + Precisely, it should be one of the following: + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". We don't need an explanation of why we need specific compatibles in each binding document (though maybe we need a better explanation somewhere). We just need to know what are valid values for compatibles and this includes any combinations. Generally, this is just a list of combinations. [snip] Fixed this in v16. Thanks. Best regards Vivek ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v16 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. On msm8996, multiple cores, viz. mdss, video, etc. use this smmu. On sdm845, this smmu is used with gpu. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- drivers/iommu/arm-smmu.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 166c8c6da24f..411e5ac57c64 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -119,6 +119,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -1970,6 +1971,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +static const char * const qcom_smmuv2_clks[] = { + "bus", "iface", +}; + +static const struct arm_smmu_match_data qcom_smmuv2 = { + .version = ARM_SMMU_V2, + .model = QCOM_SMMUV2, + .clks = qcom_smmuv2_clks, + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), +}; + static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = _generic_v1 }, { .compatible = "arm,smmu-v2", .data = _generic_v2 }, @@ -1977,6 +1989,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = _mmu401 }, { .compatible = "arm,mmu-500", .data = _mmu500 }, { .compatible = "cavium,smmu-v2", .data = _smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = _smmuv2 }, { }, }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v16 3/5] iommu/arm-smmu: Add the device_link between masters and smmu
From: Sricharan R Finally add the device link between the master device and smmu, so that the smmu gets runtime enabled/disabled only when the master needs it. This is done from add_device callback which gets called once when the master is added to the smmu. Signed-off-by: Sricharan R Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1bf542010be7..166c8c6da24f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1461,6 +1461,9 @@ static int arm_smmu_add_device(struct device *dev) iommu_device_link(>iommu, dev); + device_link_add(dev, smmu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + return 0; out_cfg_free: -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v16 4/5] dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2
Add bindings doc for Qcom's smmu-v2 implementation. Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- .../devicetree/bindings/iommu/arm,smmu.txt | 39 ++ 1 file changed, 39 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..a6504b37cc21 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,16 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. + Qcom SoCs must contain, as below, SoC-specific compatibles + along with "qcom,smmu-v2": + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +77,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names:List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": +- "bus": clock required for downstream bus access and + for the smmu ptw, +- "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +159,20 @@ conditions. iommu-map = <0 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu@d0 { + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + reg = <0xd0 0x1>; + + #global-interrupts = <1>; + interrupts = , +, +; + #iommu-cells = <1>; + power-domains = < MDSS_GDSC>; + + clocks = < SMMU_MDP_AXI_CLK>, +< SMMU_MDP_AHB_CLK>; + clock-names = "bus", "iface"; + }; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v16 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Global locks are also initialized before enabling runtime pm as the runtime_resume() calls device_reset() which does tlb_sync_global() that ultimately requires locks to be initialized. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- drivers/iommu/arm-smmu.c | 89 +++- 1 file changed, 81 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index d900e007c3c9..1bf542010be7 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -913,11 +927,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -932,6 +950,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1213,10 +1233,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1226,33 +1251,50 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_iotlb_sync(struct iommu_domain *domain) @@ -1407,7 +1449,13 @@ static int arm_smmu_add_device(struct device *dev) while (i--) cfg->smendx[i] = INVALID_SMENDX; + ret = arm_smmu
[PATCH v16 0/5] iommu/arm-smmu: Add runtime pm/sleep support
* Dropped the patch [5] that was adding device_link_find() API to device core layer. device_link_del_dev() serves the purpose to directly delete the link between two given devices. [v9] * Removed 'rpm_supported' flag, instead checking on pm_domain to enable runtime pm. * Creating device link only when the runtime pm is enabled, as we don't need a device link besides managing the power dependency between supplier and consumer devices. * Introducing a patch to add device_link_find() API that finds and existing link between supplier and consumer devices. Also, made necessary change to device_link_add() to use this API. * arm_smmu_remove_device() now uses this device_link_find() to find the device link between smmu device and the master device, and then delete this link. * Dropped the destroy_domain_context() fix [4] as it was rather, introducing catastrophically bad problem by destroying 'good dev's domain context. * Added 'Reviwed-by' tag for Tomasz's review. [v8] * Major change - - Added a flag 'rpm_supported' which each platform that supports runtime pm, can enable, and we enable runtime_pm over arm-smmu only when this flag is set. - Adding the conditional pm_runtime_get/put() calls to .map, .unmap and .attach_dev ops. - Dropped the patch [2] that exported pm_runtim_get/put_suupliers(), and also dropped the user driver patch [3] for these APIs. * Clock code further cleanup - doing only clk_bulk_enable() and clk_bulk_disable() in runtime pm callbacks. We shouldn't be taking a slow path (clk_prepare/unprepare()) from these runtime pm callbacks. Thereby, moved clk_bulk_prepare() to arm_smmu_device_probe(), and clk_bulk_unprepare() to arm_smmu_device_remove(). - clk data filling to a common method arm_smmu_fill_clk_data() that fills the clock ids and number of clocks. * Addressed other nits and comments - device_link_add() error path fixed. - Fix for checking negative error value from pm_runtime_get_sync(). - Documentation redo. * Added another patch fixing the error path in arm_smmu_attach_dev() to destroy allocated domain context. ** Change logs for previous versions is available in previous series [9]. [1] https://patchwork.kernel.org/cover/10576921/ [2] https://patchwork.kernel.org/patch/10204945/ [3] https://patchwork.kernel.org/patch/10204925/ [4] https://patchwork.kernel.org/patch/10254105/ [5] https://patchwork.kernel.org/patch/10277975/ [6] https://patchwork.kernel.org/patch/10281613/ [7] https://patchwork.kernel.org/patch/10491481/ [8] https://lore.kernel.org/patchwork/patch/974116/ [9] https://lkml.org/lkml/2018/7/8/124 Sricharan R (3): iommu/arm-smmu: Add pm_runtime/sleep ops iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device iommu/arm-smmu: Add the device_link between masters and smmu Vivek Gautam (2): dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2 iommu/arm-smmu: Add support for qcom,smmu-v2 variant .../devicetree/bindings/iommu/arm,smmu.txt | 39 + drivers/iommu/arm-smmu.c | 180 +++-- 2 files changed, 209 insertions(+), 10 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v16 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and also the functions to parse the smmu clocks from DT and enable them in resume/suspend. Also, while we enable the runtime pm add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Also add corresponding clock enable path in resume callback. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- drivers/iommu/arm-smmu.c | 77 ++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index fd1b80ef9490..d900e007c3c9 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -205,6 +206,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int*irqs; + struct clk_bulk_data*clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -1896,10 +1899,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) struct arm_smmu_match_data { enum arm_smmu_arch_version version; enum arm_smmu_implementation model; + const char * const *clks; + int num_clks; }; #define ARM_SMMU_MATCH_DATA(name, ver, imp)\ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); @@ -1918,6 +1923,23 @@ static const struct of_device_id arm_smmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, + const char * const *clks) +{ + int i; + + if (smmu->num_clks < 1) + return; + + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, + sizeof(*smmu->clks), GFP_KERNEL); + if (!smmu->clks) + return; + + for (i = 0; i < smmu->num_clks; i++) + smmu->clks[i].id = clks[i]; +} + #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) { @@ -2000,6 +2022,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, data = of_device_get_match_data(dev); smmu->version = data->version; smmu->model = data->model; + smmu->num_clks = data->num_clks; + + arm_smmu_fill_clk_data(smmu, data->clks); parse_driver_options(smmu); @@ -2098,6 +2123,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->irqs[i] = irq; } + err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks); + if (err) + return err; + + err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks); + if (err) + return err; + err = arm_smmu_device_cfg_probe(smmu); if (err) return err; @@ -2184,6 +2217,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + + clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks); + return 0; } @@ -2192,15 +2228,50 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) arm_smmu_device_remove(pdev); } -static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_enable(smmu->num_clks, smmu->clks); + if (ret) + return ret; arm_smmu_device_reset(smmu); + return 0; } -static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + clk_bulk_disable(smmu->num_clks, smmu->clks); + + return 0; +} + +static int __
Re: [Patch v15 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
On Thu, Aug 30, 2018 at 3:04 PM Tomasz Figa wrote: > > On Thu, Aug 30, 2018 at 6:22 PM Vivek Gautam > wrote: > > > > On Mon, Aug 27, 2018 at 4:27 PM Vivek Gautam > > wrote: > > > > > > From: Sricharan R > > > > > > The smmu device probe/remove and add/remove master device callbacks > > > gets called when the smmu is not linked to its master, that is without > > > the context of the master device. So calling runtime apis in those places > > > separately. > > > > > > Signed-off-by: Sricharan R > > > [vivek: Cleanup pm runtime calls] > > > Signed-off-by: Vivek Gautam > > > Reviewed-by: Tomasz Figa > > > Tested-by: Srinivas Kandagatla > > > --- > > > > > > Changes since v14: > > > - none. > > > > > > drivers/iommu/arm-smmu.c | 101 > > > +++ > > > 1 file changed, 93 insertions(+), 8 deletions(-) > > > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > > index a81224bc6637..23b4a60149b6 100644 > > > --- a/drivers/iommu/arm-smmu.c > > > +++ b/drivers/iommu/arm-smmu.c > > > > [snip] > > > > > @@ -2131,6 +2188,26 @@ static int arm_smmu_device_probe(struct > > > platform_device *pdev) > > > if (err) > > > return err; > > > > > > + /* > > > +* We want to avoid touching dev->power.lock in fastpaths unless > > > +* it's really going to do something useful - pm_runtime_enabled() > > > +* can serve as an ideal proxy for that decision. So, > > > conditionally > > > +* enable pm_runtime. > > > +*/ > > > + if (dev->pm_domain) > > > + pm_runtime_enable(dev); > > > + > > > + err = arm_smmu_rpm_get(smmu); > > > > We shouldn't be doing a runtime_get() yet, as this eventually calls > > arm_smmu_device_reset(). > > arm_smmu_device_reset() should be called only after > > arm_smmu_device_cfg_probe(). > > So, I plan to replace the pm_runtime_get/put() calls in probe() with > > simple clk_bulk_enable() > > to let the driver initialize smmu, and at the end of the probe we can > > disable the clocks and > > enable runtime pm over the device to let it take care of the device > > further-on. > > > > We can avoid the explicit clock disable by just calling > pm_runtime_set_active() before pm_runtime_enable(), assuming that what > probe does is symmetrical with the suspend callback, which would be > called after the latter. Sure, that sounds reasonable. Will use pm_runtime_set_active() instead of explicitly disabling the clocks.Thanks. Best regards Vivek > > Best regards, > Tomasz > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Patch v15 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
On Mon, Aug 27, 2018 at 4:27 PM Vivek Gautam wrote: > > From: Sricharan R > > The smmu device probe/remove and add/remove master device callbacks > gets called when the smmu is not linked to its master, that is without > the context of the master device. So calling runtime apis in those places > separately. > > Signed-off-by: Sricharan R > [vivek: Cleanup pm runtime calls] > Signed-off-by: Vivek Gautam > Reviewed-by: Tomasz Figa > Tested-by: Srinivas Kandagatla > --- > > Changes since v14: > - none. > > drivers/iommu/arm-smmu.c | 101 > +++ > 1 file changed, 93 insertions(+), 8 deletions(-) > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > index a81224bc6637..23b4a60149b6 100644 > --- a/drivers/iommu/arm-smmu.c > +++ b/drivers/iommu/arm-smmu.c [snip] > @@ -2131,6 +2188,26 @@ static int arm_smmu_device_probe(struct > platform_device *pdev) > if (err) > return err; > > + /* > +* We want to avoid touching dev->power.lock in fastpaths unless > +* it's really going to do something useful - pm_runtime_enabled() > +* can serve as an ideal proxy for that decision. So, conditionally > +* enable pm_runtime. > +*/ > + if (dev->pm_domain) > + pm_runtime_enable(dev); > + > + err = arm_smmu_rpm_get(smmu); We shouldn't be doing a runtime_get() yet, as this eventually calls arm_smmu_device_reset(). arm_smmu_device_reset() should be called only after arm_smmu_device_cfg_probe(). So, I plan to replace the pm_runtime_get/put() calls in probe() with simple clk_bulk_enable() to let the driver initialize smmu, and at the end of the probe we can disable the clocks and enable runtime pm over the device to let it take care of the device further-on. > + if (err < 0) > + return err; > + > + /* Enable clocks explicitly if runtime PM is disabled */ > + if (!pm_runtime_enabled(dev)) { > + err = clk_bulk_enable(smmu->num_clks, smmu->clks); > + if (err) > + return err; > + } > + > err = arm_smmu_device_cfg_probe(smmu); > if (err) > return err; [snip] Best regards Vivek -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Patch v15 4/5] dt-bindings: arm-smmu: Add bindings for qcom, smmu-v2
On Wed, Aug 29, 2018 at 2:05 PM Vivek Gautam wrote: > > Hi Rob, > > > On 8/29/2018 2:04 AM, Rob Herring wrote: > > On Mon, Aug 27, 2018 at 04:25:50PM +0530, Vivek Gautam wrote: > >> Add bindings doc for Qcom's smmu-v2 implementation. > >> > >> Signed-off-by: Vivek Gautam > >> Reviewed-by: Tomasz Figa > >> Tested-by: Srinivas Kandagatla > >> --- > >> > >> Changes since v14: > >> - This is a new patch added in v15 after noticing the new > >> checkpatch warning for separate dt-bindings doc. > >> - This patch also addresses comments given by Rob and Robin to add > >> a list of valid values of '' in "qcom,-smmu-v2" > >> compatible string. > >> > >> .../devicetree/bindings/iommu/arm,smmu.txt | 47 > >> ++ > >> 1 file changed, 47 insertions(+) > >> > >> diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > >> b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > >> index 8a6ffce12af5..52198a539606 100644 > >> --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > >> +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > >> @@ -17,10 +17,24 @@ conditions. > >> "arm,mmu-401" > >> "arm,mmu-500" > >> "cavium,smmu-v2" > >> +"qcom,-smmu-v2", "qcom,smmu-v2" > > The v2 in the compatible string is kind of redundant unless the SoC has > > other SMMU types. > > sdm845 has smmu-v2, and smmu-500 [1]. > > >> > >> depending on the particular implementation and/or the > >> version of the architecture implemented. > >> > >> + A number of Qcom SoCs use qcom,smmu-v2 version of the > >> IP. > >> + "qcom,-smmu-v2" represents a soc specific > >> compatible > >> + string that should be present along with the > >> "qcom,smmu-v2" > >> + to facilitate SoC specific clocks/power connections and > >> to > >> + address specific bug fixes. > >> + '' string in "qcom,-smmu-v2" should be one of > >> the > >> + following: > >> + msm8996 - for msm8996 Qcom SoC. > >> + sdm845 - for sdm845 Qcom Soc. > > Rather than all this prose, it would be simpler to just add 2 lines with > > the full compatibles rather than . The thing is not going to > > work when/if we move bindings to json-schema also. > > then we keep adding > "qcom,msm8996-smmu-v2", "qcom,smmu-v2" > "qcom,msm8998-smmu-v2", "qcom,smmu-v2" > "qcom,sdm845-smmu-v2", "qcom,smmu-v2", > and from [1] > "qcom,sdm845-smmu-500", "arm,mmu-500", etc. > for each SoCs? How about following diff on top of this patch? diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 52198a539606..5e6c04876533 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,23 +17,18 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" -"qcom,-smmu-v2", "qcom,smmu-v2" +"qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. - A number of Qcom SoCs use qcom,smmu-v2 version of the IP. - "qcom,-smmu-v2" represents a soc specific compatible - string that should be present along with the "qcom,smmu-v2" - to facilitate SoC specific clocks/power connections and to - address specific bug fixes. - '' string in "qcom,-smmu-v2" should be one of the - following: - msm8996 - for msm8996 Qcom SoC. - sdm845 - for sdm845 Qcom Soc. - - An example string would be - - "qcom,msm8996-smmu-v2", "qcom,smmu-v2". + Qcom SoCs using qcom,smmu-v2 must have soc specific + compatible string attached to "qcom,smmu-v2" to take care + of SoC specific clocks/power connections and to address + specific bug fixes. + Precisely, it should be one of the following: + "qcom,msm8996-smmu-v2", "qcom,smmu-v2", + "qcom,sdm845-smmu-v2", "qcom,smmu-v2". Thanks! Best regards Vivek -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Patch v15 4/5] dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2
Hi Rob, On 8/29/2018 2:04 AM, Rob Herring wrote: On Mon, Aug 27, 2018 at 04:25:50PM +0530, Vivek Gautam wrote: Add bindings doc for Qcom's smmu-v2 implementation. Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - This is a new patch added in v15 after noticing the new checkpatch warning for separate dt-bindings doc. - This patch also addresses comments given by Rob and Robin to add a list of valid values of '' in "qcom,-smmu-v2" compatible string. .../devicetree/bindings/iommu/arm,smmu.txt | 47 ++ 1 file changed, 47 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..52198a539606 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,24 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,-smmu-v2", "qcom,smmu-v2" The v2 in the compatible string is kind of redundant unless the SoC has other SMMU types. sdm845 has smmu-v2, and smmu-500 [1]. depending on the particular implementation and/or the version of the architecture implemented. + A number of Qcom SoCs use qcom,smmu-v2 version of the IP. + "qcom,-smmu-v2" represents a soc specific compatible + string that should be present along with the "qcom,smmu-v2" + to facilitate SoC specific clocks/power connections and to + address specific bug fixes. + '' string in "qcom,-smmu-v2" should be one of the + following: + msm8996 - for msm8996 Qcom SoC. + sdm845 - for sdm845 Qcom Soc. Rather than all this prose, it would be simpler to just add 2 lines with the full compatibles rather than . The thing is not going to work when/if we move bindings to json-schema also. then we keep adding "qcom,msm8996-smmu-v2", "qcom,smmu-v2" "qcom,msm8998-smmu-v2", "qcom,smmu-v2" "qcom,sdm845-smmu-v2", "qcom,smmu-v2", and from [1] "qcom,sdm845-smmu-500", "arm,mmu-500", etc. for each SoCs? + + An example string would be - + "qcom,msm8996-smmu-v2", "qcom,smmu-v2". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +85,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names:List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": +- "bus": clock required for downstream bus access and + for the smmu ptw, +- "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +167,20 @@ conditions. iommu-map = <0 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu { Needs a unit-address. I went in symmetry with another example in this file for 'smmu1'. I will add the address here. And if you would like, I can squash a change for 'smmu1' too in this patch, although that will be trivial. [1] https://patchwork.kernel.org/patch/10565291/ Best regards Vivek [snip] ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
Hi Robin, On 8/14/2018 10:29 PM, Robin Murphy wrote: On 14/08/18 11:55, Vivek Gautam wrote: Cleanup to re-use some of the stuff Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu.c | 32 +--- 1 file changed, 25 insertions(+), 7 deletions(-) I think the overall diffstat would be an awful lot smaller if the erratum workaround just has its own readl_poll_timeout() as it does in the vendor kernel. The burst-polling loop is for minimising latency in high-throughput situations, and if you're in a workaround which has to lock *every* register write and issue two firmware calls around each sync I think you're already well out of that game. Sorry for the delayed response. I was on vacation. I will fix this in my next version by adding the separate read_poll_timeout() for the erratum WA. diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 32e86df80428..75c146751c87 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) clear_bit(idx, map); } -/* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, - void __iomem *sync, void __iomem *status) +static int __arm_smmu_tlb_sync_wait(void __iomem *status) { unsigned int spin_cnt, delay; - writel_relaxed(0, sync); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) - return; + return 0; cpu_relax(); } udelay(delay); } + + return -EBUSY; +} + +/* Wait for any pending TLB invalidations to complete */ +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) +{ + writel_relaxed(0, sync); + + if (!__arm_smmu_tlb_sync_wait(status)) + return; + dev_err_ratelimited(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); } @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) arm_smmu_tlb_sync_global(smmu); } -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = _domain->cfg; @@ -498,6 +509,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) +{ + __arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie); +} + AFAICS even after patch #5 this does absolutely nothing except make the code needlessly harder to read :( Sure, I will rather call arm_smmu_tlb_inv_range_nosync() from qcom_errata_tlb_inv_range_nosync() then make this change. Thanks for the review. Best regards Vivek Robin. /* * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 3/3] dts: arm64/sdm845: Add node for qcom,smmu-v2
On 8/27/2018 2:26 PM, Vivek Gautam wrote: Hi Rob, Robin, On 8/15/2018 4:27 AM, Rob Herring wrote: On Wed, Aug 15, 2018 at 01:09:43AM +0530, Vivek Gautam wrote: Adding Jordan here. On Tue, Aug 14, 2018 at 4:19 PM, Robin Murphy wrote: Hi Vivek, On 14/08/18 11:27, Vivek Gautam wrote: Add device node for qcom,smmu-v2 available on sdm845. This smmu is available only to GPU device. Signed-off-by: Vivek Gautam --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 23 +++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1c2be2082f33..bd1ec5fa5146 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -989,6 +990,28 @@ cell-index = <0>; }; + gpu_smmu: iommu@504 { + compatible = "qcom,sdm845-smmu-v2", "qcom,smmu-v2"; Which of "sdm845" or "msm8996"[1] is the actual SoC name here? Well, the bindings use the SoC prefix with smmu-v2, so it should be sdm845 for this SoC. This is same as I posted in my v1 of the series [2]. Using 8996 based string in sdm845 makes things look awful. You need to list valid values of '' in the binding. Otherwise we get this confusion. Sorry for delayed response, I was away on vacation. I will list down the valid values for '' as suggested, and respin this series, and smmu bindings patch that comes as part of the runtime pm series [3]. [3] https://lore.kernel.org/patchwork/patch/968017/ I have updated the binding doc with valid values for '' string [4]. Kindly review this based on [4]. [4] https://lore.kernel.org/patchwork/patch/977888/ Best regards Vivek ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Patch v15 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. On msm8996, multiple cores, viz. mdss, video, etc. use this smmu. On sdm845, this smmu is used with gpu. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - Moved out dt-bindings change to separate patch. drivers/iommu/arm-smmu.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b5e7f72d418c..c0177ea32678 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -119,6 +119,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -1970,6 +1971,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +static const char * const qcom_smmuv2_clks[] = { + "bus", "iface", +}; + +static const struct arm_smmu_match_data qcom_smmuv2 = { + .version = ARM_SMMU_V2, + .model = QCOM_SMMUV2, + .clks = qcom_smmuv2_clks, + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), +}; + static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = _generic_v1 }, { .compatible = "arm,smmu-v2", .data = _generic_v2 }, @@ -1977,6 +1989,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = _mmu401 }, { .compatible = "arm,mmu-500", .data = _mmu500 }, { .compatible = "cavium,smmu-v2", .data = _smmuv2 }, + { .compatible = "qcom,smmu-v2", .data = _smmuv2 }, { }, }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Patch v15 4/5] dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2
Add bindings doc for Qcom's smmu-v2 implementation. Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - This is a new patch added in v15 after noticing the new checkpatch warning for separate dt-bindings doc. - This patch also addresses comments given by Rob and Robin to add a list of valid values of '' in "qcom,-smmu-v2" compatible string. .../devicetree/bindings/iommu/arm,smmu.txt | 47 ++ 1 file changed, 47 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..52198a539606 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,24 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,-smmu-v2", "qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. + A number of Qcom SoCs use qcom,smmu-v2 version of the IP. + "qcom,-smmu-v2" represents a soc specific compatible + string that should be present along with the "qcom,smmu-v2" + to facilitate SoC specific clocks/power connections and to + address specific bug fixes. + '' string in "qcom,-smmu-v2" should be one of the + following: + msm8996 - for msm8996 Qcom SoC. + sdm845 - for sdm845 Qcom Soc. + + An example string would be - + "qcom,msm8996-smmu-v2", "qcom,smmu-v2". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +85,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names:List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": +- "bus": clock required for downstream bus access and + for the smmu ptw, +- "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +167,20 @@ conditions. iommu-map = <0 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu { + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + reg = <0xd0 0x1>; + + #global-interrupts = <1>; + interrupts = , +, +; + #iommu-cells = <1>; + power-domains = < MDSS_GDSC>; + + clocks = < SMMU_MDP_AXI_CLK>, +< SMMU_MDP_AHB_CLK>; + clock-names = "bus", "iface"; + }; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Patch v15 2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - none. drivers/iommu/arm-smmu.c | 101 +++ 1 file changed, 93 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a81224bc6637..23b4a60149b6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -913,11 +927,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -932,6 +950,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1213,10 +1233,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1226,33 +1251,50 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_iotlb_sync(struct iommu_domain *domain) @@ -1407,7 +1449,13 @@ static int arm_smmu_add_device(struct device *dev) while (i--) cfg->smendx[i] = INVALID_SMENDX; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + goto out_cfg_free; + ret = arm_smmu_master_alloc_smes(dev); + arm_smmu_rpm_put(smmu); + if (ret)
[Patch v15 3/5] iommu/arm-smmu: Add the device_link between masters and smmu
From: Sricharan R Finally add the device link between the master device and smmu, so that the smmu gets runtime enabled/disabled only when the master needs it. This is done from add_device callback which gets called once when the master is added to the smmu. Signed-off-by: Sricharan R Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - none. drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 23b4a60149b6..b5e7f72d418c 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1461,6 +1461,9 @@ static int arm_smmu_add_device(struct device *dev) iommu_device_link(>iommu, dev); + device_link_add(dev, smmu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + return 0; out_cfg_free: -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[Patch v15 1/5] iommu/arm-smmu: Add pm_runtime/sleep ops
From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and also the functions to parse the smmu clocks from DT and enable them in resume/suspend. Also, while we enable the runtime pm add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Also add corresponding clock enable path in resume callback. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa Tested-by: Srinivas Kandagatla --- Changes since v14: - none. drivers/iommu/arm-smmu.c | 77 ++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index fd1b80ef9490..a81224bc6637 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -205,6 +206,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int*irqs; + struct clk_bulk_data*clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -1896,10 +1899,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) struct arm_smmu_match_data { enum arm_smmu_arch_version version; enum arm_smmu_implementation model; + const char * const *clks; + int num_clks; }; #define ARM_SMMU_MATCH_DATA(name, ver, imp)\ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); @@ -1918,6 +1923,23 @@ static const struct of_device_id arm_smmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, + const char * const *clks) +{ + int i; + + if (smmu->num_clks < 1) + return; + + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, + sizeof(*smmu->clks), GFP_KERNEL); + if (!smmu->clks) + return; + + for (i = 0; i < smmu->num_clks; i++) + smmu->clks[i].id = clks[i]; +} + #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) { @@ -2000,6 +2022,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, data = of_device_get_match_data(dev); smmu->version = data->version; smmu->model = data->model; + smmu->num_clks = data->num_clks; + + arm_smmu_fill_clk_data(smmu, data->clks); parse_driver_options(smmu); @@ -2098,6 +2123,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->irqs[i] = irq; } + err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks); + if (err) + return err; + + err = clk_bulk_prepare(smmu->num_clks, smmu->clks); + if (err) + return err; + err = arm_smmu_device_cfg_probe(smmu); if (err) return err; @@ -2184,6 +2217,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + + clk_bulk_unprepare(smmu->num_clks, smmu->clks); + return 0; } @@ -2192,15 +2228,50 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) arm_smmu_device_remove(pdev); } -static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_enable(smmu->num_clks, smmu->clks); + if (ret) + return ret; arm_smmu_device_reset(smmu); + return 0; } -static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + clk_bulk_disable(smmu->num_clks, smmu->clks); +
[Patch v15 0/5] iommu/arm-smmu: Add runtime pm/sleep support
Introducing a patch to add device_link_find() API that finds and existing link between supplier and consumer devices. Also, made necessary change to device_link_add() to use this API. * arm_smmu_remove_device() now uses this device_link_find() to find the device link between smmu device and the master device, and then delete this link. * Dropped the destroy_domain_context() fix [5] as it was rather, introducing catastrophically bad problem by destroying 'good dev's domain context. * Added 'Reviwed-by' tag for Tomasz's review. [v8] * Major change - - Added a flag 'rpm_supported' which each platform that supports runtime pm, can enable, and we enable runtime_pm over arm-smmu only when this flag is set. - Adding the conditional pm_runtime_get/put() calls to .map, .unmap and .attach_dev ops. - Dropped the patch [3] that exported pm_runtim_get/put_suupliers(), and also dropped the user driver patch [4] for these APIs. * Clock code further cleanup - doing only clk_bulk_enable() and clk_bulk_disable() in runtime pm callbacks. We shouldn't be taking a slow path (clk_prepare/unprepare()) from these runtime pm callbacks. Thereby, moved clk_bulk_prepare() to arm_smmu_device_probe(), and clk_bulk_unprepare() to arm_smmu_device_remove(). - clk data filling to a common method arm_smmu_fill_clk_data() that fills the clock ids and number of clocks. * Addressed other nits and comments - device_link_add() error path fixed. - Fix for checking negative error value from pm_runtime_get_sync(). - Documentation redo. * Added another patch fixing the error path in arm_smmu_attach_dev() to destroy allocated domain context. [v7] * Addressed review comments given by Robin Murphy - - Added device_link_del() in .remove_device path. - Error path cleanup in arm_smmu_add_device(). - Added pm_runtime_get/put_sync() in .remove path, and replaced pm_runtime_force_suspend() with pm_runtime_disable(). - clk_names cleanup in arm_smmu_init_clks() * Added 'Reviewed-by' given by Rob H. ** Change logs for previous versions is available in previous series [4]. [1] https://patchwork.kernel.org/patch/10204925/ [2] https://lore.kernel.org/patchwork/cover/968013/ [3] https://patchwork.kernel.org/patch/10204945/ [4] https://patchwork.kernel.org/patch/10204925/ [5] https://patchwork.kernel.org/patch/10254105/ [6] https://patchwork.kernel.org/patch/10277975/ [7] https://patchwork.kernel.org/patch/10281613/ [8] https://patchwork.kernel.org/patch/10491481/ [9] https://lore.kernel.org/patchwork/patch/974116/ Sricharan R (3): iommu/arm-smmu: Add pm_runtime/sleep ops iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device iommu/arm-smmu: Add the device_link between masters and smmu Vivek Gautam (2): dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2 iommu/arm-smmu: Add support for qcom,smmu-v2 variant .../devicetree/bindings/iommu/arm,smmu.txt | 47 + drivers/iommu/arm-smmu.c | 194 +++-- 2 files changed, 230 insertions(+), 11 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 3/3] dts: arm64/sdm845: Add node for qcom,smmu-v2
Hi Rob, Robin, On 8/15/2018 4:27 AM, Rob Herring wrote: On Wed, Aug 15, 2018 at 01:09:43AM +0530, Vivek Gautam wrote: Adding Jordan here. On Tue, Aug 14, 2018 at 4:19 PM, Robin Murphy wrote: Hi Vivek, On 14/08/18 11:27, Vivek Gautam wrote: Add device node for qcom,smmu-v2 available on sdm845. This smmu is available only to GPU device. Signed-off-by: Vivek Gautam --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 23 +++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1c2be2082f33..bd1ec5fa5146 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -989,6 +990,28 @@ cell-index = <0>; }; + gpu_smmu: iommu@504 { + compatible = "qcom,sdm845-smmu-v2", "qcom,smmu-v2"; Which of "sdm845" or "msm8996"[1] is the actual SoC name here? Well, the bindings use the SoC prefix with smmu-v2, so it should be sdm845 for this SoC. This is same as I posted in my v1 of the series [2]. Using 8996 based string in sdm845 makes things look awful. You need to list valid values of '' in the binding. Otherwise we get this confusion. Sorry for delayed response, I was away on vacation. I will list down the valid values for '' as suggested, and respin this series, and smmu bindings patch that comes as part of the runtime pm series [1]. [1] https://lore.kernel.org/patchwork/patch/968017/ Best regards Vivek Rob ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 3/3] dts: arm64/sdm845: Add node for qcom,smmu-v2
Adding Jordan here. On Tue, Aug 14, 2018 at 4:19 PM, Robin Murphy wrote: > Hi Vivek, > > On 14/08/18 11:27, Vivek Gautam wrote: >> >> Add device node for qcom,smmu-v2 available on sdm845. >> This smmu is available only to GPU device. >> >> Signed-off-by: Vivek Gautam >> --- >> arch/arm64/boot/dts/qcom/sdm845.dtsi | 23 +++ >> 1 file changed, 23 insertions(+) >> >> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi >> b/arch/arm64/boot/dts/qcom/sdm845.dtsi >> index 1c2be2082f33..bd1ec5fa5146 100644 >> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi >> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi >> @@ -6,6 +6,7 @@ >>*/ >> #include >> +#include >> #include >> #include >> #include >> @@ -989,6 +990,28 @@ >> cell-index = <0>; >> }; >> + gpu_smmu: iommu@504 { >> + compatible = "qcom,sdm845-smmu-v2", >> "qcom,smmu-v2"; > > > Which of "sdm845" or "msm8996"[1] is the actual SoC name here? Well, the bindings use the SoC prefix with smmu-v2, so it should be sdm845 for this SoC. This is same as I posted in my v1 of the series [2]. Using 8996 based string in sdm845 makes things look awful. Thanks Vivek [2] https://patchwork.kernel.org/patch/10534989/ > > Robin. > > [1] > https://www.mail-archive.com/freedreno@lists.freedesktop.org/msg02659.html > >> + reg = <0x504 0x1>; >> + #iommu-cells = <1>; >> + #global-interrupts = <2>; >> + interrupts = , >> +, >> +, >> +, >> +, >> +, >> +, >> +, >> +, >> +; >> + clock-names = "bus", "iface"; >> + clocks = < GCC_GPU_MEMNOC_GFX_CLK>, >> +< GCC_GPU_CFG_AHB_CLK>; >> + >> + /*power-domains = < GPU_CX_GDSC>;*/ >> + }; >> + >> apps_smmu: iommu@1500 { >> compatible = "qcom,sdm845-smmu-500", >> "arm,mmu-500"; >> reg = <0x1500 0x8>; >> > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
On 8/14/2018 5:10 PM, Will Deacon wrote: On Tue, Aug 14, 2018 at 04:25:27PM +0530, Vivek Gautam wrote: Cleanup to re-use some of the stuff Maybe we should factor a few of the other bits whilst we're here. Sure, do you want me to refactor anything besides this change? Or just write a proper commit message ;) My bad. I should have written a more descriptive commit message. :| Will change this. Best regards Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/5] Qcom smmu-500 TLB invalidation errata for sdm845
Hi Will, On 8/14/2018 5:10 PM, Will Deacon wrote: Hi Vivek, On Tue, Aug 14, 2018 at 04:25:23PM +0530, Vivek Gautam wrote: Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance errata [1] because of which the TCU cache look ups are stalled during invalidation cycle. This is mitigated by serializing all the invalidation requests coming to the smmu. How does this implementation differ from the one supported by qcom_iommu.c? I notice you're adding firmware hooks here, which we avoided by having the extra driver. Please help me understand which devices exist, how they differ, and which drivers are intended to support them! IIRC, the qcom_iommu driver was intended to support the static context bank - SID mapping, and is very specific to the smmu-v2 version present on msm8916 soc. However, this is the qcom's mmu-500 implementation specific errata. qcom_iommu will not be able to support mmu-500 configurations. Rob Clark can add more. Let you know what you suggest. Also -- you didn't CC all the maintainers for the firmware bits, so adding Andy here for that, and Rob for the previous question. I added Andy to the series, would you want me to add Rob H also? Best regards Vivek Thanks, Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
Cleanup to re-use some of the stuff Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu.c | 32 +--- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 32e86df80428..75c146751c87 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) clear_bit(idx, map); } -/* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, - void __iomem *sync, void __iomem *status) +static int __arm_smmu_tlb_sync_wait(void __iomem *status) { unsigned int spin_cnt, delay; - writel_relaxed(0, sync); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) - return; + return 0; cpu_relax(); } udelay(delay); } + + return -EBUSY; +} + +/* Wait for any pending TLB invalidations to complete */ +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) +{ + writel_relaxed(0, sync); + + if (!__arm_smmu_tlb_sync_wait(status)) + return; + dev_err_ratelimited(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); } @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) arm_smmu_tlb_sync_global(smmu); } -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = _domain->cfg; @@ -498,6 +509,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) +{ + __arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie); +} + /* * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 5/5] iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata
Qcom's implementation of arm,mmu-500 require to serialize all TLB invalidations for context banks. In case the TLB invalidation requests don't go through the first time, there's a way to disable/enable the wait for safe logic. Disabling this logic expadites the TLBIs. Different bootloaders with their access control policies allow this register access differntly. With one, we should be able to directly make qcom-scm call to do io read/write, while with other we should use the specific SCM command to send request to do the complete register configuration. A separate device tree flag for arm-smmu will allow to identify which firmware configuration of the two mentioned above we use. Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu-regs.h | 2 + drivers/iommu/arm-smmu.c | 136 +- 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h index a1226e4ab5f8..71662cae9806 100644 --- a/drivers/iommu/arm-smmu-regs.h +++ b/drivers/iommu/arm-smmu-regs.h @@ -177,6 +177,8 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 +#define ARM_SMMU_GID_QCOM_CUSTOM_CFG 0x300 + #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG(1 << 7) #define SCTLR_CFIE (1 << 6) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 75c146751c87..fafdaeb4d097 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -179,7 +180,8 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_EXIDS(1 << 12) u32 features; -#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) +#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) +#define ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA (1 << 1) u32 options; enum arm_smmu_arch_version version; enum arm_smmu_implementationmodel; @@ -262,6 +264,7 @@ static bool using_legacy_binding, using_generic_binding; static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, + { ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA, "qcom,smmu-500-fw-impl-errata" }, { 0, NULL}, }; @@ -531,12 +534,137 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } +#define CUSTOM_CFG_MDP_SAFE_ENABLE BIT(15) +#define CUSTOM_CFG_IFE1_SAFE_ENABLEBIT(14) +#define CUSTOM_CFG_IFE0_SAFE_ENABLEBIT(13) + +static int __qsmmu500_wait_safe_toggle(struct arm_smmu_device *smmu, int en) +{ + int ret; + u32 val, gid_phys_base; + phys_addr_t reg; + struct vm_struct *vm; + + /* We want physical address of SMMU, so the vm_area */ + vm = find_vm_area(smmu->base); + + /* +* GID (implementation defined address space) is located at +* SMMU_BASE + (2 × PAGESIZE). +*/ + gid_phys_base = vm->phys_addr + (2 << (smmu)->pgshift); + reg = gid_phys_base + ARM_SMMU_GID_QCOM_CUSTOM_CFG; + + ret = qcom_scm_io_readl_atomic(reg, ); + if (ret) + return ret; + + if (en) + val |= CUSTOM_CFG_MDP_SAFE_ENABLE | + CUSTOM_CFG_IFE0_SAFE_ENABLE | + CUSTOM_CFG_IFE1_SAFE_ENABLE; + else + val &= ~(CUSTOM_CFG_MDP_SAFE_ENABLE | +CUSTOM_CFG_IFE0_SAFE_ENABLE | +CUSTOM_CFG_IFE1_SAFE_ENABLE); + + ret = qcom_scm_io_writel_atomic(reg, val); + + return ret; +} + +static int qsmmu500_wait_safe_toggle(struct arm_smmu_device *smmu, +int en, bool is_fw_impl) +{ + if (is_fw_impl) + return qcom_scm_qsmmu500_wait_safe_toggle(en); + else + return __qsmmu500_wait_safe_toggle(smmu, en); +} + +static void qcom_errata_tlb_sync(struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + void __iomem *status = base + ARM_SMMU_CB_TLBSTATUS; + bool is_fw_impl; + + writel_relaxed(0, base + ARM_SMMU_CB_TLBSYNC); + + if (!__arm_smmu_tlb_sync_wait(status)) + return; + + is_fw_impl = smmu->options & ARM_SMMU_OPT_QCOM_FW_IMPL_ERRATA ? + true : false; + + /* SCM call here to disable the wait-for-safe logic. */ + if (WARN(qsmmu500_wait_safe_toggle(smmu, false, is_fw_impl), +"Failed to disable wait-safe logic, bad hw stat
[PATCH 0/5] Qcom smmu-500 TLB invalidation errata for sdm845
Qcom's implementation of arm,mmu-500 on sdm845 has a functional/performance errata [1] because of which the TCU cache look ups are stalled during invalidation cycle. This is mitigated by serializing all the invalidation requests coming to the smmu. This patch series addresses this errata by adding new tlb_ops for qcom,sdm845-smmu-500 [2]. These ops take context bank locks for all the tlb_ops that queue and sync the TLB invalidation requests. Besides adding locks, there's a way to expadite these TLB invalidations for display and camera devices by turning off the 'wait-for-safe' logic in hardware that holds the tlb invalidations until a safe level. This 'wait-for-safe' logic is controlled by toggling a chicken bit through a secure register. This secure register is accessed by making an explicit SCM call into the EL3 firmware. There are two ways of handling this logic - * Firmware, such as tz present on sdm845-mtp devices has a handler to do all the register access and bit set/clear. So is the handling in downstream arm-smmu driver [3]. * Other firmwares can have handlers to just read/write this secure register. In such cases the kernel make io_read/writel scm calls to modify the register. This patch series adds APIs in qcom-scm driver to handle both of these cases. Lastly, since these TLB invalidations can happen in atomic contexts there's a need to add atomic versions of qcom_scm_io_readl/writel() and qcom_scm_call() APIs. The traditional scm calls take mutex and we therefore can't use these calls in atomic contexts. This patch series is adapted version of how the errata is handled in downstream [1]. [1] https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842 [2] https://lore.kernel.org/patchwork/patch/974114/ [3] https://source.codeaurora.org/quic/la/kernel/msm-4.9/tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4864 Vivek Gautam (5): firmware: qcom_scm-64: Add atomic version of qcom_scm_call firmware/qcom_scm: Add atomic version of io read/write APIs firmware/qcom_scm: Add scm call to handle smmu errata iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling iommu/arm-smmu: Add support to handle Qcom's TLBI serialization errata drivers/firmware/qcom_scm-32.c | 17 drivers/firmware/qcom_scm-64.c | 181 +++-- drivers/firmware/qcom_scm.c| 18 drivers/firmware/qcom_scm.h| 9 ++ drivers/iommu/arm-smmu-regs.h | 2 + drivers/iommu/arm-smmu.c | 168 -- include/linux/qcom_scm.h | 6 ++ 7 files changed, 348 insertions(+), 53 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 3/5] firmware/qcom_scm: Add scm call to handle smmu errata
Qcom's smmu-500 needs to toggle wait-for-safe sequence to handle TLB invalidation sync's. Few firmwares allow doing that through SCM interface. Add API to toggle wait for safe from firmware through a SCM call. Signed-off-by: Vivek Gautam --- drivers/firmware/qcom_scm-32.c | 5 + drivers/firmware/qcom_scm-64.c | 13 + drivers/firmware/qcom_scm.c| 6 ++ drivers/firmware/qcom_scm.h| 5 + include/linux/qcom_scm.h | 2 ++ 5 files changed, 31 insertions(+) diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 7293e5efad69..2d301ad053f8 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -639,3 +639,8 @@ int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, { return -ENODEV; } + +int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool enable) +{ + return -ENODEV; +} diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 6bf55403f6e3..f13bcabc5d78 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -590,3 +590,16 @@ int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, , ); } + +int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool en) +{ + struct qcom_scm_desc desc = {0}; + struct arm_smccc_res res; + + desc.args[0] = QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL; + desc.args[1] = en; + desc.arginfo = QCOM_SCM_ARGS(2); + + return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_SMMU_PROGRAM, + QCOM_SCM_CONFIG_ERRATA1, , ); +} diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 36dab37f..5f15cc2e9f69 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -353,6 +353,12 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) } EXPORT_SYMBOL(qcom_scm_iommu_secure_ptbl_init); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en) +{ + return __qcom_scm_qsmmu500_wait_safe_toggle(__scm->dev, en); +} +EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle); + int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return __qcom_scm_io_readl(__scm->dev, addr, val); diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h index bb176107f51e..89a822c23e33 100644 --- a/drivers/firmware/qcom_scm.h +++ b/drivers/firmware/qcom_scm.h @@ -103,10 +103,15 @@ extern int __qcom_scm_restore_sec_cfg(struct device *dev, u32 device_id, u32 spare); #define QCOM_SCM_IOMMU_SECURE_PTBL_SIZE3 #define QCOM_SCM_IOMMU_SECURE_PTBL_INIT4 +#define QCOM_SCM_SVC_SMMU_PROGRAM 0x15 +#define QCOM_SCM_CONFIG_ERRATA10x3 +#define QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL 0x2 extern int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare, size_t *size); extern int __qcom_scm_iommu_secure_ptbl_init(struct device *dev, u64 addr, u32 size, u32 spare); +extern int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, + bool enable); #define QCOM_MEM_PROT_ASSIGN_ID0x16 extern int __qcom_scm_assign_mem(struct device *dev, phys_addr_t mem_region, size_t mem_sz, diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 6a5d0c98b328..46e6b1692998 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -62,6 +62,7 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); extern int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val); @@ -100,6 +101,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } +static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return -ENODEV; } static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return -ENODEV; } static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { return -ENODEV; } static inline int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val) { return -
[PATCH 2/5] firmware/qcom_scm: Add atomic version of io read/write APIs
Add atomic versions of qcom_scm_io_readl/writel to enable reading/writing secure registers from atomic context. Signed-off-by: Vivek Gautam --- drivers/firmware/qcom_scm-32.c | 12 drivers/firmware/qcom_scm-64.c | 32 drivers/firmware/qcom_scm.c| 12 drivers/firmware/qcom_scm.h| 4 include/linux/qcom_scm.h | 4 5 files changed, 64 insertions(+) diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 4e24e591ae74..7293e5efad69 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -627,3 +627,15 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val) return qcom_scm_call_atomic2(QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, addr, val); } + +int __qcom_scm_io_readl_atomic(struct device *dev, phys_addr_t addr, + unsigned int *val) +{ + return -ENODEV; +} + +int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, + unsigned int val) +{ + return -ENODEV; +} diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 3a8c867cdf51..6bf55403f6e3 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -558,3 +558,35 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val) return qcom_scm_call(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, , ); } + +int __qcom_scm_io_readl_atomic(struct device *dev, phys_addr_t addr, + unsigned int *val) +{ + struct qcom_scm_desc desc = {0}; + struct arm_smccc_res res; + int ret; + + desc.args[0] = addr; + desc.arginfo = QCOM_SCM_ARGS(1); + + ret = qcom_scm_call_atomic(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_READ, + , ); + if (ret >= 0) + *val = res.a1; + + return ret < 0 ? ret : 0; +} + +int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, + unsigned int val) +{ + struct qcom_scm_desc desc = {0}; + struct arm_smccc_res res; + + desc.args[0] = addr; + desc.args[1] = val; + desc.arginfo = QCOM_SCM_ARGS(2); + + return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE, + , ); +} diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index e778af766fae..36dab37f 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -365,6 +365,18 @@ int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) } EXPORT_SYMBOL(qcom_scm_io_writel); +int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val) +{ + return __qcom_scm_io_readl_atomic(__scm->dev, addr, val); +} +EXPORT_SYMBOL(qcom_scm_io_readl_atomic); + +int qcom_scm_io_writel_atomic(phys_addr_t addr, unsigned int val) +{ + return __qcom_scm_io_writel_atomic(__scm->dev, addr, val); +} +EXPORT_SYMBOL(qcom_scm_io_writel_atomic); + static void qcom_scm_set_download_mode(bool enable) { bool avail; diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h index dcd7f7917fc7..bb176107f51e 100644 --- a/drivers/firmware/qcom_scm.h +++ b/drivers/firmware/qcom_scm.h @@ -37,6 +37,10 @@ extern void __qcom_scm_cpu_power_down(u32 flags); #define QCOM_SCM_IO_WRITE 0x2 extern int __qcom_scm_io_readl(struct device *dev, phys_addr_t addr, unsigned int *val); extern int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val); +extern int __qcom_scm_io_readl_atomic(struct device *dev, phys_addr_t addr, + unsigned int *val); +extern int __qcom_scm_io_writel_atomic(struct device *dev, phys_addr_t addr, + unsigned int val); #define QCOM_SCM_SVC_INFO 0x6 #define QCOM_IS_CALL_AVAIL_CMD 0x1 diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 5d65521260b3..6a5d0c98b328 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -64,6 +64,8 @@ extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); +extern int qcom_scm_io_readl_atomic(phys_addr_t addr, unsigned int *val); +extern int qcom_scm_io_writel_atomic(phys_addr_t addr, unsigned int val); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -100,5 +102,7 @@ static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { ret static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } stati
[PATCH v2 3/3] dts: arm64/sdm845: Add node for qcom,smmu-v2
Add device node for qcom,smmu-v2 available on sdm845. This smmu is available only to GPU device. Signed-off-by: Vivek Gautam --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 23 +++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1c2be2082f33..bd1ec5fa5146 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -989,6 +990,28 @@ cell-index = <0>; }; + gpu_smmu: iommu@504 { + compatible = "qcom,sdm845-smmu-v2", "qcom,smmu-v2"; + reg = <0x504 0x1>; + #iommu-cells = <1>; + #global-interrupts = <2>; + interrupts = , +, +, +, +, +, +, +, +, +; + clock-names = "bus", "iface"; + clocks = < GCC_GPU_MEMNOC_GFX_CLK>, +< GCC_GPU_CFG_AHB_CLK>; + + /*power-domains = < GPU_CX_GDSC>;*/ + }; + apps_smmu: iommu@1500 { compatible = "qcom,sdm845-smmu-500", "arm,mmu-500"; reg = <0x1500 0x8>; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 2/3] dts: arm64/sdm845: Add node for arm,mmu-500
Add device node for arm,mmu-500 available on sdm845. This MMU-500 with single TCU and multiple TBU architecture is shared among all the peripherals except gpu on sdm845. Signed-off-by: Vivek Gautam --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 72 1 file changed, 72 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index eb4ab33bf6f4..1c2be2082f33 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -989,6 +989,78 @@ cell-index = <0>; }; + apps_smmu: iommu@1500 { + compatible = "qcom,sdm845-smmu-500", "arm,mmu-500"; + reg = <0x1500 0x8>; + #iommu-cells = <2>; + #global-interrupts = <1>; + interrupts = , +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +, +; + }; + apss_shared: mailbox@1799 { compatible = "qcom,sdm845-apss-shared"; reg = <0x1799 0x1000>; -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 0/3] Enable smmu support on sdm845
This series enables apps-smmu (arm,mmu-500) and gpu-smmu (qcom,smmu-v2) on sdm845. gpu-smmu needs one power domain from gpu clock controller whose driver was sent by Amit [1]. Changes since v1: - Addressed Rob's review comments by adding a SoC specific compatible. Have added a new dt-bindings patch for this. - Updated node name to 'iommu'. - Addressed Doug's review comment about removing status property from smmu's nodes, as smmu is either present on the soc or not. Enabling it is not a board-level decision. [1] https://lore.kernel.org/patchwork/patch/973839/ Vivek Gautam (3): dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500 dts: arm64/sdm845: Add node for arm,mmu-500 dts: arm64/sdm845: Add node for qcom,smmu-v2 .../devicetree/bindings/iommu/arm,smmu.txt | 5 ++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 95 ++ 2 files changed, 100 insertions(+) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 1/3] dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500
Qcom's implementation of arm,mmu-500 works well with current arm-smmu driver implementation. Adding a soc specific compatible along with arm,mmu-500 makes the bindings future safe. Signed-off-by: Vivek Gautam --- Documentation/devicetree/bindings/iommu/arm,smmu.txt | 5 + 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 7c71a6ed465a..7d73b2a259fc 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -18,6 +18,7 @@ conditions. "arm,mmu-500" "cavium,smmu-v2" "qcom,-smmu-v2", "qcom,smmu-v2" +"qcom,-smmu-500", "arm,mmu-500" depending on the particular implementation and/or the version of the architecture implemented. @@ -30,6 +31,10 @@ conditions. An example string would be - "qcom,msm8996-smmu-v2", "qcom,smmu-v2". + "qcom,-smmu-500" compatible string represents qcom's soc + specific implementation of arm,mmu-500, and should be present + along with "arm,mmu-500". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/3] dts: arm64/sdm845: Add node for arm,mmu-500
Hi Doug, On 8/11/2018 4:00 AM, Doug Anderson wrote: Hi, On Fri, Aug 10, 2018 at 3:18 PM, Doug Anderson wrote: Hi, On Thu, Jul 19, 2018 at 10:53 AM, Vivek Gautam wrote: Add device node for arm,mmu-500 available on sdm845. This MMU-500 with single TCU and multiple TBU architecture is shared among all the peripherals except gpu on sdm845. Signed-off-by: Vivek Gautam --- arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 4 ++ arch/arm64/boot/dts/qcom/sdm845.dtsi| 73 + 2 files changed, 77 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts index 6d651f314193..13b50dff440f 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts @@ -58,3 +58,7 @@ bias-pull-up; }; }; + +_smmu { + status = "okay"; +}; When you spin this patch please put the above in the correct place. Since "a" sorts alphabetically before "i" then this should be just before the line: { Sorry--one more thing I thought of after I sent this out... Possibly you can drop this part of the patch completely and get rid of the 'status = "disabled";' in sdm845.dtsi. As I understand it you really only want to mark things as disabled in the SoC dtsi file if some boards might use this device and other boards wouldn't. For instance not all boards will have the SD card controller hooked up / enabled so having that set to "disabled" in the SoC device tree file makes sense. ...but it's not a board-level question about whether the SMMU is present--it's always there. You don't gain anything by forcing all boards to set status to "okay". Thanks for reviewing the patches. Will sort the node as per alphabetical order. Also as you pointed, it makes sense to not have the 'status' property in SMMU. Will remove that. Thanks. Best regards Vivek -Doug ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/3] iommu/arm-smmu: Error out only if not enough context interrupts
On Wed, Jul 25, 2018 at 5:27 PM, Will Deacon wrote: > On Tue, Jul 24, 2018 at 03:09:41PM +0530, Vivek Gautam wrote: >> On 7/24/2018 2:06 PM, Will Deacon wrote: >> >On Thu, Jul 19, 2018 at 11:23:56PM +0530, Vivek Gautam wrote: >> >>diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c >> >>index 7c69736a30f8..4cb53bf4f423 100644 >> >>--- a/drivers/iommu/arm-smmu.c >> >>+++ b/drivers/iommu/arm-smmu.c >> >>@@ -2229,12 +2229,19 @@ static int arm_smmu_device_probe(struct >> >>platform_device *pdev) >> >>if (err) >> >>return err; >> >>- if (smmu->version == ARM_SMMU_V2 && >> >>- smmu->num_context_banks != smmu->num_context_irqs) { >> >>- dev_err(dev, >> >>- "found only %d context interrupt(s) but %d required\n", >> >>- smmu->num_context_irqs, smmu->num_context_banks); >> >>- return -ENODEV; >> >>+ if (smmu->version == ARM_SMMU_V2) { >> >>+ if (smmu->num_context_banks > smmu->num_context_irqs) { >> >>+ dev_err(dev, >> >>+ "found only %d context irq(s) but %d required\n", >> >>+ smmu->num_context_irqs, smmu->num_context_banks); >> >>+ return -ENODEV; >> >>+ } else if (smmu->num_context_banks < smmu->num_context_irqs) { >> >>+ /* loose extra context interrupts */ >> >>+ dev_notice(dev, >> >>+ "found %d context irq(s) but only %d required\n", >> >>+ smmu->num_context_irqs, smmu->num_context_banks); >> >>+ smmu->num_context_irqs = smmu->num_context_banks; >> >>+ } >> >I don't see the utility in the new message. Can you simplify with the patch >> >below on top? It's a bit weird that we only decide to ignore the extra irqs >> >after calling platform_get_irq() on them, but that seems to be harmless. >> >> Thanks. I will modify as suggested below and respin. > > It's ok, I can make the change locally. Thanks Will for making the changes, and picking this. Best regards Vivek -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v14 4/4] iommu/arm-smmu: Add support for qcom,smmu-v2 variant
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific clock and power requirements. This smmu core is used with multiple masters on msm8996, viz. mdss, video, etc. Add bindings for the same. Signed-off-by: Vivek Gautam Reviewed-by: Rob Herring Reviewed-by: Tomasz Figa --- Change since v13: - No change. .../devicetree/bindings/iommu/arm,smmu.txt | 42 ++ drivers/iommu/arm-smmu.c | 13 +++ 2 files changed, 55 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 8a6ffce12af5..7c71a6ed465a 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -17,10 +17,19 @@ conditions. "arm,mmu-401" "arm,mmu-500" "cavium,smmu-v2" +"qcom,-smmu-v2", "qcom,smmu-v2" depending on the particular implementation and/or the version of the architecture implemented. + A number of Qcom SoCs use qcom,smmu-v2 version of the IP. + "qcom,-smmu-v2" represents a soc specific compatible + string that should be present along with the "qcom,smmu-v2" + to facilitate SoC specific clocks/power connections and to + address specific bug fixes. + An example string would be - + "qcom,msm8996-smmu-v2", "qcom,smmu-v2". + - reg : Base address and size of the SMMU. - #global-interrupts : The number of global interrupts exposed by the @@ -71,6 +80,22 @@ conditions. or using stream matching with #iommu-cells = <2>, and may be ignored if present in such cases. +- clock-names:List of the names of clocks input to the device. The + required list depends on particular implementation and + is as follows: + - for "qcom,smmu-v2": +- "bus": clock required for downstream bus access and + for the smmu ptw, +- "iface": clock required to access smmu's registers + through the TCU's programming interface. + - unspecified for other implementations. + +- clocks: Specifiers for all clocks listed in the clock-names property, + as per generic clock bindings. + +- power-domains: Specifiers for power domains required to be powered on for + the SMMU to operate, as per generic power domain bindings. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -137,3 +162,20 @@ conditions. iommu-map = <0 0 0x400>; ... }; + + /* Qcom's arm,smmu-v2 implementation */ + smmu4: iommu { + compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + reg = <0xd0 0x1>; + + #global-interrupts = <1>; + interrupts = , +, +; + #iommu-cells = <1>; + power-domains = < MDSS_GDSC>; + + clocks = < SMMU_MDP_AXI_CLK>, +< SMMU_MDP_AHB_CLK>; + clock-names = "bus", "iface"; + }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e558abf1ecfc..2b4edba188a5 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -119,6 +119,7 @@ enum arm_smmu_implementation { GENERIC_SMMU, ARM_MMU500, CAVIUM_SMMUV2, + QCOM_SMMUV2, }; struct arm_smmu_s2cr { @@ -1971,6 +1972,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); +static const char * const qcom_smmuv2_clks[] = { + "bus", "iface", +}; + +static const struct arm_smmu_match_data qcom_smmuv2 = { + .version = ARM_SMMU_V2, + .model = QCOM_SMMUV2, + .clks = qcom_smmuv2_clks, + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks), +}; + static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = _generic_v1 }, { .compatible = "arm,smmu-v2", .data = _generic_v2 }, @@ -1978,6 +1990,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = _mmu401 }, { .compatible = "arm,mmu-500", .data = _mmu500 }, {
[PATCH v14 3/4] iommu/arm-smmu: Add the device_link between masters and smmu
From: Sricharan R Finally add the device link between the master device and smmu, so that the smmu gets runtime enabled/disabled only when the master needs it. This is done from add_device callback which gets called once when the master is added to the smmu. Signed-off-by: Sricharan R Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa --- Change since v13: - No change. drivers/iommu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1efa5681b905..e558abf1ecfc 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1461,6 +1461,9 @@ static int arm_smmu_add_device(struct device *dev) iommu_device_link(>iommu, dev); + device_link_add(dev, smmu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + return 0; out_cfg_free: -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v14 2/4] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
From: Sricharan R The smmu device probe/remove and add/remove master device callbacks gets called when the smmu is not linked to its master, that is without the context of the master device. So calling runtime apis in those places separately. Signed-off-by: Sricharan R [vivek: Cleanup pm runtime calls] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa --- Change since v13: - No change. drivers/iommu/arm-smmu.c | 101 +++ 1 file changed, 93 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5f6a9e3c0079..1efa5681b905 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + return pm_runtime_get_sync(smmu->dev); + + return 0; +} + +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) +{ + if (pm_runtime_enabled(smmu->dev)) + pm_runtime_put(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -913,11 +927,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = _domain->cfg; - int irq; + int ret, irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return; + /* * Disable the context bank and free the page tables before freeing * it. @@ -932,6 +950,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); + + arm_smmu_rpm_put(smmu); } static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) @@ -1213,10 +1233,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENODEV; smmu = fwspec_smmu(fwspec); + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + /* Ensure that the domain is finalised */ ret = arm_smmu_init_domain_context(domain, smmu); if (ret < 0) - return ret; + goto rpm_put; /* * Sanity check the domain. We don't support domains across @@ -1226,33 +1251,50 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); - return -EINVAL; + ret = -EINVAL; + goto rpm_put; } /* Looks ok, so add the device to the domain */ - return arm_smmu_domain_add_master(smmu_domain, fwspec); + ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + +rpm_put: + arm_smmu_rpm_put(smmu); + return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + int ret; if (!ops) return -ENODEV; - return ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_get(smmu); + ret = ops->map(ops, iova, paddr, size, prot); + arm_smmu_rpm_put(smmu); + + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + size_t ret; if (!ops) return 0; - return ops->unmap(ops, iova, size); + arm_smmu_rpm_get(smmu); + ret = ops->unmap(ops, iova, size); + arm_smmu_rpm_put(smmu); + + return ret; } static void arm_smmu_iotlb_sync(struct iommu_domain *domain) @@ -1407,7 +1449,13 @@ static int arm_smmu_add_device(struct device *dev) while (i--) cfg->smendx[i] = INVALID_SMENDX; + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + goto out_cfg_free; + ret = arm_smmu_master_alloc_smes(dev); + arm_smmu_rpm_put(smmu); + if (ret) goto out
[PATCH v14 0/4] iommu/arm-smmu: Add runtime pm/sleep support
ink_add() to use this API. * arm_smmu_remove_device() now uses this device_link_find() to find the device link between smmu device and the master device, and then delete this link. * Dropped the destroy_domain_context() fix [5] as it was rather, introducing catastrophically bad problem by destroying 'good dev's domain context. * Added 'Reviwed-by' tag for Tomasz's review. [v8] * Major change - - Added a flag 'rpm_supported' which each platform that supports runtime pm, can enable, and we enable runtime_pm over arm-smmu only when this flag is set. - Adding the conditional pm_runtime_get/put() calls to .map, .unmap and .attach_dev ops. - Dropped the patch [3] that exported pm_runtim_get/put_suupliers(), and also dropped the user driver patch [4] for these APIs. * Clock code further cleanup - doing only clk_bulk_enable() and clk_bulk_disable() in runtime pm callbacks. We shouldn't be taking a slow path (clk_prepare/unprepare()) from these runtime pm callbacks. Thereby, moved clk_bulk_prepare() to arm_smmu_device_probe(), and clk_bulk_unprepare() to arm_smmu_device_remove(). - clk data filling to a common method arm_smmu_fill_clk_data() that fills the clock ids and number of clocks. * Addressed other nits and comments - device_link_add() error path fixed. - Fix for checking negative error value from pm_runtime_get_sync(). - Documentation redo. * Added another patch fixing the error path in arm_smmu_attach_dev() to destroy allocated domain context. [v7] * Addressed review comments given by Robin Murphy - - Added device_link_del() in .remove_device path. - Error path cleanup in arm_smmu_add_device(). - Added pm_runtime_get/put_sync() in .remove path, and replaced pm_runtime_force_suspend() with pm_runtime_disable(). - clk_names cleanup in arm_smmu_init_clks() * Added 'Reviewed-by' given by Rob H. ** Change logs for previous versions is available in last series [4]. [1] https://patchwork.kernel.org/patch/10204925/ [2] https://lore.kernel.org/patchwork/cover/964655/ [3] https://patchwork.kernel.org/patch/10204945/ [4] https://patchwork.kernel.org/patch/10204925/ [5] https://patchwork.kernel.org/patch/10254105/ [6] https://patchwork.kernel.org/patch/10277975/ [7] https://patchwork.kernel.org/patch/10281613/ [8] https://patchwork.kernel.org/patch/10491481/ [9] https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git/log/?h=linux-next [10] https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/log/?h=next Sricharan R (3): iommu/arm-smmu: Add pm_runtime/sleep ops iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device iommu/arm-smmu: Add the device_link between masters and smmu Vivek Gautam (1): iommu/arm-smmu: Add support for qcom,smmu-v2 variant .../devicetree/bindings/iommu/arm,smmu.txt | 42 + drivers/iommu/arm-smmu.c | 194 +++-- 2 files changed, 225 insertions(+), 11 deletions(-) -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v14 1/4] iommu/arm-smmu: Add pm_runtime/sleep ops
From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and also the functions to parse the smmu clocks from DT and enable them in resume/suspend. Also, while we enable the runtime pm add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Also add corresponding clock enable path in resume callback. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa --- Changes since v13: - Moved arm_smmu_device_reset() from arm_smmu_pm_resume() to arm_smmu_runtime_resume(). arm_smmu_pm_resume() calls just runtime_resume() now. drivers/iommu/arm-smmu.c | 77 ++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index c73cfce1ccc0..5f6a9e3c0079 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -205,6 +206,8 @@ struct arm_smmu_device { u32 num_global_irqs; u32 num_context_irqs; unsigned int*irqs; + struct clk_bulk_data*clks; + int num_clks; u32 cavium_id_base; /* Specific to Cavium */ @@ -1897,10 +1900,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) struct arm_smmu_match_data { enum arm_smmu_arch_version version; enum arm_smmu_implementation model; + const char * const *clks; + int num_clks; }; #define ARM_SMMU_MATCH_DATA(name, ver, imp)\ -static struct arm_smmu_match_data name = { .version = ver, .model = imp } +static const struct arm_smmu_match_data name = { .version = ver, .model = imp } ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); @@ -1919,6 +1924,23 @@ static const struct of_device_id arm_smmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); +static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu, + const char * const *clks) +{ + int i; + + if (smmu->num_clks < 1) + return; + + smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks, + sizeof(*smmu->clks), GFP_KERNEL); + if (!smmu->clks) + return; + + for (i = 0; i < smmu->num_clks; i++) + smmu->clks[i].id = clks[i]; +} + #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) { @@ -2001,6 +2023,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, data = of_device_get_match_data(dev); smmu->version = data->version; smmu->model = data->model; + smmu->num_clks = data->num_clks; + + arm_smmu_fill_clk_data(smmu, data->clks); parse_driver_options(smmu); @@ -2099,6 +2124,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->irqs[i] = irq; } + err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks); + if (err) + return err; + + err = clk_bulk_prepare(smmu->num_clks, smmu->clks); + if (err) + return err; + err = arm_smmu_device_cfg_probe(smmu); if (err) return err; @@ -2181,6 +2214,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + + clk_bulk_unprepare(smmu->num_clks, smmu->clks); + return 0; } @@ -2189,15 +2225,50 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) arm_smmu_device_remove(pdev); } -static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_enable(smmu->num_clks, smmu->clks); + if (ret) + return ret; arm_smmu_device_reset(smmu); + return 0; } -static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *s
Re: [PATCH v13 1/4] iommu/arm-smmu: Add pm_runtime/sleep ops
On 7/26/2018 9:00 PM, Robin Murphy wrote: On 26/07/18 08:12, Vivek Gautam wrote: On Wed, Jul 25, 2018 at 11:46 PM, Vivek Gautam wrote: On Tue, Jul 24, 2018 at 8:51 PM, Robin Murphy wrote: On 19/07/18 11:15, Vivek Gautam wrote: From: Sricharan R The smmu needs to be functional only when the respective master's using it are active. The device_link feature helps to track such functional dependencies, so that the iommu gets powered when the master device enables itself using pm_runtime. So by adapting the smmu driver for runtime pm, above said dependency can be addressed. This patch adds the pm runtime/sleep callbacks to the driver and also the functions to parse the smmu clocks from DT and enable them in resume/suspend. Also, while we enable the runtime pm add a pm sleep suspend callback that pushes devices to low power state by turning the clocks off in a system sleep. Also add corresponding clock enable path in resume callback. Signed-off-by: Sricharan R Signed-off-by: Archit Taneja [vivek: rework for clock and pm ops] Signed-off-by: Vivek Gautam Reviewed-by: Tomasz Figa --- Changes since v12: - Added pm sleep .suspend callback. This disables the clocks. - Added corresponding change to enable clocks in .resume pm sleep callback. drivers/iommu/arm-smmu.c | 75 ++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index c73cfce1ccc0..9138a6fffe04 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c [snip] platform_device *pdev) arm_smmu_device_remove(pdev); } +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + return clk_bulk_enable(smmu->num_clks, smmu->clks); If there's a power domain being automatically switched by genpd then we need a reset here because we may have lost state entirely. Since I remembered the otherwise-useless GPU SMMU on Juno is in a separate power domain, I gave it a poking via sysfs with some debug stuff to dump sCR0 in these callbacks, and the problem is clear: ... [ 4.625551] arm-smmu 2b40.iommu: genpd_runtime_suspend() [ 4.631163] arm-smmu 2b40.iommu: arm_smmu_runtime_suspend: 0x00201936 [ 4.637897] arm-smmu 2b40.iommu: suspend latency exceeded, 6733980 ns [ 21.566983] arm-smmu 2b40.iommu: genpd_runtime_resume() [ 21.584796] arm-smmu 2b40.iommu: arm_smmu_runtime_resume: 0x00220101 [ 21.591452] arm-smmu 2b40.iommu: resume latency exceeded, 6658020 ns ... Qualcomm SoCs have retention enabled for SMMU registers so they don't lose state. ... [ 256.013367] arm-smmu b4.arm,smmu: arm_smmu_runtime_suspend SCR0 = 0x201e36 [ 256.013367] [ 256.019160] arm-smmu b4.arm,smmu: arm_smmu_runtime_resume SCR0 = 0x201e36 [ 256.019160] [ 256.027368] arm-smmu b4.arm,smmu: arm_smmu_runtime_suspend SCR0 = 0x201e36 [ 256.027368] [ 256.036786] arm-smmu b4.arm,smmu: arm_smmu_runtime_resume SCR0 = 0x201e36 ... However after adding arm_smmu_device_reset() in runtime_resume() I observe some performance degradation when kill an instance of 'kmscube' and start it again. The launch time with arm_smmu_device_reset() in runtime_resume() change is more. Could this be because of frequent TLB invalidation and sync? Probably. Plus the reset procedure is a big chunk of MMIO accesses, which for a non-trivial SMMU configuration probably isn't negligible in itself. Unfortunately, unless you know for absolute certain that you don't need to do that, you do. Some more information that i gathered. On Qcom SoCs besides the registers retention, TCU invalidates TLB cache on a CX power collapse exit, which is the system wide suspend case. The arm-smmu software is not aware of this CX power collapse / auto-invalidation. So wouldn't doing an explicit TLB invalidations during runtime resume be detrimental to performance? Indeed it would be, but resuming with TLBs full of random valid-looking junk is even more so. I have one more doubt here - We do runtime power cycle around arm_smmu_map/unmap() too. Now during map/unmap we selectively do TLB maintenance (either tlb_sync or tlb_add_flush). But with runtime pm we want to do TLBIALL*. Is that a problem? It's technically redundant to do both, true, but as we've covered in previous rounds of discussion it's very difficult to know *which* one is sufficient at any given time, so in order to make progress for now I think we have to settle with doing both. Thanks Robin. I will respin the patches as Tomasz also suggested; arm_smmu_runtime_resume() will look like: if (pm_runtime_suspended(dev)) return 0; return arm_smmu_runtime_resume(dev); and, arm_smmu_runtime_resume() will have arm_smmu_device_reset(). Best regards Vivek Robin. -- To unsubscribe from this list: send the line "unsubsc
Re: [PATCH v13 1/4] iommu/arm-smmu: Add pm_runtime/sleep ops
On Wed, Jul 25, 2018 at 11:46 PM, Vivek Gautam wrote: > On Tue, Jul 24, 2018 at 8:51 PM, Robin Murphy wrote: >> On 19/07/18 11:15, Vivek Gautam wrote: >>> >>> From: Sricharan R >>> >>> The smmu needs to be functional only when the respective >>> master's using it are active. The device_link feature >>> helps to track such functional dependencies, so that the >>> iommu gets powered when the master device enables itself >>> using pm_runtime. So by adapting the smmu driver for >>> runtime pm, above said dependency can be addressed. >>> >>> This patch adds the pm runtime/sleep callbacks to the >>> driver and also the functions to parse the smmu clocks >>> from DT and enable them in resume/suspend. >>> >>> Also, while we enable the runtime pm add a pm sleep suspend >>> callback that pushes devices to low power state by turning >>> the clocks off in a system sleep. >>> Also add corresponding clock enable path in resume callback. >>> >>> Signed-off-by: Sricharan R >>> Signed-off-by: Archit Taneja >>> [vivek: rework for clock and pm ops] >>> Signed-off-by: Vivek Gautam >>> Reviewed-by: Tomasz Figa >>> --- >>> >>> Changes since v12: >>> - Added pm sleep .suspend callback. This disables the clocks. >>> - Added corresponding change to enable clocks in .resume >>>pm sleep callback. >>> >>> drivers/iommu/arm-smmu.c | 75 >>> ++-- >>> 1 file changed, 73 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c >>> index c73cfce1ccc0..9138a6fffe04 100644 >>> --- a/drivers/iommu/arm-smmu.c >>> +++ b/drivers/iommu/arm-smmu.c [snip] >>> platform_device *pdev) >>> arm_smmu_device_remove(pdev); >>> } >>> +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) >>> +{ >>> + struct arm_smmu_device *smmu = dev_get_drvdata(dev); >>> + >>> + return clk_bulk_enable(smmu->num_clks, smmu->clks); >> >> >> If there's a power domain being automatically switched by genpd then we need >> a reset here because we may have lost state entirely. Since I remembered the >> otherwise-useless GPU SMMU on Juno is in a separate power domain, I gave it >> a poking via sysfs with some debug stuff to dump sCR0 in these callbacks, >> and the problem is clear: >> >> ... >> [4.625551] arm-smmu 2b40.iommu: genpd_runtime_suspend() >> [4.631163] arm-smmu 2b40.iommu: arm_smmu_runtime_suspend: 0x00201936 >> [4.637897] arm-smmu 2b40.iommu: suspend latency exceeded, 6733980 ns >> [ 21.566983] arm-smmu 2b40.iommu: genpd_runtime_resume() >> [ 21.584796] arm-smmu 2b40.iommu: arm_smmu_runtime_resume: 0x00220101 >> [ 21.591452] arm-smmu 2b40.iommu: resume latency exceeded, 6658020 ns >> ... > > Qualcomm SoCs have retention enabled for SMMU registers so they don't > lose state. > ... > [ 256.013367] arm-smmu b4.arm,smmu: arm_smmu_runtime_suspend > SCR0 = 0x201e36 > [ 256.013367] > [ 256.019160] arm-smmu b4.arm,smmu: arm_smmu_runtime_resume > SCR0 = 0x201e36 > [ 256.019160] > [ 256.027368] arm-smmu b4.arm,smmu: arm_smmu_runtime_suspend > SCR0 = 0x201e36 > [ 256.027368] > [ 256.036786] arm-smmu b4.arm,smmu: arm_smmu_runtime_resume > SCR0 = 0x201e36 > ... > > However after adding arm_smmu_device_reset() in runtime_resume() I observe > some performance degradation when kill an instance of 'kmscube' and > start it again. > The launch time with arm_smmu_device_reset() in runtime_resume() change is > more. > Could this be because of frequent TLB invalidation and sync? Some more information that i gathered. On Qcom SoCs besides the registers retention, TCU invalidates TLB cache on a CX power collapse exit, which is the system wide suspend case. The arm-smmu software is not aware of this CX power collapse / auto-invalidation. So wouldn't doing an explicit TLB invalidations during runtime resume be detrimental to performance? I have one more doubt here - We do runtime power cycle around arm_smmu_map/unmap() too. Now during map/unmap we selectively do TLB maintenance (either tlb_sync or tlb_add_flush). But with runtime pm we want to do TLBIALL*. Is that a problem? Best regards Vivek > > Best regards > Vivek [snip] -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu