Re: [PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
Hi Robin, On 8/14/2018 10:29 PM, Robin Murphy wrote: On 14/08/18 11:55, Vivek Gautam wrote: Cleanup to re-use some of the stuff Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu.c | 32 +--- 1 file changed, 25 insertions(+), 7 deletions(-) I think the overall diffstat would be an awful lot smaller if the erratum workaround just has its own readl_poll_timeout() as it does in the vendor kernel. The burst-polling loop is for minimising latency in high-throughput situations, and if you're in a workaround which has to lock *every* register write and issue two firmware calls around each sync I think you're already well out of that game. Sorry for the delayed response. I was on vacation. I will fix this in my next version by adding the separate read_poll_timeout() for the erratum WA. diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 32e86df80428..75c146751c87 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) clear_bit(idx, map); } -/* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, - void __iomem *sync, void __iomem *status) +static int __arm_smmu_tlb_sync_wait(void __iomem *status) { unsigned int spin_cnt, delay; - writel_relaxed(0, sync); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) - return; + return 0; cpu_relax(); } udelay(delay); } + + return -EBUSY; +} + +/* Wait for any pending TLB invalidations to complete */ +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) +{ + writel_relaxed(0, sync); + + if (!__arm_smmu_tlb_sync_wait(status)) + return; + dev_err_ratelimited(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); } @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) arm_smmu_tlb_sync_global(smmu); } -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = _domain->cfg; @@ -498,6 +509,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) +{ + __arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie); +} + AFAICS even after patch #5 this does absolutely nothing except make the code needlessly harder to read :( Sure, I will rather call arm_smmu_tlb_inv_range_nosync() from qcom_errata_tlb_inv_range_nosync() then make this change. Thanks for the review. Best regards Vivek Robin. /* * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
On 14/08/18 11:55, Vivek Gautam wrote: Cleanup to re-use some of the stuff Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu.c | 32 +--- 1 file changed, 25 insertions(+), 7 deletions(-) I think the overall diffstat would be an awful lot smaller if the erratum workaround just has its own readl_poll_timeout() as it does in the vendor kernel. The burst-polling loop is for minimising latency in high-throughput situations, and if you're in a workaround which has to lock *every* register write and issue two firmware calls around each sync I think you're already well out of that game. diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 32e86df80428..75c146751c87 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) clear_bit(idx, map); } -/* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, - void __iomem *sync, void __iomem *status) +static int __arm_smmu_tlb_sync_wait(void __iomem *status) { unsigned int spin_cnt, delay; - writel_relaxed(0, sync); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) - return; + return 0; cpu_relax(); } udelay(delay); } + + return -EBUSY; +} + +/* Wait for any pending TLB invalidations to complete */ +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) +{ + writel_relaxed(0, sync); + + if (!__arm_smmu_tlb_sync_wait(status)) + return; + dev_err_ratelimited(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); } @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) arm_smmu_tlb_sync_global(smmu); } -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = _domain->cfg; @@ -498,6 +509,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) +{ + __arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie); +} + AFAICS even after patch #5 this does absolutely nothing except make the code needlessly harder to read :( Robin. /* * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
On 8/14/2018 5:10 PM, Will Deacon wrote: On Tue, Aug 14, 2018 at 04:25:27PM +0530, Vivek Gautam wrote: Cleanup to re-use some of the stuff Maybe we should factor a few of the other bits whilst we're here. Sure, do you want me to refactor anything besides this change? Or just write a proper commit message ;) My bad. I should have written a more descriptive commit message. :| Will change this. Best regards Vivek Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
On Tue, Aug 14, 2018 at 04:25:27PM +0530, Vivek Gautam wrote: > Cleanup to re-use some of the stuff Maybe we should factor a few of the other bits whilst we're here. Or just write a proper commit message ;) Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 4/5] iommu/arm-smmu: Make way to add Qcom's smmu-500 errata handling
Cleanup to re-use some of the stuff Signed-off-by: Vivek Gautam --- drivers/iommu/arm-smmu.c | 32 +--- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 32e86df80428..75c146751c87 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -391,21 +391,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) clear_bit(idx, map); } -/* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, - void __iomem *sync, void __iomem *status) +static int __arm_smmu_tlb_sync_wait(void __iomem *status) { unsigned int spin_cnt, delay; - writel_relaxed(0, sync); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) - return; + return 0; cpu_relax(); } udelay(delay); } + + return -EBUSY; +} + +/* Wait for any pending TLB invalidations to complete */ +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) +{ + writel_relaxed(0, sync); + + if (!__arm_smmu_tlb_sync_wait(status)) + return; + dev_err_ratelimited(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); } @@ -461,8 +471,9 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) arm_smmu_tlb_sync_global(smmu); } -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void __arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = _domain->cfg; @@ -498,6 +509,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, + void *cookie) +{ + __arm_smmu_tlb_inv_range_nosync(iova, size, granule, leaf, cookie); +} + /* * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu