Re: [PATCH 05/15] iommu/arm-smmu: Split arm_smmu_tlb_inv_range_nosync()

2019-08-15 Thread Robin Murphy

On 15/08/2019 11:56, Will Deacon wrote:

On Fri, Aug 09, 2019 at 06:07:42PM +0100, Robin Murphy wrote:

Since we now use separate iommu_gather_ops for stage 1 and stage 2
contexts, we may as well divide up the monolithic callback into its
respective stage 1 and stage 2 parts.

Signed-off-by: Robin Murphy 
---
  drivers/iommu/arm-smmu.c | 66 ++--
  1 file changed, 37 insertions(+), 29 deletions(-)


This will conflict with my iommu API batching stuff, but I can sort that
out if/when it gets queued by Joerg.


-   if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
-   iova &= ~12UL;
-   iova |= cfg->asid;
-   do {
-   writel_relaxed(iova, reg);
-   iova += granule;
-   } while (size -= granule);
-   } else {
-   iova >>= 12;
-   iova |= (u64)cfg->asid << 48;
-   do {
-   writeq_relaxed(iova, reg);
-   iova += granule >> 12;
-   } while (size -= granule);
-   }
-   } else {
-   reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
- ARM_SMMU_CB_S2_TLBIIPAS2;
-   iova >>= 12;
+   if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
+   iova &= ~12UL;


Oh baby. You should move code around more often, so I'm forced to take a
second look!


Oh dear lord... The worst part is that I do now remember seeing this and 
having a similar moment of disbelief, but apparently I was easily 
distracted with rebasing and forgot about it too quickly :(



Can you cook a fix for this that we can route separately, please? I see
it also made its way into qcom_iommu.c...


Sure, I'll split it out to the front of the series for the moment.

Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 05/15] iommu/arm-smmu: Split arm_smmu_tlb_inv_range_nosync()

2019-08-15 Thread Will Deacon
On Fri, Aug 09, 2019 at 06:07:42PM +0100, Robin Murphy wrote:
> Since we now use separate iommu_gather_ops for stage 1 and stage 2
> contexts, we may as well divide up the monolithic callback into its
> respective stage 1 and stage 2 parts.
> 
> Signed-off-by: Robin Murphy 
> ---
>  drivers/iommu/arm-smmu.c | 66 ++--
>  1 file changed, 37 insertions(+), 29 deletions(-)

This will conflict with my iommu API batching stuff, but I can sort that
out if/when it gets queued by Joerg.

> - if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
> - iova &= ~12UL;
> - iova |= cfg->asid;
> - do {
> - writel_relaxed(iova, reg);
> - iova += granule;
> - } while (size -= granule);
> - } else {
> - iova >>= 12;
> - iova |= (u64)cfg->asid << 48;
> - do {
> - writeq_relaxed(iova, reg);
> - iova += granule >> 12;
> - } while (size -= granule);
> - }
> - } else {
> - reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
> -   ARM_SMMU_CB_S2_TLBIIPAS2;
> - iova >>= 12;
> + if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
> + iova &= ~12UL;

Oh baby. You should move code around more often, so I'm forced to take a
second look!

Can you cook a fix for this that we can route separately, please? I see
it also made its way into qcom_iommu.c...

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 05/15] iommu/arm-smmu: Split arm_smmu_tlb_inv_range_nosync()

2019-08-09 Thread Robin Murphy
Since we now use separate iommu_gather_ops for stage 1 and stage 2
contexts, we may as well divide up the monolithic callback into its
respective stage 1 and stage 2 parts.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/arm-smmu.c | 66 ++--
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 463bc8d98adb..a681e000e704 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -490,46 +490,54 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
arm_smmu_tlb_sync_global(smmu);
 }
 
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void 
*cookie)
+static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
 {
struct arm_smmu_domain *smmu_domain = cookie;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = _domain->cfg;
-   bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
-   void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+   void __iomem *reg = ARM_SMMU_CB(smmu, cfg->cbndx);
 
-   if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+   if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
 
-   if (stage1) {
-   reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
+   reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
-   if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
-   iova &= ~12UL;
-   iova |= cfg->asid;
-   do {
-   writel_relaxed(iova, reg);
-   iova += granule;
-   } while (size -= granule);
-   } else {
-   iova >>= 12;
-   iova |= (u64)cfg->asid << 48;
-   do {
-   writeq_relaxed(iova, reg);
-   iova += granule >> 12;
-   } while (size -= granule);
-   }
-   } else {
-   reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
- ARM_SMMU_CB_S2_TLBIIPAS2;
-   iova >>= 12;
+   if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
+   iova &= ~12UL;
+   iova |= cfg->asid;
do {
-   smmu_write_atomic_lq(iova, reg);
+   writel_relaxed(iova, reg);
+   iova += granule;
+   } while (size -= granule);
+   } else {
+   iova >>= 12;
+   iova |= (u64)cfg->asid << 48;
+   do {
+   writeq_relaxed(iova, reg);
iova += granule >> 12;
} while (size -= granule);
}
 }
 
+static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
+{
+   struct arm_smmu_domain *smmu_domain = cookie;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   void __iomem *reg = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
+
+   if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+   wmb();
+
+   reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
+   iova >>= 12;
+   do {
+   smmu_write_atomic_lq(iova, reg);
+   iova += granule >> 12;
+   } while (size -= granule);
+}
+
 /*
  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
  * almost negligible, but the benefit of getting the first one in as far ahead
@@ -550,13 +558,13 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long 
iova, size_t size,
 
 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
.tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
-   .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
+   .tlb_add_flush  = arm_smmu_tlb_inv_range_s1,
.tlb_sync   = arm_smmu_tlb_sync_context,
 };
 
 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
.tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
-   .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
+   .tlb_add_flush  = arm_smmu_tlb_inv_range_s2,
.tlb_sync   = arm_smmu_tlb_sync_context,
 };
 
-- 
2.21.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu