RE: [PATCH 3/3] iommu/ipmmu-vmsa: Add utlb_offset_base

2019-10-14 Thread Yoshihiro Shimoda
Hi Geert-san,

> From: Geert Uytterhoeven, Sent: Friday, October 11, 2019 9:32 PM
> 
> Hi Shimoda-san,
> 
> On Wed, Oct 9, 2019 at 10:27 AM Yoshihiro Shimoda
>  wrote:
> > Since we will have changed memory mapping of the IPMMU in the future,
> > this patch adds a utlb_offset_base into struct ipmmu_features
> > for IMUCTR and IMUASID registers.
> > No behavior change.
> >
> > Signed-off-by: Yoshihiro Shimoda 
> 
> Thanks for your patch!
> 
> > --- a/drivers/iommu/ipmmu-vmsa.c
> > +++ b/drivers/iommu/ipmmu-vmsa.c
> > @@ -52,6 +52,7 @@ struct ipmmu_features {
> > bool cache_snoop;
> > u32 ctx_offset_base;
> > u32 ctx_offset_stride;
> > +   u32 utlb_offset_base;
> >  };
> >
> >  struct ipmmu_vmsa_device {
> > @@ -285,6 +286,11 @@ static void ipmmu_ctx_write_all(struct 
> > ipmmu_vmsa_domain *domain,
> > ipmmu_ctx_write_root(domain, reg, data);
> >  }
> >
> > +static u32 ipmmu_utlb_reg(struct ipmmu_vmsa_device *mmu, unsigned int reg)
> > +{
> > +   return mmu->features->utlb_offset_base + reg;
> > +}
> > +
> >  /* 
> > -
> >   * TLB and microTLB Management
> >   */
> > @@ -330,9 +336,9 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain 
> > *domain,
> >  */
> >
> > /* TODO: What should we set the ASID to ? */
> > -   ipmmu_write(mmu, IMUASID(utlb), 0);
> > +   ipmmu_write(mmu, ipmmu_utlb_reg(mmu, IMUASID(utlb)), 0);
> > /* TODO: Do we need to flush the microTLB ? */
> > -   ipmmu_write(mmu, IMUCTR(utlb),
> > +   ipmmu_write(mmu, ipmmu_utlb_reg(mmu, IMUCTR(utlb)),
> > IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH |
> > IMUCTR_MMUEN);
> 
> Like in [PATCH 2/3], I think providing two helpers would make this more
> readable:
> 
> ipmmu_imuasid_write(mmu, utlb, 0);
> ipmmu_imuctr_write(mmu, utlb, data);

I agree. I'll fix it.

Best regards,
Yoshihiro Shimoda

> Gr{oetje,eeting}s,
> 
> Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 2/3] iommu/ipmmu-vmsa: Calculate context registers' offset instead of a macro

2019-10-14 Thread Yoshihiro Shimoda
Hi Geert-san,

> From: Geert Uytterhoeven, Sent: Friday, October 11, 2019 9:29 PM
> 
> Hi Shimoda-san,
> 
> On Wed, Oct 9, 2019 at 10:27 AM Yoshihiro Shimoda
>  wrote:
> > Since we will have changed memory mapping of the IPMMU in the future,
> > this patch uses ipmmu_features values instead of a macro to
> > calculate context registers offset. No behavior change.
> >
> > Signed-off-by: Yoshihiro Shimoda 
> 
> Thanks for your patch!
> 
> > --- a/drivers/iommu/ipmmu-vmsa.c
> > +++ b/drivers/iommu/ipmmu-vmsa.c
> > @@ -50,6 +50,8 @@ struct ipmmu_features {
> > bool twobit_imttbcr_sl0;
> > bool reserved_context;
> > bool cache_snoop;
> > +   u32 ctx_offset_base;
> > +   u32 ctx_offset_stride;
> >  };
> >
> >  struct ipmmu_vmsa_device {
> > @@ -99,8 +101,6 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device 
> > *dev)
> >
> >  #define IM_NS_ALIAS_OFFSET 0x800
> >
> > -#define IM_CTX_SIZE0x40
> > -
> >  #define IMCTR  0x
> >  #define IMCTR_TRE  (1 << 17)
> >  #define IMCTR_AFE  (1 << 16)
> > @@ -253,18 +253,25 @@ static void ipmmu_write(struct ipmmu_vmsa_device 
> > *mmu, unsigned int offset,
> > iowrite32(data, mmu->base + offset);
> >  }
> >
> > +static u32 ipmmu_ctx_reg(struct ipmmu_vmsa_device *mmu, unsigned int 
> > context_id,
> > +unsigned int reg)
> > +{
> > +   return mmu->features->ctx_offset_base +
> > +  context_id * mmu->features->ctx_offset_stride + reg;
> > +}
> > +
> >  static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain,
> >unsigned int reg)
> >  {
> > return ipmmu_read(domain->mmu->root,
> > - domain->context_id * IM_CTX_SIZE + reg);
> > + ipmmu_ctx_reg(domain->mmu, domain->context_id, 
> > reg));
> 
> For consistency:
> 
> ipmmu_ctx_reg(domain->mmu->root, ...)
> 
> but in practice the features for domain->mmu and domain->mmu->root are
> identical anyway.
> 
> >  }
> >
> >  static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain,
> >  unsigned int reg, u32 data)
> >  {
> > ipmmu_write(domain->mmu->root,
> > -   domain->context_id * IM_CTX_SIZE + reg, data);
> > +   ipmmu_ctx_reg(domain->mmu, domain->context_id, reg), 
> > data);
> 
> Likewise:
> 
> ipmmu_ctx_reg(domain->mmu->root, ...)?

Thank you for the comments! Yes, we can use domain->mmu->root to ipmmu_ctx_reg()
because ipmmu_ctx_reg() only use mmu->features.

> I find these ipmmu_{read,write}() a bit hard too read, with passing the
> mmu to both ipmmu_{read,write}() and ipmmu_ctx_reg().

I completely agree.

> What do you think about providing two helpers ipmmu_ctx_{read,write}(),
> so all users can just use e.g.
> 
> ipmmu_ctx_write(mmu, context_id, reg, data);
> 
> instead of
> 
> ipmmu_write(mmu, ipmmu_ctx_reg(mmu, context_id, reg), data);
> 
> ?

I think so. I'll fix it. Perhaps, I'll make a patch which changes
the function name at first.

Best regards,
Yoshihiro Shimoda

> Gr{oetje,eeting}s,
> 
> Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds


Re: [PATCH v3 3/7] iommu/mediatek: Use gather to achieve the tlb range flush

2019-10-14 Thread Yong Wu
On Mon, 2019-10-14 at 15:21 +0100, Robin Murphy wrote:
> On 14/10/2019 07:38, Yong Wu wrote:
> > Use the iommu_gather mechanism to achieve the tlb range flush.
> > Gather the iova range in the "tlb_add_page", then flush the merged iova
> > range in iotlb_sync.
> > 
> > Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to
> > avoid retry the lock since the spinlock have already been acquired.
> 
> I think this could probably be even simpler - once the actual 
> register-poking is all confined to mtk_iommu_tlb_sync(), you should be 
> able get rid of the per-domain locking in map/unmap and just have a 
> single per-IOMMU lock to serialise syncs. The io-pgtable code itself 
> hasn't needed external locking for a while now.

This is more simpler! Thanks very much. I will try this.

The only concern is there is no lock in the iova_to_phys then, maybe use
the new lock instead.

> 
> Robin.
> 
> > Suggested-by: Tomasz Figa 
> > Signed-off-by: Yong Wu 
> > ---
> > 1) This is the special case backtrace:
> > 
> >   mtk_iommu_iotlb_sync+0x50/0xa0
> >   mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0
> >   __arm_v7s_unmap+0x174/0x598
> >   arm_v7s_unmap+0x30/0x48
> >   mtk_iommu_unmap+0x50/0x78
> >   __iommu_unmap+0xa4/0xf8
> > 
> > 2) The checking "if (gather->start == ULONG_MAX) return;" also is
> > necessary. It will happened when unmap only go to _flush_walk, then
> > enter this tlb_sync.
> > ---
> >   drivers/iommu/mtk_iommu.c | 29 +
> >   drivers/iommu/mtk_iommu.h |  1 +
> >   2 files changed, 26 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index 5f594d6..8712afc 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
> > iommu_iotlb_gather *gather,
> > unsigned long iova, size_t granule,
> > void *cookie)
> >   {
> > -   mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
> > +   struct mtk_iommu_data *data = cookie;
> > +   struct iommu_domain *domain = >m4u_dom->domain;
> > +
> > +   data->is_in_tlb_gather_add_page = true;
> > +   iommu_iotlb_gather_add_page(domain, gather, iova, granule);
> > +   data->is_in_tlb_gather_add_page = false;
> >   }
> >   
> >   static const struct iommu_flush_ops mtk_iommu_flush_ops = {
> > @@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct 
> > iommu_domain *domain)
> >   static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
> >  struct iommu_iotlb_gather *gather)
> >   {
> > +   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> > +   bool is_in_gather = data->is_in_tlb_gather_add_page;
> > +   size_t length = gather->end - gather->start;
> > unsigned long flags;
> >   
> > -   spin_lock_irqsave(>pgtlock, flags);
> > -   mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
> > -   spin_unlock_irqrestore(>pgtlock, flags);
> > +   if (gather->start == ULONG_MAX)
> > +   return;
> > +
> > +   /*
> > +* Avoid acquire the lock when it's in gather_add_page since the lock
> > +* has already been held.
> > +*/
> > +   if (!is_in_gather)
> > +   spin_lock_irqsave(>pgtlock, flags);
> > +
> > +   mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
> > +  false, data);
> > +   mtk_iommu_tlb_sync(data);
> > +
> > +   if (!is_in_gather)
> > +   spin_unlock_irqrestore(>pgtlock, flags);
> >   }
> >   
> >   static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
> > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > index fc0f16e..d29af1d 100644
> > --- a/drivers/iommu/mtk_iommu.h
> > +++ b/drivers/iommu/mtk_iommu.h
> > @@ -58,6 +58,7 @@ struct mtk_iommu_data {
> > struct iommu_group  *m4u_group;
> > boolenable_4GB;
> > booltlb_flush_active;
> > +   boolis_in_tlb_gather_add_page;
> >   
> > struct iommu_device iommu;
> > const struct mtk_iommu_plat_data *plat_data;
> > 




Re: [PATCH v3 6/7] iommu/mediatek: Use writel for TLB range invalidation

2019-10-14 Thread Yong Wu
On Mon, 2019-10-14 at 15:04 +0100, Robin Murphy wrote:
> On 14/10/2019 07:38, Yong Wu wrote:
> > Use writel for the register F_MMU_INV_RANGE which is for triggering the
> > HW work. We expect all the setting(iova_start/iova_end...) have already
> > been finished before F_MMU_INV_RANGE.
> 
> For Arm CPUs, these registers should be mapped as Device memory, 
> therefore the same-peripheral rule should implicitly enforce that the 
> accesses are made in program order, hence you're unlikely to have seen a 
> problem in reality. However, the logical reasoning for the change seems 
> valid in general, so I'd argue that it's still worth making if only for 
> the sake of good practice:
> 
> Acked-by: Robin Murphy 

Thanks very much for the view. If this patch is not so necessary, I will
remove it this time.

> 
> > Signed-off-by: Anan.Sun 
> > Signed-off-by: Yong Wu 
> > ---
> >   drivers/iommu/mtk_iommu.c | 3 +--
> >   1 file changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index dbbacc3..d285457 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned 
> > long iova, size_t size,
> > writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> > writel_relaxed(iova + size - 1,
> >data->base + REG_MMU_INVLD_END_A);
> > -   writel_relaxed(F_MMU_INV_RANGE,
> > -  data->base + REG_MMU_INVALIDATE);
> > +   writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
> >   
> > /* tlb sync */
> > ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
> > 
> 
> ___
> Linux-mediatek mailing list
> linux-media...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek




Re: [PATCH v3 4/7] iommu/mediatek: Delete the leaf in the tlb flush

2019-10-14 Thread Yong Wu
On Mon, 2019-10-14 at 15:22 +0100, Robin Murphy wrote:
> On 14/10/2019 07:38, Yong Wu wrote:
> > In our tlb range flush, we don't care the "leaf". Remove it to simplify
> > the code. no functional change.
> 
> Presumably you don't care about the granule either?

Yes. I only keep "granule" to satisfy the format of "tlb_flush_walk",
then it's no need add a new helper function.

> 
> Robin.
> 
> > Signed-off-by: Yong Wu 
> > ---
> >   drivers/iommu/mtk_iommu.c | 16 
> >   1 file changed, 4 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index 8712afc..19f936c 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -174,8 +174,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
> >   }
> >   
> >   static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t 
> > size,
> > -  size_t granule, bool leaf,
> > -  void *cookie)
> > +  size_t granule, void *cookie)
> >   {
> > struct mtk_iommu_data *data = cookie;
> >   
> > @@ -219,14 +218,7 @@ static void mtk_iommu_tlb_sync(void *cookie)
> >   static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
> >  size_t granule, void *cookie)
> >   {
> > -   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
> > -   mtk_iommu_tlb_sync(cookie);
> > -}
> > -
> > -static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
> > -size_t granule, void *cookie)
> > -{
> > -   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
> > +   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, cookie);
> > mtk_iommu_tlb_sync(cookie);
> >   }
> >   
> > @@ -245,7 +237,7 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
> > iommu_iotlb_gather *gather,
> >   static const struct iommu_flush_ops mtk_iommu_flush_ops = {
> > .tlb_flush_all = mtk_iommu_tlb_flush_all,
> > .tlb_flush_walk = mtk_iommu_tlb_flush_walk,
> > -   .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
> > +   .tlb_flush_leaf = mtk_iommu_tlb_flush_walk,
> > .tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
> >   };
> >   
> > @@ -475,7 +467,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain 
> > *domain,
> > spin_lock_irqsave(>pgtlock, flags);
> >   
> > mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
> > -  false, data);
> > +  data);
> > mtk_iommu_tlb_sync(data);
> >   
> > if (!is_in_gather)
> > 
> 
> ___
> Linux-mediatek mailing list
> linux-media...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 1/3] iommu/ipmmu-vmsa: Remove some unused register declarations

2019-10-14 Thread Yoshihiro Shimoda
Hi Geert-san,

> From: Geert Uytterhoeven, Sent: Friday, October 11, 2019 9:11 PM
> 
> Hi Shimoda-san,
> 
> Thanks for your patch!
> 
> On Wed, Oct 9, 2019 at 10:27 AM Yoshihiro Shimoda
>  wrote:
> > To support different registers memory mapping hardware easily
> > in the future, this patch removes some unused register
> > declarations.
> >
> > Signed-off-by: Yoshihiro Shimoda 
> 
> Reviewed-by: Geert Uytterhoeven 

Thank you for your review!

> While I can confirm the removed definitions are unused, they were
> still valid (but see comments below).
> Perhaps it would be better to add comments, to state clearly to which
> SoCs or SoC families they apply?  Or do you think this would be futile,
> and would add too much clutter to the source file in the near future?

I think adding comments to the declarations are better to avoid
incorrect implementation in the future. So, I'll make such an incremental patch.

> > --- a/drivers/iommu/ipmmu-vmsa.c
> > +++ b/drivers/iommu/ipmmu-vmsa.c
> > @@ -104,8 +104,6 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device 
> > *dev)
> >  #define IMCTR  0x
> >  #define IMCTR_TRE  (1 << 17)
> >  #define IMCTR_AFE  (1 << 16)
> > -#define IMCTR_RTSEL_MASK   (3 << 4)
> 
> FWIW, this is valid for R-Car Gen2 only.  On R-Car Gen3, the field
> contains 3 bits.

That's correct.

Best regards,
Yoshihiro Shimoda

> Gr{oetje,eeting}s,
> 
> Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 6/6] ACPI/IORT: Drop code to set the PMCG software-defined model

2019-10-14 Thread Hanjun Guo
On 2019/9/30 22:33, John Garry wrote:
> Now that we can identify a PMCG implementation from the parent SMMUv3
> IIDR, drop all the code to match based on the ACPI OEM ID.
> 
> Signed-off-by: John Garry 
> ---
>  drivers/acpi/arm64/iort.c | 35 +--
>  include/linux/acpi_iort.h |  8 
>  2 files changed, 1 insertion(+), 42 deletions(-)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index 0b687520c3e7..d04888cb8cff 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -1377,27 +1377,6 @@ static void __init 
> arm_smmu_v3_pmcg_init_resources(struct resource *res,
>  ACPI_EDGE_SENSITIVE, [2]);
>  }
>  
> -static struct acpi_platform_list pmcg_plat_info[] __initdata = {
> - /* HiSilicon Hip08 Platform */
> - {"HISI  ", "HIP08   ", 0, ACPI_SIG_IORT, greater_than_or_equal,
> -  "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08},
> - { }
> -};
> -
> -static int __init arm_smmu_v3_pmcg_add_platdata(struct platform_device *pdev)
> -{
> - u32 model;
> - int idx;
> -
> - idx = acpi_match_platform_list(pmcg_plat_info);
> - if (idx >= 0)
> - model = pmcg_plat_info[idx].data;
> - else
> - model = IORT_SMMU_V3_PMCG_GENERIC;
> -
> - return platform_device_add_data(pdev, , sizeof(model));
> -}
> -
>  struct iort_dev_config {
>   const char *name;
>   int (*dev_init)(struct acpi_iort_node *node);
> @@ -1408,7 +1387,6 @@ struct iort_dev_config {
>struct acpi_iort_node *node);
>   int (*dev_set_proximity)(struct device *dev,
>   struct acpi_iort_node *node);
> - int (*dev_add_platdata)(struct platform_device *pdev);
>  };
>  
>  static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
> @@ -1430,7 +1408,6 @@ static const struct iort_dev_config 
> iort_arm_smmu_v3_pmcg_cfg __initconst = {
>   .name = "arm-smmu-v3-pmcg",
>   .dev_count_resources = arm_smmu_v3_pmcg_count_resources,
>   .dev_init_resources = arm_smmu_v3_pmcg_init_resources,
> - .dev_add_platdata = arm_smmu_v3_pmcg_add_platdata,
>  };
>  
>  static __init const struct iort_dev_config *iort_get_dev_cfg(
> @@ -1494,17 +1471,7 @@ static int __init iort_add_platform_device(struct 
> acpi_iort_node *node,
>   if (ret)
>   goto dev_put;
>  
> - /*
> -  * Platform devices based on PMCG nodes uses platform_data to
> -  * pass the hardware model info to the driver. For others, add
> -  * a copy of IORT node pointer to platform_data to be used to
> -  * retrieve IORT data information.
> -  */
> - if (ops->dev_add_platdata)
> - ret = ops->dev_add_platdata(pdev);
> - else
> - ret = platform_device_add_data(pdev, , sizeof(node));
> -
> + ret = platform_device_add_data(pdev, , sizeof(node));
>   if (ret)
>   goto dev_put;
>  
> diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
> index 8e7e2ec37f1b..7a8961e6a8bb 100644
> --- a/include/linux/acpi_iort.h
> +++ b/include/linux/acpi_iort.h
> @@ -14,14 +14,6 @@
>  #define IORT_IRQ_MASK(irq)   (irq & 0xULL)
>  #define IORT_IRQ_TRIGGER_MASK(irq)   ((irq >> 32) & 0xULL)
>  
> -/*
> - * PMCG model identifiers for use in smmu pmu driver. Please note
> - * that this is purely for the use of software and has nothing to
> - * do with hardware or with IORT specification.
> - */
> -#define IORT_SMMU_V3_PMCG_GENERIC0x /* Generic SMMUv3 PMCG */
> -#define IORT_SMMU_V3_PMCG_HISI_HIP08 0x0001 /* HiSilicon HIP08 PMCG 
> */

Since only HiSilicon platform has such erratum, and I think it works with
both old version of firmware, I'm fine with removing this erratum framework.

Acked-by: Hanjun Guo 

Thanks
Hanjun

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 1/6] ACPI/IORT: Set PMCG device parent

2019-10-14 Thread Hanjun Guo
Hi John,

On 2019/9/30 22:33, John Garry wrote:
> In the IORT, a PMCG node includes a node reference to its associated
> device.
> 
> Set the PMCG platform device parent device for future referencing.
> 
> For now, we only consider setting for when the associated component is an
> SMMUv3.
> 
> Signed-off-by: John Garry 
> ---
>  drivers/acpi/arm64/iort.c | 34 --
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index 8569b79e8b58..0b687520c3e7 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -1455,7 +1455,7 @@ static __init const struct iort_dev_config 
> *iort_get_dev_cfg(
>   * Returns: 0 on success, <0 failure

...

>   */
>  static int __init iort_add_platform_device(struct acpi_iort_node *node,
> -const struct iort_dev_config *ops)
> +const struct iort_dev_config *ops, 
> struct device *parent)

Since you added a input for this function, could you please update
the comments of this function as well?

>  {
>   struct fwnode_handle *fwnode;
>   struct platform_device *pdev;
> @@ -1466,6 +1466,8 @@ static int __init iort_add_platform_device(struct 
> acpi_iort_node *node,
>   if (!pdev)
>   return -ENOMEM;
>  
> + pdev->dev.parent = parent;
> +
>   if (ops->dev_set_proximity) {
>   ret = ops->dev_set_proximity(>dev, node);
>   if (ret)
> @@ -1573,6 +1575,11 @@ static void __init iort_enable_acs(struct 
> acpi_iort_node *iort_node)
>  static inline void iort_enable_acs(struct acpi_iort_node *iort_node) { }
>  #endif
>  
> +static int iort_fwnode_match(struct device *dev, const void *fwnode)
> +{
> + return dev->fwnode == fwnode;
> +}
> +
>  static void __init iort_init_platform_devices(void)
>  {
>   struct acpi_iort_node *iort_node, *iort_end;
> @@ -1594,11 +1601,34 @@ static void __init iort_init_platform_devices(void)
>   iort_table->length);
>  
>   for (i = 0; i < iort->node_count; i++) {
> + struct device *parent = NULL;
> +
>   if (iort_node >= iort_end) {
>   pr_err("iort node pointer overflows, bad table\n");
>   return;
>   }
>  
> + /* Fixme: handle parent declared in IORT after PMCG */
> + if (iort_node->type == ACPI_IORT_NODE_PMCG) {
> + struct acpi_iort_node *iort_assoc_node;
> + struct acpi_iort_pmcg *pmcg;
> + u32 node_reference;
> +
> + pmcg = (struct acpi_iort_pmcg *)iort_node->node_data;
> +
> + node_reference = pmcg->node_reference;
> + iort_assoc_node = ACPI_ADD_PTR(struct acpi_iort_node, 
> iort,
> +  node_reference);
> +
> + if (iort_assoc_node->type == ACPI_IORT_NODE_SMMU_V3) {
> + struct fwnode_handle *assoc_fwnode;
> +
> + assoc_fwnode = iort_get_fwnode(iort_assoc_node);
> +
> + parent = bus_find_device(_bus_type, 
> NULL,
> +   assoc_fwnode, iort_fwnode_match);
> + }
> + }

How about using a function to include those new added code to make this
function (iort_init_platform_devices()) a bit cleaner?

>   iort_enable_acs(iort_node);
>  
>   ops = iort_get_dev_cfg(iort_node);
> @@ -1609,7 +1639,7 @@ static void __init iort_init_platform_devices(void)
>  
>   iort_set_fwnode(iort_node, fwnode);
>  
> - ret = iort_add_platform_device(iort_node, ops);
> + ret = iort_add_platform_device(iort_node, ops, parent);

This function is called if ops is valid, so retrieve the parent
can be done before this function I think.

Thanks
Hanjun



Re: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

2019-10-14 Thread Yong Wu
On Mon, 2019-10-14 at 22:11 +0100, Will Deacon wrote:
> On Sat, Oct 12, 2019 at 02:23:47PM +0800, Yong Wu wrote:
> > On Fri, 2019-10-11 at 17:29 +0100, Will Deacon wrote:
> > > On Wed, Oct 09, 2019 at 09:19:02PM +0800, Yong Wu wrote:
> > > > Use writel for the register F_MMU_INV_RANGE which is for triggering the
> > > > HW work. We expect all the setting(iova_start/iova_end...) have already
> > > > been finished before F_MMU_INV_RANGE.
> > > > 
> > > > Signed-off-by: Anan.Sun 
> > > > Signed-off-by: Yong Wu 
> > > > ---
> > > > This is a improvement rather than fixing a issue.
> > > > ---
> > > >  drivers/iommu/mtk_iommu.c | 3 +--
> > > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > > index 24a13a6..607f92c 100644
> > > > --- a/drivers/iommu/mtk_iommu.c
> > > > +++ b/drivers/iommu/mtk_iommu.c
> > > > @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long 
> > > > iova, size_t size,
> > > > writel_relaxed(iova, data->base + 
> > > > REG_MMU_INVLD_START_A);
> > > > writel_relaxed(iova + size - 1,
> > > >data->base + REG_MMU_INVLD_END_A);
> > > > -   writel_relaxed(F_MMU_INV_RANGE,
> > > > -  data->base + REG_MMU_INVALIDATE);
> > > > +   writel(F_MMU_INV_RANGE, data->base + 
> > > > REG_MMU_INVALIDATE);
> > > 
> > > I don't understand this change.
> > > 
> > > Why is it an "improvement" and which accesses are you ordering with the
> > > writel?
> > 
> > The register(F_MMU_INV_RANGE) will trigger HW to begin flush range. HW
> > expect the other register iova_start/end/flush_type always is ready
> > before trigger. thus I'd like use writel to guarantee the previous
> > register has been finished.
> 
> Given that these are all MMIO writes to the same device, then
> writel_relaxed() should give you the ordering you need. If you look at
> memory_barriers.txt, it says:
> 
>   | they [readX_relaxed() and writeX_relaxed()] are still guaranteed to
>   | be ordered with respect to other accesses from the same CPU thread
>   | to the same peripheral when operating on __iomem pointers mapped
>   | with the default I/O attributes.

Thanks for this info. See it now. then I will delete this patch in next
version.

> 
> > I didn't see the writel_relaxed cause some error in practice, we only
> > think writel is necessary here in theory. so call it "improvement".
> 
> Ok, but I don't think it's needed in this case.
> 
> Will


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

2019-10-14 Thread Will Deacon
On Sat, Oct 12, 2019 at 02:23:47PM +0800, Yong Wu wrote:
> On Fri, 2019-10-11 at 17:29 +0100, Will Deacon wrote:
> > On Wed, Oct 09, 2019 at 09:19:02PM +0800, Yong Wu wrote:
> > > Use writel for the register F_MMU_INV_RANGE which is for triggering the
> > > HW work. We expect all the setting(iova_start/iova_end...) have already
> > > been finished before F_MMU_INV_RANGE.
> > > 
> > > Signed-off-by: Anan.Sun 
> > > Signed-off-by: Yong Wu 
> > > ---
> > > This is a improvement rather than fixing a issue.
> > > ---
> > >  drivers/iommu/mtk_iommu.c | 3 +--
> > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > index 24a13a6..607f92c 100644
> > > --- a/drivers/iommu/mtk_iommu.c
> > > +++ b/drivers/iommu/mtk_iommu.c
> > > @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long 
> > > iova, size_t size,
> > >   writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> > >   writel_relaxed(iova + size - 1,
> > >  data->base + REG_MMU_INVLD_END_A);
> > > - writel_relaxed(F_MMU_INV_RANGE,
> > > -data->base + REG_MMU_INVALIDATE);
> > > + writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
> > 
> > I don't understand this change.
> > 
> > Why is it an "improvement" and which accesses are you ordering with the
> > writel?
> 
> The register(F_MMU_INV_RANGE) will trigger HW to begin flush range. HW
> expect the other register iova_start/end/flush_type always is ready
> before trigger. thus I'd like use writel to guarantee the previous
> register has been finished.

Given that these are all MMIO writes to the same device, then
writel_relaxed() should give you the ordering you need. If you look at
memory_barriers.txt, it says:

  | they [readX_relaxed() and writeX_relaxed()] are still guaranteed to
  | be ordered with respect to other accesses from the same CPU thread
  | to the same peripheral when operating on __iomem pointers mapped
  | with the default I/O attributes.

> I didn't see the writel_relaxed cause some error in practice, we only
> think writel is necessary here in theory. so call it "improvement".

Ok, but I don't think it's needed in this case.

Will


Re: [PATCH RFC 0/5] ARM: Raspberry Pi 4 DMA support

2019-10-14 Thread Catalin Marinas
On Mon, Oct 14, 2019 at 08:31:02PM +0200, Nicolas Saenz Julienne wrote:
> the Raspberry Pi 4 offers up to 4GB of memory, of which only the first
> is DMA capable device wide. This forces us to use of bounce buffers,
> which are currently not very well supported by ARM's custom DMA ops.
> Among other things the current mechanism (see dmabounce.c) isn't
> suitable for high memory. Instead of fixing it, this series introduces a
> way of selecting dma-direct as the default DMA ops provider which allows
> for the Raspberry Pi to make use of swiotlb.

I presume these patches go on top of this series:

http://lkml.kernel.org/r/20190911182546.17094-1-nsaenzjulie...@suse.de

which I queued here:

https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/zone-dma

-- 
Catalin


iommu: amd: Simpify decoding logic for INVALID_PPR_REQUEST event

2019-10-14 Thread Suthikulpanit, Suravee
Reuse existing macro to simplify the code and improve readability.

Cc: Joerg Roedel 
Cc: Gary R Hook 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd_iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c1cb759..b249aa7 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -617,8 +617,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void 
*__evt)
pasid, address, flags);
break;
case EVENT_TYPE_INV_PPR_REQ:
-   pasid = ((event[0] >> 16) & 0x)
-   | ((event[1] << 6) & 0xF);
+   pasid = PPR_PASID(*((u64 *)__evt));
tag = event[1] & 0x03FF;
dev_err(dev, "Event logged [INVALID_PPR_REQUEST 
device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


iommu: amd: Fix incorrect PASID decoding from event log

2019-10-14 Thread Suthikulpanit, Suravee
IOMMU Event Log encodes 20-bit PASID for events:
ILLEGAL_DEV_TABLE_ENTRY
IO_PAGE_FAULT
PAGE_TAB_HARDWARE_ERROR
INVALID_DEVICE_REQUEST
as:
PASID[15:0]  = bit 47:32
PASID[19:16] = bit 19:16

Note that INVALID_PPR_REQUEST event has different encoding
from the rest of the events as the following:
PASID[15:0]  = bit 31:16
PASID[19:16] = bit 45:42

So, fixes the decoding logic.

Fixes: d64c0486ed50 ("iommu/amd: Update the PASID information printed to the 
system log")
Cc: Joerg Roedel 
Cc: Gary R Hook 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd_iommu.c   | 5 +++--
 drivers/iommu/amd_iommu_types.h | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 61de819..c1cb759 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -560,7 +560,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void 
*__evt)
 retry:
type= (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
-   pasid   = PPR_PASID(*(u64 *)[0]);
+   pasid   = (event[0] & EVENT_DOMID_MASK_HI) |
+ (event[1] & EVENT_DOMID_MASK_LO);
flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
 
@@ -593,7 +594,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void 
*__evt)
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
-   dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR 
device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+   dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR 
device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 64edd5a..5a698ad 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -130,8 +130,8 @@
 #define EVENT_TYPE_INV_PPR_REQ 0x9
 #define EVENT_DEVID_MASK   0x
 #define EVENT_DEVID_SHIFT  0
-#define EVENT_DOMID_MASK   0x
-#define EVENT_DOMID_SHIFT  0
+#define EVENT_DOMID_MASK_LO0x
+#define EVENT_DOMID_MASK_HI0xf
 #define EVENT_FLAGS_MASK   0xfff
 #define EVENT_FLAGS_SHIFT  0x10
 
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] dt-bindings: iommu: Convert Arm SMMUv3 to DT schema

2019-10-14 Thread Rob Herring
Convert the Arm SMMv3 binding to the DT schema format.

Cc: Joerg Roedel 
Cc: Mark Rutland 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: iommu@lists.linux-foundation.org
Signed-off-by: Rob Herring 
---
v2:
- Refine interrupt definition based on Robin's comments

 .../devicetree/bindings/iommu/arm,smmu-v3.txt |  77 --
 .../bindings/iommu/arm,smmu-v3.yaml   | 100 ++
 2 files changed, 100 insertions(+), 77 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
 create mode 100644 Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
deleted file mode 100644
index c9abbf3e4f68..
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-* ARM SMMUv3 Architecture Implementation
-
-The SMMUv3 architecture is a significant departure from previous
-revisions, replacing the MMIO register interface with in-memory command
-and event queues and adding support for the ATS and PRI components of
-the PCIe specification.
-
-** SMMUv3 required properties:
-
-- compatible: Should include:
-
-  * "arm,smmu-v3" for any SMMUv3 compliant
-implementation. This entry should be last in the
-compatible list.
-
-- reg   : Base address and size of the SMMU.
-
-- interrupts: Non-secure interrupt list describing the wired
-  interrupt sources corresponding to entries in
-  interrupt-names. If no wired interrupts are
-  present then this property may be omitted.
-
-- interrupt-names   : When the interrupts property is present, should
-  include the following:
-  * "eventq"- Event Queue not empty
-  * "priq"  - PRI Queue not empty
-  * "cmdq-sync" - CMD_SYNC complete
-  * "gerror"- Global Error activated
-  * "combined"  - The combined interrupt is optional,
- and should only be provided if the
- hardware supports just a single,
- combined interrupt line.
- If provided, then the combined interrupt
- will be used in preference to any others.
-
-- #iommu-cells  : See the generic IOMMU binding described in
-devicetree/bindings/pci/pci-iommu.txt
-  for details. For SMMUv3, must be 1, with each cell
-  describing a single stream ID. All possible stream
-  IDs which a device may emit must be described.
-
-** SMMUv3 optional properties:
-
-- dma-coherent  : Present if DMA operations made by the SMMU (page
-  table walks, stream table accesses etc) are cache
-  coherent with the CPU.
-
-  NOTE: this only applies to the SMMU itself, not
-  masters connected upstream of the SMMU.
-
-- msi-parent: See the generic MSI binding described in
-devicetree/bindings/interrupt-controller/msi.txt
-  for a description of the msi-parent property.
-
-- hisilicon,broken-prefetch-cmd
-: Avoid sending CMD_PREFETCH_* commands to the SMMU.
-
-- cavium,cn9900-broken-page1-regspace
-: Replaces all page 1 offsets used for EVTQ_PROD/CONS,
- PRIQ_PROD/CONS register access with page 0 offsets.
- Set for Cavium ThunderX2 silicon that doesn't support
- SMMU page1 register space.
-
-** Example
-
-smmu@2b40 {
-compatible = "arm,smmu-v3";
-reg = <0x0 0x2b40 0x0 0x2>;
-interrupts = ,
- ,
- ,
- ;
-interrupt-names = "eventq", "priq", "cmdq-sync", "gerror";
-dma-coherent;
-#iommu-cells = <1>;
-msi-parent = < 0xff>;
-};
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml
new file mode 100644
index ..662cbc4592c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/arm,smmu-v3.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM SMMUv3 Architecture Implementation
+
+maintainers:
+  - Will Deacon 
+  - Robin Murphy 
+
+description: |+
+  The SMMUv3 architecture is a significant departure from 

[PATCH RFC 4/5] dma/direct: check for overflows in ARM's dma_capable()

2019-10-14 Thread Nicolas Saenz Julienne
The Raspberry Pi 4 has a 1GB ZONE_DMA area starting at address
0x and a mapping between physical and DMA memory offset by
0xc000.  It transpires that, on non LPAE systems, any attempt to
translate physical addresses outside of ZONE_DMA will result in an
overflow. The resulting DMA addresses will not be detected by arm's
dma_capable() as they still fit in the device's DMA mask.

Fix this by failing to validate a DMA address smaller than the lowest
possible DMA address.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/arm/include/asm/dma-direct.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/include/asm/dma-direct.h 
b/arch/arm/include/asm/dma-direct.h
index b67e5fc1fe43..ee8ad47a14e3 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -2,6 +2,8 @@
 #ifndef ASM_ARM_DMA_DIRECT_H
 #define ASM_ARM_DMA_DIRECT_H 1
 
+#include 
+
 static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
unsigned int offset = paddr & ~PAGE_MASK;
@@ -21,6 +23,10 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
if (!dev->dma_mask)
return 0;
 
+   /* Check if address overflowed */
+   if (addr < __phys_to_dma(dev, PFN_UP(min_low_pfn)))
+   return 0;
+
mask = *dev->dma_mask;
 
limit = (mask + 1) & ~mask;
-- 
2.23.0



[PATCH RFC 2/5] ARM: introduce arm_dma_direct

2019-10-14 Thread Nicolas Saenz Julienne
ARM devices might use the arch's custom dma-mapping implementation or
dma-direct/swiotlb depending on how the kernel is built. This is not
good enough as we need to be able to control the device's DMA ops based
on the specific machine configuration.

Centralise control over DMA ops with arm_dma_direct, a global variable
which will be set accordingly during init.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/arm/include/asm/dma-mapping.h |  3 ++-
 arch/arm/include/asm/dma.h |  2 ++
 arch/arm/mm/dma-mapping.c  | 10 ++
 arch/arm/mm/init.c | 13 +
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/dma-mapping.h 
b/arch/arm/include/asm/dma-mapping.h
index bdd80ddbca34..b19af5c55bee 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 #include 
@@ -18,7 +19,7 @@ extern const struct dma_map_ops arm_coherent_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-   if (IS_ENABLED(CONFIG_MMU) && !IS_ENABLED(CONFIG_ARM_LPAE))
+   if (IS_ENABLED(CONFIG_MMU) && !arm_dma_direct)
return _dma_ops;
return NULL;
 }
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
index a81dda65c576..d386719c53cd 100644
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -14,6 +14,8 @@
(PAGE_OFFSET + arm_dma_zone_size) : 0xUL; })
 #endif
 
+extern bool arm_dma_direct __ro_after_init;
+
 #ifdef CONFIG_ISA_DMA_API
 /*
  * This is used to support drivers written for the x86 ISA DMA API.
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 13ef9f131975..172eea707cf7 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -1100,14 +1101,7 @@ int arm_dma_supported(struct device *dev, u64 mask)
 
 static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
 {
-   /*
-* When CONFIG_ARM_LPAE is set, physical address can extend above
-* 32-bits, which then can't be addressed by devices that only support
-* 32-bit DMA.
-* Use the generic dma-direct / swiotlb ops code in that case, as that
-* handles bounce buffering for us.
-*/
-   if (IS_ENABLED(CONFIG_ARM_LPAE))
+   if (arm_dma_direct)
return NULL;
return coherent ? _coherent_dma_ops : _dma_ops;
 }
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b4be3baa83d4..0a63379a4d1a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -105,8 +105,21 @@ static void __init arm_adjust_dma_zone(unsigned long 
*size, unsigned long *hole,
 }
 #endif
 
+bool arm_dma_direct __ro_after_init;
+EXPORT_SYMBOL(arm_dma_direct);
+
 void __init setup_dma_zone(const struct machine_desc *mdesc)
 {
+   /*
+* When CONFIG_ARM_LPAE is set, physical address can extend above
+* 32-bits, which then can't be addressed by devices that only support
+* 32-bit DMA.
+* Use the generic dma-direct / swiotlb ops code in that case, as that
+* handles bounce buffering for us.
+*/
+   if (IS_ENABLED(CONFIG_ARM_LPAE))
+   arm_dma_direct = true;
+
 #ifdef CONFIG_ZONE_DMA
if (mdesc->dma_zone_size) {
arm_dma_zone_size = mdesc->dma_zone_size;
-- 
2.23.0



[PATCH RFC 3/5] ARM: let machines select dma-direct over arch's DMA implementation

2019-10-14 Thread Nicolas Saenz Julienne
A bounce buffering feature is already available in ARM, dmabounce.c, yet
it doesn't support high memory which some devices need. Instead of
fixing it, provide a means for devices to enable dma-direct, which is the
preferred way of doing DMA now days.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/arm/include/asm/mach/arch.h | 1 +
 arch/arm/mm/init.c   | 8 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index e7df5a822cab..3542bf502573 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -33,6 +33,7 @@ struct machine_desc {
 #ifdef CONFIG_ZONE_DMA
phys_addr_t dma_zone_size;  /* size of DMA-able area */
 #endif
+   booldma_direct;
 
unsigned intvideo_start;/* start of video RAM   */
unsigned intvideo_end;  /* end of video RAM */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 0a63379a4d1a..556f70665353 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -119,6 +120,8 @@ void __init setup_dma_zone(const struct machine_desc *mdesc)
 */
if (IS_ENABLED(CONFIG_ARM_LPAE))
arm_dma_direct = true;
+   else
+   arm_dma_direct = mdesc->dma_direct;
 
 #ifdef CONFIG_ZONE_DMA
if (mdesc->dma_zone_size) {
@@ -126,7 +129,10 @@ void __init setup_dma_zone(const struct machine_desc 
*mdesc)
arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
} else
arm_dma_limit = 0x;
+
arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT;
+
+   zone_dma_bits = ilog2(arm_dma_limit) + 1;
 #endif
 }
 
@@ -482,7 +488,7 @@ static void __init free_highpages(void)
  */
 void __init mem_init(void)
 {
-#ifdef CONFIG_ARM_LPAE
+#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
 #endif
 
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC 0/5] ARM: Raspberry Pi 4 DMA support

2019-10-14 Thread Nicolas Saenz Julienne
Hi all,
the Raspberry Pi 4 offers up to 4GB of memory, of which only the first
is DMA capable device wide. This forces us to use of bounce buffers,
which are currently not very well supported by ARM's custom DMA ops.
Among other things the current mechanism (see dmabounce.c) isn't
suitable for high memory. Instead of fixing it, this series introduces a
way of selecting dma-direct as the default DMA ops provider which allows
for the Raspberry Pi to make use of swiotlb.

Regards,
Nicolas

---

Nicolas Saenz Julienne (5):
  dma/direct: turn ARCH_ZONE_DMA_BITS into a variable
  ARM: introduce arm_dma_direct
  ARM: let machines select dma-direct over arch's DMA implementation
  dma/direct: check for overflows in ARM's dma_capable()
  ARM: bcm2711: use dma-direct

 arch/arm/include/asm/dma-direct.h  |  6 ++
 arch/arm/include/asm/dma-mapping.h |  3 ++-
 arch/arm/include/asm/dma.h |  2 ++
 arch/arm/include/asm/mach/arch.h   |  1 +
 arch/arm/mach-bcm/Kconfig  |  1 +
 arch/arm/mach-bcm/bcm2711.c|  1 +
 arch/arm/mm/dma-mapping.c  | 10 ++
 arch/arm/mm/init.c | 21 -
 arch/arm64/include/asm/page.h  |  2 --
 arch/arm64/mm/init.c   |  9 +++--
 arch/powerpc/include/asm/page.h|  9 -
 arch/powerpc/mm/mem.c  | 20 +++-
 arch/s390/include/asm/page.h   |  2 --
 arch/s390/mm/init.c|  1 +
 include/linux/dma-direct.h |  2 ++
 kernel/dma/direct.c| 13 ++---
 16 files changed, 66 insertions(+), 37 deletions(-)

-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC 1/5] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-14 Thread Nicolas Saenz Julienne
Some architectures, notably ARM, are interested in tweaking this
depending on their runtime DMA addressing limitations.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/arm64/include/asm/page.h   |  2 --
 arch/arm64/mm/init.c|  9 +++--
 arch/powerpc/include/asm/page.h |  9 -
 arch/powerpc/mm/mem.c   | 20 +++-
 arch/s390/include/asm/page.h|  2 --
 arch/s390/mm/init.c |  1 +
 include/linux/dma-direct.h  |  2 ++
 kernel/dma/direct.c | 13 ++---
 8 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7b8c98830101..d39ddb258a04 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,6 +38,4 @@ extern int pfn_valid(unsigned long);
 
 #include 
 
-#define ARCH_ZONE_DMA_BITS 30
-
 #endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 44f07fdf7a59..ddd6a6ce158e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,6 +42,8 @@
 #include 
 #include 
 
+#define ARM64_ZONE_DMA_BITS30
+
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -440,8 +443,10 @@ void __init arm64_memblock_init(void)
 
early_init_fdt_scan_reserved_mem();
 
-   if (IS_ENABLED(CONFIG_ZONE_DMA))
-   arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS);
+   if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+   zone_dma_bits = ARM64_ZONE_DMA_BITS;
+   arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
+   }
 
if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..f6c562acc3f8 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,13 +329,4 @@ struct vm_area_struct;
 #endif /* __ASSEMBLY__ */
 #include 
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 97e5922cb52e..8bab4e8b6bae 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -200,10 +201,10 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code.  32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
  */
 static unsigned long max_zone_pfns[MAX_NR_ZONES];
 
@@ -236,9 +237,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
   (long int)((top_of_ram - total_ram) >> 20));
 
+   /*
+* Allow 30-bit DMA for very limited Broadcom wifi chips on many
+* powerbooks.
+*/
+   if (IS_ENABLED(CONFIG_PPC32))
+   zone_dma_bits = 30;
+   else
+   zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
 #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | \
 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS 31
-
 #include 
 #include 
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c1d96e588152..ac44bd76db4b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+   zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff 

[PATCH RFC 5/5] ARM: bcm2711: use dma-direct

2019-10-14 Thread Nicolas Saenz Julienne
The Raspberry Pi 4 supports up to 4GB of memory yet most of its devices
are only able to address the fist GB. Enable dma-direct for that board
in order to benefit from swiotlb's bounce buffering mechanism.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/arm/mach-bcm/Kconfig   | 1 +
 arch/arm/mach-bcm/bcm2711.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index e4e25f287ad7..fd7d725d596c 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -163,6 +163,7 @@ config ARCH_BCM2835
select ARM_ERRATA_411920 if ARCH_MULTI_V6
select ARM_GIC if ARCH_MULTI_V7
select ZONE_DMA if ARCH_MULTI_V7
+   select SWIOTLB if ARCH_MULTI_V7
select ARM_TIMER_SP804
select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
select TIMER_OF
diff --git a/arch/arm/mach-bcm/bcm2711.c b/arch/arm/mach-bcm/bcm2711.c
index dbe296798647..67d98cb0533f 100644
--- a/arch/arm/mach-bcm/bcm2711.c
+++ b/arch/arm/mach-bcm/bcm2711.c
@@ -19,6 +19,7 @@ DT_MACHINE_START(BCM2711, "BCM2711")
 #ifdef CONFIG_ZONE_DMA
.dma_zone_size  = SZ_1G,
 #endif
+   .dma_direct = true,
.dt_compat = bcm2711_compat,
.smp = smp_ops(bcm2836_smp_ops),
 MACHINE_END
-- 
2.23.0



Re: [PATCH v4 0/4] User API for nested shared virtual address (SVA)

2019-10-14 Thread Jacob Pan
Hi Joerg,

Just another gentle reminder. I think we have reached consensus in this
common code. Jean and Eric can confirm.

Thanks,

Jacob

On Mon, 7 Oct 2019 12:39:12 -0700
Jacob Pan  wrote:

> Hi Joerg and all,
> 
> Just wondering if you have more comments on this series.
> 
> Thanks,
> 
> Jacob
> 
> On Wed,  2 Oct 2019 12:42:39 -0700
> Jacob Pan  wrote:
> 
> > This set consists of IOMMU APIs to support SVA in the guest, a.k.a
> > nested SVA. As the complete SVA support is complex, we break down
> > the enabling effort into three stages:
> > 1. PCI device direct assignment
> > 2. Fault handling, especially page request service support
> > 3. Mediated device assignment
> > 
> > Each stage includes common API and vendor specific IOMMU driver
> > changes. This series is the common uAPI for stage #1. It is intended
> > to build consensus on the interface which all vendors reply on.
> > 
> > This series is extracted from the complete stage1 set which includes
> > VT-d code. https://lkml.org/lkml/2019/8/15/951
> > 
> > Changes:
> >  - Use spinlock instead of mutex to protect ioasid custom
> > allocators. This is to support callers in atomic context
> >  - Added more padding to guest PASID bind data for future
> > extensions, suggested by Joerg.
> > After much thinking, I did not do name change from PASID to IOASID
> > in the uAPI, considering we have been using PASID in the rest of
> > uAPIs. IOASID will remain used within the kernel.
> > 
> > For more discussions lead to this series, checkout LPC 2019
> > VFIO/IOMMU/PCI microconference materials.
> > https://linuxplumbersconf.org/event/4/sessions/66/#20190909
> > 
> > 
> > Change log:
> > v4:- minor patch regroup and fixes based on review from Jean
> > v3:- include errno.h in ioasid.h to fix compile error
> >- rebased to v5.4-rc1, no change
> >  
> > v2:
> > - Addressed review comments by Jean on IOASID custom
> > allocators, locking fix, misc control flow fix.
> > - Fixed a compile error with missing header errno.h
> > - Updated Jean-Philiippe's new email and updateded
> > reviewed-by tag
> > 
> > Jacob Pan (2):
> >   iommu/ioasid: Add custom allocators
> >   iommu: Introduce guest PASID bind function
> > 
> > Jean-Philippe Brucker (1):
> >   iommu: Add I/O ASID allocator
> > 
> > Yi L Liu (1):
> >   iommu: Introduce cache_invalidate API
> > 
> >  drivers/iommu/Kconfig  |   4 +
> >  drivers/iommu/Makefile |   1 +
> >  drivers/iommu/ioasid.c | 422
> > +
> > drivers/iommu/iommu.c  |  30  include/linux/ioasid.h |
> > 76  include/linux/iommu.h  |  36 
> >  include/uapi/linux/iommu.h | 169 ++
> >  7 files changed, 738 insertions(+)
> >  create mode 100644 drivers/iommu/ioasid.c
> >  create mode 100644 include/linux/ioasid.h
> >   
> 
> [Jacob Pan]

[Jacob Pan]
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [BUG] dma-ranges, reserved memory regions, dma_alloc_coherent: possible bug?

2019-10-14 Thread Vladimir Murzin
On 10/14/19 2:54 PM, Robin Murphy wrote:
> On 13/10/2019 15:28, Daniele Alessandrelli wrote:
>> Hi,
>>
>> It looks like dma_alloc_coherent() is setting the dma_handle output
>> parameter to the memory physical address and not the device bus
>> address when the device is using reserved memory regions for DMA
>> allocation. This is despite using 'dma_ranges' in the device tree to
>> describe the DMA memory mapping. Is this expected behavior or a bug?
> 
> That does sound like a bug :(
> 
>> Here is a reduced version of the device tree I'm using:
>> \ {
>>  reserved-memory {
>>  #address-cells = <2>;
>>  #size-cells = <2>;
>>  ranges;
>>  mydev_rsvd: rsvd_mem@49480 {
>>  compatible = "shared-dma-pool";
>>  reg = <0x4 0x9480 0x0 0x20>;
>>  no-map;
>>  };
>>  };
>>  soc {
>>  compatible = "simple-bus";
>>  #address-cells = <2>;
>>  #size-cells = <2>;
>>  ranges;
>>  dma_ranges;
>>
>>  mybus {
>>  ranges = <>;
>>  dma-ranges = <>;
>>  compatible = "simple-bus";
>>  #address-cells = <2>;
>>  #size-cells = <2>;
>>  ranges = <0x0 0x0  0x0 0x0  0x0 0x8000>;
>>  dma-ranges = <0x0 0x8000  0x4 0x8000
>> 0x0 0x8000>;
>>
>>  mydevice {
>>  compatible = "my-compatible-string";
>>  memory-region = <_rsvd>;
>>  }
>>  }
>>  }
>> };
>>
>> It looks like this issue was previously fixed by commit c41f9ea998f3
>> ("drivers: dma-coherent: Account dma_pfn_offset when used with device
>> tree") which introduced a new function ('dma_get_device_base()') to
>> return the reserved memory address as seen by the device. However,
>> such a function, even if still there, is not used anymore in latest
>> code (as of v5.4-rc2). Was that done for a specific reason? Or is it
>> just a mistake?
> 
> Hmm, it looks like 43fc509c3efb ("dma-coherent: introduce interface for 
> default DMA pool") removed the caller of dma_get_device_base() in the alloc 
> path shortly after it was introduced, which certainly appears as if it may 
> have been unintentional - Vladimir?

I do not remember it was intentional. Looking at history, default DMA pool was 
a response
to another report. However, now I'm wondering why it was not caught by STM32 - 
most of that
work was required to support "dma-ranges" with NOMMU+caches (Cortex-M7).

Alex (or anybody else from ST), maybe you have some input?

Cheers
Vladimir

> 
> Robin.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] kernel: dma: Make CMA boot parameters __ro_after_init

2019-10-14 Thread Robin Murphy

On 12/10/2019 13:29, Shyam Saini wrote:

This parameters are not changed after early boot.
By making them __ro_after_init will reduce any attack surface in the
kernel.


At a glance, it looks like these are only referenced by a couple of 
__init functions, so couldn't they just be __initdata/__initconst?


Robin.


Link: https://lwn.net/Articles/676145/
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Cc: Matthew Wilcox 
Cc: Christopher Lameter 
Cc: Kees Cook 
Signed-off-by: Shyam Saini 
---
  kernel/dma/contiguous.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 69cfb4345388..1b689b1303cd 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -42,10 +42,10 @@ struct cma *dma_contiguous_default_area;
   * Users, who want to set the size of global CMA area for their system
   * should use cma= kernel parameter.
   */
-static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
-static phys_addr_t size_cmdline = -1;
-static phys_addr_t base_cmdline;
-static phys_addr_t limit_cmdline;
+static const phys_addr_t __ro_after_init size_bytes = 
(phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
+static phys_addr_t __ro_after_init size_cmdline = -1;
+static phys_addr_t __ro_after_init base_cmdline;
+static phys_addr_t __ro_after_init limit_cmdline;
  
  static int __init early_cma(char *p)

  {



Re: [PATCH v3 4/7] iommu/mediatek: Delete the leaf in the tlb flush

2019-10-14 Thread Robin Murphy

On 14/10/2019 07:38, Yong Wu wrote:

In our tlb range flush, we don't care the "leaf". Remove it to simplify
the code. no functional change.


Presumably you don't care about the granule either?

Robin.


Signed-off-by: Yong Wu 
---
  drivers/iommu/mtk_iommu.c | 16 
  1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 8712afc..19f936c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -174,8 +174,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
  }
  
  static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,

-  size_t granule, bool leaf,
-  void *cookie)
+  size_t granule, void *cookie)
  {
struct mtk_iommu_data *data = cookie;
  
@@ -219,14 +218,7 @@ static void mtk_iommu_tlb_sync(void *cookie)

  static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
 size_t granule, void *cookie)
  {
-   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
-   mtk_iommu_tlb_sync(cookie);
-}
-
-static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
-size_t granule, void *cookie)
-{
-   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
+   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, cookie);
mtk_iommu_tlb_sync(cookie);
  }
  
@@ -245,7 +237,7 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,

  static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
-   .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
+   .tlb_flush_leaf = mtk_iommu_tlb_flush_walk,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
  };
  
@@ -475,7 +467,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,

spin_lock_irqsave(>pgtlock, flags);
  
  	mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,

-  false, data);
+  data);
mtk_iommu_tlb_sync(data);
  
  	if (!is_in_gather)




Re: Advice on oops - memory trap on non-memory access instruction (invalid CR2?)

2019-10-14 Thread Thomas Gleixner
On Mon, 14 Oct 2019, Guilherme G. Piccoli wrote:
> Modules linked in: <...>
> CPU: 40 PID: 78274 Comm: qemu-system-x86 Tainted: P W  OE

Tainted: P - Proprietary module loaded ...

Try again without that module

Tainted: W - Warning issued before

Are you sure that that warning is harmless and unrelated?

> 4.4.0-45-generic #66~14.04.1-Ubuntu

Does the same problem happen with a not so dead kernel? CR2 handling got
quite some updates/fixes since then.

Thanks,

tglx


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/7] iommu/mediatek: Use gather to achieve the tlb range flush

2019-10-14 Thread Robin Murphy

On 14/10/2019 07:38, Yong Wu wrote:

Use the iommu_gather mechanism to achieve the tlb range flush.
Gather the iova range in the "tlb_add_page", then flush the merged iova
range in iotlb_sync.

Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to
avoid retry the lock since the spinlock have already been acquired.


I think this could probably be even simpler - once the actual 
register-poking is all confined to mtk_iommu_tlb_sync(), you should be 
able get rid of the per-domain locking in map/unmap and just have a 
single per-IOMMU lock to serialise syncs. The io-pgtable code itself 
hasn't needed external locking for a while now.


Robin.


Suggested-by: Tomasz Figa 
Signed-off-by: Yong Wu 
---
1) This is the special case backtrace:

  mtk_iommu_iotlb_sync+0x50/0xa0
  mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0
  __arm_v7s_unmap+0x174/0x598
  arm_v7s_unmap+0x30/0x48
  mtk_iommu_unmap+0x50/0x78
  __iommu_unmap+0xa4/0xf8

2) The checking "if (gather->start == ULONG_MAX) return;" also is
necessary. It will happened when unmap only go to _flush_walk, then
enter this tlb_sync.
---
  drivers/iommu/mtk_iommu.c | 29 +
  drivers/iommu/mtk_iommu.h |  1 +
  2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 5f594d6..8712afc 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
  {
-   mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+   struct mtk_iommu_data *data = cookie;
+   struct iommu_domain *domain = >m4u_dom->domain;
+
+   data->is_in_tlb_gather_add_page = true;
+   iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+   data->is_in_tlb_gather_add_page = false;
  }
  
  static const struct iommu_flush_ops mtk_iommu_flush_ops = {

@@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain 
*domain)
  static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
 struct iommu_iotlb_gather *gather)
  {
+   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+   bool is_in_gather = data->is_in_tlb_gather_add_page;
+   size_t length = gather->end - gather->start;
unsigned long flags;
  
-	spin_lock_irqsave(>pgtlock, flags);

-   mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
-   spin_unlock_irqrestore(>pgtlock, flags);
+   if (gather->start == ULONG_MAX)
+   return;
+
+   /*
+* Avoid acquire the lock when it's in gather_add_page since the lock
+* has already been held.
+*/
+   if (!is_in_gather)
+   spin_lock_irqsave(>pgtlock, flags);
+
+   mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
+  false, data);
+   mtk_iommu_tlb_sync(data);
+
+   if (!is_in_gather)
+   spin_unlock_irqrestore(>pgtlock, flags);
  }
  
  static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,

diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index fc0f16e..d29af1d 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -58,6 +58,7 @@ struct mtk_iommu_data {
struct iommu_group  *m4u_group;
boolenable_4GB;
booltlb_flush_active;
+   boolis_in_tlb_gather_add_page;
  
  	struct iommu_device		iommu;

const struct mtk_iommu_plat_data *plat_data;



Re: [PATCH v3 1/7] iommu/mediatek: Correct the flush_iotlb_all callback

2019-10-14 Thread Robin Murphy

On 14/10/2019 07:38, Yong Wu wrote:

Use the correct tlb_flush_all instead of the original one.


Reviewed-by: Robin Murphy 

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Yong Wu 
---
  drivers/iommu/mtk_iommu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 67a483c..76b9388 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -447,7 +447,7 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
  
  static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)

  {
-   mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+   mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
  }
  
  static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,




Re: [BUG] dma-ranges, reserved memory regions, dma_alloc_coherent: possible bug?

2019-10-14 Thread Robin Murphy

On 13/10/2019 15:28, Daniele Alessandrelli wrote:

Hi,

It looks like dma_alloc_coherent() is setting the dma_handle output
parameter to the memory physical address and not the device bus
address when the device is using reserved memory regions for DMA
allocation. This is despite using 'dma_ranges' in the device tree to
describe the DMA memory mapping. Is this expected behavior or a bug?


That does sound like a bug :(


Here is a reduced version of the device tree I'm using:
\ {
 reserved-memory {
 #address-cells = <2>;
 #size-cells = <2>;
 ranges;
 mydev_rsvd: rsvd_mem@49480 {
 compatible = "shared-dma-pool";
 reg = <0x4 0x9480 0x0 0x20>;
 no-map;
 };
 };
 soc {
 compatible = "simple-bus";
 #address-cells = <2>;
 #size-cells = <2>;
 ranges;
 dma_ranges;

 mybus {
 ranges = <>;
 dma-ranges = <>;
 compatible = "simple-bus";
 #address-cells = <2>;
 #size-cells = <2>;
 ranges = <0x0 0x0  0x0 0x0  0x0 0x8000>;
 dma-ranges = <0x0 0x8000  0x4 0x8000
0x0 0x8000>;

 mydevice {
 compatible = "my-compatible-string";
 memory-region = <_rsvd>;
 }
 }
 }
};

It looks like this issue was previously fixed by commit c41f9ea998f3
("drivers: dma-coherent: Account dma_pfn_offset when used with device
tree") which introduced a new function ('dma_get_device_base()') to
return the reserved memory address as seen by the device. However,
such a function, even if still there, is not used anymore in latest
code (as of v5.4-rc2). Was that done for a specific reason? Or is it
just a mistake?


Hmm, it looks like 43fc509c3efb ("dma-coherent: introduce interface for 
default DMA pool") removed the caller of dma_get_device_base() in the 
alloc path shortly after it was introduced, which certainly appears as 
if it may have been unintentional - Vladimir?


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 6/7] iommu/mediatek: Use writel for TLB range invalidation

2019-10-14 Thread Robin Murphy

On 14/10/2019 07:38, Yong Wu wrote:

Use writel for the register F_MMU_INV_RANGE which is for triggering the
HW work. We expect all the setting(iova_start/iova_end...) have already
been finished before F_MMU_INV_RANGE.


For Arm CPUs, these registers should be mapped as Device memory, 
therefore the same-peripheral rule should implicitly enforce that the 
accesses are made in program order, hence you're unlikely to have seen a 
problem in reality. However, the logical reasoning for the change seems 
valid in general, so I'd argue that it's still worth making if only for 
the sake of good practice:


Acked-by: Robin Murphy 


Signed-off-by: Anan.Sun 
Signed-off-by: Yong Wu 
---
  drivers/iommu/mtk_iommu.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index dbbacc3..d285457 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -187,8 +187,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long 
iova, size_t size,
writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
writel_relaxed(iova + size - 1,
   data->base + REG_MMU_INVLD_END_A);
-   writel_relaxed(F_MMU_INV_RANGE,
-  data->base + REG_MMU_INVALIDATE);
+   writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
  
  		/* tlb sync */

ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,



Advice on oops - memory trap on non-memory access instruction (invalid CR2?)

2019-10-14 Thread Guilherme G. Piccoli
Hello kernel community, I'm investigating a recurrent problem, and
hereby I'm seeking some advice - perhaps anybody reading this had
similar issue, for example. I've iterated some mailing-lists I thought
would be of interest, apologize if I miss any or if I shouldn't have
included some.

We have a kernel memory oops due to invalid read/write, but the trap
happens in a non-memory access instruction.

Example in [0] below. We can see a read access to offset 0x458, while it
seems KVM was sending IPI. The "Code" line though (and EIP analysis with
objdump in the vmlinux image) shows the trapping instruction as:

2b:*84 c0 test %al,%al <-- trapping instruction

This instruction clearly shouldn't trap by invalid memory access. Also,
this 0x458 offset seems not present in the code, based on assembly
analysis done [1]. We had 3 or 4 more reports like this, some have
invalid address on write (again #PF), some #GP - in all of them, the
trapping insn is a non-memory related opcode.

We understand x86 (should) have precise exceptions, so some hypothesis
right now are related with:

(a) Invalid CR2 - perhaps due to a System Management Interrupt, firmware
code executed and caused an invalid memory access, polluting CR2.

(b) Error in processor - there are some errata on Xeon processors, which
Intel claims never were observed in commercial systems.

(c) Error in kernel reporting when the oops happens - though we
investigate this deeply, and the exception handlers are quite concise
assembly routines that stacks processor generated data.

(d) Some KVM/vAPIC related failure that may be caused by guest MMAPed
APIC area bad access during interrupt virtualization.

(e) Intel processor do not present precise interrupts.

All of them are unlikely - maybe I'm not seeing something obvious, hence
this advice request. Below there's a more detailed analysis of the
registers of the aforementioned oops splat [2].

We are aware of the old version of kernel, unfortunately the user
reporting this issue is unable to update right now. Any
direction/suggestion/advice to obtain more data or prove/disprove some
of our hypothesis is highly appreciated. Any questions are also
appreciated, feel free to respond with any ideas you might have.

Thanks,


Guilherme
--


[0]
BUG: unable to handle kernel NULL pointer dereference at 0458
IP: [] kvm_irq_delivery_to_apic+0x56/0x220 [kvm]
PGD 0
Oops:  [#1] SMP
Modules linked in: <...>
CPU: 40 PID: 78274 Comm: qemu-system-x86 Tainted: P W  OE
4.4.0-45-generic #66~14.04.1-Ubuntu
Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.1.7 06/16/2016
task: 8800594dd280 ti: 880169168000 task.ti: 880169168000
RIP: 0010:[]  []
kvm_irq_delivery_to_apic+0x56/0x220 [kvm]
RSP: 0018:88016916bbe8  EFLAGS: 00010282
RAX: 0001 RBX: 0300 RCX: 0003
RDX: 0040 RSI: 0010 RDI: 88016916bba8
RBP: 88016916bc30 R08: 0004 R09: 
R10: 0001 R11:  R12: 08fd
R13: 0004 R14: 88004d3e8000 R15: 88016916bc40
FS:  7fbd67fff700() GS:881ffeb0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0458 CR3: 0001961a9000 CR4: 003426e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Stack:
 0001  882194b81400 000194b81410
 0300 08fd 0004 882194b81400
 0001 88016916bc78 c0796d20 08fd
Call Trace:
 [] apic_reg_write+0x110/0x5f0 [kvm]
 [] kvm_apic_write_nodecode+0x4b/0x60 [kvm]
 [] handle_apic_write+0x1e/0x30 [kvm_intel]
 [] vmx_handle_exit+0x288/0xbf0 [kvm_intel]
 [] vcpu_enter_guest+0x8b4/0x10a0 [kvm]
 [] ? kvm_vcpu_block+0x191/0x2d0 [kvm]
 [] ? prepare_to_wait_event+0xf0/0xf0
 [] kvm_arch_vcpu_ioctl_run+0xc4/0x3d0 [kvm]
 [] kvm_vcpu_ioctl+0x2ab/0x640 [kvm]
 [] do_vfs_ioctl+0x2dd/0x4c0
 [] ? __audit_syscall_entry+0xaf/0x100
 [] ? do_audit_syscall_entry+0x66/0x70
 [] SyS_ioctl+0x79/0x90
 [] entry_SYSCALL_64_fastpath+0x16/0x75
Code: d4 ff ff ff ff 75 0d 81 7a 10 ff 00 00 00 0f 84 7d 01 00 00 4c 8b
45 c0 48 8b 75 c8 48 8d 4d d4 4c 89 fa 4c 89 f7 e8 ca be ff ff <84> c0
0f 85 0c 01 00 00 41 8b 86 f0 09 00 00 85 c0 0f 8e fd 00
RIP  [] kvm_irq_delivery_to_apic+0x56/0x220 [kvm]
RSP  CR2: 0458
--


[1] Assembly analysis: https://pastebin.ubuntu.com/p/hdHNmvFtd8/
--


[2] More detailed analysis of registers:

%rax = 1 [return from kvm_irq_delivery_to_apic_fast()]

%rbx = 0x300 [ICR_LO register - this value comes from
kvm_apic_write_nodecode(), in which the offset / register is assigned to
%ebx.

%rdi = 
%rsi = 16 (0x10) from "for_each_set_bit(i, , 16)" in function
kvm_irq_delivery_to_apic_fast().

%rcx = i in above loop
%rdx = 64 (0x40 - BITS_PER_LONG, set inside find_next_bit() in the above
loop)

%r8 = 

[BUG] dma-ranges, reserved memory regions, dma_alloc_coherent: possible bug?

2019-10-14 Thread Daniele Alessandrelli
Hi,

It looks like dma_alloc_coherent() is setting the dma_handle output
parameter to the memory physical address and not the device bus
address when the device is using reserved memory regions for DMA
allocation. This is despite using 'dma_ranges' in the device tree to
describe the DMA memory mapping. Is this expected behavior or a bug?

Here is a reduced version of the device tree I'm using:
\ {
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ranges;
mydev_rsvd: rsvd_mem@49480 {
compatible = "shared-dma-pool";
reg = <0x4 0x9480 0x0 0x20>;
no-map;
};
};
soc {
compatible = "simple-bus";
#address-cells = <2>;
#size-cells = <2>;
ranges;
dma_ranges;

mybus {
ranges = <>;
dma-ranges = <>;
compatible = "simple-bus";
#address-cells = <2>;
#size-cells = <2>;
ranges = <0x0 0x0  0x0 0x0  0x0 0x8000>;
dma-ranges = <0x0 0x8000  0x4 0x8000
0x0 0x8000>;

mydevice {
compatible = "my-compatible-string";
memory-region = <_rsvd>;
}
}
}
};

It looks like this issue was previously fixed by commit c41f9ea998f3
("drivers: dma-coherent: Account dma_pfn_offset when used with device
tree") which introduced a new function ('dma_get_device_base()') to
return the reserved memory address as seen by the device. However,
such a function, even if still there, is not used anymore in latest
code (as of v5.4-rc2). Was that done for a specific reason? Or is it
just a mistake?

Regards,
Daniele
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] dma-mapping: Add dma_addr_is_phys_addr()

2019-10-14 Thread Robin Murphy

On 14/10/2019 05:51, David Gibson wrote:

On Fri, Oct 11, 2019 at 06:25:18PM -0700, Ram Pai wrote:

From: Thiago Jung Bauermann 

In order to safely use the DMA API, virtio needs to know whether DMA
addresses are in fact physical addresses and for that purpose,
dma_addr_is_phys_addr() is introduced.

cc: Benjamin Herrenschmidt 
cc: David Gibson 
cc: Michael Ellerman 
cc: Paul Mackerras 
cc: Michael Roth 
cc: Alexey Kardashevskiy 
cc: Paul Burton 
cc: Robin Murphy 
cc: Bartlomiej Zolnierkiewicz 
cc: Marek Szyprowski 
cc: Christoph Hellwig 
Suggested-by: Michael S. Tsirkin 
Signed-off-by: Ram Pai 
Signed-off-by: Thiago Jung Bauermann 


The change itself looks ok, so

Reviewed-by: David Gibson 

However, I would like to see the commit message (and maybe the inline
comments) expanded a bit on what the distinction here is about.  Some
of the text from the next patch would be suitable, about DMA addresses
usually being in a different address space but not in the case of
bounce buffering.


Right, this needs a much tighter definition. "DMA address happens to be 
a valid physical address" is true of various IOMMU setups too, but I 
can't believe it's meaningful in such cases.


If what you actually want is "DMA is direct or SWIOTLB" - i.e. "DMA 
address is physical address of DMA data (not necessarily the original 
buffer)" - wouldn't dma_is_direct() suffice?


Robin.


---
  arch/powerpc/include/asm/dma-mapping.h | 21 +
  arch/powerpc/platforms/pseries/Kconfig |  1 +
  include/linux/dma-mapping.h| 20 
  kernel/dma/Kconfig |  3 +++
  4 files changed, 45 insertions(+)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index 565d6f7..f92c0a4b 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -5,6 +5,8 @@
  #ifndef _ASM_DMA_MAPPING_H
  #define _ASM_DMA_MAPPING_H
  
+#include 

+
  static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
  {
/* We don't handle the NULL dev case for ISA for now. We could
@@ -15,4 +17,23 @@ static inline const struct dma_map_ops 
*get_arch_dma_ops(struct bus_type *bus)
return NULL;
  }
  
+#ifdef CONFIG_ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR

+/**
+ * dma_addr_is_phys_addr - check whether a device DMA address is a physical
+ * address
+ * @dev:   device to check
+ *
+ * Returns %true if any DMA address for this device happens to also be a valid
+ * physical address (not necessarily of the same page).
+ */
+static inline bool dma_addr_is_phys_addr(struct device *dev)
+{
+   /*
+* Secure guests always use the SWIOTLB, therefore DMA addresses are
+* actually the physical address of the bounce buffer.
+*/
+   return is_secure_guest();
+}
+#endif
+
  #endif/* _ASM_DMA_MAPPING_H */
diff --git a/arch/powerpc/platforms/pseries/Kconfig 
b/arch/powerpc/platforms/pseries/Kconfig
index 9e35cdd..0108150 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -152,6 +152,7 @@ config PPC_SVM
select SWIOTLB
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+   select ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR
help
 There are certain POWER platforms which support secure guests using
 the Protected Execution Facility, with the help of an Ultravisor
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea..6df5664 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -693,6 +693,26 @@ static inline bool dma_addressing_limited(struct device 
*dev)
dma_get_required_mask(dev);
  }
  
+#ifndef CONFIG_ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR

+/**
+ * dma_addr_is_phys_addr - check whether a device DMA address is a physical
+ * address
+ * @dev:   device to check
+ *
+ * Returns %true if any DMA address for this device happens to also be a valid
+ * physical address (not necessarily of the same page).
+ */
+static inline bool dma_addr_is_phys_addr(struct device *dev)
+{
+   /*
+* Except in very specific setups, DMA addresses exist in a different
+* address space from CPU physical addresses and cannot be directly used
+* to reference system memory.
+*/
+   return false;
+}
+#endif
+
  #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9decbba..6209b46 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -51,6 +51,9 @@ config ARCH_HAS_DMA_MMAP_PGPROT
  config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
  
+config ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR

+   bool
+
  config DMA_NONCOHERENT_CACHE_SYNC
bool
  




[PATCH v3 5/7] iommu/mediatek: Move the tlb_sync into tlb_flush

2019-10-14 Thread Yong Wu
Right now, the tlb_add_flush_nosync and tlb_sync always appear together.
we merge the two functions into one. No functional change.

mtk_iommu_tlb_add_flush_nosync
mtk_iommu_tlb_sync

Signed-off-by: Chao Hao 
Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 36 
 drivers/iommu/mtk_iommu.h |  1 -
 2 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 19f936c..dbbacc3 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -173,10 +173,12 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
}
 }
 
-static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
+static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
   size_t granule, void *cookie)
 {
struct mtk_iommu_data *data = cookie;
+   int ret;
+   u32 tmp;
 
for_each_m4u(data) {
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
@@ -187,21 +189,8 @@ static void mtk_iommu_tlb_add_flush_nosync(unsigned long 
iova, size_t size,
   data->base + REG_MMU_INVLD_END_A);
writel_relaxed(F_MMU_INV_RANGE,
   data->base + REG_MMU_INVALIDATE);
-   data->tlb_flush_active = true;
-   }
-}
-
-static void mtk_iommu_tlb_sync(void *cookie)
-{
-   struct mtk_iommu_data *data = cookie;
-   int ret;
-   u32 tmp;
-
-   for_each_m4u(data) {
-   /* Avoid timing out if there's nothing to wait for */
-   if (!data->tlb_flush_active)
-   return;
 
+   /* tlb sync */
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
tmp, tmp != 0, 10, 10);
if (ret) {
@@ -211,17 +200,9 @@ static void mtk_iommu_tlb_sync(void *cookie)
}
/* Clear the CPE status */
writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
-   data->tlb_flush_active = false;
}
 }
 
-static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
-size_t granule, void *cookie)
-{
-   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, cookie);
-   mtk_iommu_tlb_sync(cookie);
-}
-
 static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
@@ -236,8 +217,8 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
iommu_iotlb_gather *gather,
 
 static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
-   .tlb_flush_walk = mtk_iommu_tlb_flush_walk,
-   .tlb_flush_leaf = mtk_iommu_tlb_flush_walk,
+   .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync,
+   .tlb_flush_leaf = mtk_iommu_tlb_flush_range_sync,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
 };
 
@@ -466,9 +447,8 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain 
*domain,
if (!is_in_gather)
spin_lock_irqsave(>pgtlock, flags);
 
-   mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
-  data);
-   mtk_iommu_tlb_sync(data);
+   mtk_iommu_tlb_flush_range_sync(gather->start, length,
+  gather->pgsize, data);
 
if (!is_in_gather)
spin_unlock_irqrestore(>pgtlock, flags);
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index d29af1d..9056f73 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -57,7 +57,6 @@ struct mtk_iommu_data {
struct mtk_iommu_domain *m4u_dom;
struct iommu_group  *m4u_group;
boolenable_4GB;
-   booltlb_flush_active;
boolis_in_tlb_gather_add_page;
 
struct iommu_device iommu;
-- 
1.9.1



[PATCH v3 6/7] iommu/mediatek: Use writel for TLB range invalidation

2019-10-14 Thread Yong Wu
Use writel for the register F_MMU_INV_RANGE which is for triggering the
HW work. We expect all the setting(iova_start/iova_end...) have already
been finished before F_MMU_INV_RANGE.

Signed-off-by: Anan.Sun 
Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index dbbacc3..d285457 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -187,8 +187,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long 
iova, size_t size,
writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
writel_relaxed(iova + size - 1,
   data->base + REG_MMU_INVLD_END_A);
-   writel_relaxed(F_MMU_INV_RANGE,
-  data->base + REG_MMU_INVALIDATE);
+   writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
 
/* tlb sync */
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
-- 
1.9.1



[PATCH v3 7/7] iommu/mediatek: Reduce the tlb flush timeout value

2019-10-14 Thread Yong Wu
Reduce the tlb timeout value from 10us to 1000us. the original value
is so long that affect the multimedia performance. This is only a minor
improvement rather than fixing a issue.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index d285457..c9d49af 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -191,7 +191,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long 
iova, size_t size,
 
/* tlb sync */
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
-   tmp, tmp != 0, 10, 10);
+   tmp, tmp != 0, 10, 1000);
if (ret) {
dev_warn(data->dev,
 "Partial TLB flush timed out, falling back to 
full flush\n");
-- 
1.9.1



[PATCH v3 4/7] iommu/mediatek: Delete the leaf in the tlb flush

2019-10-14 Thread Yong Wu
In our tlb range flush, we don't care the "leaf". Remove it to simplify
the code. no functional change.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 8712afc..19f936c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -174,8 +174,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
 }
 
 static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
-  size_t granule, bool leaf,
-  void *cookie)
+  size_t granule, void *cookie)
 {
struct mtk_iommu_data *data = cookie;
 
@@ -219,14 +218,7 @@ static void mtk_iommu_tlb_sync(void *cookie)
 static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
 size_t granule, void *cookie)
 {
-   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
-   mtk_iommu_tlb_sync(cookie);
-}
-
-static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
-size_t granule, void *cookie)
-{
-   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
+   mtk_iommu_tlb_add_flush_nosync(iova, size, granule, cookie);
mtk_iommu_tlb_sync(cookie);
 }
 
@@ -245,7 +237,7 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
iommu_iotlb_gather *gather,
 static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
-   .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
+   .tlb_flush_leaf = mtk_iommu_tlb_flush_walk,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
 };
 
@@ -475,7 +467,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain 
*domain,
spin_lock_irqsave(>pgtlock, flags);
 
mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
-  false, data);
+  data);
mtk_iommu_tlb_sync(data);
 
if (!is_in_gather)
-- 
1.9.1



[PATCH v3 0/7] Improve tlb range flush

2019-10-14 Thread Yong Wu
This patchset mainly fixes a tlb flush timeout issue and use the new
iommu_gather to re-implement the tlb flush flow. and several clean up
patches about the tlb_flush.

change note:
v3:
   1. Use the gather to implement the tlb_flush suggested from Tomasz.
   2. add some clean up patches.

v2:
https://lore.kernel.org/linux-iommu/1570627143-29441-1-git-send-email-yong...@mediatek.com/T/#t
   1. rebase on v5.4-rc1
   2. only split to several patches.

v1:
https://lore.kernel.org/linux-iommu/CAAFQd5C3U7pZo4SSUJ52Q7E+0FaUoORQFbQC5RhCHBhi=nf...@mail.gmail.com/T/#t

Yong Wu (7):
  iommu/mediatek: Correct the flush_iotlb_all callback
  iommu/mediatek: Add pgtlock in the iotlb_sync
  iommu/mediatek: Use gather to achieve the tlb range flush
  iommu/mediatek: Delete the leaf in the tlb flush
  iommu/mediatek: Move the tlb_sync into tlb_flush
  iommu/mediatek: Use writel for TLB range invalidation
  iommu/mediatek: Reduce the tlb flush timeout value

 drivers/iommu/mtk_iommu.c | 77 +++
 drivers/iommu/mtk_iommu.h |  2 +-
 2 files changed, 38 insertions(+), 41 deletions(-)

-- 
1.9.1



[PATCH v3 3/7] iommu/mediatek: Use gather to achieve the tlb range flush

2019-10-14 Thread Yong Wu
Use the iommu_gather mechanism to achieve the tlb range flush.
Gather the iova range in the "tlb_add_page", then flush the merged iova
range in iotlb_sync.

Note: If iotlb_sync comes from iommu_iotlb_gather_add_page, we have to
avoid retry the lock since the spinlock have already been acquired.

Suggested-by: Tomasz Figa 
Signed-off-by: Yong Wu 
---
1) This is the special case backtrace:

 mtk_iommu_iotlb_sync+0x50/0xa0
 mtk_iommu_tlb_flush_page_nosync+0x5c/0xd0
 __arm_v7s_unmap+0x174/0x598
 arm_v7s_unmap+0x30/0x48
 mtk_iommu_unmap+0x50/0x78
 __iommu_unmap+0xa4/0xf8

2) The checking "if (gather->start == ULONG_MAX) return;" also is
necessary. It will happened when unmap only go to _flush_walk, then
enter this tlb_sync.
---
 drivers/iommu/mtk_iommu.c | 29 +
 drivers/iommu/mtk_iommu.h |  1 +
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 5f594d6..8712afc 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -234,7 +234,12 @@ static void mtk_iommu_tlb_flush_page_nosync(struct 
iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
 {
-   mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+   struct mtk_iommu_data *data = cookie;
+   struct iommu_domain *domain = >m4u_dom->domain;
+
+   data->is_in_tlb_gather_add_page = true;
+   iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+   data->is_in_tlb_gather_add_page = false;
 }
 
 static const struct iommu_flush_ops mtk_iommu_flush_ops = {
@@ -453,12 +458,28 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain 
*domain)
 static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
 struct iommu_iotlb_gather *gather)
 {
+   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+   bool is_in_gather = data->is_in_tlb_gather_add_page;
+   size_t length = gather->end - gather->start;
unsigned long flags;
 
-   spin_lock_irqsave(>pgtlock, flags);
-   mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
-   spin_unlock_irqrestore(>pgtlock, flags);
+   if (gather->start == ULONG_MAX)
+   return;
+
+   /*
+* Avoid acquire the lock when it's in gather_add_page since the lock
+* has already been held.
+*/
+   if (!is_in_gather)
+   spin_lock_irqsave(>pgtlock, flags);
+
+   mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
+  false, data);
+   mtk_iommu_tlb_sync(data);
+
+   if (!is_in_gather)
+   spin_unlock_irqrestore(>pgtlock, flags);
 }
 
 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index fc0f16e..d29af1d 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -58,6 +58,7 @@ struct mtk_iommu_data {
struct iommu_group  *m4u_group;
boolenable_4GB;
booltlb_flush_active;
+   boolis_in_tlb_gather_add_page;
 
struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
-- 
1.9.1



[PATCH v3 2/7] iommu/mediatek: Add pgtlock in the iotlb_sync

2019-10-14 Thread Yong Wu
The commit 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API
TLB sync") help move the tlb_sync of unmap from v7s into the iommu
framework. It helps add a new function "mtk_iommu_iotlb_sync", But it
lacked the dom->pgtlock, then it will cause the variable "tlb_flush_active"
may be changed unexpectedly, we could see this warning log randomly:

mtk-iommu 10205000.iommu: Partial TLB flush timed out, falling back to
full flush

This patch adds dom->pgtlock in mtk_iommu_iotlb_sync to fix this issue.

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 76b9388..5f594d6 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -453,7 +453,12 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain 
*domain)
 static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
 struct iommu_iotlb_gather *gather)
 {
+   struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+   unsigned long flags;
+
+   spin_lock_irqsave(>pgtlock, flags);
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+   spin_unlock_irqrestore(>pgtlock, flags);
 }
 
 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
-- 
1.9.1



[PATCH v3 1/7] iommu/mediatek: Correct the flush_iotlb_all callback

2019-10-14 Thread Yong Wu
Use the correct tlb_flush_all instead of the original one.

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 67a483c..76b9388 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -447,7 +447,7 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
 
 static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
-   mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+   mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
 }
 
 static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
-- 
1.9.1