[PATCH] dma-debug: teach add_dma_entry() about DMA_ATTR_SKIP_CPU_SYNC

2021-10-11 Thread Hamza Mahfooz
Mapping something twice should be possible as long as,
DMA_ATTR_SKIP_CPU_SYNC is passed to the strictly speaking second relevant
mapping operation (that attempts to map the same thing). So, don't issue a
warning if the specified condition is met in add_dma_entry().

Signed-off-by: Hamza Mahfooz 
---
 kernel/dma/debug.c   | 24 ++--
 kernel/dma/debug.h   | 24 
 kernel/dma/mapping.c | 12 ++--
 3 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 95445bd6eb72..c4128df3de41 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -552,7 +552,7 @@ static void active_cacheline_remove(struct dma_debug_entry 
*entry)
  * Wrapper function for adding an entry to the hash.
  * This function takes care of locking itself.
  */
-static void add_dma_entry(struct dma_debug_entry *entry)
+static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
 {
struct hash_bucket *bucket;
unsigned long flags;
@@ -566,7 +566,7 @@ static void add_dma_entry(struct dma_debug_entry *entry)
if (rc == -ENOMEM) {
pr_err("cacheline tracking ENOMEM, dma-debug disabled\n");
global_disable = true;
-   } else if (rc == -EEXIST) {
+   } else if ((rc == -EEXIST) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
err_printk(entry->dev, entry,
"cacheline tracking EEXIST, overlapping mappings aren't 
supported\n");
}
@@ -1191,7 +1191,8 @@ void debug_dma_map_single(struct device *dev, const void 
*addr,
 EXPORT_SYMBOL(debug_dma_map_single);
 
 void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
-   size_t size, int direction, dma_addr_t dma_addr)
+   size_t size, int direction, dma_addr_t dma_addr,
+   unsigned long attrs)
 {
struct dma_debug_entry *entry;
 
@@ -1222,7 +1223,7 @@ void debug_dma_map_page(struct device *dev, struct page 
*page, size_t offset,
check_for_illegal_area(dev, addr, size);
}
 
-   add_dma_entry(entry);
+   add_dma_entry(entry, attrs);
 }
 
 void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -1280,7 +1281,8 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t 
addr,
 }
 
 void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, int mapped_ents, int direction)
+ int nents, int mapped_ents, int direction,
+ unsigned long attrs)
 {
struct dma_debug_entry *entry;
struct scatterlist *s;
@@ -1312,7 +1314,7 @@ void debug_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
 
check_sg_segment(dev, s);
 
-   add_dma_entry(entry);
+   add_dma_entry(entry, attrs);
}
 }
 
@@ -1368,7 +1370,8 @@ void debug_dma_unmap_sg(struct device *dev, struct 
scatterlist *sglist,
 }
 
 void debug_dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t dma_addr, void *virt)
+ dma_addr_t dma_addr, void *virt,
+ unsigned long attrs)
 {
struct dma_debug_entry *entry;
 
@@ -1398,7 +1401,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t 
size,
else
entry->pfn = page_to_pfn(virt_to_page(virt));
 
-   add_dma_entry(entry);
+   add_dma_entry(entry, attrs);
 }
 
 void debug_dma_free_coherent(struct device *dev, size_t size,
@@ -1429,7 +1432,8 @@ void debug_dma_free_coherent(struct device *dev, size_t 
size,
 }
 
 void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
-   int direction, dma_addr_t dma_addr)
+   int direction, dma_addr_t dma_addr,
+   unsigned long attrs)
 {
struct dma_debug_entry *entry;
 
@@ -1449,7 +1453,7 @@ void debug_dma_map_resource(struct device *dev, 
phys_addr_t addr, size_t size,
entry->direction= direction;
entry->map_err_type = MAP_ERR_NOT_CHECKED;
 
-   add_dma_entry(entry);
+   add_dma_entry(entry, attrs);
 }
 
 void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h
index 83643b3010b2..f525197d3cae 100644
--- a/kernel/dma/debug.h
+++ b/kernel/dma/debug.h
@@ -11,26 +11,30 @@
 #ifdef CONFIG_DMA_API_DEBUG
 extern void debug_dma_map_page(struct device *dev, struct page *page,
   size_t offset, size_t size,
-  int direction, dma_addr_t dma_addr);
+  int direction, dma_addr_t dma_addr,
+  unsigned long attrs);
 
 extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 size_t size, int direction);
 
 extern void de

Re: [PATCH] iommu: fix ARM_SMMU vs QCOM_SCM compilation

2021-10-11 Thread Dmitry Baryshkov
On Sun, 10 Oct 2021 at 20:42, Arnd Bergmann  wrote:
>
> On Sun, Oct 10, 2021 at 6:17 AM Bjorn Andersson
>  wrote:
> >
> > On Sat 09 Oct 21:33 CDT 2021, Dmitry Baryshkov wrote:
> >
> > > After commit 424953cf3c66 ("qcom_scm: hide Kconfig symbol") arm-smmu got
> > > qcom_smmu_impl_init() call guarded by IS_ENABLED(CONFIG_ARM_SMMU_QCOM).
> > > However the CONFIG_ARM_SMMU_QCOM Kconfig entry does not exist, so the
> > > qcom_smmu_impl_init() is never called.
> > >
> > > So, let's fix this by always calling qcom_smmu_impl_init(). It does not
> > > touch the smmu passed unless the device is a non-Qualcomm one. Make
> > > ARM_SMMU select QCOM_SCM for ARCH_QCOM.
>
> Sorry about this bug. I was sure I had it working, but I lost part of the 
> commit
> during a rebase, and my randconfig builds still succeeded without it, so I
> sent a wrong version.
>
> > Arnd's intention was to not force QCOM_SCM to be built on non-Qualcomm
> > devices. But as Daniel experienced, attempting to boot most Qualcomm
> > boards without this results in a instant reboot.
> >
> > I think it's okay if we tinker with CONFIG_ARM_SMMU_QCOM for v5.16, but
> > we're getting late in v5.15 so I would prefer if we make sure this works
> > out of the box.
>
> Yes, makes sense. For reference, see below for how I would fix this properly,
> this is what I had intended to have in the patch. Feel free to pick
> either version
> as the immediate bugfix. I'll give the below a little more randconfig testing
> overnight though. The pasted version of the patch is probably
> whitespace-damaged,
> let me know if you would like me to send it as a proper patch.
>
>Arnd
>
> 8<-
> Subject: iommu: fix ARM_SMMU_QCOM compilation
>
> My previous bugfix ended up making things worse for the QCOM IOMMU
> driver when it forgot to add the Kconfig symbol that is getting used to
> control the compilation of the SMMU implementation specific code
> for Qualcomm.
>
> Fixes: 424953cf3c66 ("qcom_scm: hide Kconfig symbol")
> Reported-by: Daniel Lezcano 
> Reported-by: Dmitry Baryshkov 
> Signed-off-by: Arnd Bergmann 

Reviewed-by: Dmitry Baryshkov 

Let's get either of them in.

> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index c5c71b7ab7e8..2dfe744ddd97 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -311,6 +311,7 @@ config ARM_SMMU
> select IOMMU_API
> select IOMMU_IO_PGTABLE_LPAE
> select ARM_DMA_USE_IOMMU if ARM
> +   select QCOM_SCM if ARM_SMMU_QCOM
> help
>   Support for implementations of the ARM System MMU architecture
>   versions 1 and 2.
> @@ -355,6 +356,13 @@ config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
>   'arm-smmu.disable_bypass' will continue to override this
>   config.
>
> +config ARM_SMMU_QCOM
> +   def_bool y
> +   depends on ARM_SMMU && ARCH_QCOM
> +   help
> + When running on a Qualcomm platform that has the custom variant
> + of the ARM SMMU, this needs to be built into the SMMU driver.
> +
>  config ARM_SMMU_V3
> tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
> depends on ARM64



-- 
With best wishes
Dmitry
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: fix ARM_SMMU vs QCOM_SCM compilation

2021-10-11 Thread Dmitry Baryshkov
On Mon, 11 Oct 2021 at 12:57, Arnd Bergmann  wrote:
>
> On Mon, Oct 11, 2021 at 11:10 AM Dmitry Baryshkov
>  wrote:
> >
> > On Mon, 11 Oct 2021 at 09:09, Arnd Bergmann  wrote:
> > >
> > > On Mon, Oct 11, 2021 at 6:11 AM Dmitry Baryshkov
> > >  wrote:
> > > > On Sun, 10 Oct 2021 at 20:42, Arnd Bergmann  wrote:
> > > >
> > > > The patch seems correct, but it becomes overcomplicated. What about:
> > > > - restoring QCOM_SCM stubs
> > >
> > > The stubs are what has led to the previous bugs in this area to often
> > > go unnoticed for too long, as illustrated by your suggestion
> > >
> > > > - making ARM_SMMU select QCOM_SCM if ARM_SMMU_QCOM
> > >
> > > I assume you meant "select QCOM_SCM if ARCH_QCOM",
> > > after we stop using ARM_SMMU_QCOM?
> > >
> > > > This would have almost the same result as with your patch, but without
> > > > extra ARM_SMMU_QCOM Kconfig symbol.
> > >
> > > The "almost" is the problem: consider the case of
> > >
> > > CONFIG_ARM=y
> > > CONFIG_COMPILE_TEST=y
> > > CONFIG_ARCH_QCOM=n
> > > CONFIG_ARM_SMMU=y
> > > CONFIG_DRM_MSM=m
> > > CONFIG_QCOM_SCM=m (selected by DRM_MSM)
> > >
> > > The stubs here lead to ARM_SMMU linking against the QCOM_SCM
> > > driver from built-in code, which fails because QCOM_SCM itself
> > > is a loadable module.
> >
> > I see. The idealist in me wishes to change my suggestion to
> > 'select QCOM_SCM if ARCH_QCOM || COMPILE_TEST'
> > but I have the subtle feeling that this also might fail somehow.
>
> I think that would actually work, but it has the nasty side-effect
> that simply flipping 'CONFIG_COMPILE_TEST' changes what
> the kernel does, rather than just hiding or unhiding additional
> options.
>
> > > We can move the "select QCOM_SCM" in the ARM_SMMU_QCOM
> > > symbol if we make that a tristate though, if you want to separate it
> > > a little more.
> >
> > This would complicate things a bit, as we would no longer be able to
> > use 'arm-smmu-$(CONFIG_ARM_SMMU_QCOM) +=' construct.
>
> I'm fairly sure we could still use that, Kbuild is smart enough
> to include both 'file-m +=' and 'file-y += ' in 'file.ko', see
> scripts/Makefile.lib:
>
> # If $(foo-objs), $(foo-y), $(foo-m), or $(foo-) exists, foo.o is a
> composite object
> multi-obj-y := $(call multi-search, $(obj-y), .o, -objs -y)
> multi-obj-m := $(call multi-search, $(obj-m), .o, -objs -y -m)
> multi-obj-ym := $(multi-obj-y) $(multi-obj-m)
>
> # Replace multi-part objects by their individual parts,
> # including built-in.a from subdirectories
> real-obj-y := $(call real-search, $(obj-y), .o, -objs -y)
> real-obj-m := $(call real-search, $(obj-m), .o, -objs -y -m)

Ah, I thought Kbuild would accept only  foo-y, please excuse me.

>
> What doesn't work is having a built-in driver in a directory that is
> guarded with a =m symbol, or including a =m object into a =y
> module.
>
> Arnd



-- 
With best wishes
Dmitry
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 2/2] qcom_scm: hide Kconfig symbol

2021-10-11 Thread John Stultz
On Thu, Oct 7, 2021 at 8:10 AM Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> Now that SCM can be a loadable module, we have to add another
> dependency to avoid link failures when ipa or adreno-gpu are
> built-in:
>
> aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_probe':
> ipa_main.c:(.text+0xfc4): undefined reference to `qcom_scm_is_available'
>
> ld.lld: error: undefined symbol: qcom_scm_is_available
> >>> referenced by adreno_gpu.c
> >>>   gpu/drm/msm/adreno/adreno_gpu.o:(adreno_zap_shader_load) in 
> >>> archive drivers/built-in.a
>
> This can happen when CONFIG_ARCH_QCOM is disabled and we don't select
> QCOM_MDT_LOADER, but some other module selects QCOM_SCM. Ideally we'd
> use a similar dependency here to what we have for QCOM_RPROC_COMMON,
> but that causes dependency loops from other things selecting QCOM_SCM.
>
> This appears to be an endless problem, so try something different this
> time:
>
>  - CONFIG_QCOM_SCM becomes a hidden symbol that nothing 'depends on'
>but that is simply selected by all of its users
>
>  - All the stubs in include/linux/qcom_scm.h can go away
>
>  - arm-smccc.h needs to provide a stub for __arm_smccc_smc() to
>allow compile-testing QCOM_SCM on all architectures.
>
>  - To avoid a circular dependency chain involving RESET_CONTROLLER
>and PINCTRL_SUNXI, drop the 'select RESET_CONTROLLER' statement.
>According to my testing this still builds fine, and the QCOM
>platform selects this symbol already.
>
> Acked-by: Kalle Valo 
> Acked-by: Alex Elder 
> Signed-off-by: Arnd Bergmann 
> ---
> Changes in v2:
> - fix the iommu dependencies

Hey Arnd,
   Thanks again so much for working out these details. Also my
apologies, as Bjorn asked for me to test this patch, but I wasn't able
to get to it before it landed.  Unfortunately I've hit an issue that
is keeping the db845c from booting with this.

> diff --git a/drivers/iommu/arm/arm-smmu/Makefile 
> b/drivers/iommu/arm/arm-smmu/Makefile
> index e240a7bcf310..b0cc01aa20c9 100644
> --- a/drivers/iommu/arm/arm-smmu/Makefile
> +++ b/drivers/iommu/arm/arm-smmu/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0
>  obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
>  obj-$(CONFIG_ARM_SMMU) += arm_smmu.o
> -arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o
> +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
> +arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
> b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
> index 9f465e146799..2c25cce38060 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
> @@ -215,7 +215,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
> arm_smmu_device *smmu)
> of_device_is_compatible(np, "nvidia,tegra186-smmu"))
> return nvidia_smmu_impl_init(smmu);
>
> -   smmu = qcom_smmu_impl_init(smmu);
> +   if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM))
> +   smmu = qcom_smmu_impl_init(smmu);
>
> if (of_device_is_compatible(np, "marvell,ap806-smmu-500"))
> smmu->impl = &mrvl_mmu500_impl;


The problem with these two chunks is that there is currently no
CONFIG_ARM_SMMU_QCOM option. :)

Was that something you intended to add in the patch?

I'm working up a Kconfig patch to do so, so I'll send that out in a
second here, but let me know if you already have that somewhere (I
suspect you implemented it and just forgot to add the change to the
commit), as I'm sure your Kconfig help text will be better than mine.
:)

Again, I'm so sorry I didn't get over to testing your patch before
seeing this here!

thanks
-john
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC 11/20] iommu/iommufd: Add IOMMU_IOASID_ALLOC/FREE

2021-10-11 Thread Jason Gunthorpe via iommu
On Mon, Oct 11, 2021 at 09:49:57AM +0100, Jean-Philippe Brucker wrote:

> Seems like we don't need the negotiation part?  The host kernel
> communicates available IOVA ranges to userspace including holes (patch
> 17), and userspace can check that the ranges it needs are within the IOVA
> space boundaries. That part is necessary for DPDK as well since it needs
> to know about holes in the IOVA space where DMA wouldn't work as expected
> (MSI doorbells for example). 

I haven't looked super closely at DPDK, but the other simple VFIO app
I am aware of struggled to properly implement this semantic (Indeed it
wasn't even clear to the author this was even needed).

It requires interval tree logic inside the application which is not a
trivial algorithm to implement in C.

I do wonder if the "simple" interface should have an option more like
the DMA API where userspace just asks to DMA map some user memory and
gets back the dma_addr_t to use. Kernel manages the allocation
space/etc.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] iommu/tegra-smmu: Support managed domains

2021-10-11 Thread Dmitry Osipenko
23.04.2021 19:32, Thierry Reding пишет:
> From: Navneet Kumar 
> 
> Allow creating identity and DMA API compatible IOMMU domains. When
> creating a DMA API compatible domain, make sure to also create the
> required cookie.

IOMMU_DOMAIN_DMA should be a disaster. It shouldn't work without
preparing DRM and VDE drivers at first. We discussed this briefly in the
past.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [RFC 11/20] iommu/iommufd: Add IOMMU_IOASID_ALLOC/FREE

2021-10-11 Thread Jason Gunthorpe via iommu
On Mon, Oct 11, 2021 at 05:02:01PM +1100, David Gibson wrote:

> > This means we cannot define an input that has a magic HW specific
> > value.
> 
> I'm not entirely sure what you mean by that.

I mean if you make a general property 'foo' that userspace must
specify correctly then your API isn't general anymore. Userspace must
know if it is A or B HW to set foo=A or foo=B.

Supported IOVA ranges are easially like that as every IOMMU is
different. So DPDK shouldn't provide such specific or binding
information.

> No, I don't think that needs to be a condition.  I think it's
> perfectly reasonable for a constraint to be given, and for the host
> IOMMU to just say "no, I can't do that".  But that does mean that each
> of these values has to have an explicit way of userspace specifying "I
> don't care", so that the kernel will select a suitable value for those
> instead - that's what DPDK or other userspace would use nearly all the
> time.

My feeling is that qemu should be dealing with the host != target
case, not the kernel.

The kernel's job should be to expose the IOMMU HW it has, with all
features accessible, to userspace.

Qemu's job should be to have a userspace driver for each kernel IOMMU
and the internal infrastructure to make accelerated emulations for all
supported target IOMMUs.

In other words, it is not the kernel's job to provide target IOMMU
emulation.

The kernel should provide truely generic "works everywhere" interface
that qemu/etc can rely on to implement the least accelerated emulation
path.

So when I see proposals to have "generic" interfaces that actually
require very HW specific setup, and cannot be used by a generic qemu
userpace driver, I think it breaks this model. If qemu needs to know
it is on PPC (as it does today with VFIO's PPC specific API) then it
may as well speak PPC specific language and forget about pretending to
be generic.

This approach is grounded in 15 years of trying to build these
user/kernel split HW subsystems (particularly RDMA) where it has
become painfully obvious that the kernel is the worst place to try and
wrangle really divergent HW into a "common" uAPI.

This is because the kernel/user boundary is fixed. Introducing
anything generic here requires a lot of time, thought, arguing and
risk. Usually it ends up being done wrong (like the PPC specific
ioctls, for instance) and when this happens we can't learn and adapt,
we are stuck with stable uABI forever.

Exposing a device's native programming interface is much simpler. Each
device is fixed, defined and someone can sit down and figure out how
to expose it. Then that is it, it doesn't need revisiting, it doesn't
need harmonizing with a future slightly different device, it just
stays as is.

The cost, is that there must be a userspace driver component for each
HW piece - which we are already paying here!

> Ideally the host /dev/iommu will say "ok!", since both those ranges
> are within the 0..2^60 translated range of the host IOMMU, and don't
> touch the IO hole.  When the guest calls the IO mapping hypercalls,
> qemu translates those into DMA_MAP operations, and since they're all
> within the previously verified windows, they should work fine.

For instance, we are going to see HW with nested page tables, user
space owned page tables and even kernel-bypass fast IOTLB
invalidation.

In that world does it even make sense for qmeu to use slow DMA_MAP
ioctls for emulation?

A userspace framework in qemu can make these optimizations and is
also necessarily HW specific as the host page table is HW specific..

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: intel: remove flooding of non-error logs, when new-DMA-PTE is the same as old-DMA-PTE.

2021-10-11 Thread Ajay Garg
Thanks Alex for your time.

I think I may have found the issue. Right now, when doing a
dma-unmapping, we do a "soft-unmapping" only, as the pte-values
themselves are not cleared in the unlinked pagetable-frame.

I have made the (simple) changes, and things are looking good as of
now (almost an hour now).
However, this time I will give it a day ;)

If there is not a single-flooding observed in the next 24 hours, I
would float the v2 patch for review.


Thanks again for your time and patience.


Thanks and Regards,
Ajay


>
> Even this QEMU explanation doesn't make a lot of sense, vfio tracks
> userspace mappings and will return an -EEXIST error for duplicate or
> overlapping IOVA entries.  We expect to have an entirely empty IOMMU
> domain when a device is assigned, but it seems the only way userspace
> can trigger duplicate PTEs would be if mappings already exist, or we
> have a bug somewhere.
>
> If the most recent instance is purely on bare metal, then it seems the
> host itself has conflicting mappings.  I can only speculate with the
> limited data presented, but I'm suspicious there's something happening
> with RMRRs here (but that should also entirely preclude assignment).
> dmesg, lspci -vvv, and VM configuration would be useful.  Thanks,
>
> Alex
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC 11/20] iommu/iommufd: Add IOMMU_IOASID_ALLOC/FREE

2021-10-11 Thread Jason Gunthorpe via iommu
On Mon, Oct 11, 2021 at 04:37:38PM +1100, da...@gibson.dropbear.id.au wrote:
> > PASID support will already require that a device can be multi-bound to
> > many IOAS's, couldn't PPC do the same with the windows?
> 
> I don't see how that would make sense.  The device has no awareness of
> multiple windows the way it does of PASIDs.  It just sends
> transactions over the bus with the IOVAs it's told.  If those IOVAs
> lie within one of the windows, the IOMMU picks them up and translates
> them.  If they don't, it doesn't.

To my mind that address centric routing is awareness.

If the HW can attach multiple non-overlapping IOAS's to the same
device then the HW is routing to the correct IOAS by using the address
bits. This is not much different from the prior discussion we had
where we were thinking of the PASID as an 80 bit address

The fact the PPC HW actually has multiple page table roots and those
roots even have different page tables layouts while still connected to
the same device suggests this is not even an unnatural modelling
approach...

Jason  


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v7 9/9] iommu/dma: Reserve any RMR regions associated with a dev

2021-10-11 Thread Shameerali Kolothum Thodi



> -Original Message-
> From: Robin Murphy [mailto:robin.mur...@arm.com]
> Sent: 11 October 2021 16:01
> To: Jon Nettleton 
> Cc: Shameerali Kolothum Thodi ;
> linux-arm-kernel ; ACPI Devel Maling
> List ; Linux IOMMU
> ; Linuxarm ;
> Lorenzo Pieralisi ; Joerg Roedel
> ; Will Deacon ; wanghuiqiang
> ; Guohanjun (Hanjun Guo)
> ; Steven Price ; Sami
> Mujawar ; Eric Auger ;
> yangyicong 
> Subject: Re: [PATCH v7 9/9] iommu/dma: Reserve any RMR regions associated
> with a dev
> 
> On 2021-10-09 08:07, Jon Nettleton wrote:
> > On Fri, Oct 8, 2021 at 3:10 PM Robin Murphy 
> wrote:
> >>
> >> On 2021-08-05 09:07, Shameer Kolothum wrote:
> >>> Get ACPI IORT RMR regions associated with a dev reserved
> >>> so that there is a unity mapping for them in SMMU.
> >>
> >> This feels like most of it belongs in the IORT code rather than
> >> iommu-dma (which should save the temporary list copy as well).
> >
> > See previous comment.  The original intent was for device-tree to also
> > be able to use these mechanisms to create RMR's and support them
> > in the SMMU.
> 
> Can you clarify how code behind an "if (!is_of_node(...))" check
> alongside other IORT-specific code is expected to be useful for DT?
> 
> Yes, iommu_dma_get_resv_regions() itself wants to end up serving as an
> abstraction layer, but that still doesn't mean it has to do much more
> than dispatch into firmware-specific backends as appropriate.

(Resending as I accidently replied earlier from our internal ML id. Sorry)

The way I thought about is as below,

1.  iommu_dma_get_resv_regions() will invoke the common 
iommu_dma_get_rmr_resv_regions().
Yes, the if (!is_of_node(...)) is not required here.
2.  iommu_dma_get_rmr_resv_regions() calls iommu_dma_get_rmrs().
iommu_dma_get_rmrs() has the  (!is_of_node(...)) check to call into IORT or 
DT specific functions
to retrieve the RMR reserve regions associated with a given iommu_fwnode.
3.  The common iommu_dma_get_rmr_resv_regions() further checks for PCI host 
preserve_config
and whether the returned RMR list actually has any dev specific region to 
reserve or not.

So the only firmware specific backend is handled inside the 
iommu_dma_get_rmrs() and that is also called
from the SMMU driver probe to install bypass SIDs.

Anyway, if the eventual DT implementation or further IORT spec changes makes 
this abstraction
irrelevant I am Ok to move this into the IORT code.

Thanks,
Shameer
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 9/9] iommu/dma: Reserve any RMR regions associated with a dev

2021-10-11 Thread Robin Murphy

On 2021-10-09 08:07, Jon Nettleton wrote:

On Fri, Oct 8, 2021 at 3:10 PM Robin Murphy  wrote:


On 2021-08-05 09:07, Shameer Kolothum wrote:

Get ACPI IORT RMR regions associated with a dev reserved
so that there is a unity mapping for them in SMMU.


This feels like most of it belongs in the IORT code rather than
iommu-dma (which should save the temporary list copy as well).


See previous comment.  The original intent was for device-tree to also
be able to use these mechanisms to create RMR's and support them
in the SMMU.


Can you clarify how code behind an "if (!is_of_node(...))" check 
alongside other IORT-specific code is expected to be useful for DT?


Yes, iommu_dma_get_resv_regions() itself wants to end up serving as an 
abstraction layer, but that still doesn't mean it has to do much more 
than dispatch into firmware-specific backends as appropriate.


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: intel: remove flooding of non-error logs, when new-DMA-PTE is the same as old-DMA-PTE.

2021-10-11 Thread Alex Williamson
On Mon, 11 Oct 2021 11:49:33 +0530
Ajay Garg  wrote:

> The flooding was seen today again, after I booted the host-machine in
> the morning.
> Need to look what the heck is going on ...
> 
> On Sun, Oct 10, 2021 at 11:45 AM Ajay Garg  wrote:
> >  
> > > I'll try and backtrack to the userspace process that is sending these 
> > > ioctls.
> > >  
> >
> > The userspace process is qemu.
> >
> > I compiled qemu from latest source, installed via "sudo make install"
> > on host-machine, rebooted the host-machine, and booted up the
> > guest-machine on the host-machine. Now, no kernel-flooding is seen on
> > the host-machine.
> >
> > For me, the issue is thus closed-invalid; admins may take the
> > necessary action to officially mark ;)

Even this QEMU explanation doesn't make a lot of sense, vfio tracks
userspace mappings and will return an -EEXIST error for duplicate or
overlapping IOVA entries.  We expect to have an entirely empty IOMMU
domain when a device is assigned, but it seems the only way userspace
can trigger duplicate PTEs would be if mappings already exist, or we
have a bug somewhere.

If the most recent instance is purely on bare metal, then it seems the
host itself has conflicting mappings.  I can only speculate with the
limited data presented, but I'm suspicious there's something happening
with RMRRs here (but that should also entirely preclude assignment).
dmesg, lspci -vvv, and VM configuration would be useful.  Thanks,

Alex

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 2/9] ACPI/IORT: Add support for RMR node parsing

2021-10-11 Thread Robin Murphy

On 2021-10-09 08:06, Jon Nettleton wrote:
[...]

+ if (rmr->flags & IOMMU_RMR_REMAP_PERMITTED) {
+ type = IOMMU_RESV_DIRECT_RELAXABLE;
+ /*
+  * Set IOMMU_CACHE as IOMMU_RESV_DIRECT_RELAXABLE is
+  * normally used for allocated system memory that is
+  * then used for device specific reserved regions.
+  */
+ prot |= IOMMU_CACHE;
+ } else {
+ type = IOMMU_RESV_DIRECT;
+ /*
+  * Set IOMMU_MMIO as IOMMU_RESV_DIRECT is normally used
+  * for device memory like MSI doorbell.
+  */
+ prot |= IOMMU_MMIO;
+ }


I'm not sure we ever got a definitive answer to this - does DPAA2
actually go wrong if we use IOMMU_MMIO here? I'd still much prefer to
make the fewest possible assumptions, since at this point it's basically
just a stop-gap until we can fix the spec. It's become clear that we
can't reliably rely on guessing attributes, so I'm not too fussed about
theoretical cases that currently don't work (due to complete lack of RMR
support) continuing to not work for the moment, as long as we can make
the real-world cases we actually have work at all. Anything which only
affects performance I'd rather leave until firmware can tell us what to do.


Well it isn't DPAA2, it is FSL_MC_BUS that fails with IOMMU_MMIO
mappings.  DPAA2 is just one connected device.


Apologies if I'm being overly loose with terminology there - my point of 
reference for this hardware is documentation for the old LS2080A, where 
the "DPAA2 Reference Manual" gives a strong impression that the MC is a 
component belonging to the overall DPAA2 architecture. Either way it 
technically stands to reason that the other DPAA2 components would only 
be usable if the MC itself works (unless I've been holding a major 
misconception about that for years as well).


In the context of this discussion, please consider any reference I may 
make to bits of NXP's hardware to be shorthand for "the thing for which 
NXP have a vested interest in IORT RMRs".


Thanks,
Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 1/9] iommu: Introduce a union to struct iommu_resv_region

2021-10-11 Thread Robin Murphy

On 2021-10-11 06:47, Shameerali Kolothum Thodi wrote:




-Original Message-
From: Jon Nettleton [mailto:j...@solid-run.com]
Sent: 09 October 2021 07:58
To: Robin Murphy 
Cc: Shameerali Kolothum Thodi ;
linux-arm-kernel ; ACPI Devel Maling
List ; Linux IOMMU
; Linuxarm ;
Steven Price ; Guohanjun (Hanjun Guo)
; yangyicong ; Sami
Mujawar ; Will Deacon ;
wanghuiqiang 
Subject: Re: [PATCH v7 1/9] iommu: Introduce a union to struct
iommu_resv_region

On Fri, Oct 8, 2021 at 2:14 PM Robin Murphy 
wrote:


On 2021-08-05 09:07, Shameer Kolothum wrote:

A union is introduced to struct iommu_resv_region to hold any
firmware specific data. This is in preparation to add support for
IORT RMR reserve regions and the union now holds the RMR specific
information.

Signed-off-by: Shameer Kolothum

---
   include/linux/iommu.h | 11 +++
   1 file changed, 11 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
32d448050bf7..bd0e4641c569 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -114,6 +114,13 @@ enum iommu_resv_type {
   IOMMU_RESV_SW_MSI,
   };

+struct iommu_iort_rmr_data {
+#define IOMMU_RMR_REMAP_PERMITTED(1 << 0)
+ u32 flags;
+ u32 sid;/* Stream Id associated with RMR entry */
+ void *smmu; /* Associated IORT SMMU node pointer */
+};


Do we really need to duplicate all this data? AFAICS we could just
save the acpi_iort_rmr pointer in the iommu_resv_region (with a
forward declaration here if necessary) and defer parsing its actual
mappings until the point where we can directly consume the results.


 From earlier discussions on this patchset, the original goal was also for
device-tree mechanisms to be able to hook into this code to support similar
RMR's and SMMU initialization, just not through the ACPI / IORT path.


Yes. IIRC, there were some earlier attempts to have DT support for reserved 
regions
and there was a suggestion to provide generic interfaces so that when DT 
solution
comes up it is easier to add the support.


OK, but in that case why is every single part of it IORT-specific in 
either name, description or function?


Regardless, s/acpi_iort_rmr/original firmware descriptor of whatever 
variety/ and my comment still stands. If a firmware-specific structure 
is still going to exist to begin with, then what do we gain from 
interpreting details earlier than needed and wasting memory storing 
copies of them? This isn't something we're looking up hundreds of times 
per second and need to cache in some more efficient format.


Furthermore, it seems unlikely that the eventual DT solution would end 
up being semantically identical to IORT RMRs, so there's every 
possibility that the One True Abstract Structure would need changing to 
work for another firmware implementation anyway. Heck, it might not even 
fit future IORT if it becomes permissible for multiple StreamIDs to 
share a single RMR descriptor.


Thanks,
Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 04/12] iommu/mediatek: Add device_link between the consumer and the larb devices

2021-10-11 Thread Dafna Hirschfeld




On 29.09.21 03:37, Yong Wu wrote:

MediaTek IOMMU-SMI diagram is like below. all the consumer connect with
smi-larb, then connect with smi-common.

 M4U
  |
 smi-common
  |
   -
   | |...
   | |
larb1 larb2
   | |
vdec   venc

When the consumer works, it should enable the smi-larb's power which
also need enable the smi-common's power firstly.

Thus, First of all, use the device link connect the consumer and the
smi-larbs. then add device link between the smi-larb and smi-common.

This patch adds device_link between the consumer and the larbs.

When device_link_add, I add the flag DL_FLAG_STATELESS to avoid calling
pm_runtime_xx to keep the original status of clocks. It can avoid two
issues:
1) Display HW show fastlogo abnormally reported in [1]. At the beggining,
all the clocks are enabled before entering kernel, but the clocks for
display HW(always in larb0) will be gated after clk_enable and clk_disable
called from device_link_add(->pm_runtime_resume) and rpm_idle. The clock
operation happened before display driver probe. At that time, the display
HW will be abnormal.

2) A deadlock issue reported in [2]. Use DL_FLAG_STATELESS to skip
pm_runtime_xx to avoid the deadlock.

Corresponding, DL_FLAG_AUTOREMOVE_CONSUMER can't be added, then
device_link_removed should be added explicitly.

[1] https://lore.kernel.org/linux-mediatek/1564213888.22908.4.camel@mhfsdcap03/
[2] https://lore.kernel.org/patchwork/patch/1086569/

Suggested-by: Tomasz Figa 
Signed-off-by: Yong Wu 
Tested-by: Frank Wunderlich  # BPI-R2/MT7623
---
  drivers/iommu/mtk_iommu.c| 22 ++
  drivers/iommu/mtk_iommu_v1.c | 20 +++-
  2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index d5848f78a677..a2fa55899434 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -560,22 +560,44 @@ static struct iommu_device *mtk_iommu_probe_device(struct 
device *dev)
  {
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
+   struct device_link *link;
+   struct device *larbdev;
+   unsigned int larbid;
  
  	if (!fwspec || fwspec->ops != &mtk_iommu_ops)

return ERR_PTR(-ENODEV); /* Not a iommu client device */
  
  	data = dev_iommu_priv_get(dev);
  
+	/*

+* Link the consumer device with the smi-larb device(supplier)
+* The device in each a larb is a independent HW. thus only link
+* one larb here.
+*/
+   larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);


so larbid is always the same for all the ids of a device? If so
maybe it worth testing it and return error if this is not the case.

Thanks,
Dafna


+   larbdev = data->larb_imu[larbid].dev;
+   link = device_link_add(dev, larbdev,
+  DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
+   if (!link)
+   dev_err(dev, "Unable to link %s\n", dev_name(larbdev));
return &data->iommu;
  }
  
  static void mtk_iommu_release_device(struct device *dev)

  {
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+   struct mtk_iommu_data *data;
+   struct device *larbdev;
+   unsigned int larbid;
  
  	if (!fwspec || fwspec->ops != &mtk_iommu_ops)

return;
  
+	data = dev_iommu_priv_get(dev);

+   larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
+   larbdev = data->larb_imu[larbid].dev;
+   device_link_remove(dev, larbdev);
+
iommu_fwspec_free(dev);
  }
  
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c

index 4d7809432239..e6f13459470e 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -423,7 +423,9 @@ static struct iommu_device *mtk_iommu_probe_device(struct 
device *dev)
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct of_phandle_args iommu_spec;
struct mtk_iommu_data *data;
-   int err, idx = 0;
+   int err, idx = 0, larbid;
+   struct device_link *link;
+   struct device *larbdev;
  
  	/*

 * In the deferred case, free the existed fwspec.
@@ -453,6 +455,14 @@ static struct iommu_device *mtk_iommu_probe_device(struct 
device *dev)
  
  	data = dev_iommu_priv_get(dev);
  
+	/* Link the consumer device with the smi-larb device(supplier) */

+   larbid = mt2701_m4u_to_larb(fwspec->ids[0]);
+   larbdev = data->larb_imu[larbid].dev;
+   link = device_link_add(dev, larbdev,
+  DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
+   if (!link)
+   dev_err(dev, "Unable to link %s\n", dev_name(larbdev));
+
return &data->iommu;
  }
  
@@ -473,10 +483,18 @@ static void mtk_iommu_probe_finalize(struct device *dev)

  static void mtk_iommu_release_device(struct device *dev)
  {
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+  

Re: [PATCH v2 2/2] qcom_scm: hide Kconfig symbol

2021-10-11 Thread Hans Verkuil
On 07/10/2021 17:10, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> Now that SCM can be a loadable module, we have to add another
> dependency to avoid link failures when ipa or adreno-gpu are
> built-in:
> 
> aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_probe':
> ipa_main.c:(.text+0xfc4): undefined reference to `qcom_scm_is_available'
> 
> ld.lld: error: undefined symbol: qcom_scm_is_available
 referenced by adreno_gpu.c
   gpu/drm/msm/adreno/adreno_gpu.o:(adreno_zap_shader_load) in 
 archive drivers/built-in.a
> 
> This can happen when CONFIG_ARCH_QCOM is disabled and we don't select
> QCOM_MDT_LOADER, but some other module selects QCOM_SCM. Ideally we'd
> use a similar dependency here to what we have for QCOM_RPROC_COMMON,
> but that causes dependency loops from other things selecting QCOM_SCM.
> 
> This appears to be an endless problem, so try something different this
> time:
> 
>  - CONFIG_QCOM_SCM becomes a hidden symbol that nothing 'depends on'
>but that is simply selected by all of its users
> 
>  - All the stubs in include/linux/qcom_scm.h can go away
> 
>  - arm-smccc.h needs to provide a stub for __arm_smccc_smc() to
>allow compile-testing QCOM_SCM on all architectures.
> 
>  - To avoid a circular dependency chain involving RESET_CONTROLLER
>and PINCTRL_SUNXI, drop the 'select RESET_CONTROLLER' statement.
>According to my testing this still builds fine, and the QCOM
>platform selects this symbol already.

Acked-by: Hans Verkuil 

Thanks,

Hans

> 
> Acked-by: Kalle Valo 
> Acked-by: Alex Elder 
> Signed-off-by: Arnd Bergmann 
> ---
> Changes in v2:
> - fix the iommu dependencies
> 
> I've queued this version as a bugfix along with patch 1/2
> in my asm-generic tree.
> 
>  drivers/firmware/Kconfig   |  5 +-
>  drivers/gpu/drm/msm/Kconfig|  4 +-
>  drivers/iommu/Kconfig  |  3 +-
>  drivers/iommu/arm/arm-smmu/Makefile|  3 +-
>  drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  3 +-
>  drivers/media/platform/Kconfig |  2 +-
>  drivers/mmc/host/Kconfig   |  2 +-
>  drivers/net/ipa/Kconfig|  1 +
>  drivers/net/wireless/ath/ath10k/Kconfig|  2 +-
>  drivers/pinctrl/qcom/Kconfig   |  3 +-
>  include/linux/arm-smccc.h  | 10 +++
>  include/linux/qcom_scm.h   | 71 --
>  12 files changed, 24 insertions(+), 85 deletions(-)
> 
> diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> index 220a58cf0a44..cda7d7162cbb 100644
> --- a/drivers/firmware/Kconfig
> +++ b/drivers/firmware/Kconfig
> @@ -203,10 +203,7 @@ config INTEL_STRATIX10_RSU
> Say Y here if you want Intel RSU support.
>  
>  config QCOM_SCM
> - tristate "Qcom SCM driver"
> - depends on ARM || ARM64
> - depends on HAVE_ARM_SMCCC
> - select RESET_CONTROLLER
> + tristate
>  
>  config QCOM_SCM_DOWNLOAD_MODE_DEFAULT
>   bool "Qualcomm download mode enabled by default"
> diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
> index e9c6af78b1d7..3ddf739a6f9b 100644
> --- a/drivers/gpu/drm/msm/Kconfig
> +++ b/drivers/gpu/drm/msm/Kconfig
> @@ -17,7 +17,7 @@ config DRM_MSM
>   select DRM_SCHED
>   select SHMEM
>   select TMPFS
> - select QCOM_SCM if ARCH_QCOM
> + select QCOM_SCM
>   select WANT_DEV_COREDUMP
>   select SND_SOC_HDMI_CODEC if SND_SOC
>   select SYNC_FILE
> @@ -55,7 +55,7 @@ config DRM_MSM_GPU_SUDO
>  
>  config DRM_MSM_HDMI_HDCP
>   bool "Enable HDMI HDCP support in MSM DRM driver"
> - depends on DRM_MSM && QCOM_SCM
> + depends on DRM_MSM
>   default y
>   help
> Choose this option to enable HDCP state machine
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 124c41adeca1..c5c71b7ab7e8 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -308,7 +308,6 @@ config APPLE_DART
>  config ARM_SMMU
>   tristate "ARM Ltd. System MMU (SMMU) Support"
>   depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)
> - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
>   select ARM_DMA_USE_IOMMU if ARM
> @@ -438,7 +437,7 @@ config QCOM_IOMMU
>   # Note: iommu drivers cannot (yet?) be built as modules
>   bool "Qualcomm IOMMU Support"
>   depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
> - depends on QCOM_SCM=y
> + select QCOM_SCM
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
>   select ARM_DMA_USE_IOMMU
> diff --git a/drivers/iommu/arm/arm-smmu/Makefile 
> b/drivers/iommu/arm/arm-smmu/Makefile
> index e240a7bcf310..b0cc01aa20c9 100644
> --- a/drivers/iommu/arm/arm-smmu/Makefile
> +++ b/drivers/iommu/arm/arm-smmu/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0
>  obj-$(CONFIG_QCOM_IOMMU) += qcom_io

Re: DPAA2 triggers, [PATCH] dma debug: report -EEXIST errors in add_dma_entry

2021-10-11 Thread Christoph Hellwig
On Thu, Oct 07, 2021 at 12:59:32PM +0200, Karsten Graul wrote:
> In our case its really that a buffer is mapped twice for 2 different devices 
> which we use in SMC to provide failover capabilities. We see that -EEXIST is 
> returned when a buffer is mapped for the second device. Since there is a 
> maximum of 2 parallel mappings we never see the warning shown by 
> active_cacheline_inc_overlap() because we don't exceed 
> ACTIVE_CACHELINE_MAX_OVERLAP.

Mapping something twice is possible, but needs special care.
Basically one device always needs to do the first mapping and the other
one needs to use DMA_ATTR_SKIP_CPU_SYNC to opt out of the coherency
protocol.  So we have two TODO items here: 1) the driver needs to use the
above scheme and 2) this dma-debug check needs to understand
DMA_ATTR_SKIP_CPU_SYNC.  Can I trick you into doing both?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-debug: fix sg checks in debug_dma_map_sg()

2021-10-11 Thread Christoph Hellwig
Thanks,

applied.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-mapping: fix the kerneldoc for dma_map_sgtable()

2021-10-11 Thread Christoph Hellwig
Thanks,

applied.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: fix ARM_SMMU vs QCOM_SCM compilation

2021-10-11 Thread Arnd Bergmann
On Mon, Oct 11, 2021 at 11:10 AM Dmitry Baryshkov
 wrote:
>
> On Mon, 11 Oct 2021 at 09:09, Arnd Bergmann  wrote:
> >
> > On Mon, Oct 11, 2021 at 6:11 AM Dmitry Baryshkov
> >  wrote:
> > > On Sun, 10 Oct 2021 at 20:42, Arnd Bergmann  wrote:
> > >
> > > The patch seems correct, but it becomes overcomplicated. What about:
> > > - restoring QCOM_SCM stubs
> >
> > The stubs are what has led to the previous bugs in this area to often
> > go unnoticed for too long, as illustrated by your suggestion
> >
> > > - making ARM_SMMU select QCOM_SCM if ARM_SMMU_QCOM
> >
> > I assume you meant "select QCOM_SCM if ARCH_QCOM",
> > after we stop using ARM_SMMU_QCOM?
> >
> > > This would have almost the same result as with your patch, but without
> > > extra ARM_SMMU_QCOM Kconfig symbol.
> >
> > The "almost" is the problem: consider the case of
> >
> > CONFIG_ARM=y
> > CONFIG_COMPILE_TEST=y
> > CONFIG_ARCH_QCOM=n
> > CONFIG_ARM_SMMU=y
> > CONFIG_DRM_MSM=m
> > CONFIG_QCOM_SCM=m (selected by DRM_MSM)
> >
> > The stubs here lead to ARM_SMMU linking against the QCOM_SCM
> > driver from built-in code, which fails because QCOM_SCM itself
> > is a loadable module.
>
> I see. The idealist in me wishes to change my suggestion to
> 'select QCOM_SCM if ARCH_QCOM || COMPILE_TEST'
> but I have the subtle feeling that this also might fail somehow.

I think that would actually work, but it has the nasty side-effect
that simply flipping 'CONFIG_COMPILE_TEST' changes what
the kernel does, rather than just hiding or unhiding additional
options.

> > We can move the "select QCOM_SCM" in the ARM_SMMU_QCOM
> > symbol if we make that a tristate though, if you want to separate it
> > a little more.
>
> This would complicate things a bit, as we would no longer be able to
> use 'arm-smmu-$(CONFIG_ARM_SMMU_QCOM) +=' construct.

I'm fairly sure we could still use that, Kbuild is smart enough
to include both 'file-m +=' and 'file-y += ' in 'file.ko', see
scripts/Makefile.lib:

# If $(foo-objs), $(foo-y), $(foo-m), or $(foo-) exists, foo.o is a
composite object
multi-obj-y := $(call multi-search, $(obj-y), .o, -objs -y)
multi-obj-m := $(call multi-search, $(obj-m), .o, -objs -y -m)
multi-obj-ym := $(multi-obj-y) $(multi-obj-m)

# Replace multi-part objects by their individual parts,
# including built-in.a from subdirectories
real-obj-y := $(call real-search, $(obj-y), .o, -objs -y)
real-obj-m := $(call real-search, $(obj-m), .o, -objs -y -m)

What doesn't work is having a built-in driver in a directory that is
guarded with a =m symbol, or including a =m object into a =y
module.

Arnd
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RFC v1 03/11] iommu/virtio: Handle incoming page faults

2021-10-11 Thread Vivek Kumar Gautam

Hi Jean,


On 10/11/21 2:46 PM, Jean-Philippe Brucker wrote:

Hi Vivek,

On Mon, Oct 11, 2021 at 01:41:15PM +0530, Vivek Gautam wrote:

+ list_for_each_entry(ep, &viommu->endpoints, list) {
+ if (ep->eid == endpoint) {
+ vdev = ep->vdev;


I have a question here though -
Is endpoint-ID unique across all the endpoints available per 'viommu_dev' or
per 'viommu_domain'?
If it is per 'viommu_domain' then the above list is also incorrect.
As you pointed to in the patch [1] -
[PATCH RFC v1 02/11] iommu/virtio: Maintain a list of endpoints served
by viommu_dev
I am planning to add endpoint ID into a static global xarray in
viommu_probe_device() as below:

 vdev_for_each_id(i, eid, vdev) {
 ret = xa_insert(&viommu_ep_ids, eid, vdev, GFP_KERNEL);
 if (ret)
 goto err_free_dev;
 }

and replace the above list traversal as below:

 xa_lock_irqsave(&viommu_ep_ids, flags);
 xa_for_each(&viommu_ep_ids, eid, vdev) {
 if (eid == endpoint) {
 ret =
iommu_report_device_fault(vdev->dev, &fault_evt);
 if (ret)
 dev_err(vdev->dev, "Couldn't
handle page request\n");
 }
 }
 xa_unlock_irqrestore(&viommu_ep_ids, flags);

But using a global xarray would also be incorrect if the endpointsID are global
across 'viommu_domain'.

I need to find the correct 'viommu_endpoint' to call iommu_report_device_fault()
with the correct device.


The endpoint IDs are only unique across viommu_dev, so a global xarray
wouldn't work but one in viommu_dev would. In vdomain it doesn't work
either because we can't get to the domain from the fault handler without
first finding the endpoint


Thanks. That's easy then. Will have a xarray in viommu_dev and iterate 
over it from the fault handler.


Best regards
Vivek
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RFC v1 03/11] iommu/virtio: Handle incoming page faults

2021-10-11 Thread Jean-Philippe Brucker
Hi Vivek,

On Mon, Oct 11, 2021 at 01:41:15PM +0530, Vivek Gautam wrote:
> > > + list_for_each_entry(ep, &viommu->endpoints, list) {
> > > + if (ep->eid == endpoint) {
> > > + vdev = ep->vdev;
> 
> I have a question here though -
> Is endpoint-ID unique across all the endpoints available per 'viommu_dev' or
> per 'viommu_domain'?
> If it is per 'viommu_domain' then the above list is also incorrect.
> As you pointed to in the patch [1] -
> [PATCH RFC v1 02/11] iommu/virtio: Maintain a list of endpoints served
> by viommu_dev
> I am planning to add endpoint ID into a static global xarray in
> viommu_probe_device() as below:
> 
> vdev_for_each_id(i, eid, vdev) {
> ret = xa_insert(&viommu_ep_ids, eid, vdev, GFP_KERNEL);
> if (ret)
> goto err_free_dev;
> }
> 
> and replace the above list traversal as below:
> 
> xa_lock_irqsave(&viommu_ep_ids, flags);
> xa_for_each(&viommu_ep_ids, eid, vdev) {
> if (eid == endpoint) {
> ret =
> iommu_report_device_fault(vdev->dev, &fault_evt);
> if (ret)
> dev_err(vdev->dev, "Couldn't
> handle page request\n");
> }
> }
> xa_unlock_irqrestore(&viommu_ep_ids, flags);
> 
> But using a global xarray would also be incorrect if the endpointsID are 
> global
> across 'viommu_domain'.
> 
> I need to find the correct 'viommu_endpoint' to call 
> iommu_report_device_fault()
> with the correct device.

The endpoint IDs are only unique across viommu_dev, so a global xarray
wouldn't work but one in viommu_dev would. In vdomain it doesn't work
either because we can't get to the domain from the fault handler without
first finding the endpoint

Thanks,
Jean

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: fix ARM_SMMU vs QCOM_SCM compilation

2021-10-11 Thread Dmitry Baryshkov
On Mon, 11 Oct 2021 at 09:09, Arnd Bergmann  wrote:
>
> On Mon, Oct 11, 2021 at 6:11 AM Dmitry Baryshkov
>  wrote:
> > On Sun, 10 Oct 2021 at 20:42, Arnd Bergmann  wrote:
> >
> > The patch seems correct, but it becomes overcomplicated. What about:
> > - restoring QCOM_SCM stubs
>
> The stubs are what has led to the previous bugs in this area to often
> go unnoticed for too long, as illustrated by your suggestion
>
> > - making ARM_SMMU select QCOM_SCM if ARM_SMMU_QCOM
>
> I assume you meant "select QCOM_SCM if ARCH_QCOM",
> after we stop using ARM_SMMU_QCOM?
>
> > This would have almost the same result as with your patch, but without
> > extra ARM_SMMU_QCOM Kconfig symbol.
>
> The "almost" is the problem: consider the case of
>
> CONFIG_ARM=y
> CONFIG_COMPILE_TEST=y
> CONFIG_ARCH_QCOM=n
> CONFIG_ARM_SMMU=y
> CONFIG_DRM_MSM=m
> CONFIG_QCOM_SCM=m (selected by DRM_MSM)
>
> The stubs here lead to ARM_SMMU linking against the QCOM_SCM
> driver from built-in code, which fails because QCOM_SCM itself
> is a loadable module.

I see. The idealist in me wishes to change my suggestion to
'select QCOM_SCM if ARCH_QCOM || COMPILE_TEST'
but I have the subtle feeling that this also might fail somehow.

>
> We can move the "select QCOM_SCM" in the ARM_SMMU_QCOM
> symbol if we make that a tristate though, if you want to separate it
> a little more.

This would complicate things a bit, as we would no longer be able to
use 'arm-smmu-$(CONFIG_ARM_SMMU_QCOM) +=' construct.

-- 
With best wishes
Dmitry
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC 11/20] iommu/iommufd: Add IOMMU_IOASID_ALLOC/FREE

2021-10-11 Thread Jean-Philippe Brucker
On Mon, Oct 11, 2021 at 05:02:01PM +1100, David Gibson wrote:
> qemu wants to emulate a PAPR vIOMMU, so it says (via interfaces yet to
> be determined) that it needs an IOAS where things can be mapped in the
> range 0..2GiB (for the 32-bit window) and 2^59..2^59+1TiB (for the
> 64-bit window).
> 
> Ideally the host /dev/iommu will say "ok!", since both those ranges
> are within the 0..2^60 translated range of the host IOMMU, and don't
> touch the IO hole.  When the guest calls the IO mapping hypercalls,
> qemu translates those into DMA_MAP operations, and since they're all
> within the previously verified windows, they should work fine.

Seems like we don't need the negotiation part?  The host kernel
communicates available IOVA ranges to userspace including holes (patch
17), and userspace can check that the ranges it needs are within the IOVA
space boundaries. That part is necessary for DPDK as well since it needs
to know about holes in the IOVA space where DMA wouldn't work as expected
(MSI doorbells for example). And there already is a negotiation happening,
when the host kernel rejects MAP ioctl outside the advertised area.

Thanks,
Jean

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RFC v1 03/11] iommu/virtio: Handle incoming page faults

2021-10-11 Thread Vivek Gautam
Hi Jean,


On Tue, Sep 21, 2021 at 9:33 PM Jean-Philippe Brucker
 wrote:
>
> On Fri, Apr 23, 2021 at 03:21:39PM +0530, Vivek Gautam wrote:
> > Redirect the incoming page faults to the registered fault handler
> > that can take the fault information such as, pasid, page request
> > group-id, address and pasid flags.
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >  drivers/iommu/virtio-iommu.c  | 80 ++-
> >  include/uapi/linux/virtio_iommu.h |  1 +
> >  2 files changed, 80 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> > index c970f386f031..fd237cad1ce5 100644
> > --- a/drivers/iommu/virtio-iommu.c
> > +++ b/drivers/iommu/virtio-iommu.c
> > @@ -37,6 +37,13 @@
> >  /* Some architectures need an Address Space ID for each page table */
> >  DEFINE_XARRAY_ALLOC1(viommu_asid_xa);
> >
> > +struct viommu_dev_pri_work {
> > + struct work_struct  work;
> > + struct viommu_dev   *dev;
> > + struct virtio_iommu_fault   *vfault;
> > + u32 endpoint;
> > +};
> > +
> >  struct viommu_dev {
> >   struct iommu_device iommu;
> >   struct device   *dev;
> > @@ -49,6 +56,8 @@ struct viommu_dev {
> >   struct list_headrequests;
> >   void*evts;
> >   struct list_headendpoints;
> > + struct workqueue_struct *pri_wq;
> > + struct viommu_dev_pri_work  *pri_work;
>
> IOPF already has a workqueue, so the driver doesn't need one.
> iommu_report_device_fault() should be fast enough to be called from the
> event handler.

Sure, will call iommu_report_device_fault() directly from
viommu_fault_handler().

>
> >
> >   /* Device configuration */
> >   struct iommu_domain_geometrygeometry;
> > @@ -666,6 +675,58 @@ static int viommu_probe_endpoint(struct viommu_dev 
> > *viommu, struct device *dev)
> >   return ret;
> >  }
> >
> > +static void viommu_handle_ppr(struct work_struct *work)
> > +{
> > + struct viommu_dev_pri_work *pwork =
> > + container_of(work, struct 
> > viommu_dev_pri_work, work);
> > + struct viommu_dev *viommu = pwork->dev;
> > + struct virtio_iommu_fault *vfault = pwork->vfault;
> > + struct viommu_endpoint *vdev;
> > + struct viommu_ep_entry *ep;
> > + struct iommu_fault_event fault_evt = {
> > + .fault.type = IOMMU_FAULT_PAGE_REQ,
> > + };
> > + struct iommu_fault_page_request *prq = &fault_evt.fault.prm;
> > +
> > + u32 flags   = le32_to_cpu(vfault->flags);
> > + u32 prq_flags   = le32_to_cpu(vfault->pr_evt_flags);
> > + u32 endpoint= pwork->endpoint;
> > +
> > + memset(prq, 0, sizeof(struct iommu_fault_page_request));
>
> The fault_evt struct is already initialized

Right, I will remove this line.

>
> > + prq->addr = le64_to_cpu(vfault->address);
> > +
> > + if (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_LAST_PAGE)
> > + prq->flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> > + if (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_PASID_VALID) {
> > + prq->flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> > + prq->pasid = le32_to_cpu(vfault->pasid);
> > + prq->grpid = le32_to_cpu(vfault->grpid);
> > + }
> > +
> > + if (flags & VIRTIO_IOMMU_FAULT_F_READ)
> > + prq->perm |= IOMMU_FAULT_PERM_READ;
> > + if (flags & VIRTIO_IOMMU_FAULT_F_WRITE)
> > + prq->perm |= IOMMU_FAULT_PERM_WRITE;
> > + if (flags & VIRTIO_IOMMU_FAULT_F_EXEC)
> > + prq->perm |= IOMMU_FAULT_PERM_EXEC;
> > + if (flags & VIRTIO_IOMMU_FAULT_F_PRIV)
> > + prq->perm |= IOMMU_FAULT_PERM_PRIV;
> > +
> > + list_for_each_entry(ep, &viommu->endpoints, list) {
> > + if (ep->eid == endpoint) {
> > + vdev = ep->vdev;

I have a question here though -
Is endpoint-ID unique across all the endpoints available per 'viommu_dev' or
per 'viommu_domain'?
If it is per 'viommu_domain' then the above list is also incorrect.
As you pointed to in the patch [1] -
[PATCH RFC v1 02/11] iommu/virtio: Maintain a list of endpoints served
by viommu_dev
I am planning to add endpoint ID into a static global xarray in
viommu_probe_device() as below:

vdev_for_each_id(i, eid, vdev) {
ret = xa_insert(&viommu_ep_ids, eid, vdev, GFP_KERNEL);
if (ret)
goto err_free_dev;
}

and replace the above list traversal as below:

xa_lock_irqsave(&viommu_ep_ids, flags);
xa_for_each(&viommu_ep_ids, eid, vdev) {
if (eid == endpoint) {
ret =
iommu_report_device_fault(vdev->dev, &fault_evt);
if (ret)
dev_err(vdev->dev, "Couldn't
handle pa

Re: [RFC 11/20] iommu/iommufd: Add IOMMU_IOASID_ALLOC/FREE

2021-10-11 Thread David Gibson
On Fri, Oct 01, 2021 at 09:22:25AM -0300, Jason Gunthorpe wrote:
> On Fri, Oct 01, 2021 at 04:13:58PM +1000, David Gibson wrote:
> > On Tue, Sep 21, 2021 at 02:44:38PM -0300, Jason Gunthorpe wrote:
> > > On Sun, Sep 19, 2021 at 02:38:39PM +0800, Liu Yi L wrote:
> > > > This patch adds IOASID allocation/free interface per iommufd. When
> > > > allocating an IOASID, userspace is expected to specify the type and
> > > > format information for the target I/O page table.
> > > > 
> > > > This RFC supports only one type (IOMMU_IOASID_TYPE_KERNEL_TYPE1V2),
> > > > implying a kernel-managed I/O page table with vfio type1v2 mapping
> > > > semantics. For this type the user should specify the addr_width of
> > > > the I/O address space and whether the I/O page table is created in
> > > > an iommu enfore_snoop format. enforce_snoop must be true at this point,
> > > > as the false setting requires additional contract with KVM on handling
> > > > WBINVD emulation, which can be added later.
> > > > 
> > > > Userspace is expected to call IOMMU_CHECK_EXTENSION (see next patch)
> > > > for what formats can be specified when allocating an IOASID.
> > > > 
> > > > Open:
> > > > - Devices on PPC platform currently use a different iommu driver in 
> > > > vfio.
> > > >   Per previous discussion they can also use vfio type1v2 as long as 
> > > > there
> > > >   is a way to claim a specific iova range from a system-wide address 
> > > > space.
> > > >   This requirement doesn't sound PPC specific, as addr_width for pci 
> > > > devices
> > > >   can be also represented by a range [0, 2^addr_width-1]. This RFC 
> > > > hasn't
> > > >   adopted this design yet. We hope to have formal alignment in v1 
> > > > discussion
> > > >   and then decide how to incorporate it in v2.
> > > 
> > > I think the request was to include a start/end IO address hint when
> > > creating the ios. When the kernel creates it then it can return the
> > > actual geometry including any holes via a query.
> > 
> > So part of the point of specifying start/end addresses is that
> > explicitly querying holes shouldn't be necessary: if the requested
> > range crosses a hole, it should fail.  If you didn't really need all
> > that range, you shouldn't have asked for it.
> > 
> > Which means these aren't really "hints" but optionally supplied
> > constraints.
> 
> We have to be very careful here, there are two very different use
> cases. When we are talking about the generic API I am mostly
> interested to see that applications like DPDK can use this API and be
> portable to any IOMMU HW the kernel supports. I view the fact that
> there is VFIO PPC specific code in DPDK as a failing of the kernel to
> provide a HW abstraction.

I would agree.  At the time we were making this, we thought there were
irreconcilable differences between what could be done with the x86 vs
ppc IOMMUs.  Turns out we just didn't think it through hard enough to
find a common model.

> This means we cannot define an input that has a magic HW specific
> value.

I'm not entirely sure what you mean by that.

> DPDK can never provide that portably. Thus all these kinds of
> inputs in the generic API need to be hints, if they exist at all.

I don't follow your reasoning.  First, note that in qemu these valus
are *target* hardware specific, not *host* hardware specific.  If
those requests aren't honoured, qemu cannot faithfully emulate the
target hardware and has to fail.  That's what I mean when I say this
is not a constraint, not a hint.

But when I say the constraint is optional, I mean that things which
don't have that requirement - like DPDK - shouldn't apply the
constraint.

> As 'address space size hint'/'address space start hint' is both
> generic, useful, and providable by DPDK I think it is OK.

Size is certainly providable, and probably useful.  For DPDK, I don't
think start is useful.

> PPC can use
> it to pick which of the two page table formats to use for this IOAS if
> it wants.

Clarification: it's not that each window has a specific page table
format.  The two windows are independent of each other, which means
you can separately select the page table format for each one (although
the 32-bit one generally won't be big enough that there's any point
selecting something other than a 1-level TCE table).  When I say
format here, I basically mean number of levels and size of each level
- the IOPTE (a.k.a. TCE) format is the same in each case.

> The second use case is when we have a userspace driver for a specific
> HW IOMMU. Eg a vIOMMU in qemu doing specific PPC/ARM/x86 acceleration.
> We can look here for things to make general, but I would expect a
> fairly high bar. Instead, I would rather see the userspace driver
> communicate with the kernel driver in its own private language, so
> that the entire functionality of the unique HW can be used.

I don't think we actually need to do this.  Or rather, we might want
to do this for maximum performance in some cases, but I think we can
h

Re: [RFC 11/20] iommu/iommufd: Add IOMMU_IOASID_ALLOC/FREE

2021-10-11 Thread da...@gibson.dropbear.id.au
On Sat, Oct 02, 2021 at 09:25:42AM -0300, Jason Gunthorpe wrote:
> On Sat, Oct 02, 2021 at 02:21:38PM +1000, da...@gibson.dropbear.id.au wrote:
> 
> > > > No. qemu needs to supply *both* the 32-bit and 64-bit range to its
> > > > guest, and therefore needs to request both from the host.
> > > 
> > > As I understood your remarks each IOAS can only be one of the formats
> > > as they have a different PTE layout. So here I ment that qmeu needs to
> > > be able to pick *for each IOAS* which of the two formats it is.
> > 
> > No.  Both windows are in the same IOAS.  A device could do DMA
> > simultaneously to both windows.  
> 
> Sure, but that doesn't force us to model it as one IOAS in the
> iommufd. A while back you were talking about using nesting and 3
> IOAS's, right?
> 
> 1, 2 or 3 IOAS's seems like a decision we can make.

Well, up to a point.  We can decide how such a thing should be
constructed.  However at some point there needs to exist an IOAS in
which both windows are mapped, whether it's directly or indirectly.
That's what the device will be attached to.

> PASID support will already require that a device can be multi-bound to
> many IOAS's, couldn't PPC do the same with the windows?

I don't see how that would make sense.  The device has no awareness of
multiple windows the way it does of PASIDs.  It just sends
transactions over the bus with the IOVAs it's told.  If those IOVAs
lie within one of the windows, the IOMMU picks them up and translates
them.  If they don't, it doesn't.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu