[PATCH] iommu/mediatek: Fix a build fail of m4u_type
The commit ("iommu/mediatek: Enlarge the validate PA range for 4GB mode") introduce the following build error: drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_hw_init': >> drivers/iommu/mtk_iommu.c:536:30: error: 'const struct mtk_iommu_data' has no member named 'm4u_type'; did you mean 'm4u_dom'? if (data->enable_4GB && data->m4u_type != M4U_MT8173) { This patch fix it, use "m4u_plat" instead of "m4u_type". Reported-by: kernel test robotSigned-off-by: Yong Wu --- 1) In the v2 of mt2712 iommu support patch-set, We changed a variant name from "m4u_type" to "m4u_plat", But I'm sorry that I forgot to change it in the later patch. 2) This patch is based on [1]. [1]:https://lists.linuxfoundation.org/pipermail/iommu/2017-August/023847.html 3) I cann't find the commit id of the patch ("iommu/mediatek: Enlarge the validate PA range for 4GB mode") in the [2]. So I don't write the commit id for that patch in the commit message. [2]https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 4f233e1..bc00e40 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -533,7 +533,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), data->base + REG_MMU_IVRP_PADDR); - if (data->enable_4GB && data->m4u_type != M4U_MT8173) { + if (data->enable_4GB && data->m4u_plat != M4U_MT8173) { /* * If 4GB mode is enabled, the validate PA range is from * 0x1__ to 0x1__. here record bit[32:30]. -- 1.9.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 7/8] iommu/mediatek: Enlarge the validate PA range for 4GB mode
Hi Yong, [auto build test ERROR on iommu/next] [also build test ERROR on next-20170823] [cannot apply to v4.13-rc6] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Yong-Wu/MT2712-IOMMU-SUPPORT/20170824-074750 base: https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next config: arm64-allmodconfig (attached as .config) compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705 reproduce: wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=arm64 All errors (new ones prefixed by >>): drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_hw_init': >> drivers/iommu/mtk_iommu.c:536:30: error: 'const struct mtk_iommu_data' has >> no member named 'm4u_type'; did you mean 'm4u_dom'? if (data->enable_4GB && data->m4u_type != M4U_MT8173) { ^~ vim +536 drivers/iommu/mtk_iommu.c 500 501 static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) 502 { 503 u32 regval; 504 int ret; 505 506 ret = clk_prepare_enable(data->bclk); 507 if (ret) { 508 dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret); 509 return ret; 510 } 511 512 regval = F_MMU_TF_PROTECT_SEL(2, data); 513 if (data->m4u_plat == M4U_MT8173) 514 regval |= F_MMU_PREFETCH_RT_REPLACE_MOD; 515 writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); 516 517 regval = F_L2_MULIT_HIT_EN | 518 F_TABLE_WALK_FAULT_INT_EN | 519 F_PREETCH_FIFO_OVERFLOW_INT_EN | 520 F_MISS_FIFO_OVERFLOW_INT_EN | 521 F_PREFETCH_FIFO_ERR_INT_EN | 522 F_MISS_FIFO_ERR_INT_EN; 523 writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0); 524 525 regval = F_INT_TRANSLATION_FAULT | 526 F_INT_MAIN_MULTI_HIT_FAULT | 527 F_INT_INVALID_PA_FAULT | 528 F_INT_ENTRY_REPLACEMENT_FAULT | 529 F_INT_TLB_MISS_FAULT | 530 F_INT_MISS_TRANSACTION_FIFO_FAULT | 531 F_INT_PRETETCH_TRANSATION_FIFO_FAULT; 532 writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL); 533 534 writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), 535 data->base + REG_MMU_IVRP_PADDR); > 536 if (data->enable_4GB && data->m4u_type != M4U_MT8173) { 537 /* 538 * If 4GB mode is enabled, the validate PA range is from 539 * 0x1__ to 0x1__. here record bit[32:30]. 540 */ 541 regval = F_MMU_VLD_PA_RNG(7, 4); 542 writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); 543 } 544 writel_relaxed(0, data->base + REG_MMU_DCM_DIS); 545 546 /* It's MISC control register whose default value is ok except mt8173.*/ 547 if (data->m4u_plat == M4U_MT8173) 548 writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE); 549 550 if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, 551 dev_name(data->dev), (void *)data)) { 552 writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR); 553 clk_disable_unprepare(data->bclk); 554 dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq); 555 return -ENODEV; 556 } 557 558 return 0; 559 } 560 --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/4] iommu/pamu: Add support for generic iommu-device
Joerg Roedelwrites: > Hello Michael, > > On Wed, Aug 23, 2017 at 10:17:39PM +1000, Michael Ellerman wrote: > >> [0.358192] Call Trace: >> [0.360624] [c000f7173680] [c000f7173710] 0xc000f7173710 >> (unreliable) >> [0.368088] [c000f7173710] [c02abf7c] >> .kernfs_find_and_get_ns+0x4c/0x7c >> [0.375729] [c000f71737a0] [c02b13c8] >> .sysfs_add_link_to_group+0x44/0x9c >> [0.383456] [c000f7173840] [c0591660] >> .iommu_device_link+0x70/0x144 >> [0.390744] [c000f71738e0] [c05942dc] >> .fsl_pamu_add_device+0x4c/0x80 >> [0.398121] [c000f7173960] [c058ce8c] >> .add_iommu_group+0x5c/0x9c >> [0.405153] [c000f71739e0] [c059d6b8] >> .bus_for_each_dev+0x98/0xfc >> [0.412269] [c000f7173a80] [c058f1a0] >> .bus_set_iommu+0xd8/0x11c >> [0.419218] [c000f7173b20] [c0d86998] >> .pamu_domain_init+0xb0/0xe0 >> [0.426331] [c000f7173ba0] [c0d86864] >> .fsl_pamu_init+0xec/0x170 >> [0.433276] [c000f7173c30] [c0001934] >> .do_one_initcall+0x68/0x1b8 >> [0.440395] [c000f7173d00] [c0d440e4] >> .kernel_init_freeable+0x24c/0x324 >> [0.448031] [c000f7173db0] [c0001b90] .kernel_init+0x20/0x140 >> [0.454801] [c000f7173e30] [c9bc] >> .ret_from_kernel_thread+0x58/0x9c >> [0.462438] Instruction dump: >> [0.465390] 7c0802a6 fb81ffe0 f8010010 fba1ffe8 fbc1fff0 fbe1fff8 >> f821ff71 7c7e1b78 >> [0.473114] 7cbd2b78 7c9c2378 6000 6000 311d >> ebfe0048 552906b4 >> [0.481017] ---[ end trace 5d11d3e6c29380bd ]--- > > Thanks for the report, this looks like an initialization ordering > problem. Yes I suspect so. > Can you please try the attached patch? Thanks, that works. It boots happily, much later in boot I see these messages: [2.085616] iommu: Adding device ffe301000.jr to group 9 [2.091101] iommu: Adding device ffe302000.jr to group 10 [2.096633] iommu: Adding device ffe303000.jr to group 11 [2.102161] iommu: Adding device ffe304000.jr to group 12 In /sys I see: root@p5020ds:~# ls -l /sys/class/iommu/iommu0 lrwxrwxrwx 1 root root 0 Aug 24 12:01 /sys/class/iommu/iommu0 -> ../../devices/virtual/iommu/iommu0 And: root@p5020ds:~# ls -l /sys/devices/virtual/iommu/iommu0/devices/ total 0 lrwxrwxrwx 1 root root 0 Aug 24 12:02 :00:00.0 -> ../../../../platform/ffe20.pcie/pci:00/:00:00.0 lrwxrwxrwx 1 root root 0 Aug 24 12:02 2000:00:00.0 -> ../../../../platform/ffe202000.pcie/pci2000:00/2000:00:00.0 lrwxrwxrwx 1 root root 0 Aug 24 12:02 2000:01:00.0 -> ../../../../platform/ffe202000.pcie/pci2000:00/2000:00:00.0/2000:01:00.0 lrwxrwxrwx 1 root root 0 Aug 24 12:02 2000:01:00.1 -> ../../../../platform/ffe202000.pcie/pci2000:00/2000:00:00.0/2000:01:00.1 lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe100300.dma -> ../../../../platform/ffe00.soc/ffe100300.dma lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe101300.dma -> ../../../../platform/ffe00.soc/ffe101300.dma lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe114000.sdhc -> ../../../../platform/ffe00.soc/ffe114000.sdhc lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe20.pcie -> ../../../../platform/ffe20.pcie lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe202000.pcie -> ../../../../platform/ffe202000.pcie lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe21.usb -> ../../../../platform/ffe00.soc/ffe21.usb lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe211000.usb -> ../../../../platform/ffe00.soc/ffe211000.usb lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe22.sata -> ../../../../platform/ffe00.soc/ffe22.sata lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe221000.sata -> ../../../../platform/ffe00.soc/ffe221000.sata lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe301000.jr -> ../../../../platform/ffe00.soc/ffe30.crypto/ffe301000.jr lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe302000.jr -> ../../../../platform/ffe00.soc/ffe30.crypto/ffe302000.jr lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe303000.jr -> ../../../../platform/ffe00.soc/ffe30.crypto/ffe303000.jr lrwxrwxrwx 1 root root 0 Aug 24 12:02 ffe304000.jr -> ../../../../platform/ffe00.soc/ffe30.crypto/ffe304000.jr Which seems like it's working? cheers ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC v2 2/4] iommu/arm-smmu-v3: Add tlbi_on_map option
On Wed, Aug 23, 2017 at 05:42:55PM +0100, Will Deacon wrote: > Hi Eric, > > On Wed, Aug 23, 2017 at 02:36:53PM +0200, Auger Eric wrote: > > On 23/08/2017 12:25, Will Deacon wrote: > > > On Tue, Aug 22, 2017 at 10:09:15PM +0300, Michael S. Tsirkin wrote: > > >> On Fri, Aug 18, 2017 at 05:49:42AM +0300, Michael S. Tsirkin wrote: > > >>> On Thu, Aug 17, 2017 at 05:34:25PM +0100, Will Deacon wrote: > > On Fri, Aug 11, 2017 at 03:45:28PM +0200, Eric Auger wrote: > > > When running a virtual SMMU on a guest we sometimes need to trap > > > all changes to the translation structures. This is especially useful > > > to integrate with VFIO. This patch adds a new option that forces > > > the IO_PGTABLE_QUIRK_TLBI_ON_MAP to be applied on LPAE page tables. > > > > > > TLBI commands then can be trapped. > > > > > > Signed-off-by: Eric Auger> > > > > > --- > > > v1 -> v2: > > > - rebase on v4.13-rc2 > > > --- > > > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 4 > > > drivers/iommu/arm-smmu-v3.c | 5 + > > > 2 files changed, 9 insertions(+) > > > > > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > index c9abbf3..ebb85e9 100644 > > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > @@ -52,6 +52,10 @@ the PCIe specification. > > > > > > devicetree/bindings/interrupt-controller/msi.txt > > >for a description of the msi-parent property. > > > > > > +- tlbi-on-map : invalidate caches whenever there is an update > > > of > > > + any remapping structure (updates to > > > not-present or > > > + present entries). > > > + > > > > My position on this hasn't changed, so NAK for this patch. If you want > > to > > emulate something outside of the SMMUv3 architecture, please do so, but > > don't pretend that it's an SMMUv3. > > > > Will > > >>> > > >>> What if the emulated device does not list arm,smmu-v3, listing > > >>> qemu,ssmu-v3 as compatible? Would that address the concern? > > >> > > >> Will, can you comment on this please? Are you open to reusing the code > > >> in drivers/iommu/arm-smmu-v3.c to support a paravirtual device that does > > >> not claim to be compatible with smmuv3 but does try to behave very close > > >> to > > >> it except it can cache non-present structures? Or would you rather > > >> the code to support this is forked to qemu-smmu-v3.c? > > > > > > I still don't understand why this is preferable to a PV IOMMU > > > implementation. Not only is this proposing to issue TLB maintenance on > > > map, but the maintenance command itself is entirely made up. Why not just > > > have a map command? Anyway, I'm reluctant to add this hack to the driver > > > until: > > > > > > 1. There is a compelling reason to pursue this approach instead of a > > > PV approach (including performance measurements). > > > > > > 2. There is a specification for the QEMU fork of the ARM SMMUv3 > > > architecture, including the semantics of the new command being > > > proposed > > > and what exactly the TLB maintenance requirements are on map (for > > > example, what if I change an STE or a CD -- are they cached too?). > > I am not sure I catch this last point. At the moment whenever the smmuv3 > > driver issues data structure invalidation commands (CMD_CFGI_*), those > > are trapped and I replay the mappings on host side. I have not changed > > anything on that side. > > But STEs and CDs have very similar rules to TLBs: you don't need to issue > invalidation if the data structure is transitioning from invalid to valid. > If you're caching those in QEMU, how do you keep them up-to-date? I can > also guarantee you that there will be additional data structures added > in future versions of the architecture, so you'll need to consider how > you want to operate when running on newer hardware. > > > I introduced a new map implementation defined command because the per > > page CMD_TLBI_NH_VA IOVA invalidation command was not efficient/usable > > with use cases such as DPDK on guest. I understood the spec provisions > > for such implementation defined commands. > > Whilst there is a space for IMP DEF commands, this doesn't generally mean > that they can be repurposed by software. What if the underlying hardware > has an IMP DEF command that you want to export? The code needs to change to only issue these commands for the QEMU SMMU, not the arm one. Then if necessary we will be able to add some kind of handshake with guest and transition to a different command for newer guests. > Besides, my
Re: [PATCH v6 3/3] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801
On 23/08/2017 17:43, Will Deacon wrote: On Wed, Aug 23, 2017 at 03:29:46PM +0100, John Garry wrote: On 23/08/2017 14:24, Will Deacon wrote: On Wed, Aug 23, 2017 at 02:17:24PM +0100, John Garry wrote: Signed-off-by: Shameer Kolothum--- drivers/iommu/arm-smmu-v3.c | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) Please can you also add a devicetree binding with corresponding documentation to enable this workaround on non-ACPI based systems too? It should be straightforward if you update the arm_smmu_options table. As I mentioned before, devicetree was a lower priority and we would definitely submit patch to support that. Even if we update the arm_smmu_options table with DT binding, the generic function to retrieve the MSI address regions only works on ACPI/IORT case now. Hi Will, Can you confirm your stance on supporting this workaround for DT as well as ACPI? For us, we now only "officially" support ACPI FW, and DT support at this point is patchy/limited. To me, adding DT support is just more errata workaround code to maintain with little useful gain. I basically don't like the idea of a driver that only works for one of ACPI or DT yet claims to support both. I'm less fussed about functionality differences (feature X is only available with firmware Y), but not working around a hardware erratum that we know about is just lazy. So I'd prefer that we handle this in both cases, or blacklist affected devices when booting with DT. Continuing as though there isn't an erratum is the worst thing we can do. OK, seems reasonable. We would consider blacklisting the device, where/how to do is the question. So the errata is in the GICv3 ITS/PCI host controller, and we just use the in-between SMMU (driver) to provide the workaround. So my initial impression is that the PCI host controller would have to be blacklisted IFF behind an IOMMU for DT firmware in pcie-hisi.c or pci quirks framework. How does it sound? If that avoids us running into the erratum, then fine by me. I'd obviously prefer we work-around it since we know how to, but that's up to you. I'm surpsised that you may want more errata workaround code to maintain. Anyway we'll check both approaches and show you how they look and go from there. Please also note that in our SoC we have other devices behind the same SMMU, like storage controller, which are not affected or related to the Errata. BTW, ignoring DT support, are you happy with this patchset? Yes, but I won't ack it without the above taken care of. Fair enough. Will Thanks, John . ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v6 3/3] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801
On Wed, Aug 23, 2017 at 03:29:46PM +0100, John Garry wrote: > On 23/08/2017 14:24, Will Deacon wrote: > >On Wed, Aug 23, 2017 at 02:17:24PM +0100, John Garry wrote: > >Signed-off-by: Shameer Kolothum >> >--- > >drivers/iommu/arm-smmu-v3.c | 27 ++- > >1 file changed, 22 insertions(+), 5 deletions(-) > > Please can you also add a devicetree binding with corresponding > documentation to enable this workaround on non-ACPI based systems too? > It should be straightforward if you update the arm_smmu_options table. > >>> > >>>As I mentioned before, devicetree was a lower priority and we would > >>>definitely > >>>submit patch to support that. Even if we update the arm_smmu_options table > >>>with DT binding, the generic function to retrieve the MSI address regions > >>>only > >>>works on ACPI/IORT case now. > >>> > >> > >>Hi Will, > >> > >>Can you confirm your stance on supporting this workaround for DT as well as > >>ACPI? > >> > >>For us, we now only "officially" support ACPI FW, and DT support at this > >>point is patchy/limited. To me, adding DT support is just more errata > >>workaround code to maintain with little useful gain. > > > >I basically don't like the idea of a driver that only works for one of > >ACPI or DT yet claims to support both. I'm less fussed about functionality > >differences (feature X is only available with firmware Y), but not working > >around a hardware erratum that we know about is just lazy. > > > >So I'd prefer that we handle this in both cases, or blacklist affected > >devices when booting with DT. Continuing as though there isn't an erratum > >is the worst thing we can do. > > OK, seems reasonable. > > We would consider blacklisting the device, where/how to do is the question. > > So the errata is in the GICv3 ITS/PCI host controller, and we just use the > in-between SMMU (driver) to provide the workaround. So my initial impression > is that the PCI host controller would have to be blacklisted IFF behind an > IOMMU for DT firmware in pcie-hisi.c or pci quirks framework. How does it > sound? If that avoids us running into the erratum, then fine by me. I'd obviously prefer we work-around it since we know how to, but that's up to you. > Please also note that in our SoC we have other devices behind the same SMMU, > like storage controller, which are not affected or related to the Errata. > > BTW, ignoring DT support, are you happy with this patchset? Yes, but I won't ack it without the above taken care of. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC v2 2/4] iommu/arm-smmu-v3: Add tlbi_on_map option
Hi Eric, On Wed, Aug 23, 2017 at 02:36:53PM +0200, Auger Eric wrote: > On 23/08/2017 12:25, Will Deacon wrote: > > On Tue, Aug 22, 2017 at 10:09:15PM +0300, Michael S. Tsirkin wrote: > >> On Fri, Aug 18, 2017 at 05:49:42AM +0300, Michael S. Tsirkin wrote: > >>> On Thu, Aug 17, 2017 at 05:34:25PM +0100, Will Deacon wrote: > On Fri, Aug 11, 2017 at 03:45:28PM +0200, Eric Auger wrote: > > When running a virtual SMMU on a guest we sometimes need to trap > > all changes to the translation structures. This is especially useful > > to integrate with VFIO. This patch adds a new option that forces > > the IO_PGTABLE_QUIRK_TLBI_ON_MAP to be applied on LPAE page tables. > > > > TLBI commands then can be trapped. > > > > Signed-off-by: Eric Auger> > > > --- > > v1 -> v2: > > - rebase on v4.13-rc2 > > --- > > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 4 > > drivers/iommu/arm-smmu-v3.c | 5 + > > 2 files changed, 9 insertions(+) > > > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > index c9abbf3..ebb85e9 100644 > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > @@ -52,6 +52,10 @@ the PCIe specification. > > > > devicetree/bindings/interrupt-controller/msi.txt > >for a description of the msi-parent property. > > > > +- tlbi-on-map : invalidate caches whenever there is an update of > > + any remapping structure (updates to not-present > > or > > + present entries). > > + > > My position on this hasn't changed, so NAK for this patch. If you want to > emulate something outside of the SMMUv3 architecture, please do so, but > don't pretend that it's an SMMUv3. > > Will > >>> > >>> What if the emulated device does not list arm,smmu-v3, listing > >>> qemu,ssmu-v3 as compatible? Would that address the concern? > >> > >> Will, can you comment on this please? Are you open to reusing the code > >> in drivers/iommu/arm-smmu-v3.c to support a paravirtual device that does > >> not claim to be compatible with smmuv3 but does try to behave very close to > >> it except it can cache non-present structures? Or would you rather > >> the code to support this is forked to qemu-smmu-v3.c? > > > > I still don't understand why this is preferable to a PV IOMMU > > implementation. Not only is this proposing to issue TLB maintenance on > > map, but the maintenance command itself is entirely made up. Why not just > > have a map command? Anyway, I'm reluctant to add this hack to the driver > > until: > > > > 1. There is a compelling reason to pursue this approach instead of a > > PV approach (including performance measurements). > > > > 2. There is a specification for the QEMU fork of the ARM SMMUv3 > > architecture, including the semantics of the new command being proposed > > and what exactly the TLB maintenance requirements are on map (for > > example, what if I change an STE or a CD -- are they cached too?). > I am not sure I catch this last point. At the moment whenever the smmuv3 > driver issues data structure invalidation commands (CMD_CFGI_*), those > are trapped and I replay the mappings on host side. I have not changed > anything on that side. But STEs and CDs have very similar rules to TLBs: you don't need to issue invalidation if the data structure is transitioning from invalid to valid. If you're caching those in QEMU, how do you keep them up-to-date? I can also guarantee you that there will be additional data structures added in future versions of the architecture, so you'll need to consider how you want to operate when running on newer hardware. > I introduced a new map implementation defined command because the per > page CMD_TLBI_NH_VA IOVA invalidation command was not efficient/usable > with use cases such as DPDK on guest. I understood the spec provisions > for such implementation defined commands. Whilst there is a space for IMP DEF commands, this doesn't generally mean that they can be repurposed by software. What if the underlying hardware has an IMP DEF command that you want to export? Besides, my main points here are that your command isn't well-specified and if you have to add a command, why not just add a "map" command (i.e. implement a PV interface instead)? > > 3. The ACPI IORT spec is updated to recognise this implementation > > > > 4. There is an implementation that can use the guest page tables directly, > > because that may well make all of this moot. > Most probably I will come back to you with questions on stage 1 + stage2 > enablement and "4.8 Virtualisation"
Re: [PATCH v6 3/3] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801
On 23/08/2017 14:24, Will Deacon wrote: On Wed, Aug 23, 2017 at 02:17:24PM +0100, John Garry wrote: Signed-off-by: Shameer Kolothum--- drivers/iommu/arm-smmu-v3.c | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) Please can you also add a devicetree binding with corresponding documentation to enable this workaround on non-ACPI based systems too? It should be straightforward if you update the arm_smmu_options table. As I mentioned before, devicetree was a lower priority and we would definitely submit patch to support that. Even if we update the arm_smmu_options table with DT binding, the generic function to retrieve the MSI address regions only works on ACPI/IORT case now. Hi Will, Can you confirm your stance on supporting this workaround for DT as well as ACPI? For us, we now only "officially" support ACPI FW, and DT support at this point is patchy/limited. To me, adding DT support is just more errata workaround code to maintain with little useful gain. I basically don't like the idea of a driver that only works for one of ACPI or DT yet claims to support both. I'm less fussed about functionality differences (feature X is only available with firmware Y), but not working around a hardware erratum that we know about is just lazy. So I'd prefer that we handle this in both cases, or blacklist affected devices when booting with DT. Continuing as though there isn't an erratum is the worst thing we can do. OK, seems reasonable. We would consider blacklisting the device, where/how to do is the question. So the errata is in the GICv3 ITS/PCI host controller, and we just use the in-between SMMU (driver) to provide the workaround. So my initial impression is that the PCI host controller would have to be blacklisted IFF behind an IOMMU for DT firmware in pcie-hisi.c or pci quirks framework. How does it sound? Please also note that in our SoC we have other devices behind the same SMMU, like storage controller, which are not affected or related to the Errata. BTW, ignoring DT support, are you happy with this patchset? Regards, John Will -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html . ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2] iommu/ipmmu-vmsa: Rereserving a free context before setting up a pagetable
From: Oleksandr TyshchenkoReserving a free context is both quicker and more likely to fail (due to limited hardware resources) than setting up a pagetable. What is more the pagetable init/cleanup code could require the context to be set up. Signed-off-by: Oleksandr Tyshchenko CC: Robin Murphy CC: Laurent Pinchart CC: Joerg Roedel --- This patch fixes a bug during rollback logic: In ipmmu_domain_init_context() we are trying to find an unused context and if operation fails we will call free_io_pgtable_ops(), but "domain->context_id" hasn't been initialized yet (likely it is 0 because of kzalloc). Having the following call stack: free_io_pgtable_ops() -> io_pgtable_tlb_flush_all() -> ipmmu_tlb_flush_all() -> ipmmu_tlb_invalidate() we will get a mistaken cache flush for a context pointed by uninitialized "domain->context_id". v1 here: https://lists.linuxfoundation.org/pipermail/iommu/2017-August/023857.html --- drivers/iommu/ipmmu-vmsa.c | 42 +- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 2a38aa1..382f387 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -341,6 +341,19 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, return ret; } +static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, + unsigned int context_id) +{ + unsigned long flags; + + spin_lock_irqsave(>lock, flags); + + clear_bit(context_id, mmu->ctx); + mmu->domains[context_id] = NULL; + + spin_unlock_irqrestore(>lock, flags); +} + static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; @@ -370,22 +383,22 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) */ domain->cfg.iommu_dev = domain->mmu->dev; - domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, - domain); - if (!domain->iop) - return -EINVAL; - /* * Find an unused context. */ ret = ipmmu_domain_allocate_context(domain->mmu, domain); - if (ret == IPMMU_CTX_MAX) { - free_io_pgtable_ops(domain->iop); + if (ret == IPMMU_CTX_MAX) return -EBUSY; - } domain->context_id = ret; + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, + domain); + if (!domain->iop) { + ipmmu_domain_free_context(domain->mmu, domain->context_id); + return -EINVAL; + } + /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; ipmmu_ctx_write(domain, IMTTLBR0, ttbr); @@ -426,19 +439,6 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) return 0; } -static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, - unsigned int context_id) -{ - unsigned long flags; - - spin_lock_irqsave(>lock, flags); - - clear_bit(context_id, mmu->ctx); - mmu->domains[context_id] = NULL; - - spin_unlock_irqrestore(>lock, flags); -} - static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) { /* -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/4] iommu/pamu: Add support for generic iommu-device
Hello Michael, On Wed, Aug 23, 2017 at 10:17:39PM +1000, Michael Ellerman wrote: > [0.358192] Call Trace: > [0.360624] [c000f7173680] [c000f7173710] 0xc000f7173710 > (unreliable) > [0.368088] [c000f7173710] [c02abf7c] > .kernfs_find_and_get_ns+0x4c/0x7c > [0.375729] [c000f71737a0] [c02b13c8] > .sysfs_add_link_to_group+0x44/0x9c > [0.383456] [c000f7173840] [c0591660] > .iommu_device_link+0x70/0x144 > [0.390744] [c000f71738e0] [c05942dc] > .fsl_pamu_add_device+0x4c/0x80 > [0.398121] [c000f7173960] [c058ce8c] > .add_iommu_group+0x5c/0x9c > [0.405153] [c000f71739e0] [c059d6b8] > .bus_for_each_dev+0x98/0xfc > [0.412269] [c000f7173a80] [c058f1a0] .bus_set_iommu+0xd8/0x11c > [0.419218] [c000f7173b20] [c0d86998] > .pamu_domain_init+0xb0/0xe0 > [0.426331] [c000f7173ba0] [c0d86864] .fsl_pamu_init+0xec/0x170 > [0.433276] [c000f7173c30] [c0001934] > .do_one_initcall+0x68/0x1b8 > [0.440395] [c000f7173d00] [c0d440e4] > .kernel_init_freeable+0x24c/0x324 > [0.448031] [c000f7173db0] [c0001b90] .kernel_init+0x20/0x140 > [0.454801] [c000f7173e30] [c9bc] > .ret_from_kernel_thread+0x58/0x9c > [0.462438] Instruction dump: > [0.465390] 7c0802a6 fb81ffe0 f8010010 fba1ffe8 fbc1fff0 fbe1fff8 f821ff71 > 7c7e1b78 > [0.473114] 7cbd2b78 7c9c2378 6000 6000 311d > ebfe0048 552906b4 > [0.481017] ---[ end trace 5d11d3e6c29380bd ]--- Thanks for the report, this looks like an initialization ordering problem. Can you please try the attached patch? diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 9238a85de53e..9ee8e9e161f5 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -44,8 +44,6 @@ static struct paace *spaact; static bool probed;/* Has PAMU been probed? */ -struct iommu_device pamu_iommu;/* IOMMU core code handle */ - /* * Table for matching compatible strings, for device tree * guts node, for QorIQ SOCs. @@ -1156,18 +1154,6 @@ static int fsl_pamu_probe(struct platform_device *pdev) if (ret) goto error_genpool; - ret = iommu_device_sysfs_add(_iommu, dev, NULL, "iommu0"); - if (ret) - goto error_genpool; - - iommu_device_set_ops(_iommu, _pamu_ops); - - ret = iommu_device_register(_iommu); - if (ret) { - dev_err(dev, "Can't register iommu device\n"); - goto error_sysfs; - } - pamubypenr = in_be32(_regs->pamubypenr); for (pamu_reg_off = 0, pamu_counter = 0x8000; pamu_reg_off < size; @@ -1195,9 +1181,6 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; -error_sysfs: - iommu_device_sysfs_remove(_iommu); - error_genpool: gen_pool_destroy(spaace_pool); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index fa48222f3421..c3434f29c967 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -391,9 +391,6 @@ struct ome { #define EOE_WWSAOL 0x1e/* Write with stash allocate only and lock */ #define EOE_VALID 0x80 -extern const struct iommu_ops fsl_pamu_ops; -extern struct iommu_device pamu_iommu; /* IOMMU core code handle */ - /* Function prototypes */ int pamu_domain_init(void); int pamu_enable_liodn(int liodn); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 914953b87bf1..e0fcd079cca9 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -33,6 +33,8 @@ static struct kmem_cache *fsl_pamu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static DEFINE_SPINLOCK(device_domain_lock); +struct iommu_device pamu_iommu;/* IOMMU core code handle */ + static struct fsl_dma_domain *to_fsl_dma_domain(struct iommu_domain *dom) { return container_of(dom, struct fsl_dma_domain, iommu_domain); @@ -1050,7 +1052,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain) return dma_domain->win_cnt; } -const struct iommu_ops fsl_pamu_ops = { +static const struct iommu_ops fsl_pamu_ops = { .capable= fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, .domain_free= fsl_pamu_domain_free, @@ -1076,6 +1078,19 @@ int __init pamu_domain_init(void) if (ret) return ret; + ret = iommu_device_sysfs_add(_iommu, NULL, NULL, "iommu0"); + if (ret) + return ret; + + iommu_device_set_ops(_iommu, _pamu_ops); + + ret = iommu_device_register(_iommu); + if (ret) { + iommu_device_sysfs_remove(_iommu); + pr_err("Can't register iommu device\n"); + return ret; + } + bus_set_iommu(_bus_type, _pamu_ops);
Re: [RFC v2 2/4] iommu/arm-smmu-v3: Add tlbi_on_map option
On Wed, Aug 23, 2017 at 11:25:17AM +0100, Will Deacon wrote: > On Tue, Aug 22, 2017 at 10:09:15PM +0300, Michael S. Tsirkin wrote: > > On Fri, Aug 18, 2017 at 05:49:42AM +0300, Michael S. Tsirkin wrote: > > > On Thu, Aug 17, 2017 at 05:34:25PM +0100, Will Deacon wrote: > > > > On Fri, Aug 11, 2017 at 03:45:28PM +0200, Eric Auger wrote: > > > > > When running a virtual SMMU on a guest we sometimes need to trap > > > > > all changes to the translation structures. This is especially useful > > > > > to integrate with VFIO. This patch adds a new option that forces > > > > > the IO_PGTABLE_QUIRK_TLBI_ON_MAP to be applied on LPAE page tables. > > > > > > > > > > TLBI commands then can be trapped. > > > > > > > > > > Signed-off-by: Eric Auger> > > > > > > > > > --- > > > > > v1 -> v2: > > > > > - rebase on v4.13-rc2 > > > > > --- > > > > > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 4 > > > > > drivers/iommu/arm-smmu-v3.c | 5 + > > > > > 2 files changed, 9 insertions(+) > > > > > > > > > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > > index c9abbf3..ebb85e9 100644 > > > > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > > @@ -52,6 +52,10 @@ the PCIe specification. > > > > > > > > > > devicetree/bindings/interrupt-controller/msi.txt > > > > >for a description of the msi-parent property. > > > > > > > > > > +- tlbi-on-map : invalidate caches whenever there is an update > > > > > of > > > > > + any remapping structure (updates to > > > > > not-present or > > > > > + present entries). > > > > > + > > > > > > > > My position on this hasn't changed, so NAK for this patch. If you want > > > > to > > > > emulate something outside of the SMMUv3 architecture, please do so, but > > > > don't pretend that it's an SMMUv3. > > > > > > > > Will > > > > > > What if the emulated device does not list arm,smmu-v3, listing > > > qemu,ssmu-v3 as compatible? Would that address the concern? > > > > Will, can you comment on this please? Are you open to reusing the code > > in drivers/iommu/arm-smmu-v3.c to support a paravirtual device that does > > not claim to be compatible with smmuv3 but does try to behave very close to > > it except it can cache non-present structures? Or would you rather > > the code to support this is forked to qemu-smmu-v3.c? > > I still don't understand why this is preferable to a PV IOMMU > implementation. It has advantages and disadvantages as everything. To list some advantages: - Because this reuses all of the code we need for emulating SMMU anyway. Just look at size of the patches and compare to virtio iommu patches. - I think this is a reasonable stepping stone for using nested support in host SMMU which is obviously faster as you don't need to send mappings to host. We can get guest and QEMU working, then work on support using guest page tables directly. - With virtio IOMMU you will never be able to switch to using guest page tables directly without upheaving to host/guest interfaces. > Not only is this proposing to issue TLB maintenance on > map, but the maintenance command itself is entirely made up. Why not just > have a map command? Anyway, I'm reluctant to add this hack to the driver > until: > > 1. There is a compelling reason to pursue this approach instead of a > PV approach (including performance measurements). In fact the question does not make a lot of sense. This interface is PV right there. But this PV is waaay less code since we need the emulation anyway. I don't even need to look at performance to see a compelling reason on the QEMU side. So it's a question of reduced maintainance host side. > 2. There is a specification for the QEMU fork of the ARM SMMUv3 > architecture, including the semantics of the new command being proposed > and what exactly the TLB maintenance requirements are on map (for > example, what if I change an STE or a CD -- are they cached too?). Makes sense. > 3. The ACPI IORT spec is updated to recognise this implementation I don't think we have to gate on this. IORT is ARM spec for ARM hardware. This should be a device specific quirk only triggering for the QEMU (non-ARM) implementation (which in this patchset it isn't, and this is something to fix IMO). > 4. There is an implementation that can use the guest page tables directly, > because that may well make all of this moot. That will depend on the specific host capability. So this is actually another part of the motivation here. Guest / host interface will be very similar with this and with using guest page tables directly. So most of the same code gets to run with and without, good for
Re: [PATCH v1] iommu/ipmmu-vmsa: Set context_id to non-existent value if allocation failed
Hi, Laurent On Wed, Aug 23, 2017 at 4:46 PM, Laurent Pinchartwrote: > Hi Oleksandr, > > On Wednesday, 23 August 2017 14:58:47 EEST Oleksandr Tyshchenko wrote: >> On Wed, Aug 23, 2017 at 1:05 PM, Robin Murphy wrote: >> > On 23/08/17 10:36, Oleksandr Tyshchenko wrote: >> >> On Wed, Aug 23, 2017 at 12:25 AM, Laurent Pinchart wrote: >> >>> On Monday, 21 August 2017 15:40:41 EEST Oleksandr Tyshchenko wrote: >> From: Oleksandr Tyshchenko >> >> In ipmmu_domain_init_context() we are trying to allocate context and >> if allocation fails we will call free_io_pgtable_ops(), >> but "domain->context_id" hasn't been initialized yet (likely it is 0 >> because of kzalloc). Having the following call stack: >> free_io_pgtable_ops() -> io_pgtable_tlb_flush_all() -> >> ipmmu_tlb_flush_all() -> ipmmu_tlb_invalidate() >> we will get a mistaken cache flush for a context pointed by >> uninitialized "domain->context_id". >> >> So, set context_id to non-existent value (IPMMU_CTX_MAX) before calling >> free_io_pgtable_ops() and check it for a valid value (< IPMMU_CTX_MAX) >> before calling ipmmu_tlb_invalidate(). >> >> Signed-off-by: Oleksandr Tyshchenko >> --- >> >> drivers/iommu/ipmmu-vmsa.c | 4 >> 1 file changed, 4 insertions(+) >> >> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c >> index 2a38aa1..5b226c0 100644 >> --- a/drivers/iommu/ipmmu-vmsa.c >> +++ b/drivers/iommu/ipmmu-vmsa.c >> @@ -303,6 +303,9 @@ static void ipmmu_tlb_flush_all(void *cookie) >> { >> struct ipmmu_vmsa_domain *domain = cookie; >> >> + if (domain->context_id >= IPMMU_CTX_MAX) >> + return; >> + >> ipmmu_tlb_invalidate(domain); >> } >> >> @@ -380,6 +383,7 @@ static int ipmmu_domain_init_context(struct >> ipmmu_vmsa_domain *domain) >> */ >> ret = ipmmu_domain_allocate_context(domain->mmu, domain); >> if (ret == IPMMU_CTX_MAX) { >> + domain->context_id = IPMMU_CTX_MAX; >> >>> >> >>> Wouldn't it make more sense to allocate the pgtable ops after >> >>> initializing the context (moving the ipmmu_domain_allocate_context() >> >>> call to the very end of the function) ? That way we would be less >> >>> dependent on changes to pgtable ops init/cleanup code that could >> >>> require the context to be set up. >> >> >> >> Why not. But, not sure about the very end of the function. Since for >> >> writing some HW registers down the function (IMTTLBR0/IMTTUBR0, >> >> IMMAIR0) we need to have what pgtable ops sets up for us (ttbr, mair). >> >> What about to just swap alloc_io_pgtable_ops() and >> >> ipmmu_domain_allocate_context()? >> > >> > This looks a lot more reasonable - reserving a free context is both >> > quicker and more likely to fail (due to limited hardware resources) than >> > setting up a pagetable, so it makes a lot of sense to do that before >> > anything else. >> >> Agree. > > That looks good to me too. In general I prefer initializing everything needed > by the error path before calling anything that could trigger that error path, > instead of initializing variables to magic values that mean part of the > cleanup should be skipped. Make sense. > > Will you send a v2 ? Yes. > > -- > Regards, > > Laurent Pinchart > -- Regards, Oleksandr Tyshchenko ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [virtio-dev] [RFC] virtio-iommu version 0.4
Hi Jean-Philippe, On 04/08/2017 20:19, Jean-Philippe Brucker wrote: > This is the continuation of my proposal for virtio-iommu, the para- > virtualized IOMMU. Here is a summary of the changes since last time [1]: > > * The virtio-iommu document now resembles an actual specification. It is > split into a formal description of the virtio device, and implementation > notes. Please find sources and binaries at [2]. > > * Added a probe request to describe to the guest different properties that > do not fit in firmware or in the virtio config space. This is a > necessary stepping stone for extending the virtio-iommu. > > * There is a working Qemu prototype [3], thanks to Eric Auger and Bharat > Bhushan. > > You can find the Linux driver and kvmtool device at [4] and [5]. I > plan to rework driver and kvmtool device slightly before sending the > patches. > > To understand the virtio-iommu, I advise to first read introduction and > motivation, then skim through implementation notes and finally look at the > device specification. > > I wasn't sure how to organize the review. For those who prefer to comment > inline, I attached v0.4 of device-operations.tex and topology.tex+MSI.tex > to this thread. They are the biggest chunks of the document. But LaTeX > isn't very pleasant to read, so you can simply send a list of comments in > relation to section numbers and a few words of context, we'll manage. Please find some comments/questions below: 2.6.7:1 I do not understand the footnode #6 sentence: 'Without a specific definition of the ACK requirements for a given property type, it simply means that the driver read all fields of that property." 2.6.7:2 what if the driver has not provided a big enough buffer and the device cannot report all supported properties? 2.6.8.2: Couldn't we use the same terminology as iommu_resv_type in iommu.h? the negative form is not the easiest to understand for me. Why F_MSI? 2.6.8.2.1 Multiple overlapping RESV_MEM properties are merged together. Device requirement? if same types I assume? what if the device is a physical assigned device. does the device report the host doorbell or the guest doorbell, may be worth to clarify. 3.1.1 last paragraph you talk about device ID but this is a terminology only known by ARM I think. Actually it is introduced later in 3.1.2. 3.1.2 About ACPI integration. Isn't IORT ARM specific? Will other architectures use that table? In IORT we also have Named Component node which look pretty the same. Could you elaborate of the difference? Device Object name: isn't it the path to the LNR0005 in the namespace? Thanks Eric > > --- > Version numbers 0.1-0.4 are arbitrary. I'm hoping they allow to compare > more easily differences since the RFC (see [6]), but haven't been made > public so far. This is the first public posting since initial proposal > [1], and the following describes all changes. > > ## v0.1 ## > > Content is the same as the RFC, but formatted to LaTeX. 'make' generates > one PDF and one HTML document. > > ## v0.2 ## > > Add introductions, improve topology example and firmware description based > on feedback and a number of useful discussions. > > ## v0.3 ## > > Add normative sections (MUST, SHOULD, etc). Clarify some things, tighten > the device and driver behaviour. Unmap semantics are consolidated; they > are now closer to VFIO Type1 v2 semantics. > > ## v0.4 ## > > Introduce PROBE requests. They provide per-endpoint information to the > driver that couldn't be described otherwise. > > For the moment, they allow to handle MSIs on x86 virtual platforms (see > 3.2). To do that we communicate reserved IOVA regions, that will also be > useful for describing regions that cannot be mapped for a given endpoint, > for instance addresses that correspond to a PCI bridge window. > > Introducing such a large framework for this tiny feature may seem > overkill, but it is needed for future extensions of the virtio-iommu and I > believe it really is worth the effort. > > ## Future ## > > Other extensions are in preparation. I won't detail them here because v0.4 > already is a lot to digest, but in short, building on top of PROBE: > > * First, since the IOMMU is paravirtualized, the device can expose some > properties of the physical topology to the guest, and let it allocate > resources more efficiently. For example, when the virtio-iommu manages > both physical and emulated endpoints, with different underlying IOMMUs, > we now have a way to describe multiple page and block granularities, > instead of forcing the guest to use the most restricted one for all > endpoints. This will most likely be in v0.5. > > * Then on top of that, a major improvement will describe hardware > acceleration features available to the guest. There is what I call "Page > Table Handover" (or simply, from the host POV, "Nested"), the ability > for the guest to manipulate its own page tables instead of sending > MAP/UNMAP requests to
Re: [PATCH] iommu: qcom: annotate PM functions as __maybe_unused
On Wed, Aug 23, 2017 at 9:42 AM, Arnd Bergmannwrote: > The qcom_iommu_disable_clocks() function is only called from PM > code that is hidden in an #ifdef, causing a harmless warning without > CONFIG_PM: > > drivers/iommu/qcom_iommu.c:601:13: error: 'qcom_iommu_disable_clocks' defined > but not used [-Werror=unused-function] > static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu) > drivers/iommu/qcom_iommu.c:581:12: error: 'qcom_iommu_enable_clocks' defined > but not used [-Werror=unused-function] > static int qcom_iommu_enable_clocks(struct qcom_iommu_dev *qcom_iommu) > > Replacing that #ifdef with __maybe_unused annotations lets the compiler > drop the functions silently instead. > > Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") > Signed-off-by: Arnd Bergmann thanks Acked-by: Rob Clark > --- > drivers/iommu/qcom_iommu.c | 6 ++ > 1 file changed, 2 insertions(+), 4 deletions(-) > > diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c > index 48b62aa52787..c8a587d034b0 100644 > --- a/drivers/iommu/qcom_iommu.c > +++ b/drivers/iommu/qcom_iommu.c > @@ -860,8 +860,7 @@ static int qcom_iommu_device_remove(struct > platform_device *pdev) > return 0; > } > > -#ifdef CONFIG_PM > -static int qcom_iommu_resume(struct device *dev) > +static int __maybe_unused qcom_iommu_resume(struct device *dev) > { > struct platform_device *pdev = to_platform_device(dev); > struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); > @@ -869,7 +868,7 @@ static int qcom_iommu_resume(struct device *dev) > return qcom_iommu_enable_clocks(qcom_iommu); > } > > -static int qcom_iommu_suspend(struct device *dev) > +static int __maybe_unused qcom_iommu_suspend(struct device *dev) > { > struct platform_device *pdev = to_platform_device(dev); > struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); > @@ -878,7 +877,6 @@ static int qcom_iommu_suspend(struct device *dev) > > return 0; > } > -#endif > > static const struct dev_pm_ops qcom_iommu_pm_ops = { > SET_RUNTIME_PM_OPS(qcom_iommu_suspend, qcom_iommu_resume, NULL) > -- > 2.9.0 > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/2] iommu/amd: Rename a few flush functions
From: Joerg RoedelRename a few iommu cache-flush functions that start with iommu_ to start with amd_iommu now. This is to prevent name collisions with generic iommu code later on. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 354cbd6..5469457 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1165,7 +1165,7 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, ); } -static void iommu_flush_dte_all(struct amd_iommu *iommu) +static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; @@ -1179,7 +1179,7 @@ static void iommu_flush_dte_all(struct amd_iommu *iommu) * This function uses heavy locking and may disable irqs for some time. But * this is no issue because it is only called during resume. */ -static void iommu_flush_tlb_all(struct amd_iommu *iommu) +static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; @@ -1193,7 +1193,7 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } -static void iommu_flush_all(struct amd_iommu *iommu) +static void amd_iommu_flush_all(struct amd_iommu *iommu) { struct iommu_cmd cmd; @@ -1212,7 +1212,7 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) iommu_queue_command(iommu, ); } -static void iommu_flush_irt_all(struct amd_iommu *iommu) +static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; @@ -1225,11 +1225,11 @@ static void iommu_flush_irt_all(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { - iommu_flush_all(iommu); + amd_iommu_flush_all(iommu); } else { - iommu_flush_dte_all(iommu); - iommu_flush_irt_all(iommu); - iommu_flush_tlb_all(iommu); + amd_iommu_flush_dte_all(iommu); + amd_iommu_flush_irt_all(iommu); + amd_iommu_flush_tlb_all(iommu); } } -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 2/2] iommu: Introduce Interface for IOMMU TLB Flushing
From: Joerg RoedelWith the current IOMMU-API the hardware TLBs have to be flushed in every iommu_ops->unmap() call-back. For unmapping large amounts of address space, like it happens when a KVM domain with assigned devices is destroyed, this causes thousands of unnecessary TLB flushes in the IOMMU hardware because the unmap call-back runs for every unmapped physical page. With the TLB Flush Interface and the new iommu_unmap_fast() function introduced here the need to clean the hardware TLBs is removed from the unmapping code-path. Users of iommu_unmap_fast() have to explicitly call the TLB-Flush functions to sync the page-table changes to the hardware. Three functions for TLB-Flushes are introduced: * iommu_flush_tlb_all() - Flushes all TLB entries associated with that domain. TLBs entries are flushed when this function returns. * iommu_tlb_range_add() - This will add a given range to the flush queue for this domain. * iommu_tlb_sync() - Flushes all queued ranges from the hardware TLBs. Returns when the flush is finished. The semantic of this interface is intentionally similar to the iommu_gather_ops from the io-pgtable code. Cc: Alex Williamson Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 32 --- include/linux/iommu.h | 72 ++- 2 files changed, 99 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3f6ea16..0f68342 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -527,6 +527,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, } + iommu_flush_tlb_all(domain); + out: iommu_put_resv_regions(dev, ); @@ -1556,13 +1558,16 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, } EXPORT_SYMBOL_GPL(iommu_map); -size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) +static size_t __iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size, + bool sync) { + const struct iommu_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; - unsigned int min_pagesz; unsigned long orig_iova = iova; + unsigned int min_pagesz; - if (unlikely(domain->ops->unmap == NULL || + if (unlikely(ops->unmap == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1592,10 +1597,13 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) while (unmapped < size) { size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); - unmapped_page = domain->ops->unmap(domain, iova, pgsize); + unmapped_page = ops->unmap(domain, iova, pgsize); if (!unmapped_page) break; + if (sync && ops->iotlb_range_add) + ops->iotlb_range_add(domain, iova, pgsize); + pr_debug("unmapped: iova 0x%lx size 0x%zx\n", iova, unmapped_page); @@ -1603,11 +1611,27 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } + if (sync && ops->iotlb_sync) + ops->iotlb_sync(domain); + trace_unmap(orig_iova, size, unmapped); return unmapped; } + +size_t iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + return __iommu_unmap(domain, iova, size, true); +} EXPORT_SYMBOL_GPL(iommu_unmap); +size_t iommu_unmap_fast(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + return __iommu_unmap(domain, iova, size, false); +} +EXPORT_SYMBOL_GPL(iommu_unmap_fast); + size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54ad..67fa954 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -167,6 +167,10 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @map_sg: map a scatter-gather list of physically contiguous memory chunks + * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain + * @tlb_range_add: Add a given iova range to
[PATCH 0/2 v2] Introduce IOMMU-API TLB Flushing Interface
Hi, here is the second version of the patch-set to introduce an explicit TLB-flushing interface to the IOMMU-API. This version changed the interface quite a lot compared to the first version. This version does not change the semantics of the iommu_map() and iommu_unmap() functions anymore. The reason is that the semantic-change doesn't make sense for iommu_map(), as this path does not require a TLB-flush in almost all cases. And only changing the semantics of the iommu_unmap() path would introduce an asymmetry to the IOMMU-API, which makes it harder to use. This series introduces the new function iommu_unmap_fast() instead, which is allowed to return with dirty TLBs. The three functions for TLB-flushing have not changed compared to the first version of this patch-set. Since the iommu_map()/unmap() functions don't change their semantics anymore, I've also dropped all patches which convert the existing users. Please review. Thanks, Joerg Joerg Roedel (2): iommu/amd: Rename a few flush functions iommu: Introduce Interface for IOMMU TLB Flushing drivers/iommu/amd_iommu.c | 16 +-- drivers/iommu/iommu.c | 32 ++--- include/linux/iommu.h | 72 ++- 3 files changed, 107 insertions(+), 13 deletions(-) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/ipmmu-vmsa: Set context_id to non-existent value if allocation failed
Hi Oleksandr, On Wednesday, 23 August 2017 14:58:47 EEST Oleksandr Tyshchenko wrote: > On Wed, Aug 23, 2017 at 1:05 PM, Robin Murphy wrote: > > On 23/08/17 10:36, Oleksandr Tyshchenko wrote: > >> On Wed, Aug 23, 2017 at 12:25 AM, Laurent Pinchart wrote: > >>> On Monday, 21 August 2017 15:40:41 EEST Oleksandr Tyshchenko wrote: > From: Oleksandr Tyshchenko> > In ipmmu_domain_init_context() we are trying to allocate context and > if allocation fails we will call free_io_pgtable_ops(), > but "domain->context_id" hasn't been initialized yet (likely it is 0 > because of kzalloc). Having the following call stack: > free_io_pgtable_ops() -> io_pgtable_tlb_flush_all() -> > ipmmu_tlb_flush_all() -> ipmmu_tlb_invalidate() > we will get a mistaken cache flush for a context pointed by > uninitialized "domain->context_id". > > So, set context_id to non-existent value (IPMMU_CTX_MAX) before calling > free_io_pgtable_ops() and check it for a valid value (< IPMMU_CTX_MAX) > before calling ipmmu_tlb_invalidate(). > > Signed-off-by: Oleksandr Tyshchenko > --- > > drivers/iommu/ipmmu-vmsa.c | 4 > 1 file changed, 4 insertions(+) > > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c > index 2a38aa1..5b226c0 100644 > --- a/drivers/iommu/ipmmu-vmsa.c > +++ b/drivers/iommu/ipmmu-vmsa.c > @@ -303,6 +303,9 @@ static void ipmmu_tlb_flush_all(void *cookie) > { > struct ipmmu_vmsa_domain *domain = cookie; > > + if (domain->context_id >= IPMMU_CTX_MAX) > + return; > + > ipmmu_tlb_invalidate(domain); > } > > @@ -380,6 +383,7 @@ static int ipmmu_domain_init_context(struct > ipmmu_vmsa_domain *domain) > */ > ret = ipmmu_domain_allocate_context(domain->mmu, domain); > if (ret == IPMMU_CTX_MAX) { > + domain->context_id = IPMMU_CTX_MAX; > >>> > >>> Wouldn't it make more sense to allocate the pgtable ops after > >>> initializing the context (moving the ipmmu_domain_allocate_context() > >>> call to the very end of the function) ? That way we would be less > >>> dependent on changes to pgtable ops init/cleanup code that could > >>> require the context to be set up. > >> > >> Why not. But, not sure about the very end of the function. Since for > >> writing some HW registers down the function (IMTTLBR0/IMTTUBR0, > >> IMMAIR0) we need to have what pgtable ops sets up for us (ttbr, mair). > >> What about to just swap alloc_io_pgtable_ops() and > >> ipmmu_domain_allocate_context()? > > > > This looks a lot more reasonable - reserving a free context is both > > quicker and more likely to fail (due to limited hardware resources) than > > setting up a pagetable, so it makes a lot of sense to do that before > > anything else. > > Agree. That looks good to me too. In general I prefer initializing everything needed by the error path before calling anything that could trigger that error path, instead of initializing variables to magic values that mean part of the cleanup should be skipped. Will you send a v2 ? -- Regards, Laurent Pinchart ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu: qcom: annotate PM functions as __maybe_unused
The qcom_iommu_disable_clocks() function is only called from PM code that is hidden in an #ifdef, causing a harmless warning without CONFIG_PM: drivers/iommu/qcom_iommu.c:601:13: error: 'qcom_iommu_disable_clocks' defined but not used [-Werror=unused-function] static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu) drivers/iommu/qcom_iommu.c:581:12: error: 'qcom_iommu_enable_clocks' defined but not used [-Werror=unused-function] static int qcom_iommu_enable_clocks(struct qcom_iommu_dev *qcom_iommu) Replacing that #ifdef with __maybe_unused annotations lets the compiler drop the functions silently instead. Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Signed-off-by: Arnd Bergmann--- drivers/iommu/qcom_iommu.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 48b62aa52787..c8a587d034b0 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -860,8 +860,7 @@ static int qcom_iommu_device_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int qcom_iommu_resume(struct device *dev) +static int __maybe_unused qcom_iommu_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); @@ -869,7 +868,7 @@ static int qcom_iommu_resume(struct device *dev) return qcom_iommu_enable_clocks(qcom_iommu); } -static int qcom_iommu_suspend(struct device *dev) +static int __maybe_unused qcom_iommu_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); @@ -878,7 +877,6 @@ static int qcom_iommu_suspend(struct device *dev) return 0; } -#endif static const struct dev_pm_ops qcom_iommu_pm_ops = { SET_RUNTIME_PM_OPS(qcom_iommu_suspend, qcom_iommu_resume, NULL) -- 2.9.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v6 3/3] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801
On Wed, Aug 23, 2017 at 02:17:24PM +0100, John Garry wrote: > >>>Signed-off-by: Shameer Kolothum > >>> >>>--- > >>> drivers/iommu/arm-smmu-v3.c | 27 ++- > >>> 1 file changed, 22 insertions(+), 5 deletions(-) > >> > >>Please can you also add a devicetree binding with corresponding > >>documentation to enable this workaround on non-ACPI based systems too? > >>It should be straightforward if you update the arm_smmu_options table. > > > >As I mentioned before, devicetree was a lower priority and we would > >definitely > >submit patch to support that. Even if we update the arm_smmu_options table > >with DT binding, the generic function to retrieve the MSI address regions > >only > >works on ACPI/IORT case now. > > > > Hi Will, > > Can you confirm your stance on supporting this workaround for DT as well as > ACPI? > > For us, we now only "officially" support ACPI FW, and DT support at this > point is patchy/limited. To me, adding DT support is just more errata > workaround code to maintain with little useful gain. I basically don't like the idea of a driver that only works for one of ACPI or DT yet claims to support both. I'm less fussed about functionality differences (feature X is only available with firmware Y), but not working around a hardware erratum that we know about is just lazy. So I'd prefer that we handle this in both cases, or blacklist affected devices when booting with DT. Continuing as though there isn't an erratum is the worst thing we can do. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v6 3/3] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801
Signed-off-by: Shameer Kolothum--- drivers/iommu/arm-smmu-v3.c | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) Please can you also add a devicetree binding with corresponding documentation to enable this workaround on non-ACPI based systems too? It should be straightforward if you update the arm_smmu_options table. As I mentioned before, devicetree was a lower priority and we would definitely submit patch to support that. Even if we update the arm_smmu_options table with DT binding, the generic function to retrieve the MSI address regions only works on ACPI/IORT case now. Hi Will, Can you confirm your stance on supporting this workaround for DT as well as ACPI? For us, we now only "officially" support ACPI FW, and DT support at this point is patchy/limited. To me, adding DT support is just more errata workaround code to maintain with little useful gain. Thanks very much, John Moreover I am on holidays starting tomorrow, and really appreciate if this series can go through now. Please let me know. Thanks, Shameer . ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC v2 2/4] iommu/arm-smmu-v3: Add tlbi_on_map option
Hi Will, On 23/08/2017 12:25, Will Deacon wrote: > On Tue, Aug 22, 2017 at 10:09:15PM +0300, Michael S. Tsirkin wrote: >> On Fri, Aug 18, 2017 at 05:49:42AM +0300, Michael S. Tsirkin wrote: >>> On Thu, Aug 17, 2017 at 05:34:25PM +0100, Will Deacon wrote: On Fri, Aug 11, 2017 at 03:45:28PM +0200, Eric Auger wrote: > When running a virtual SMMU on a guest we sometimes need to trap > all changes to the translation structures. This is especially useful > to integrate with VFIO. This patch adds a new option that forces > the IO_PGTABLE_QUIRK_TLBI_ON_MAP to be applied on LPAE page tables. > > TLBI commands then can be trapped. > > Signed-off-by: Eric Auger> > --- > v1 -> v2: > - rebase on v4.13-rc2 > --- > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 4 > drivers/iommu/arm-smmu-v3.c | 5 + > 2 files changed, 9 insertions(+) > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > index c9abbf3..ebb85e9 100644 > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > @@ -52,6 +52,10 @@ the PCIe specification. > devicetree/bindings/interrupt-controller/msi.txt >for a description of the msi-parent property. > > +- tlbi-on-map : invalidate caches whenever there is an update of > + any remapping structure (updates to not-present or > + present entries). > + My position on this hasn't changed, so NAK for this patch. If you want to emulate something outside of the SMMUv3 architecture, please do so, but don't pretend that it's an SMMUv3. Will >>> >>> What if the emulated device does not list arm,smmu-v3, listing >>> qemu,ssmu-v3 as compatible? Would that address the concern? >> >> Will, can you comment on this please? Are you open to reusing the code >> in drivers/iommu/arm-smmu-v3.c to support a paravirtual device that does >> not claim to be compatible with smmuv3 but does try to behave very close to >> it except it can cache non-present structures? Or would you rather >> the code to support this is forked to qemu-smmu-v3.c? > > I still don't understand why this is preferable to a PV IOMMU > implementation. Not only is this proposing to issue TLB maintenance on > map, but the maintenance command itself is entirely made up. Why not just > have a map command? Anyway, I'm reluctant to add this hack to the driver > until: > > 1. There is a compelling reason to pursue this approach instead of a > PV approach (including performance measurements). > > 2. There is a specification for the QEMU fork of the ARM SMMUv3 > architecture, including the semantics of the new command being proposed > and what exactly the TLB maintenance requirements are on map (for > example, what if I change an STE or a CD -- are they cached too?). I am not sure I catch this last point. At the moment whenever the smmuv3 driver issues data structure invalidation commands (CMD_CFGI_*), those are trapped and I replay the mappings on host side. I have not changed anything on that side. I introduced a new map implementation defined command because the per page CMD_TLBI_NH_VA IOVA invalidation command was not efficient/usable with use cases such as DPDK on guest. I understood the spec provisions for such implementation defined commands. > > 3. The ACPI IORT spec is updated to recognise this implementation > > 4. There is an implementation that can use the guest page tables directly, > because that may well make all of this moot. Most probably I will come back to you with questions on stage 1 + stage2 enablement and "4.8 Virtualisation" chapter of smmuv3 spec. Besides I also need to get access to some HW with smmuv3 ;-) Thanks Eric > > Forking the driver doesn't sound very sensible to me. > > Will > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/4] iommu/pamu: Add support for generic iommu-device
Joerg Roedelwrites: > From: Joerg Roedel > > This patch adds a global iommu-handle to the pamu driver and > initializes it at probe time. Also link devices added to the > iommu to this handle. > > Signed-off-by: Joerg Roedel > --- > drivers/iommu/fsl_pamu.c| 17 + > drivers/iommu/fsl_pamu.h| 3 +++ > drivers/iommu/fsl_pamu_domain.c | 5 - > drivers/iommu/fsl_pamu_domain.h | 2 ++ > 4 files changed, 26 insertions(+), 1 deletion(-) This seems to be causing my p5020ds to blow up with: [0.096728] Machine: fsl,P5020DS [0.096729] SoC family: QorIQ [0.096730] SoC ID: svr:0x82280020, Revision: 2.0 [0.098143] Found FSL PCI host bridge at 0x000ffe20. Firmware bus number: 0->0 [0.098145] PCI host bridge /pcie@ffe20 ranges: [0.098149] MEM 0x000c..0x000c1fff -> 0xe000 [0.098151] IO 0x000ff800..0x000ff800 -> 0x [0.098161] /pcie@ffe20: PCICSRBAR @ 0xdf00 [0.098162] setup_pci_atmu: end of DRAM 1 [0.098167] /pcie@ffe20: Setup 64-bit PCI DMA window [0.098168] /pcie@ffe20: DMA window size is 0xdf00 [0.098326] Found FSL PCI host bridge at 0x000ffe202000. Firmware bus number: 0->1 [0.098327] PCI host bridge /pcie@ffe202000 ranges: [0.098330] MEM 0x000c4000..0x000c5fff -> 0xe000 [0.098332] IO 0x000ff802..0x000ff802 -> 0x [0.098340] /pcie@ffe202000: PCICSRBAR @ 0xdf00 [0.098340] setup_pci_atmu: end of DRAM 1 [0.098345] /pcie@ffe202000: Setup 64-bit PCI DMA window [0.098346] /pcie@ffe202000: DMA window size is 0xdf00 [0.204289] iommu: Adding device ffe100300.dma to group 0 [0.209606] Unable to handle kernel paging request for data at address 0x0068 [0.217059] Faulting instruction address: 0xc02abe18 [0.222703] Oops: Kernel access of bad area, sig: 11 [#1] [0.228078] SMP NR_CPUS=24 [0.228080] CoreNet Generic [0.233634] Modules linked in: [0.236674] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc5-gcc-6.3.1-4-g68a17f0be6fe #41 [0.245613] task: c000f7168000 task.stack: c000f717 [0.251515] NIP: c02abe18 LR: c02abf7c CTR: c0025dc8 [0.258545] REGS: c000f7173400 TRAP: 0300 Not tainted (4.13.0-rc5-gcc-6.3.1-4-g68a17f0be6fe) [0.267919] MSR: 80029000 [0.267924] CR: 28adb242 XER: 2000 [0.276165] DEAR: 0068 ESR: SOFTE: 1 [0.276165] GPR00: c02abf7c c000f7173680 c0fbf600 [0.276165] GPR04: c0c6dc88 c000f72e0190 [0.276165] GPR08: c000f7168000 0038 [0.276165] GPR12: 22adb444 c0003fff5000 c0001b70 [0.276165] GPR16: [0.276165] GPR20: [0.276165] GPR24: c0ef4f58 c0d3b408 c0c6dc88 [0.276165] GPR28: c0c6dc88 c0e3b658 [0.346218] NIP [c02abe18] .kernfs_find_ns+0x30/0x148 [0.351943] LR [c02abf7c] .kernfs_find_and_get_ns+0x4c/0x7c [0.358192] Call Trace: [0.360624] [c000f7173680] [c000f7173710] 0xc000f7173710 (unreliable) [0.368088] [c000f7173710] [c02abf7c] .kernfs_find_and_get_ns+0x4c/0x7c [0.375729] [c000f71737a0] [c02b13c8] .sysfs_add_link_to_group+0x44/0x9c [0.383456] [c000f7173840] [c0591660] .iommu_device_link+0x70/0x144 [0.390744] [c000f71738e0] [c05942dc] .fsl_pamu_add_device+0x4c/0x80 [0.398121] [c000f7173960] [c058ce8c] .add_iommu_group+0x5c/0x9c [0.405153] [c000f71739e0] [c059d6b8] .bus_for_each_dev+0x98/0xfc [0.412269] [c000f7173a80] [c058f1a0] .bus_set_iommu+0xd8/0x11c [0.419218] [c000f7173b20] [c0d86998] .pamu_domain_init+0xb0/0xe0 [0.426331] [c000f7173ba0] [c0d86864] .fsl_pamu_init+0xec/0x170 [0.433276] [c000f7173c30] [c0001934] .do_one_initcall+0x68/0x1b8 [0.440395] [c000f7173d00] [c0d440e4] .kernel_init_freeable+0x24c/0x324 [0.448031] [c000f7173db0] [c0001b90] .kernel_init+0x20/0x140 [0.454801] [c000f7173e30] [c9bc] .ret_from_kernel_thread+0x58/0x9c [0.462438] Instruction dump: [0.465390] 7c0802a6 fb81ffe0 f8010010 fba1ffe8 fbc1fff0 fbe1fff8 f821ff71 7c7e1b78 [0.473114] 7cbd2b78 7c9c2378 6000 6000 311d ebfe0048 552906b4 [0.481017] ---[ end trace 5d11d3e6c29380bd ]---
Re: [PATCH 00/13] Introduce IOMMU-API TLB Flushing Interface
On Thu, Aug 17, 2017 at 05:22:20PM +0200, Joerg Roedel wrote: > What I absolutly don't want is that the whole explicit TLB flushing > of the IOMMU-API (as introduced in this patch-set) is considered some > optional part of the API, as it would be when I just introduce _async > versions of map/unmap/map_sg. Okay, forget that :) The discussions I had around this interface made me change it a little bit in the version 2 of the patch-set which I will post soon. I thought a bit more about the iommu_map() code-path. It really doesn't make any sense to remove the tlb-sync requirement from it, because in almost all cases the hardware doesn't require any flushes after a map operation anyway. And in the rare cases where it does - because the hardware is emulated and slow - the iommu-driver can handle that by doing a flush in its iommu_ops->map() call-back. So I removed the iommu_map_sync() and iommu_map_sg_sync() functions from this series. With those changes it also doesn't make sense anymore to have different tlb-sync semantics between iommu_map() and iommu_unmap(). So I ended up introducing a new iommu_unmap_fast() function which can unmap ranges and return with dirty io-tlbs. This makes the extension of couse look somewhat optional, which I tried to avoid, but I hope the '_fast' part of the name is enough motivation for iommu-api users to look into ways to use it in their code. Regards, Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/ipmmu-vmsa: Set context_id to non-existent value if allocation failed
Hi, Robin On Wed, Aug 23, 2017 at 1:05 PM, Robin Murphywrote: > On 23/08/17 10:36, Oleksandr Tyshchenko wrote: >> Hi, Laurent. >> >> On Wed, Aug 23, 2017 at 12:25 AM, Laurent Pinchart >> wrote: >>> Hi Oleksandr, >>> >>> Thank you for the patch. >>> >>> On Monday, 21 August 2017 15:40:41 EEST Oleksandr Tyshchenko wrote: From: Oleksandr Tyshchenko In ipmmu_domain_init_context() we are trying to allocate context and if allocation fails we will call free_io_pgtable_ops(), but "domain->context_id" hasn't been initialized yet (likely it is 0 because of kzalloc). Having the following call stack: free_io_pgtable_ops() -> io_pgtable_tlb_flush_all() -> ipmmu_tlb_flush_all() -> ipmmu_tlb_invalidate() we will get a mistaken cache flush for a context pointed by uninitialized "domain->context_id". So, set context_id to non-existent value (IPMMU_CTX_MAX) before calling free_io_pgtable_ops() and check it for a valid value (< IPMMU_CTX_MAX) before calling ipmmu_tlb_invalidate(). Signed-off-by: Oleksandr Tyshchenko --- drivers/iommu/ipmmu-vmsa.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 2a38aa1..5b226c0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -303,6 +303,9 @@ static void ipmmu_tlb_flush_all(void *cookie) { struct ipmmu_vmsa_domain *domain = cookie; + if (domain->context_id >= IPMMU_CTX_MAX) + return; + ipmmu_tlb_invalidate(domain); } @@ -380,6 +383,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) */ ret = ipmmu_domain_allocate_context(domain->mmu, domain); if (ret == IPMMU_CTX_MAX) { + domain->context_id = IPMMU_CTX_MAX; >>> >>> Wouldn't it make more sense to allocate the pgtable ops after initializing >>> the >>> context (moving the ipmmu_domain_allocate_context() call to the very end of >>> the function) ? That way we would be less dependent on changes to pgtable >>> ops >>> init/cleanup code that could require the context to be set up. >> >> Why not. But, not sure about the very end of the function. Since for >> writing some HW registers down the function (IMTTLBR0/IMTTUBR0, >> IMMAIR0) >> we need to have what pgtable ops sets up for us (ttbr, mair). What >> about to just swap alloc_io_pgtable_ops() and >> ipmmu_domain_allocate_context()? > > This looks a lot more reasonable - reserving a free context is both > quicker and more likely to fail (due to limited hardware resources) than > setting up a pagetable, so it makes a lot of sense to do that before > anything else. Agree. > > Robin. > >> ... >> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c >> index 2a38aa1..90af1c7 100644 >> --- a/drivers/iommu/ipmmu-vmsa.c >> +++ b/drivers/iommu/ipmmu-vmsa.c >> @@ -370,22 +370,22 @@ static int ipmmu_domain_init_context(struct >> ipmmu_vmsa_domain *domain) >> */ >> domain->cfg.iommu_dev = domain->mmu->dev; >> >> - domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, >> - domain); >> - if (!domain->iop) >> - return -EINVAL; >> - >> /* >> * Find an unused context. >> */ >> ret = ipmmu_domain_allocate_context(domain->mmu, domain); >> - if (ret == IPMMU_CTX_MAX) { >> - free_io_pgtable_ops(domain->iop); >> + if (ret == IPMMU_CTX_MAX) >> return -EBUSY; >> - } >> >> domain->context_id = ret; >> >> + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, >> + domain); >> + if (!domain->iop) { >> + ipmmu_domain_free_context(domain->mmu, domain->context_id); >> + return -EINVAL; >> + } >> + >> /* TTBR0 */ >> ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; >> ipmmu_ctx_write(domain, IMTTLBR0, ttbr); >> ... >> >>> free_io_pgtable_ops(domain->iop); return -EBUSY; } >>> >>> >>> -- >>> Regards, >>> >>> Laurent Pinchart >>> >> >> >> > -- Regards, Oleksandr Tyshchenko ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC v2 2/4] iommu/arm-smmu-v3: Add tlbi_on_map option
On Tue, Aug 22, 2017 at 10:09:15PM +0300, Michael S. Tsirkin wrote: > On Fri, Aug 18, 2017 at 05:49:42AM +0300, Michael S. Tsirkin wrote: > > On Thu, Aug 17, 2017 at 05:34:25PM +0100, Will Deacon wrote: > > > On Fri, Aug 11, 2017 at 03:45:28PM +0200, Eric Auger wrote: > > > > When running a virtual SMMU on a guest we sometimes need to trap > > > > all changes to the translation structures. This is especially useful > > > > to integrate with VFIO. This patch adds a new option that forces > > > > the IO_PGTABLE_QUIRK_TLBI_ON_MAP to be applied on LPAE page tables. > > > > > > > > TLBI commands then can be trapped. > > > > > > > > Signed-off-by: Eric Auger> > > > > > > > --- > > > > v1 -> v2: > > > > - rebase on v4.13-rc2 > > > > --- > > > > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 4 > > > > drivers/iommu/arm-smmu-v3.c | 5 + > > > > 2 files changed, 9 insertions(+) > > > > > > > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > index c9abbf3..ebb85e9 100644 > > > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > > > @@ -52,6 +52,10 @@ the PCIe specification. > > > > > > > > devicetree/bindings/interrupt-controller/msi.txt > > > >for a description of the msi-parent property. > > > > > > > > +- tlbi-on-map : invalidate caches whenever there is an update of > > > > + any remapping structure (updates to not-present > > > > or > > > > + present entries). > > > > + > > > > > > My position on this hasn't changed, so NAK for this patch. If you want to > > > emulate something outside of the SMMUv3 architecture, please do so, but > > > don't pretend that it's an SMMUv3. > > > > > > Will > > > > What if the emulated device does not list arm,smmu-v3, listing > > qemu,ssmu-v3 as compatible? Would that address the concern? > > Will, can you comment on this please? Are you open to reusing the code > in drivers/iommu/arm-smmu-v3.c to support a paravirtual device that does > not claim to be compatible with smmuv3 but does try to behave very close to > it except it can cache non-present structures? Or would you rather > the code to support this is forked to qemu-smmu-v3.c? I still don't understand why this is preferable to a PV IOMMU implementation. Not only is this proposing to issue TLB maintenance on map, but the maintenance command itself is entirely made up. Why not just have a map command? Anyway, I'm reluctant to add this hack to the driver until: 1. There is a compelling reason to pursue this approach instead of a PV approach (including performance measurements). 2. There is a specification for the QEMU fork of the ARM SMMUv3 architecture, including the semantics of the new command being proposed and what exactly the TLB maintenance requirements are on map (for example, what if I change an STE or a CD -- are they cached too?). 3. The ACPI IORT spec is updated to recognise this implementation 4. There is an implementation that can use the guest page tables directly, because that may well make all of this moot. Forking the driver doesn't sound very sensible to me. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/ipmmu-vmsa: Set context_id to non-existent value if allocation failed
On 23/08/17 10:36, Oleksandr Tyshchenko wrote: > Hi, Laurent. > > On Wed, Aug 23, 2017 at 12:25 AM, Laurent Pinchart >wrote: >> Hi Oleksandr, >> >> Thank you for the patch. >> >> On Monday, 21 August 2017 15:40:41 EEST Oleksandr Tyshchenko wrote: >>> From: Oleksandr Tyshchenko >>> >>> In ipmmu_domain_init_context() we are trying to allocate context and >>> if allocation fails we will call free_io_pgtable_ops(), >>> but "domain->context_id" hasn't been initialized yet (likely it is 0 >>> because of kzalloc). Having the following call stack: >>> free_io_pgtable_ops() -> io_pgtable_tlb_flush_all() -> >>> ipmmu_tlb_flush_all() -> ipmmu_tlb_invalidate() >>> we will get a mistaken cache flush for a context pointed by >>> uninitialized "domain->context_id". >>> >>> So, set context_id to non-existent value (IPMMU_CTX_MAX) before calling >>> free_io_pgtable_ops() and check it for a valid value (< IPMMU_CTX_MAX) >>> before calling ipmmu_tlb_invalidate(). >>> >>> Signed-off-by: Oleksandr Tyshchenko >>> --- >>> drivers/iommu/ipmmu-vmsa.c | 4 >>> 1 file changed, 4 insertions(+) >>> >>> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c >>> index 2a38aa1..5b226c0 100644 >>> --- a/drivers/iommu/ipmmu-vmsa.c >>> +++ b/drivers/iommu/ipmmu-vmsa.c >>> @@ -303,6 +303,9 @@ static void ipmmu_tlb_flush_all(void *cookie) >>> { >>> struct ipmmu_vmsa_domain *domain = cookie; >>> >>> + if (domain->context_id >= IPMMU_CTX_MAX) >>> + return; >>> + >>> ipmmu_tlb_invalidate(domain); >>> } >>> >>> @@ -380,6 +383,7 @@ static int ipmmu_domain_init_context(struct >>> ipmmu_vmsa_domain *domain) */ >>> ret = ipmmu_domain_allocate_context(domain->mmu, domain); >>> if (ret == IPMMU_CTX_MAX) { >>> + domain->context_id = IPMMU_CTX_MAX; >> >> Wouldn't it make more sense to allocate the pgtable ops after initializing >> the >> context (moving the ipmmu_domain_allocate_context() call to the very end of >> the function) ? That way we would be less dependent on changes to pgtable ops >> init/cleanup code that could require the context to be set up. > > Why not. But, not sure about the very end of the function. Since for > writing some HW registers down the function (IMTTLBR0/IMTTUBR0, > IMMAIR0) > we need to have what pgtable ops sets up for us (ttbr, mair). What > about to just swap alloc_io_pgtable_ops() and > ipmmu_domain_allocate_context()? This looks a lot more reasonable - reserving a free context is both quicker and more likely to fail (due to limited hardware resources) than setting up a pagetable, so it makes a lot of sense to do that before anything else. Robin. > ... > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c > index 2a38aa1..90af1c7 100644 > --- a/drivers/iommu/ipmmu-vmsa.c > +++ b/drivers/iommu/ipmmu-vmsa.c > @@ -370,22 +370,22 @@ static int ipmmu_domain_init_context(struct > ipmmu_vmsa_domain *domain) > */ > domain->cfg.iommu_dev = domain->mmu->dev; > > - domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, > - domain); > - if (!domain->iop) > - return -EINVAL; > - > /* > * Find an unused context. > */ > ret = ipmmu_domain_allocate_context(domain->mmu, domain); > - if (ret == IPMMU_CTX_MAX) { > - free_io_pgtable_ops(domain->iop); > + if (ret == IPMMU_CTX_MAX) > return -EBUSY; > - } > > domain->context_id = ret; > > + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, > + domain); > + if (!domain->iop) { > + ipmmu_domain_free_context(domain->mmu, domain->context_id); > + return -EINVAL; > + } > + > /* TTBR0 */ > ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; > ipmmu_ctx_write(domain, IMTTLBR0, ttbr); > ... > >> >>> free_io_pgtable_ops(domain->iop); >>> return -EBUSY; >>> } >> >> >> -- >> Regards, >> >> Laurent Pinchart >> > > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC] virtio-iommu version 0.4
On 04/08/17 19:19, Jean-Philippe Brucker wrote: > Other extensions are in preparation. I won't detail them here because v0.4 > already is a lot to digest, but in short, building on top of PROBE: > > * First, since the IOMMU is paravirtualized, the device can expose some > properties of the physical topology to the guest, and let it allocate > resources more efficiently. For example, when the virtio-iommu manages > both physical and emulated endpoints, with different underlying IOMMUs, > we now have a way to describe multiple page and block granularities, > instead of forcing the guest to use the most restricted one for all > endpoints. This will most likely be in v0.5. In order to extend requests with PASIDs and (later) nested mode, I intend to rename "address_space" field to "domain", since it is a lot more precise about what the field is referring to and the current name would make these extensions confusing. Please find the rationale at [1]. "ioasid_bits" will be "domain_bits" and "VIRTIO_IOMMU_F_IOASID_BITS" will be "VIRTIO_IOMMU_F_DOMAIN_BITS". For those that had time to read this version, do you have other comments and suggestions about v0.4? Otherwise it is the only update I have for v0.5 (along with fine-grained address range and page size properties from the quoted text) and I will send it soon. In particular, please tell me now if you see the need for other destructive changes like this one. They will be impossible to introduce once a driver or device is upstream. Thanks, Jean [1] https://www.spinics.net/lists/kvm/msg154573.html ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/ipmmu-vmsa: Set context_id to non-existent value if allocation failed
Hi, Laurent. On Wed, Aug 23, 2017 at 12:25 AM, Laurent Pinchartwrote: > Hi Oleksandr, > > Thank you for the patch. > > On Monday, 21 August 2017 15:40:41 EEST Oleksandr Tyshchenko wrote: >> From: Oleksandr Tyshchenko >> >> In ipmmu_domain_init_context() we are trying to allocate context and >> if allocation fails we will call free_io_pgtable_ops(), >> but "domain->context_id" hasn't been initialized yet (likely it is 0 >> because of kzalloc). Having the following call stack: >> free_io_pgtable_ops() -> io_pgtable_tlb_flush_all() -> >> ipmmu_tlb_flush_all() -> ipmmu_tlb_invalidate() >> we will get a mistaken cache flush for a context pointed by >> uninitialized "domain->context_id". >> >> So, set context_id to non-existent value (IPMMU_CTX_MAX) before calling >> free_io_pgtable_ops() and check it for a valid value (< IPMMU_CTX_MAX) >> before calling ipmmu_tlb_invalidate(). >> >> Signed-off-by: Oleksandr Tyshchenko >> --- >> drivers/iommu/ipmmu-vmsa.c | 4 >> 1 file changed, 4 insertions(+) >> >> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c >> index 2a38aa1..5b226c0 100644 >> --- a/drivers/iommu/ipmmu-vmsa.c >> +++ b/drivers/iommu/ipmmu-vmsa.c >> @@ -303,6 +303,9 @@ static void ipmmu_tlb_flush_all(void *cookie) >> { >> struct ipmmu_vmsa_domain *domain = cookie; >> >> + if (domain->context_id >= IPMMU_CTX_MAX) >> + return; >> + >> ipmmu_tlb_invalidate(domain); >> } >> >> @@ -380,6 +383,7 @@ static int ipmmu_domain_init_context(struct >> ipmmu_vmsa_domain *domain) */ >> ret = ipmmu_domain_allocate_context(domain->mmu, domain); >> if (ret == IPMMU_CTX_MAX) { >> + domain->context_id = IPMMU_CTX_MAX; > > Wouldn't it make more sense to allocate the pgtable ops after initializing the > context (moving the ipmmu_domain_allocate_context() call to the very end of > the function) ? That way we would be less dependent on changes to pgtable ops > init/cleanup code that could require the context to be set up. Why not. But, not sure about the very end of the function. Since for writing some HW registers down the function (IMTTLBR0/IMTTUBR0, IMMAIR0) we need to have what pgtable ops sets up for us (ttbr, mair). What about to just swap alloc_io_pgtable_ops() and ipmmu_domain_allocate_context()? ... diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 2a38aa1..90af1c7 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -370,22 +370,22 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) */ domain->cfg.iommu_dev = domain->mmu->dev; - domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, - domain); - if (!domain->iop) - return -EINVAL; - /* * Find an unused context. */ ret = ipmmu_domain_allocate_context(domain->mmu, domain); - if (ret == IPMMU_CTX_MAX) { - free_io_pgtable_ops(domain->iop); + if (ret == IPMMU_CTX_MAX) return -EBUSY; - } domain->context_id = ret; + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, >cfg, + domain); + if (!domain->iop) { + ipmmu_domain_free_context(domain->mmu, domain->context_id); + return -EINVAL; + } + /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; ipmmu_ctx_write(domain, IMTTLBR0, ttbr); ... > >> free_io_pgtable_ops(domain->iop); >> return -EBUSY; >> } > > > -- > Regards, > > Laurent Pinchart > -- Regards, Oleksandr Tyshchenko ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu