Re: [PATCH v6 03/10] iommu/mediatek: Use a u32 flags to describe different HW features
On Fri, 2020-07-03 at 12:41 +0800, Chao Hao wrote: > Given the fact that we are adding more and more plat_data bool values, > it would make sense to use a u32 flags register and add the appropriate > macro definitions to set and check for a flag present. > No functional change. > > Cc: Yong Wu > Suggested-by: Matthias Brugger > Signed-off-by: Chao Hao > Reviewed-by: Matthias Brugger > --- > drivers/iommu/mtk_iommu.c | 28 +--- > drivers/iommu/mtk_iommu.h | 7 +-- > 2 files changed, 18 insertions(+), 17 deletions(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 88d3df5b91c2..40ca564d97af 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -100,6 +100,15 @@ > #define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) > #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) > > +#define HAS_4GB_MODE BIT(0) > +/* HW will use the EMI clock if there isn't the "bclk". */ > +#define HAS_BCLK BIT(1) > +#define HAS_VLD_PA_RNG BIT(2) > +#define RESET_AXIBIT(3) > + > +#define MTK_IOMMU_HAS_FLAG(pdata, _x) \ > + pdata)->flags) & (_x)) == (_x)) > + > struct mtk_iommu_domain { > struct io_pgtable_cfg cfg; > struct io_pgtable_ops *iop; > @@ -563,7 +572,8 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data > *data) >upper_32_bits(data->protect_base); > writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); > > - if (data->enable_4GB && data->plat_data->has_vld_pa_rng) { > + if (data->enable_4GB && > + MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) { > /* >* If 4GB mode is enabled, the validate PA range is from >* 0x1__ to 0x1__. here record bit[32:30]. > @@ -573,7 +583,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data > *data) > } > writel_relaxed(0, data->base + REG_MMU_DCM_DIS); > > - if (data->plat_data->reset_axi) { > + if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { > /* The register is called STANDARD_AXI_MODE in this case */ > writel_relaxed(0, data->base + REG_MMU_MISC_CTRL); > } > @@ -618,7 +628,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) > > /* Whether the current dram is over 4GB */ > data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT)); > - if (!data->plat_data->has_4gb_mode) > + if (!MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) > data->enable_4GB = false; > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > @@ -631,7 +641,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) > if (data->irq < 0) > return data->irq; > > - if (data->plat_data->has_bclk) { > + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) { > data->bclk = devm_clk_get(dev, "bclk"); > if (IS_ERR(data->bclk)) > return PTR_ERR(data->bclk); > @@ -763,23 +773,19 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = { > > static const struct mtk_iommu_plat_data mt2712_data = { > .m4u_plat = M4U_MT2712, > - .has_4gb_mode = true, > - .has_bclk = true, > - .has_vld_pa_rng = true, > + .flags= HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, > .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, > }; > > static const struct mtk_iommu_plat_data mt8173_data = { > .m4u_plat = M4U_MT8173, > - .has_4gb_mode = true, > - .has_bclk = true, > - .reset_axi= true, > + .flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI, > .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */ > }; > > static const struct mtk_iommu_plat_data mt8183_data = { > .m4u_plat = M4U_MT8183, > - .reset_axi= true, > + .flags= RESET_AXI, > .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1}, > }; > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index 7212e6fcf982..5225a9170aaa 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -39,12 +39,7 @@ enum mtk_iommu_plat { > > struct mtk_iommu_plat_data { > enum mtk_iommu_plat m4u_plat; > - boolhas_4gb_mode; > - > - /* HW will use the EMI clock if there isn't the "bclk". */ > - boolhas_bclk; > - boolhas_vld_pa_rng; > - boolreset_axi; > + u32 flags; How about using bit field instead? eg u32 has_bclk:1; In this way, we don't need to change code. Joe.C ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 00/34] iommu: Move iommu_group setup to IOMMU core code
On Tue, Jun 30, 2020 at 08:40:28PM -0400, Qian Cai wrote: > On Wed, Apr 29, 2020 at 03:36:38PM +0200, Joerg Roedel wrote: > > Hi, > > > > here is the third version of this patch-set. Older versions can be found > > here: > > > > v1: https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/ > > (Has some more introductory text) > > > > v2: https://lore.kernel.org/lkml/20200414131542.25608-1-j...@8bytes.org/ > > > > Changes v2 -> v3: > > > > * Rebased v5.7-rc3 > > > > * Added a missing iommu_group_put() as reported by Lu Baolu. > > > > * Added a patch to consolidate more initialization work in > > __iommu_probe_device(), fixing a bug where no 'struct > > device_iommu' was allocated in the hotplug path. > > > > There is also a git-branch available with these patches applied: > > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v3 > > > > Please review. If there are no objections I plan to put these patches > > into the IOMMU tree early next week. > > Looks like this patchset introduced an use-after-free on arm-smmu-v3. > > Reproduced using mlx5, > > # echo 1 > /sys/class/net/enp11s0f1np1/device/sriov_numvfs > # echo 0 > /sys/class/net/enp11s0f1np1/device/sriov_numvfs > > The .config, > https://github.com/cailca/linux-mm/blob/master/arm64.config > > Looking at the free stack, > > iommu_release_device->iommu_group_remove_device > > was introduced in 07/34 ("iommu: Add probe_device() and release_device() > call-backs"). FYI, I have just sent a patch to fix this, https://lore.kernel.org/linux-iommu/20200704001003.2303-1-...@lca.pw/ > > [ 9426.724641][ T3356] pci :0b:01.2: Removing from iommu group 3 > [ 9426.731347][ T3356] > == > [ 9426.739263][ T3356] BUG: KASAN: use-after-free in > __lock_acquire+0x3458/0x4440 > __lock_acquire at kernel/locking/lockdep.c:4250 > [ 9426.746477][ T3356] Read of size 8 at addr 0089df1a6f68 by task > bash/3356 > [ 9426.753601][ T3356] > [ 9426.755782][ T3356] CPU: 5 PID: 3356 Comm: bash Not tainted > 5.8.0-rc3-next-20200630 #2 > [ 9426.763687][ T3356] Hardware name: HPE Apollo 70 > /C01_APACHE_MB , BIOS L50_5.13_1.11 06/18/2019 > [ 9426.774111][ T3356] Call trace: > [ 9426.777245][ T3356] dump_backtrace+0x0/0x398 > [ 9426.781593][ T3356] show_stack+0x14/0x20 > [ 9426.785596][ T3356] dump_stack+0x140/0x1b8 > [ 9426.789772][ T3356] print_address_description.isra.12+0x54/0x4a8 > [ 9426.795855][ T3356] kasan_report+0x134/0x1b8 > [ 9426.800203][ T3356] __asan_report_load8_noabort+0x2c/0x50 > [ 9426.805679][ T3356] __lock_acquire+0x3458/0x4440 > [ 9426.810373][ T3356] lock_acquire+0x204/0xf10 > [ 9426.814722][ T3356] _raw_spin_lock_irqsave+0xf8/0x180 > [ 9426.819853][ T3356] arm_smmu_detach_dev+0xd8/0x4a0 > arm_smmu_detach_dev at drivers/iommu/arm-smmu-v3.c:2776 > [ 9426.824721][ T3356] arm_smmu_release_device+0xb4/0x1c8 > arm_smmu_disable_pasid at drivers/iommu/arm-smmu-v3.c:2754 > (inlined by) arm_smmu_release_device at drivers/iommu/arm-smmu-v3.c:3000 > [ 9426.829937][ T3356] iommu_release_device+0xc0/0x178 > iommu_release_device at drivers/iommu/iommu.c:302 > [ 9426.834892][ T3356] iommu_bus_notifier+0x118/0x160 > [ 9426.839762][ T3356] notifier_call_chain+0xa4/0x128 > [ 9426.844630][ T3356] __blocking_notifier_call_chain+0x70/0xa8 > [ 9426.850367][ T3356] blocking_notifier_call_chain+0x14/0x20 > [ 9426.855929][ T3356] device_del+0x618/0xa00 > [ 9426.860105][ T3356] pci_remove_bus_device+0x108/0x2d8 > [ 9426.865233][ T3356] pci_stop_and_remove_bus_device+0x1c/0x28 > [ 9426.870972][ T3356] pci_iov_remove_virtfn+0x228/0x368 > [ 9426.876100][ T3356] sriov_disable+0x8c/0x348 > [ 9426.880447][ T3356] pci_disable_sriov+0x5c/0x70 > [ 9426.885117][ T3356] mlx5_core_sriov_configure+0xd8/0x260 [mlx5_core] > [ 9426.891549][ T3356] sriov_numvfs_store+0x240/0x318 > [ 9426.896417][ T3356] dev_attr_store+0x38/0x68 > [ 9426.900766][ T3356] sysfs_kf_write+0xdc/0x128 > [ 9426.905200][ T3356] kernfs_fop_write+0x23c/0x448 > [ 9426.909897][ T3356] __vfs_write+0x54/0xe8 > [ 9426.913984][ T3356] vfs_write+0x124/0x3f0 > [ 9426.918070][ T3356] ksys_write+0xe8/0x1b8 > [ 9426.922157][ T3356] __arm64_sys_write+0x68/0x98 > [ 9426.926766][ T3356] do_el0_svc+0x124/0x220 > [ 9426.930941][ T3356] el0_sync_handler+0x260/0x408 > [ 9426.935634][ T3356] el0_sync+0x140/0x180 > [ 9426.939633][ T3356] > [ 9426.941810][ T3356] Allocated by task 3356: > [ 9426.945985][ T3356] save_stack+0x24/0x50 > [ 9426.949986][ T3356] __kasan_kmalloc.isra.13+0xc4/0xe0 > [ 9426.955114][ T3356] kasan_kmalloc+0xc/0x18 > [ 9426.959288][ T3356] kmem_cache_alloc_trace+0x1ec/0x318 > [ 9426.964503][ T3356] arm_smmu_domain_alloc+0x54/0x148 > [ 9426.969545][ T3356] iommu_group_alloc_default_domain+0xc0/0x440 > [ 9426.975541][ T3356] iommu_probe_device+0x1c0/0x308 > [ 9426.980409][ T3356]
[PATCH] iommu: fix use-after-free in iommu_release_device
In pci_disable_sriov(), i.e., # echo 0 > /sys/class/net/enp11s0f1np1/device/sriov_numvfs iommu_release_device iommu_group_remove_device arm_smmu_domain_free kfree(smmu_domain) Later, iommu_release_device arm_smmu_release_device arm_smmu_detach_dev spin_lock_irqsave(_domain->devices_lock, would trigger an use-after-free. Fixed it by call arm_smmu_release_device() first before iommu_group_remove_device(). BUG: KASAN: use-after-free in __lock_acquire+0x3458/0x4440 __lock_acquire at kernel/locking/lockdep.c:4250 Read of size 8 at addr 0089df1a6f68 by task bash/3356 CPU: 5 PID: 3356 Comm: bash Not tainted 5.8.0-rc3-next-20200630 #2 Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.11 06/18/2019 Call trace: dump_backtrace+0x0/0x398 show_stack+0x14/0x20 dump_stack+0x140/0x1b8 print_address_description.isra.12+0x54/0x4a8 kasan_report+0x134/0x1b8 __asan_report_load8_noabort+0x2c/0x50 __lock_acquire+0x3458/0x4440 lock_acquire+0x204/0xf10 _raw_spin_lock_irqsave+0xf8/0x180 arm_smmu_detach_dev+0xd8/0x4a0 arm_smmu_detach_dev at drivers/iommu/arm-smmu-v3.c:2776 arm_smmu_release_device+0xb4/0x1c8 arm_smmu_disable_pasid at drivers/iommu/arm-smmu-v3.c:2754 (inlined by) arm_smmu_release_device at drivers/iommu/arm-smmu-v3.c:3000 iommu_release_device+0xc0/0x178 iommu_release_device at drivers/iommu/iommu.c:302 iommu_bus_notifier+0x118/0x160 notifier_call_chain+0xa4/0x128 __blocking_notifier_call_chain+0x70/0xa8 blocking_notifier_call_chain+0x14/0x20 device_del+0x618/0xa00 pci_remove_bus_device+0x108/0x2d8 pci_stop_and_remove_bus_device+0x1c/0x28 pci_iov_remove_virtfn+0x228/0x368 sriov_disable+0x8c/0x348 pci_disable_sriov+0x5c/0x70 mlx5_core_sriov_configure+0xd8/0x260 [mlx5_core] sriov_numvfs_store+0x240/0x318 dev_attr_store+0x38/0x68 sysfs_kf_write+0xdc/0x128 kernfs_fop_write+0x23c/0x448 __vfs_write+0x54/0xe8 vfs_write+0x124/0x3f0 ksys_write+0xe8/0x1b8 __arm64_sys_write+0x68/0x98 do_el0_svc+0x124/0x220 el0_sync_handler+0x260/0x408 el0_sync+0x140/0x180 Allocated by task 3356: save_stack+0x24/0x50 __kasan_kmalloc.isra.13+0xc4/0xe0 kasan_kmalloc+0xc/0x18 kmem_cache_alloc_trace+0x1ec/0x318 arm_smmu_domain_alloc+0x54/0x148 iommu_group_alloc_default_domain+0xc0/0x440 iommu_probe_device+0x1c0/0x308 iort_iommu_configure+0x434/0x518 acpi_dma_configure+0xf0/0x128 pci_dma_configure+0x114/0x160 really_probe+0x124/0x6d8 driver_probe_device+0xc4/0x180 __device_attach_driver+0x184/0x1e8 bus_for_each_drv+0x114/0x1a0 __device_attach+0x19c/0x2a8 device_attach+0x10/0x18 pci_bus_add_device+0x70/0xf8 pci_iov_add_virtfn+0x7b4/0xb40 sriov_enable+0x5c8/0xc30 pci_enable_sriov+0x64/0x80 mlx5_core_sriov_configure+0x58/0x260 [mlx5_core] sriov_numvfs_store+0x1c0/0x318 dev_attr_store+0x38/0x68 sysfs_kf_write+0xdc/0x128 kernfs_fop_write+0x23c/0x448 __vfs_write+0x54/0xe8 vfs_write+0x124/0x3f0 ksys_write+0xe8/0x1b8 __arm64_sys_write+0x68/0x98 do_el0_svc+0x124/0x220 el0_sync_handler+0x260/0x408 el0_sync+0x140/0x180 Freed by task 3356: save_stack+0x24/0x50 __kasan_slab_free+0x124/0x198 kasan_slab_free+0x10/0x18 slab_free_freelist_hook+0x110/0x298 kfree+0x128/0x668 arm_smmu_domain_free+0xf4/0x1a0 iommu_group_release+0xec/0x160 kobject_put+0xf4/0x238 kobject_del+0x110/0x190 kobject_put+0x1e4/0x238 iommu_group_remove_device+0x394/0x938 iommu_release_device+0x9c/0x178 iommu_release_device at drivers/iommu/iommu.c:300 iommu_bus_notifier+0x118/0x160 notifier_call_chain+0xa4/0x128 __blocking_notifier_call_chain+0x70/0xa8 blocking_notifier_call_chain+0x14/0x20 device_del+0x618/0xa00 pci_remove_bus_device+0x108/0x2d8 pci_stop_and_remove_bus_device+0x1c/0x28 pci_iov_remove_virtfn+0x228/0x368 sriov_disable+0x8c/0x348 pci_disable_sriov+0x5c/0x70 mlx5_core_sriov_configure+0xd8/0x260 [mlx5_core] sriov_numvfs_store+0x240/0x318 dev_attr_store+0x38/0x68 sysfs_kf_write+0xdc/0x128 kernfs_fop_write+0x23c/0x448 __vfs_write+0x54/0xe8 vfs_write+0x124/0x3f0 ksys_write+0xe8/0x1b8 __arm64_sys_write+0x68/0x98 do_el0_svc+0x124/0x220 el0_sync_handler+0x260/0x408 el0_sync+0x140/0x180 The buggy address belongs to the object at 0089df1a6e00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 360 bytes inside of 512-byte region [0089df1a6e00, 0089df1a7000) The buggy address belongs to the page: page:ffe02257c680 refcount:1 mapcount:0 mapping: index:0x0089df1a1400 flags: 0x780200(slab) raw: 00780200 ffe02246b8c8 ffe02257ff88 00320680 raw: 0089df1a1400 002a000e 0001 0089df1a5001 page dumped because: kasan: bad access detected page->mem_cgroup:0089df1a5001 Memory state around the buggy address: 0089df1a6e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)
On 2020-07-03 21:34, Rob Clark wrote: On Fri, Jul 3, 2020 at 7:53 AM Sai Prakash Ranjan wrote: Hi Will, On 2020-07-03 19:07, Will Deacon wrote: > On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote: >> diff --git a/drivers/gpu/drm/msm/msm_iommu.c >> b/drivers/gpu/drm/msm/msm_iommu.c >> index f455c597f76d..bd1d58229cc2 100644 >> --- a/drivers/gpu/drm/msm/msm_iommu.c >> +++ b/drivers/gpu/drm/msm/msm_iommu.c >> @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, >> uint64_t iova, >> iova |= GENMASK_ULL(63, 49); >> >> >> +if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE) >> +prot |= IOMMU_SYS_CACHE_ONLY; > > Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then > it > looks like it should actually be a property on the domain because we > never > need to configure it on a per-mapping basis within a domain, and > therefore > it shouldn't be exposed by the IOMMU API as a prot flag. > > Do you agree? > GPU being the only user is for now, but there are other clients which can use this. Plus how do we set the memory attributes if we do not expose this as prot flag? It does appear that the downstream kgsl driver sets this for basically all mappings.. well there is some conditional stuff around DOMAIN_ATTR_USE_LLC_NWA but it seems based on the property of the domain. (Jordan may know more about what that is about.) But looks like there are a lot of different paths into iommu_map in kgsl so I might have missed something. Assuming there isn't some case where we specifically don't want to use the system cache for some mapping, I think it could be a domain attribute that sets an io_pgtable_cfg::quirks flag Ok then we are good to remove unused sys cache prot flag which Will has posted. Thanks, Sai -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag
On 2020-07-03 21:55, Will Deacon wrote: The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and has no in-tree users. Remove it. Cc: Robin Murphy Cc: "Isaac J. Manjarres" Cc: Joerg Roedel Cc: Christoph Hellwig Cc: Sai Prakash Ranjan Cc: Rob Clark Signed-off-by: Will Deacon --- As discussed in [1], sounds like this should be a domain attribute anyway when it's needed by the GPU. [1] https://lore.kernel.org/r/caf6aegscrovtsi2r7_aukmh9luoc_gumr0w0kujc2cegpfj...@mail.gmail.com Reviewed-by: Sai Prakash Ranjan QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag
The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and has no in-tree users. Remove it. Cc: Robin Murphy Cc: "Isaac J. Manjarres" Cc: Joerg Roedel Cc: Christoph Hellwig Cc: Sai Prakash Ranjan Cc: Rob Clark Signed-off-by: Will Deacon --- As discussed in [1], sounds like this should be a domain attribute anyway when it's needed by the GPU. [1] https://lore.kernel.org/r/caf6aegscrovtsi2r7_aukmh9luoc_gumr0w0kujc2cegpfj...@mail.gmail.com drivers/iommu/io-pgtable-arm.c | 3 --- include/linux/iommu.h | 6 -- 2 files changed, 9 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 04fbd4bf0ff9..8f175c02f8e3 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -438,9 +438,6 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, else if (prot & IOMMU_CACHE) pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE << ARM_LPAE_PTE_ATTRINDX_SHIFT); - else if (prot & IOMMU_SYS_CACHE_ONLY) - pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE - << ARM_LPAE_PTE_ATTRINDX_SHIFT); } if (prot & IOMMU_CACHE) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5f0b7859d2eb..bee1a8fa1fb1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -31,12 +31,6 @@ * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) -/* - * Non-coherent masters can use this page protection flag to set cacheable - * memory attributes for only a transparent outer level of cache, also known as - * the last-level or system cache. - */ -#define IOMMU_SYS_CACHE_ONLY (1 << 6) struct iommu_ops; struct iommu_group; -- 2.27.0.212.ge8ba1cc988-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/arm-smmu-v3: expose numa_node attribute to users in sysfs
On Sat, May 30, 2020 at 09:15:05PM +1200, Barry Song wrote: > As tests show the latency of dma_unmap can increase dramatically while > calling them cross NUMA nodes, especially cross CPU packages, eg. > 300ns vs 800ns while waiting for the completion of CMD_SYNC in an > empty command queue. The large latency causing by remote node will > in turn make contention of the command queue more serious, and enlarge > the latency of DMA users within local NUMA nodes. > > Users might intend to enforce NUMA locality with the consideration of > the position of SMMU. The patch provides minor benefit by presenting > this information to users directly, as they might want to know it without > checking hardware spec at all. I don't think that's a very good reason to expose things to userspace. I know sysfs shouldn't be treated as ABI, but the grim reality is that once somebody relies on this stuff then we can't change it, so I'd rather avoid exposing it unless it's absolutely necessary. Thanks, Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/arm-smmu-v3: allocate the memory of queues in local numa node
On Mon, Jun 01, 2020 at 11:31:41PM +1200, Barry Song wrote: > dmam_alloc_coherent() will usually allocate memory from the default CMA. For > a common arm64 defconfig without reserved memory in device tree, there is only > one CMA close to address 0. > dma_alloc_contiguous() will allocate memory without any idea of NUMA and smmu > has no customized per-numa cma_area. > struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) > { > size_t count = size >> PAGE_SHIFT; > struct page *page = NULL; > struct cma *cma = NULL; > > if (dev && dev->cma_area) > cma = dev->cma_area; > else if (count > 1) > cma = dma_contiguous_default_area; > > ... > return page; > } > > if there are N numa nodes, N-1 nodes will put command/evt queues etc in a > remote node the default CMA belongs to,probably node 0. Tests show, after > sending CMD_SYNC in an empty command queue, > on Node2, without this patch, it takes 550ns to wait for the completion > of CMD_SYNC; with this patch, it takes 250ns to wait for the completion > of CMD_SYNC. > > Signed-off-by: Barry Song > --- > drivers/iommu/arm-smmu-v3.c | 63 - > 1 file changed, 48 insertions(+), 15 deletions(-) I would prefer that the coherent DMA allocator learned about NUMA, rather than we bodge drivers to use the streaming API where it doesn't really make sense. I see that you've posted other patches to do that (thanks!), so I'll disregard this series. Cheers, Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)
On Fri, Jul 3, 2020 at 7:53 AM Sai Prakash Ranjan wrote: > > Hi Will, > > On 2020-07-03 19:07, Will Deacon wrote: > > On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote: > >> diff --git a/drivers/gpu/drm/msm/msm_iommu.c > >> b/drivers/gpu/drm/msm/msm_iommu.c > >> index f455c597f76d..bd1d58229cc2 100644 > >> --- a/drivers/gpu/drm/msm/msm_iommu.c > >> +++ b/drivers/gpu/drm/msm/msm_iommu.c > >> @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, > >> uint64_t iova, > >> iova |= GENMASK_ULL(63, 49); > >> > >> > >> +if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE) > >> +prot |= IOMMU_SYS_CACHE_ONLY; > > > > Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then > > it > > looks like it should actually be a property on the domain because we > > never > > need to configure it on a per-mapping basis within a domain, and > > therefore > > it shouldn't be exposed by the IOMMU API as a prot flag. > > > > Do you agree? > > > > GPU being the only user is for now, but there are other clients which > can use this. > Plus how do we set the memory attributes if we do not expose this as > prot flag? It does appear that the downstream kgsl driver sets this for basically all mappings.. well there is some conditional stuff around DOMAIN_ATTR_USE_LLC_NWA but it seems based on the property of the domain. (Jordan may know more about what that is about.) But looks like there are a lot of different paths into iommu_map in kgsl so I might have missed something. Assuming there isn't some case where we specifically don't want to use the system cache for some mapping, I think it could be a domain attribute that sets an io_pgtable_cfg::quirks flag BR, -R ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/2] iommu: Tidy up Kconfig for SoC IOMMUs
Wacky COMPILE_TEST dependencies based on who used to define dev_archdata.iommu can go. Dependencies on ARM or ARM64 already implied by the ARCH_* platform selection can go. The entire IOMMU_SUPPORT menu already depends on MMU, so those can go. IOMMU_DMA is for the architecture's DMA API implementation to choose, and its interface to IOMMU drivers is properly stubbed out if disabled, so dependencies on or selections of that can go (AMD_IOMMU is the current exception since the x86 drivers have to provide their own entire dma_map_ops implementation). Since commit ed6ccf10f24b ("dma-mapping: properly stub out the DMA API for !CONFIG_HAS_DMA"), drivers which simply use the dma-mapping API should not need to depend on HAS_DMA, so those can go. And a long-dead option for code removed from the MSM driver 4 years ago can also go. Signed-off-by: Robin Murphy --- Based on the current iommu/next branch. drivers/iommu/Kconfig | 16 +++- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b0f308cb7f7c..4dcd85bbff99 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -128,10 +128,6 @@ config MSM_IOMMU If unsure, say N here. -config IOMMU_PGTABLES_L2 - def_bool y - depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n - # AMD IOMMU support config AMD_IOMMU bool "AMD IOMMU support" @@ -274,7 +270,6 @@ config IRQ_REMAP # OMAP IOMMU support config OMAP_IOMMU bool "OMAP IOMMU Support" - depends on ARM && MMU || (COMPILE_TEST && (ARM || ARM64 || IA64 || SPARC)) depends on ARCH_OMAP2PLUS || COMPILE_TEST select IOMMU_API help @@ -292,7 +287,6 @@ config OMAP_IOMMU_DEBUG config ROCKCHIP_IOMMU bool "Rockchip IOMMU Support" - depends on ARM || ARM64 || (COMPILE_TEST && (ARM64 || IA64 || SPARC)) depends on ARCH_ROCKCHIP || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU @@ -305,11 +299,9 @@ config ROCKCHIP_IOMMU config SUN50I_IOMMU bool "Allwinner H6 IOMMU Support" - depends on HAS_DMA depends on ARCH_SUNXI || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API - select IOMMU_DMA help Support for the IOMMU introduced in the Allwinner H6 SoCs. @@ -336,7 +328,7 @@ config TEGRA_IOMMU_SMMU config EXYNOS_IOMMU bool "Exynos IOMMU Support" - depends on ARCH_EXYNOS && MMU || (COMPILE_TEST && (ARM || ARM64 || IA64 || SPARC)) + depends on ARCH_EXYNOS || COMPILE_TEST depends on !CPU_BIG_ENDIAN # revisit driver if we can enable big-endian ptes select IOMMU_API select ARM_DMA_USE_IOMMU @@ -359,7 +351,7 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARM || IOMMU_DMA + depends on ARM || ARM64 depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64) select IOMMU_API select IOMMU_IO_PGTABLE_LPAE @@ -381,7 +373,7 @@ config SPAPR_TCE_IOMMU # ARM IOMMU support config ARM_SMMU tristate "ARM Ltd. System MMU (SMMU) Support" - depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) && MMU + depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64) select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM @@ -467,11 +459,9 @@ config S390_AP_IOMMU config MTK_IOMMU bool "MTK IOMMU Support" - depends on HAS_DMA depends on ARCH_MEDIATEK || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API - select IOMMU_DMA select IOMMU_IO_PGTABLE_ARMV7S select MEMORY select MTK_SMI base-commit: e5640f21b63d2a5d3e4e0c4111b2b38e99fe5164 -- 2.23.0.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 2/2] iommu/renesas: Expand COMPILE_TEST coverage
This driver shouldn't need anything architecture-specific (that isn't under CONFIG_ARM protection already), and has already been accessible from certain x86 configurations by virtue of the previously-cleaned-up "ARM || IOMMU_DMA" dependency. Allow COMPILE_TEST for all architectures. Signed-off-by: Robin Murphy --- Let's see if 0-day agrees... :D drivers/iommu/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 4dcd85bbff99..af6c081d164f 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -351,7 +351,6 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARM || ARM64 depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64) select IOMMU_API select IOMMU_IO_PGTABLE_LPAE -- 2.23.0.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)
On Fri, Jul 03, 2020 at 08:23:07PM +0530, Sai Prakash Ranjan wrote: > On 2020-07-03 19:07, Will Deacon wrote: > > On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote: > > > diff --git a/drivers/gpu/drm/msm/msm_iommu.c > > > b/drivers/gpu/drm/msm/msm_iommu.c > > > index f455c597f76d..bd1d58229cc2 100644 > > > --- a/drivers/gpu/drm/msm/msm_iommu.c > > > +++ b/drivers/gpu/drm/msm/msm_iommu.c > > > @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, > > > uint64_t iova, > > > iova |= GENMASK_ULL(63, 49); > > > > > > > > > + if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE) > > > + prot |= IOMMU_SYS_CACHE_ONLY; > > > > Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then > > it > > looks like it should actually be a property on the domain because we > > never > > need to configure it on a per-mapping basis within a domain, and > > therefore > > it shouldn't be exposed by the IOMMU API as a prot flag. > > > > Do you agree? > > > > GPU being the only user is for now, but there are other clients which can > use this. > Plus how do we set the memory attributes if we do not expose this as prot > flag? I just don't understand the need for it to be per-map operation. Put another way, if we extended the domain attribute to apply to cacheable mappings on the domain and not just the table walk, what would break? Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)
Hi Will, On 2020-07-03 19:07, Will Deacon wrote: On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote: diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index f455c597f76d..bd1d58229cc2 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, iova |= GENMASK_ULL(63, 49); + if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE) + prot |= IOMMU_SYS_CACHE_ONLY; Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then it looks like it should actually be a property on the domain because we never need to configure it on a per-mapping basis within a domain, and therefore it shouldn't be exposed by the IOMMU API as a prot flag. Do you agree? GPU being the only user is for now, but there are other clients which can use this. Plus how do we set the memory attributes if we do not expose this as prot flag? Thanks, Sai -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/2] iommu/iova: Retry from last rb tree node if iova search fails
From: Vijayanand Jitta When ever a new iova alloc request comes iova is always searched from the cached node and the nodes which are previous to cached node. So, even if there is free iova space available in the nodes which are next to the cached node iova allocation can still fail because of this approach. Consider the following sequence of iova alloc and frees on 1GB of iova space 1) alloc - 500MB 2) alloc - 12MB 3) alloc - 499MB 4) free - 12MB which was allocated in step 2 5) alloc - 13MB After the above sequence we will have 12MB of free iova space and cached node will be pointing to the iova pfn of last alloc of 13MB which will be the lowest iova pfn of that iova space. Now if we get an alloc request of 2MB we just search from cached node and then look for lower iova pfn's for free iova and as they aren't any, iova alloc fails though there is 12MB of free iova space. To avoid such iova search failures do a retry from the last rb tree node when iova search fails, this will search the entire tree and get an iova if its available. Signed-off-by: Vijayanand Jitta --- drivers/iommu/iova.c | 23 +-- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 49fc01f..4e77116 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, struct rb_node *curr, *prev; struct iova *curr_iova; unsigned long flags; - unsigned long new_pfn; + unsigned long new_pfn, low_pfn_new; unsigned long align_mask = ~0UL; + unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn; if (size_aligned) align_mask <<= fls_long(size - 1); @@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = rb_entry(curr, struct iova, node); + low_pfn_new = curr_iova->pfn_hi + 1; + +retry: do { - limit_pfn = min(limit_pfn, curr_iova->pfn_lo); - new_pfn = (limit_pfn - size) & align_mask; + high_pfn = min(high_pfn, curr_iova->pfn_lo); + new_pfn = (high_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); curr_iova = rb_entry(curr, struct iova, node); - } while (curr && new_pfn <= curr_iova->pfn_hi); - - if (limit_pfn < size || new_pfn < iovad->start_pfn) { + } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn); + + if (high_pfn < size || new_pfn < low_pfn) { + if (low_pfn == iovad->start_pfn && low_pfn_new < limit_pfn) { + high_pfn = limit_pfn; + low_pfn = low_pfn_new; + curr = >anchor.node; + curr_iova = rb_entry(curr, struct iova, node); + goto retry; + } iovad->max32_alloc_size = size; goto iova32_full; } -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation 1.9.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 2/2] iommu/iova: Free global iova rcache on iova alloc failure
From: Vijayanand Jitta When ever an iova alloc request fails we free the iova ranges present in the percpu iova rcaches and then retry but the global iova rcache is not freed as a result we could still see iova alloc failure even after retry as global rcache is holding the iova's which can cause fragmentation. So, free the global iova rcache as well and then go for the retry. Change-Id: Ib8236dc88ba5516b73d4fbf6bf8e68bbf09bbad2 Signed-off-by: Vijayanand Jitta --- drivers/iommu/iova.c | 23 +++ include/linux/iova.h | 6 ++ 2 files changed, 29 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 4e77116..5836c87 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -442,6 +442,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) flush_rcache = false; for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); + free_global_cached_iovas(iovad); goto retry; } @@ -1055,5 +1056,27 @@ void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) } } +/* + * free all the IOVA ranges of global cache + */ +void free_global_cached_iovas(struct iova_domain *iovad) +{ + struct iova_rcache *rcache; + unsigned long flags; + int i, j; + + for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + rcache = >rcaches[i]; + spin_lock_irqsave(>lock, flags); + for (j = 0; j < rcache->depot_size; ++j) { + iova_magazine_free_pfns(rcache->depot[j], iovad); + iova_magazine_free(rcache->depot[j]); + rcache->depot[j] = NULL; + } + rcache->depot_size = 0; + spin_unlock_irqrestore(>lock, flags); + } +} + MODULE_AUTHOR("Anil S Keshavamurthy "); MODULE_LICENSE("GPL"); diff --git a/include/linux/iova.h b/include/linux/iova.h index a0637ab..a905726 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -163,6 +163,7 @@ int init_iova_flush_queue(struct iova_domain *iovad, struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); +void free_global_cached_iovas(struct iova_domain *iovad); #else static inline int iova_cache_get(void) { @@ -270,6 +271,11 @@ static inline void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) { } + +static inline void free_global_cached_iovas(struct iova_domain *iovad) +{ +} + #endif #endif -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation 1.9.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)
On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote: > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c > index f455c597f76d..bd1d58229cc2 100644 > --- a/drivers/gpu/drm/msm/msm_iommu.c > +++ b/drivers/gpu/drm/msm/msm_iommu.c > @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t > iova, > iova |= GENMASK_ULL(63, 49); > > > + if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE) > + prot |= IOMMU_SYS_CACHE_ONLY; Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then it looks like it should actually be a property on the domain because we never need to configure it on a per-mapping basis within a domain, and therefore it shouldn't be exposed by the IOMMU API as a prot flag. Do you agree? Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 03/14] vfio/type1: Report iommu nesting info to userspace
Hi Alex, > From: Liu, Yi L > Sent: Friday, July 3, 2020 2:06 PM [...] > > > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3 > > > + > > > +struct vfio_iommu_type1_info_cap_nesting { > > > + struct vfio_info_cap_header header; > > > + __u32 flags; > > > > I think there's an alignment issue here for a uapi. The header field > > is 8-bytes total and info[] should start at an 8-byte alignment to > > allow data[] within info to have 8-byte alignment. This could lead to > > the structure having a compiler dependent size and offsets. We should > > add a 4-byte reserved field here to resolve. > > got it. or how about defining the flags as __u64? > > > > > > + __u8info[]; > > > +}; > > > > This should have a lot more description around it, a user could not > > infer that info[] is including a struct iommu_nesting_info from the > > information provided here. > > Thanks, > > sure. BTW. do you think it is necessary to add a flag to indicate the info[] > is a > struct iommu_nesting_info? or as a start, it's not necessary to do it. seems like I misunderstood your comment. Does below description suits your comment? /* * Reporting nesting info to user space. * * @info: the nesting info provided by IOMMU driver. Today * it is expected to be a struct iommu_nesting_info * data. */ struct vfio_iommu_type1_info_cap_nesting { struct vfio_info_cap_header header; __u32 flags; __u32 padding; __u8info[]; }; Thanks, Yi Liu > Regards, > Yi Liu > > > Alex > > > > > + > > > #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) > > > > > > /** ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC][PATCH 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module
On 03-07-20, 13:23, Will Deacon wrote: > On Tue, Jun 16, 2020 at 01:52:32PM -0700, John Stultz wrote: > > On Tue, Jun 16, 2020 at 12:55 AM Marc Zyngier wrote: > > > On 2020-06-16 07:13, John Stultz wrote: > > > > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig > > > > index b510f67dfa49..714893535dd2 100644 > > > > --- a/drivers/iommu/Kconfig > > > > +++ b/drivers/iommu/Kconfig > > > > @@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU > > > > config ARM_SMMU > > > > tristate "ARM Ltd. System MMU (SMMU) Support" > > > > depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) > > > > && > > > > MMU > > > > + depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y > > > > > > This looks a bit ugly. Could you explain why we need this at the SMMU > > > level? I'd have expected the dependency to flow the other way around... > > > > Yea, so the arm-smmu-qcom.c file calls directly into the qcom-scm code > > via qcom_scm_qsmmu500_wait_safe_toggle() > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iommu/arm-smmu-qcom.c?h=v5.8-rc1#n44 > > > > So if ARM_SMMU=y and QCOM_SCM=m we get: > > drivers/iommu/arm-smmu-qcom.o: In function `qcom_smmu500_reset': > > arm-smmu-qcom.c:(.text+0xb4): undefined reference to > > `qcom_scm_qsmmu500_wait_safe_toggle' > > > > Do you have a suggestion for an alternative approach? > > Can you use symbol_get() or something like that? How are module dependencies > handled by other drivers? So drivers deal with this by making rules in Kconfig which will prohibit this case. QCOM_SCM depends on ARM_SMMU with the caveat that if ARM_SMMU is a module, QCOM_SCM cant be inbuilt. This can be done by adding below line in Kconfig for QCOM_SCM: depends on ARM_SMMU || !ARM_SMMU This is quite prevalent is drivers to ensure dependency like this Thanks -- ~Vinod ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC][PATCH 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module
On Tue, Jun 16, 2020 at 01:52:32PM -0700, John Stultz wrote: > On Tue, Jun 16, 2020 at 12:55 AM Marc Zyngier wrote: > > On 2020-06-16 07:13, John Stultz wrote: > > > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig > > > index b510f67dfa49..714893535dd2 100644 > > > --- a/drivers/iommu/Kconfig > > > +++ b/drivers/iommu/Kconfig > > > @@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU > > > config ARM_SMMU > > > tristate "ARM Ltd. System MMU (SMMU) Support" > > > depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) && > > > MMU > > > + depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y > > > > This looks a bit ugly. Could you explain why we need this at the SMMU > > level? I'd have expected the dependency to flow the other way around... > > Yea, so the arm-smmu-qcom.c file calls directly into the qcom-scm code > via qcom_scm_qsmmu500_wait_safe_toggle() > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iommu/arm-smmu-qcom.c?h=v5.8-rc1#n44 > > So if ARM_SMMU=y and QCOM_SCM=m we get: > drivers/iommu/arm-smmu-qcom.o: In function `qcom_smmu500_reset': > arm-smmu-qcom.c:(.text+0xb4): undefined reference to > `qcom_scm_qsmmu500_wait_safe_toggle' > > Do you have a suggestion for an alternative approach? Can you use symbol_get() or something like that? How are module dependencies handled by other drivers? Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 2/4] iommu/arm-smmu: Workaround for Marvell Armada-AP806 SoC erratum #582743
On 03.07.2020 11:03, Robin Murphy wrote: On 2020-07-02 21:16, Tomasz Nowicki wrote: From: Hanna Hawa Due to erratum #582743, the Marvell Armada-AP806 can't access 64bit to ARM SMMUv2 registers. Provide implementation relevant hooks: - split the writeq/readq to two accesses of writel/readl. - mask the MMU_IDR2.PTFSv8 fields to not use AArch64 format (but only AARCH32_L) since with AArch64 format 32 bits access is not supported. Note that separate writes/reads to 2 is not problem regards to atomicity, because the driver use the readq/writeq while initialize the SMMU, report for SMMU fault, and use spinlock in one case (iova_to_phys). The comment about the spinlock seems to be out of date, and TBH that whole sentence is a bit unclear - how about something like: "Note that most 64-bit registers like TTBRn can be accessed as two 32-bit halves without issue, and AArch32 format ensures that the register writes which must be atomic (for TLBI etc.) need only be 32-bit." Signed-off-by: Hanna Hawa Signed-off-by: Gregory CLEMENT Signed-off-by: Tomasz Nowicki --- Documentation/arm64/silicon-errata.rst | 3 ++ drivers/iommu/arm-smmu-impl.c | 52 ++ 2 files changed, 55 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 936cf2a59ca4..157214d3abe1 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -125,6 +125,9 @@ stable kernels. | Cavium | ThunderX2 Core | #219 | CAVIUM_TX2_ERRATUM_219 | ++-+-+-+ ++-+-+-+ +| Marvell | ARM-MMU-500 | #582743 | N/A | +++-+-+-+ +++-+-+-+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | ++-+-+-+ ++-+-+-+ diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index c75b9d957b70..c1fc5e1b8193 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -147,6 +147,53 @@ static const struct arm_smmu_impl arm_mmu500_impl = { .reset = arm_mmu500_reset, }; +static u64 mrvl_mmu500_readq(struct arm_smmu_device *smmu, int page, int off) +{ + u64 val; + + /* + * Marvell Armada-AP806 erratum #582743. + * Split all the readq to double readl + */ + val = (u64)readl_relaxed(arm_smmu_page(smmu, page) + off + 4) << 32; + val |= readl_relaxed(arm_smmu_page(smmu, page) + off); Even though io-64-nonatomic-hi-lo.h doesn't override readq() etc. for 64-bit builds, you can still use hi_lo_readq_relaxed() directly. + + return val; +} + +static void mrvl_mmu500_writeq(struct arm_smmu_device *smmu, int page, int off, + u64 val) +{ + /* + * Marvell Armada-AP806 erratum #582743. + * Split all the writeq to double writel + */ + writel_relaxed(upper_32_bits(val), arm_smmu_page(smmu, page) + off + 4); + writel_relaxed(lower_32_bits(val), arm_smmu_page(smmu, page) + off); Similarly, hi_lo_writeq_relaxed(). +} + +static u32 mrvl_mmu500_cfg_id2_fixup(u32 id) +{ + + /* + * Armada-AP806 erratum #582743. + * Hide the SMMU_IDR2.PTFSv8 fields to sidestep the AArch64 + * formats altogether and allow using 32 bits access on the + * interconnect. + */ + id &= ~(ARM_SMMU_ID2_PTFS_4K | ARM_SMMU_ID2_PTFS_16K | + ARM_SMMU_ID2_PTFS_64K); + + return id; +} + +static const struct arm_smmu_impl mrvl_mmu500_impl = { + .read_reg64 = mrvl_mmu500_readq, + .write_reg64 = mrvl_mmu500_writeq, + .cfg_id2_fixup = mrvl_mmu500_cfg_id2_fixup, + .reset = arm_mmu500_reset, +}; + struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) { @@ -160,6 +207,11 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) */ switch (smmu->model) { case ARM_MMU500: + if (of_device_is_compatible(smmu->dev->of_node, Nit: there's a local "np" variable now. + "marvell,ap806-smmu-500")) { + smmu->impl = _mmu500_impl; + return smmu; + } Please put this with the other integration checks below the switch statement. Yes, it means we'll end up assigning smmu->impl twice for this particular case, but that's the intended pattern. Thanks, all above comments do make sense and will be fixed in next spin. Thanks, Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org
Re: [PATCH v3 02/14] iommu: Report domain nesting info
On Tue, Jun 30, 2020 at 02:00:49AM +, Tian, Kevin wrote: > > From: Liu, Yi L > > Sent: Monday, June 29, 2020 8:23 PM > > > > Hi Stefan, > > > > > From: Stefan Hajnoczi > > > Sent: Monday, June 29, 2020 5:25 PM > > > > > > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote: > > > > +/* > > > > + * struct iommu_nesting_info - Information for nesting-capable IOMMU. > > > > + * user space should check it before using > > > > + * nesting capability. > > > > + * > > > > + * @size: size of the whole structure > > > > + * @format:PASID table entry format, the same definition with > > > > + * @format of struct iommu_gpasid_bind_data. > > > > + * @features: supported nesting features. > > > > + * @flags: currently reserved for future extension. > > > > + * @data: vendor specific cap info. > > > > + * > > > > + * > > > > +---++ > > > > + * | feature | Notes > > > > | > > > > + * > > > > > +===+=== > > > > > =+ > > > > + * | SYSWIDE_PASID | Kernel manages PASID in system wide, PASIDs > > used | > > > > + * | | in the system should be allocated by host kernel > > > > | > > > > + * > > > > +---++ > > > > + * | BIND_PGTBL| bind page tables to host PASID, the PASID could > > > > | > > > > + * | | either be a host PASID passed in bind request or > > > > | > > > > + * | | default PASIDs (e.g. default PASID of > > > > aux-domain) | > > > > + * > > > > +---++ > > > > + * | CACHE_INVLD | mandatory feature for nesting capable IOMMU > > | > > > > + * > > > > +---++ > > > > > > This feature description is vague about what CACHE_INVLD does and how > > to > > > use it. If I understand correctly, the presence of this feature means > > > that VFIO_IOMMU_NESTING_OP_CACHE_INVLD must be used? > > > > > > The same kind of clarification could be done for SYSWIDE_PASID and > > > BIND_PGTBL too. > > > > For SYSWIDE_PASID and BIND_PGTBL, yes, presence of the feature bit > > means must use. So the two are requirements to user space if it wants > > to setup nesting. While for CACHE_INVLD, it's kind of availability > > here. How about removing CACHE_INVLD as presence of BIND_PGTBL should > > indicates support of CACHE_INVLD? > > > > So far this assumption is correct but it may not be true when thinking > forward. > For example, a vendor might find a way to allow the owner of 1st-level page > table to directly invalidate cache w/o going through host IOMMU driver. From > this angle I feel explicitly reporting this capability is more robust. > > Regarding to the description, what about below? > > -- > SYSWIDE_PASID: PASIDs are managed in system-wide, instead of per device. > When a device is assigned to userspace or VM, proper uAPI (provided by > userspace driver framework, e.g. VFIO) must be used to allocate/free PASIDs > for the assigned device. > > BIND_PGTBL: The owner of the first-level/stage-1 page table must explicitly > bind the page table to associated PASID (either the one specified in bind > request or the default PASID of the iommu domain), through VFIO_IOMMU > _NESTING_OP > > CACHE_INVLD: The owner of the first-level/stage-1 page table must > explicitly invalidate the IOMMU cache through VFIO_IOMMU_NESTING_OP, > according to vendor-specific requirement when changing the page table. > -- Mentioning the API to allocate/free PASIDs and VFIO_IOMMU_NESTING_OP has made this clearer. This lets someone reading the documentation know where to look for further information on using these features. Thank you! Stefan signature.asc Description: PGP signature ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 4/4] arm64: dts: marvell: add SMMU support
On 03.07.2020 11:16, Robin Murphy wrote: On 2020-07-02 21:16, Tomasz Nowicki wrote: From: Marcin Wojtas Add IOMMU node for Marvell AP806 based SoCs together with platform and PCI device Stream ID mapping. Signed-off-by: Marcin Wojtas Signed-off-by: Tomasz Nowicki --- arch/arm64/boot/dts/marvell/armada-8040.dtsi | 36 +++ arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 17 + 2 files changed, 53 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-8040.dtsi b/arch/arm64/boot/dts/marvell/armada-8040.dtsi index 7699b19224c2..25c1df709f72 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-8040.dtsi @@ -23,3 +23,39 @@ _rtc { status = "disabled"; }; + +_usb3_0 { + iommus = < 0x440>; +}; + +_usb3_1 { + iommus = < 0x441>; +}; + +_sata0 { + iommus = < 0x444>; +}; + +_sdhci0 { + iommus = < 0x445>; +}; + +_sata0 { + iommus = < 0x454>; +}; + +_usb3_0 { + iommus = < 0x450>; +}; + +_usb3_1 { + iommus = < 0x451>; +}; + +_pcie0 { + iommu-map = + <0x0 0x480 0x20>, + <0x100 0x4a0 0x20>, + <0x200 0x4c0 0x20>; + iommu-map-mask = <0x031f>; Nice! I do like a good compressed mapping :D +}; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index 7f9b9a647717..ded8b8082d79 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -56,6 +56,23 @@ compatible = "simple-bus"; ranges = <0x0 0x0 0xf000 0x100>; + smmu: iommu@500 { + compatible = "marvell,ap806-smmu-500", "arm,mmu-500"; + reg = <0x10 0x10>; + dma-coherent; + #iommu-cells = <1>; + #global-interrupts = <1>; + interrupts = , + , + , + , + , + , + , + , + ; I'd recommend you have the node disabled by default here, then explicitly enable it in armada-8040.dtsi where you add the Stream IDs. Otherwise it will also end up enabled for 8020, 70x0, etc. where disable_bypass will then catastrophically break everything. Good point! I will fix this. Thanks, Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 3/4] dt-bindings: arm-smmu: add compatible string for Marvell Armada-AP806 SMMU-500
On 03.07.2020 11:05, Robin Murphy wrote: On 2020-07-02 21:16, Tomasz Nowicki wrote: Add specific compatible string for Marvell usage due to errata of accessing 64bits registers of ARM SMMU, in AP806. AP806 SoC uses the generic ARM-MMU500, and there's no specific implementation of Marvell, this compatible is used for errata only. Signed-off-by: Hanna Hawa Signed-off-by: Gregory CLEMENT Signed-off-by: Tomasz Nowicki --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 5 + 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index d7ceb4c34423..7beca9c00b12 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -38,6 +38,11 @@ properties: - qcom,sc7180-smmu-500 - qcom,sdm845-smmu-500 - const: arm,mmu-500 + - description: Marvell SoCs implementing "arm,mmu-500" + items: + - enum: + - marvell,ap806-smmu-500 Isn't a single-valued enum just a constant? :P That's how copy-paste engineering ends up :) Thanks, Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 1/4] iommu/arm-smmu: Add SMMU ID2 register fixup hook
On 03.07.2020 10:24, Robin Murphy wrote: On 2020-07-02 21:16, Tomasz Nowicki wrote: We already have 'cfg_probe' hook which meant to override and apply workarounds while probing ID registers. However, 'cfg_probe' is called at the very end and therefore for some cases fixing up things becomes complex or requires exporting of SMMU driver structures. Hence, seems it is better and cleaner to do ID fixup right away. In preparation for adding Marvell errata add an extra ID2 fixup hook. Hmm, the intent of ->cfg_probe was very much to give impl a chance to adjust the detected features before we start consuming them, with this exact case in mind. Since the Cavium quirk isn't actually doing that - it just needs to run *anywhere* in the whole probe process - I'm under no illusion that I put the hook in exactly the right place first time around ;) The diff below should be more on the mark... Robin. ->8- diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 243bc4cb2705..884ffca5b1eb 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1891,6 +1891,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } + if (smmu->impl && smmu->impl->cfg_probe) + return smmu->impl->cfg_probe(smmu); + /* Now we've corralled the various formats, what'll it do? */ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M; @@ -1909,7 +1912,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", smmu->pgsize_bitmap); - if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n", smmu->va_size, smmu->ipa_size); @@ -1918,9 +1920,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", smmu->ipa_size, smmu->pa_size); - if (smmu->impl && smmu->impl->cfg_probe) - return smmu->impl->cfg_probe(smmu); - return 0; } I was under impression that ->cfg_probe was meant for Cavium alike errata (position independent). Then I will move ->cfg_probe as you suggested. I prefer not to add yet another impl hook too :) Thanks, Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 4/4] arm64: dts: marvell: add SMMU support
On 2020-07-02 21:16, Tomasz Nowicki wrote: From: Marcin Wojtas Add IOMMU node for Marvell AP806 based SoCs together with platform and PCI device Stream ID mapping. Signed-off-by: Marcin Wojtas Signed-off-by: Tomasz Nowicki --- arch/arm64/boot/dts/marvell/armada-8040.dtsi | 36 +++ arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 17 + 2 files changed, 53 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-8040.dtsi b/arch/arm64/boot/dts/marvell/armada-8040.dtsi index 7699b19224c2..25c1df709f72 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-8040.dtsi @@ -23,3 +23,39 @@ _rtc { status = "disabled"; }; + +_usb3_0 { + iommus = < 0x440>; +}; + +_usb3_1 { + iommus = < 0x441>; +}; + +_sata0 { + iommus = < 0x444>; +}; + +_sdhci0 { + iommus = < 0x445>; +}; + +_sata0 { + iommus = < 0x454>; +}; + +_usb3_0 { + iommus = < 0x450>; +}; + +_usb3_1 { + iommus = < 0x451>; +}; + +_pcie0 { + iommu-map = + <0x00x480 0x20>, + <0x100 0x4a0 0x20>, + <0x200 0x4c0 0x20>; + iommu-map-mask = <0x031f>; Nice! I do like a good compressed mapping :D +}; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index 7f9b9a647717..ded8b8082d79 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -56,6 +56,23 @@ compatible = "simple-bus"; ranges = <0x0 0x0 0xf000 0x100>; + smmu: iommu@500 { + compatible = "marvell,ap806-smmu-500", "arm,mmu-500"; + reg = <0x10 0x10>; + dma-coherent; + #iommu-cells = <1>; + #global-interrupts = <1>; + interrupts = , +, +, +, +, +, +, +, +; I'd recommend you have the node disabled by default here, then explicitly enable it in armada-8040.dtsi where you add the Stream IDs. Otherwise it will also end up enabled for 8020, 70x0, etc. where disable_bypass will then catastrophically break everything. Robin. + }; + gic: interrupt-controller@21 { compatible = "arm,gic-400"; #interrupt-cells = <3>; ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 3/4] dt-bindings: arm-smmu: add compatible string for Marvell Armada-AP806 SMMU-500
On 2020-07-02 21:16, Tomasz Nowicki wrote: Add specific compatible string for Marvell usage due to errata of accessing 64bits registers of ARM SMMU, in AP806. AP806 SoC uses the generic ARM-MMU500, and there's no specific implementation of Marvell, this compatible is used for errata only. Signed-off-by: Hanna Hawa Signed-off-by: Gregory CLEMENT Signed-off-by: Tomasz Nowicki --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 5 + 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index d7ceb4c34423..7beca9c00b12 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -38,6 +38,11 @@ properties: - qcom,sc7180-smmu-500 - qcom,sdm845-smmu-500 - const: arm,mmu-500 + - description: Marvell SoCs implementing "arm,mmu-500" +items: + - enum: + - marvell,ap806-smmu-500 Isn't a single-valued enum just a constant? :P Robin. + - const: arm,mmu-500 - items: - const: arm,mmu-500 - const: arm,smmu-v2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 2/4] iommu/arm-smmu: Workaround for Marvell Armada-AP806 SoC erratum #582743
On 2020-07-02 21:16, Tomasz Nowicki wrote: From: Hanna Hawa Due to erratum #582743, the Marvell Armada-AP806 can't access 64bit to ARM SMMUv2 registers. Provide implementation relevant hooks: - split the writeq/readq to two accesses of writel/readl. - mask the MMU_IDR2.PTFSv8 fields to not use AArch64 format (but only AARCH32_L) since with AArch64 format 32 bits access is not supported. Note that separate writes/reads to 2 is not problem regards to atomicity, because the driver use the readq/writeq while initialize the SMMU, report for SMMU fault, and use spinlock in one case (iova_to_phys). The comment about the spinlock seems to be out of date, and TBH that whole sentence is a bit unclear - how about something like: "Note that most 64-bit registers like TTBRn can be accessed as two 32-bit halves without issue, and AArch32 format ensures that the register writes which must be atomic (for TLBI etc.) need only be 32-bit." Signed-off-by: Hanna Hawa Signed-off-by: Gregory CLEMENT Signed-off-by: Tomasz Nowicki --- Documentation/arm64/silicon-errata.rst | 3 ++ drivers/iommu/arm-smmu-impl.c | 52 ++ 2 files changed, 55 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 936cf2a59ca4..157214d3abe1 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -125,6 +125,9 @@ stable kernels. | Cavium | ThunderX2 Core | #219| CAVIUM_TX2_ERRATUM_219 | ++-+-+-+ ++-+-+-+ +| Marvell| ARM-MMU-500 | #582743 | N/A | +++-+-+-+ +++-+-+-+ | Freescale/NXP | LS2080A/LS1043A | A-008585| FSL_ERRATUM_A008585 | ++-+-+-+ ++-+-+-+ diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index c75b9d957b70..c1fc5e1b8193 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -147,6 +147,53 @@ static const struct arm_smmu_impl arm_mmu500_impl = { .reset = arm_mmu500_reset, }; +static u64 mrvl_mmu500_readq(struct arm_smmu_device *smmu, int page, int off) +{ + u64 val; + + /* +* Marvell Armada-AP806 erratum #582743. +* Split all the readq to double readl +*/ + val = (u64)readl_relaxed(arm_smmu_page(smmu, page) + off + 4) << 32; + val |= readl_relaxed(arm_smmu_page(smmu, page) + off); Even though io-64-nonatomic-hi-lo.h doesn't override readq() etc. for 64-bit builds, you can still use hi_lo_readq_relaxed() directly. + + return val; +} + +static void mrvl_mmu500_writeq(struct arm_smmu_device *smmu, int page, int off, + u64 val) +{ + /* +* Marvell Armada-AP806 erratum #582743. +* Split all the writeq to double writel +*/ + writel_relaxed(upper_32_bits(val), arm_smmu_page(smmu, page) + off + 4); + writel_relaxed(lower_32_bits(val), arm_smmu_page(smmu, page) + off); Similarly, hi_lo_writeq_relaxed(). +} + +static u32 mrvl_mmu500_cfg_id2_fixup(u32 id) +{ + + /* +* Armada-AP806 erratum #582743. +* Hide the SMMU_IDR2.PTFSv8 fields to sidestep the AArch64 +* formats altogether and allow using 32 bits access on the +* interconnect. +*/ + id &= ~(ARM_SMMU_ID2_PTFS_4K | ARM_SMMU_ID2_PTFS_16K | + ARM_SMMU_ID2_PTFS_64K); + + return id; +} + +static const struct arm_smmu_impl mrvl_mmu500_impl = { + .read_reg64 = mrvl_mmu500_readq, + .write_reg64 = mrvl_mmu500_writeq, + .cfg_id2_fixup = mrvl_mmu500_cfg_id2_fixup, + .reset = arm_mmu500_reset, +}; + struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) { @@ -160,6 +207,11 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) */ switch (smmu->model) { case ARM_MMU500: + if (of_device_is_compatible(smmu->dev->of_node, Nit: there's a local "np" variable now. + "marvell,ap806-smmu-500")) { + smmu->impl = _mmu500_impl; + return smmu; + } Please put this with the other integration checks below the switch statement. Yes, it means we'll end up assigning smmu->impl twice for this particular case, but that's the intended pattern. Robin. smmu->impl = _mmu500_impl; break; case
Re: [PATCH v3 1/4] iommu/arm-smmu: Add SMMU ID2 register fixup hook
On 2020-07-02 21:16, Tomasz Nowicki wrote: We already have 'cfg_probe' hook which meant to override and apply workarounds while probing ID registers. However, 'cfg_probe' is called at the very end and therefore for some cases fixing up things becomes complex or requires exporting of SMMU driver structures. Hence, seems it is better and cleaner to do ID fixup right away. In preparation for adding Marvell errata add an extra ID2 fixup hook. Hmm, the intent of ->cfg_probe was very much to give impl a chance to adjust the detected features before we start consuming them, with this exact case in mind. Since the Cavium quirk isn't actually doing that - it just needs to run *anywhere* in the whole probe process - I'm under no illusion that I put the hook in exactly the right place first time around ;) The diff below should be more on the mark... Robin. ->8- diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 243bc4cb2705..884ffca5b1eb 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1891,6 +1891,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } + if (smmu->impl && smmu->impl->cfg_probe) + return smmu->impl->cfg_probe(smmu); + /* Now we've corralled the various formats, what'll it do? */ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M; @@ -1909,7 +1912,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", smmu->pgsize_bitmap); - if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n", smmu->va_size, smmu->ipa_size); @@ -1918,9 +1920,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", smmu->ipa_size, smmu->pa_size); - if (smmu->impl && smmu->impl->cfg_probe) - return smmu->impl->cfg_probe(smmu); - return 0; } ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 01/12] iommu: Change type of pasid to u32
Am 2020-07-02 um 3:10 p.m. schrieb Fenghua Yu: > Hi, Felix, Thomas, Joerg and maintainers, > > On Tue, Jun 30, 2020 at 10:12:38PM -0400, Felix Kuehling wrote: >> Am 2020-06-30 um 7:44 p.m. schrieb Fenghua Yu: >> You didn't change the return types of amdgpu_pasid_alloc and >> kfd_pasid_alloc. amdgpu_pasid_alloc returns int, because it can return >> negative error codes. But kfd_pasid_alloc could be updated, because it >> returns 0 for errors. > I fixed return type as "u32" for kfd_pasid_alloc(). Thank you. The patch is Acked-by: Felix Kuehling > > The fix is minor and only limited in patch 1. So instead of sending the > whole series, I only send the updated patch 1 here. If you want me to > send the whole series with the fix, I can do that too. > > Thanks. > > -Fenghua > > From 4ff6c14bb0761dd97d803350d31f87edc4336345 Mon Sep 17 00:00:00 2001 > From: Fenghua Yu > Date: Mon, 4 May 2020 18:00:55 + > Subject: [PATCH v5.1 01/12] iommu: Change type of pasid to u32 > > PASID is defined as a few different types in iommu including "int", > "u32", and "unsigned int". To be consistent and to match with uapi > definitions, define PASID and its variations (e.g. max PASID) as "u32". > "u32" is also shorter and a little more explicit than "unsigned int". > > No PASID type change in uapi although it defines PASID as __u64 in > some places. > > Suggested-by: Thomas Gleixner > Signed-off-by: Fenghua Yu > Reviewed-by: Tony Luck > Reviewed-by: Lu Baolu > --- > v5.1: > - Change return type to u32 for kfd_pasid_alloc() (Felix) > > v5: > - Reviewed by Lu Baolu > > v4: > - Change PASID type from "unsigned int" to "u32" (Christoph) > > v2: > - Create this new patch to define PASID as "unsigned int" consistently in > iommu (Thomas) > > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 4 +-- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 2 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 4 +-- > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 8 ++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 8 ++--- > .../gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 2 +- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 7 ++--- > drivers/gpu/drm/amd/amdkfd/kfd_events.c | 8 ++--- > drivers/gpu/drm/amd/amdkfd/kfd_events.h | 4 +-- > drivers/gpu/drm/amd/amdkfd/kfd_iommu.c| 6 ++-- > drivers/gpu/drm/amd/amdkfd/kfd_pasid.c| 4 +-- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 20 ++-- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- > .../gpu/drm/amd/include/kgd_kfd_interface.h | 2 +- > drivers/iommu/amd/amd_iommu.h | 10 +++--- > drivers/iommu/amd/iommu.c | 31 ++- > drivers/iommu/amd/iommu_v2.c | 20 ++-- > drivers/iommu/intel/dmar.c| 7 +++-- > drivers/iommu/intel/intel-pasid.h | 24 +++--- > drivers/iommu/intel/iommu.c | 4 +-- > drivers/iommu/intel/pasid.c | 31 +-- > drivers/iommu/intel/svm.c | 12 +++ > drivers/iommu/iommu.c | 2 +- > drivers/misc/uacce/uacce.c| 2 +- > include/linux/amd-iommu.h | 8 ++--- > include/linux/intel-iommu.h | 12 +++ > include/linux/intel-svm.h | 2 +- > include/linux/iommu.h | 10 +++--- > include/linux/uacce.h | 2 +- > 38 files changed, 141 insertions(+), 141 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index ffe149aafc39..dfef5a7e0f5a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -207,11 +207,11 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct > kgd_dev *dst, struct kgd_dev *s > }) > > /* GPUVM API */ > -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int > pasid, > +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid, > void **vm, void **process_info, > struct dma_fence **ef); > int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, > - struct file *filp, unsigned int pasid, > + struct file *filp, u32
RE: [PATCH v3 09/14] vfio/type1: Support binding guest page tables to PASID
Hi Alex, > From: Alex Williamson > Sent: Friday, July 3, 2020 5:20 AM > > On Wed, 24 Jun 2020 01:55:22 -0700 > Liu Yi L wrote: > > > Nesting translation allows two-levels/stages page tables, with 1st > > level for guest translations (e.g. GVA->GPA), 2nd level for host > > translations (e.g. GPA->HPA). This patch adds interface for binding > > guest page tables to a PASID. This PASID must have been allocated to > > user space before the binding request. > > > > Cc: Kevin Tian > > CC: Jacob Pan > > Cc: Alex Williamson > > Cc: Eric Auger > > Cc: Jean-Philippe Brucker > > Cc: Joerg Roedel > > Cc: Lu Baolu > > Signed-off-by: Jean-Philippe Brucker > > Signed-off-by: Liu Yi L > > Signed-off-by: Jacob Pan > > --- > > v2 -> v3: > > *) use __iommu_sva_unbind_gpasid() for unbind call issued by VFIO > > https://lore.kernel.org/linux-iommu/1592931837-58223-6-git-send-email- > > jacob.jun@linux.intel.com/ > > > > v1 -> v2: > > *) rename subject from "vfio/type1: Bind guest page tables to host" > > *) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to > support bind/ > >unbind guet page table > > *) replaced vfio_iommu_for_each_dev() with a group level loop since this > >series enforces one group per container w/ nesting type as start. > > *) rename vfio_bind/unbind_gpasid_fn() to > > vfio_dev_bind/unbind_gpasid_fn() > > *) vfio_dev_unbind_gpasid() always successful > > *) use vfio_mm->pasid_lock to avoid race between PASID free and page table > >bind/unbind > > --- > > drivers/vfio/vfio_iommu_type1.c | 169 > > > drivers/vfio/vfio_pasid.c | 30 +++ > > include/linux/vfio.h| 20 + > > include/uapi/linux/vfio.h | 30 +++ > > 4 files changed, 249 insertions(+) > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > b/drivers/vfio/vfio_iommu_type1.c index d0891c5..5926533 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -148,6 +148,33 @@ struct vfio_regions { > > #define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX) > > #define DIRTY_BITMAP_SIZE_MAX > DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX) > > > > +struct domain_capsule { > > + struct vfio_group *group; > > + struct iommu_domain *domain; > > + void *data; > > +}; > > + > > +/* iommu->lock must be held */ > > +static struct vfio_group *vfio_find_nesting_group(struct vfio_iommu > > +*iommu) { > > + struct vfio_domain *d; > > + struct vfio_group *g, *group = NULL; > > + > > + if (!iommu->nesting_info) > > + return NULL; > > + > > + /* only support singleton container with nesting type */ > > + list_for_each_entry(d, >domain_list, next) { > > + list_for_each_entry(g, >group_list, next) { > > + if (!group) { > > + group = g; > > + break; > > + } > > > We break out of the inner loop only to pointlessly continue in the outer loop > when we could simply return g and remove the second group pointer altogether > (use "group" instead of "g" if so). how about below? :-) /* only support singleton container with nesting type */ list_for_each_entry(d, >domain_list, next) { list_for_each_entry(group, >group_list, next) { break; } } > > > + } > > + } > > + return group; > > +} > > + > > static int put_pfn(unsigned long pfn, int prot); > > > > static struct vfio_group *vfio_iommu_find_iommu_group(struct > > vfio_iommu *iommu, @@ -2351,6 +2378,48 @@ static int > vfio_iommu_resv_refresh(struct vfio_iommu *iommu, > > return ret; > > } > > > > +static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data) { > > + struct domain_capsule *dc = (struct domain_capsule *)data; > > + unsigned long arg = *(unsigned long *) dc->data; > > + > > + return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *) arg); > > +} > > + > > +static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data) > > +{ > > + struct domain_capsule *dc = (struct domain_capsule *)data; > > + unsigned long arg = *(unsigned long *) dc->data; > > + > > + iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *) arg); > > + return 0; > > +} > > + > > +static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void > > +*data) { > > + struct domain_capsule *dc = (struct domain_capsule *)data; > > + struct iommu_gpasid_bind_data *unbind_data = > > + (struct iommu_gpasid_bind_data *) dc->data; > > + > > + __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data); > > + return 0; > > +} > > + > > +static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data) { > > + struct domain_capsule *dc = (struct domain_capsule *) data; > > + struct iommu_gpasid_bind_data unbind_data; > > + > > + unbind_data.argsz = offsetof(struct iommu_gpasid_bind_data, vendor); > > +
RE: [PATCH v3 06/14] vfio/type1: Add VFIO_IOMMU_PASID_REQUEST (alloc/free)
Hi Alex, > From: Alex Williamson > Sent: Friday, July 3, 2020 5:19 AM > > On Wed, 24 Jun 2020 01:55:19 -0700 > Liu Yi L wrote: > > > This patch allows user space to request PASID allocation/free, e.g. > > when serving the request from the guest. > > > > PASIDs that are not freed by userspace are automatically freed when > > the IOASID set is destroyed when process exits. > > > > Cc: Kevin Tian > > CC: Jacob Pan > > Cc: Alex Williamson > > Cc: Eric Auger > > Cc: Jean-Philippe Brucker > > Cc: Joerg Roedel > > Cc: Lu Baolu > > Signed-off-by: Liu Yi L > > Signed-off-by: Yi Sun > > Signed-off-by: Jacob Pan > > --- > > v1 -> v2: > > *) move the vfio_mm related code to be a seprate module > > *) use a single structure for alloc/free, could support a range of > > PASIDs > > *) fetch vfio_mm at group_attach time instead of at iommu driver open > > time > > --- > > drivers/vfio/Kconfig| 1 + > > drivers/vfio/vfio_iommu_type1.c | 96 > - > > drivers/vfio/vfio_pasid.c | 10 + > > include/linux/vfio.h| 6 +++ > > include/uapi/linux/vfio.h | 36 > > 5 files changed, 147 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index > > 3d8a108..95d90c6 100644 > > --- a/drivers/vfio/Kconfig > > +++ b/drivers/vfio/Kconfig > > @@ -2,6 +2,7 @@ > > config VFIO_IOMMU_TYPE1 > > tristate > > depends on VFIO > > + select VFIO_PASID if (X86) > > default n > > > > config VFIO_IOMMU_SPAPR_TCE > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > b/drivers/vfio/vfio_iommu_type1.c index 8c143d5..d0891c5 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -73,6 +73,7 @@ struct vfio_iommu { > > boolv2; > > boolnesting; > > struct iommu_nesting_info *nesting_info; > > + struct vfio_mm *vmm; > > Structure alignment again. sure. may get agreement in the prior email. > > > booldirty_page_tracking; > > boolpinned_page_dirty_scope; > > }; > > @@ -1933,6 +1934,17 @@ static void vfio_iommu_iova_insert_copy(struct > > vfio_iommu *iommu, > > > > list_splice_tail(iova_copy, iova); > > } > > + > > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) > > +{ > > + if (iommu->vmm) { > > + vfio_mm_put(iommu->vmm); > > + iommu->vmm = NULL; > > + } > > + > > + kfree(iommu->nesting_info); > > iommu->nesting_info = NULL; got it. > > +} > > + > > static int vfio_iommu_type1_attach_group(void *iommu_data, > > struct iommu_group *iommu_group) > { @@ -2067,6 +2079,25 @@ > > static int vfio_iommu_type1_attach_group(void *iommu_data, > > goto out_detach; > > } > > iommu->nesting_info = info; > > + > > + if (info->features & IOMMU_NESTING_FEAT_SYSWIDE_PASID) { > > + struct vfio_mm *vmm; > > + int sid; > > + > > + vmm = vfio_mm_get_from_task(current); > > + if (IS_ERR(vmm)) { > > + ret = PTR_ERR(vmm); > > + goto out_detach; > > + } > > + iommu->vmm = vmm; > > + > > + sid = vfio_mm_ioasid_sid(vmm); > > + ret = iommu_domain_set_attr(domain->domain, > > + DOMAIN_ATTR_IOASID_SID, > > + ); > > This looks pretty dicey in the case of !CONFIG_VFIO_PASID, can we get here in > that case? If so it looks like we're doing bad things with setting the > domain- > >ioasid_sid. I guess not. So far, vfio_iommu_type1 will select CONFIG_VFIO_PASID for X86. do you think it is enough? > > > + if (ret) > > + goto out_detach; > > + } > > } > > > > /* Get aperture info */ > > @@ -2178,7 +2209,8 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > > return 0; > > > > out_detach: > > - kfree(iommu->nesting_info); > > + if (iommu->nesting_info) > > + vfio_iommu_release_nesting_info(iommu); > > Make vfio_iommu_release_nesting_info() check iommu->nesting_info, then call > it unconditionally? got it. :-) > > vfio_iommu_detach_group(domain, group); > > out_domain: > > iommu_domain_free(domain->domain); > > @@ -2380,7 +2412,8 @@ static void vfio_iommu_type1_detach_group(void > *iommu_data, > > else > > > vfio_iommu_unmap_unpin_reaccount(iommu); > > > > - kfree(iommu->nesting_info); > > + if (iommu->nesting_info) > > + > vfio_iommu_release_nesting_info(iommu); > > } > > iommu_domain_free(domain->domain); > >
RE: [PATCH v3 04/14] vfio: Add PASID allocation/free support
Hi Alex, > From: Alex Williamson > Sent: Friday, July 3, 2020 5:17 AM > > On Wed, 24 Jun 2020 01:55:17 -0700 > Liu Yi L wrote: > > > Shared Virtual Addressing (a.k.a Shared Virtual Memory) allows sharing > > multiple process virtual address spaces with the device for simplified > > programming model. PASID is used to tag an virtual address space in > > DMA requests and to identify the related translation structure in > > IOMMU. When a PASID-capable device is assigned to a VM, we want the > > same capability of using PASID to tag guest process virtual address > > spaces to achieve virtual SVA (vSVA). > > > > PASID management for guest is vendor specific. Some vendors (e.g. > > Intel > > VT-d) requires system-wide managed PASIDs cross all devices, > > regardless of whether a device is used by host or assigned to guest. > > Other vendors (e.g. ARM SMMU) may allow PASIDs managed per-device thus > > could be fully delegated to the guest for assigned devices. > > > > For system-wide managed PASIDs, this patch introduces a vfio module to > > handle explicit PASID alloc/free requests from guest. Allocated PASIDs > > are associated to a process (or, mm_struct) in IOASID core. A vfio_mm > > object is introduced to track mm_struct. Multiple VFIO containers > > within a process share the same vfio_mm object. > > > > A quota mechanism is provided to prevent malicious user from > > exhausting available PASIDs. Currently the quota is a global parameter > > applied to all VFIO devices. In the future per-device quota might be > > supported > too. > > > > Cc: Kevin Tian > > CC: Jacob Pan > > Cc: Eric Auger > > Cc: Jean-Philippe Brucker > > Cc: Joerg Roedel > > Cc: Lu Baolu > > Suggested-by: Alex Williamson > > Signed-off-by: Liu Yi L > > --- > > v1 -> v2: > > *) added in v2, split from the pasid alloc/free support of v1 > > --- > > drivers/vfio/Kconfig | 5 ++ > > drivers/vfio/Makefile | 1 + > > drivers/vfio/vfio_pasid.c | 151 > ++ > > include/linux/vfio.h | 28 + > > 4 files changed, 185 insertions(+) > > create mode 100644 drivers/vfio/vfio_pasid.c > > > > diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index > > fd17db9..3d8a108 100644 > > --- a/drivers/vfio/Kconfig > > +++ b/drivers/vfio/Kconfig > > @@ -19,6 +19,11 @@ config VFIO_VIRQFD > > depends on VFIO && EVENTFD > > default n > > > > +config VFIO_PASID > > + tristate > > + depends on IOASID && VFIO > > + default n > > + > > menuconfig VFIO > > tristate "VFIO Non-Privileged userspace driver framework" > > depends on IOMMU_API > > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index > > de67c47..bb836a3 100644 > > --- a/drivers/vfio/Makefile > > +++ b/drivers/vfio/Makefile > > @@ -3,6 +3,7 @@ vfio_virqfd-y := virqfd.o > > > > obj-$(CONFIG_VFIO) += vfio.o > > obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o > > +obj-$(CONFIG_VFIO_PASID) += vfio_pasid.o > > obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o > > obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o > > obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o diff --git > > a/drivers/vfio/vfio_pasid.c b/drivers/vfio/vfio_pasid.c new file mode > > 100644 index 000..dd5b6d1 > > --- /dev/null > > +++ b/drivers/vfio/vfio_pasid.c > > @@ -0,0 +1,151 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (C) 2020 Intel Corporation. > > + * Author: Liu Yi L > > + * > > + */ > > + > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > + > > +#define DRIVER_VERSION "0.1" > > +#define DRIVER_AUTHOR "Liu Yi L " > > +#define DRIVER_DESC "PASID management for VFIO bus drivers" > > + > > +#define VFIO_DEFAULT_PASID_QUOTA 1000 > > +static int pasid_quota = VFIO_DEFAULT_PASID_QUOTA; > > +module_param_named(pasid_quota, pasid_quota, uint, 0444); > > +MODULE_PARM_DESC(pasid_quota, > > +" Set the quota for max number of PASIDs that an application is > > +allowed to request (default 1000)"); > > + > > +struct vfio_mm_token { > > + unsigned long long val; > > +}; > > + > > +struct vfio_mm { > > + struct kref kref; > > + struct vfio_mm_tokentoken; > > + int ioasid_sid; > > + int pasid_quota; > > + struct list_headnext; > > +}; > > + > > +static struct vfio_pasid { > > + struct mutexvfio_mm_lock; > > + struct list_headvfio_mm_list; > > +} vfio_pasid; > > + > > +/* called with vfio.vfio_mm_lock held */ static void > > +vfio_mm_release(struct kref *kref) { > > + struct vfio_mm *vmm = container_of(kref, struct vfio_mm, kref); > > + > > + list_del(>next); > > + mutex_unlock(_pasid.vfio_mm_lock); > > + ioasid_free_set(vmm->ioasid_sid, true); > > + kfree(vmm); > > +} > > + > > +void vfio_mm_put(struct vfio_mm *vmm) { > > + kref_put_mutex(>kref, vfio_mm_release, > > +_pasid.vfio_mm_lock); } > > + > >
RE: [PATCH v3 03/14] vfio/type1: Report iommu nesting info to userspace
Hi Alex, > From: Alex Williamson < alex.william...@redhat.com > > Sent: Friday, July 3, 2020 2:39 AM > > On Wed, 24 Jun 2020 01:55:16 -0700 > Liu Yi L wrote: > > > This patch exports iommu nesting capability info to user space through > > VFIO. User space is expected to check this info for supported uAPIs (e.g. > > PASID alloc/free, bind page table, and cache invalidation) and the > > vendor specific format information for first level/stage page table > > that will be bound to. > > > > The nesting info is available only after the nesting iommu type is set > > for a container. Current implementation imposes one limitation - one > > nesting container should include at most one group. The philosophy of > > vfio container is having all groups/devices within the container share > > the same IOMMU context. When vSVA is enabled, one IOMMU context could > > include one 2nd-level address space and multiple 1st-level address spaces. > > While the 2nd-leve address space is reasonably sharable by multiple > > groups , blindly sharing 1st-level address spaces across all groups > > within the container might instead break the guest expectation. In the > > future sub/ super container concept might be introduced to allow > > partial address space sharing within an IOMMU context. But for now > > let's go with this restriction by requiring singleton container for > > using nesting iommu features. Below link has the related discussion > > about this > > decision. > > > > https://lkml.org/lkml/2020/5/15/1028 > > > > Cc: Kevin Tian > > CC: Jacob Pan > > Cc: Alex Williamson > > Cc: Eric Auger > > Cc: Jean-Philippe Brucker > > Cc: Joerg Roedel > > Cc: Lu Baolu > > Signed-off-by: Liu Yi L > > --- > > drivers/vfio/vfio_iommu_type1.c | 73 > + > > include/uapi/linux/vfio.h | 9 + > > 2 files changed, 82 insertions(+) > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > b/drivers/vfio/vfio_iommu_type1.c index 7accb59..8c143d5 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -72,6 +72,7 @@ struct vfio_iommu { > > uint64_tpgsize_bitmap; > > boolv2; > > boolnesting; > > + struct iommu_nesting_info *nesting_info; > > booldirty_page_tracking; > > boolpinned_page_dirty_scope; > > }; > > Mind the structure packing and alignment, placing a pointer in the middle > of a > section of bools is going to create wasteful holes in the data structure. how about below? Add the @nesting_info and @vmm in the end of this struct. I've two questions, the first one is how the place the comment of the @external_domain; second question is do you want me to move the @nesting field to be near-by with the @nesting_info and @vmm. :) please let me know your preference. struct vfio_iommu { struct list_headdomain_list; struct list_headiova_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutexlock; struct rb_root dma_list; struct blocking_notifier_head notifier; unsigned intdma_avail; uint64_tpgsize_bitmap; boolv2; boolnesting; booldirty_page_tracking; boolpinned_page_dirty_scope; struct iommu_nesting_info *nesting_info; struct vfio_mm *vmm; }; > > @@ -130,6 +131,9 @@ struct vfio_regions { > > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\ > > (!list_empty(>domain_list)) > > > > +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu- > >external_domain) || \ > > +(!list_empty(>domain_list))) > > + > > #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / > BITS_PER_BYTE) > > > > /* > > @@ -1959,6 +1963,12 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > > } > > } > > > > + /* Nesting type container can include only one group */ > > + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) { > > + mutex_unlock(>lock); > > + return -EINVAL; > > + } > > + > > group = kzalloc(sizeof(*group), GFP_KERNEL); > > domain = kzalloc(sizeof(*domain), GFP_KERNEL); > > if (!group || !domain) { > > @@ -2029,6 +2039,36 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > > if (ret) > > goto out_domain; > > > > + /* Nesting cap info is available only after attaching */ > > + if (iommu->nesting) { > > + struct iommu_nesting_info tmp; > > + struct iommu_nesting_info *info; > > + > > + /* First get the size of vendor specific