Re: [PATCH v6 03/10] iommu/mediatek: Use a u32 flags to describe different HW features

2020-07-03 Thread Yingjoe Chen
On Fri, 2020-07-03 at 12:41 +0800, Chao Hao wrote:
> Given the fact that we are adding more and more plat_data bool values,
> it would make sense to use a u32 flags register and add the appropriate
> macro definitions to set and check for a flag present.
> No functional change.
> 
> Cc: Yong Wu 
> Suggested-by: Matthias Brugger 
> Signed-off-by: Chao Hao 
> Reviewed-by: Matthias Brugger 
> ---
>  drivers/iommu/mtk_iommu.c | 28 +---
>  drivers/iommu/mtk_iommu.h |  7 +--
>  2 files changed, 18 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index 88d3df5b91c2..40ca564d97af 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -100,6 +100,15 @@
>  #define MTK_M4U_TO_LARB(id)  (((id) >> 5) & 0xf)
>  #define MTK_M4U_TO_PORT(id)  ((id) & 0x1f)
>  
> +#define HAS_4GB_MODE BIT(0)
> +/* HW will use the EMI clock if there isn't the "bclk". */
> +#define HAS_BCLK BIT(1)
> +#define HAS_VLD_PA_RNG   BIT(2)
> +#define RESET_AXIBIT(3)
> +
> +#define MTK_IOMMU_HAS_FLAG(pdata, _x) \
> + pdata)->flags) & (_x)) == (_x))
> +
>  struct mtk_iommu_domain {
>   struct io_pgtable_cfg   cfg;
>   struct io_pgtable_ops   *iop;
> @@ -563,7 +572,8 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data 
> *data)
>upper_32_bits(data->protect_base);
>   writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
>  
> - if (data->enable_4GB && data->plat_data->has_vld_pa_rng) {
> + if (data->enable_4GB &&
> + MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) {
>   /*
>* If 4GB mode is enabled, the validate PA range is from
>* 0x1__ to 0x1__. here record bit[32:30].
> @@ -573,7 +583,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data 
> *data)
>   }
>   writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
>  
> - if (data->plat_data->reset_axi) {
> + if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) {
>   /* The register is called STANDARD_AXI_MODE in this case */
>   writel_relaxed(0, data->base + REG_MMU_MISC_CTRL);
>   }
> @@ -618,7 +628,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
>  
>   /* Whether the current dram is over 4GB */
>   data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT));
> - if (!data->plat_data->has_4gb_mode)
> + if (!MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE))
>   data->enable_4GB = false;
>  
>   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> @@ -631,7 +641,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
>   if (data->irq < 0)
>   return data->irq;
>  
> - if (data->plat_data->has_bclk) {
> + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) {
>   data->bclk = devm_clk_get(dev, "bclk");
>   if (IS_ERR(data->bclk))
>   return PTR_ERR(data->bclk);
> @@ -763,23 +773,19 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = {
>  
>  static const struct mtk_iommu_plat_data mt2712_data = {
>   .m4u_plat = M4U_MT2712,
> - .has_4gb_mode = true,
> - .has_bclk = true,
> - .has_vld_pa_rng   = true,
> + .flags= HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG,
>   .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
>  };
>  
>  static const struct mtk_iommu_plat_data mt8173_data = {
>   .m4u_plat = M4U_MT8173,
> - .has_4gb_mode = true,
> - .has_bclk = true,
> - .reset_axi= true,
> + .flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI,
>   .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
>  };
>  
>  static const struct mtk_iommu_plat_data mt8183_data = {
>   .m4u_plat = M4U_MT8183,
> - .reset_axi= true,
> + .flags= RESET_AXI,
>   .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1},
>  };
>  
> diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> index 7212e6fcf982..5225a9170aaa 100644
> --- a/drivers/iommu/mtk_iommu.h
> +++ b/drivers/iommu/mtk_iommu.h
> @@ -39,12 +39,7 @@ enum mtk_iommu_plat {
>  
>  struct mtk_iommu_plat_data {
>   enum mtk_iommu_plat m4u_plat;
> - boolhas_4gb_mode;
> -
> - /* HW will use the EMI clock if there isn't the "bclk". */
> - boolhas_bclk;
> - boolhas_vld_pa_rng;
> - boolreset_axi;
> + u32 flags;


How about using bit field instead? eg

  u32 has_bclk:1;

In this way, we don't need to change code.

Joe.C


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 00/34] iommu: Move iommu_group setup to IOMMU core code

2020-07-03 Thread Qian Cai
On Tue, Jun 30, 2020 at 08:40:28PM -0400, Qian Cai wrote:
> On Wed, Apr 29, 2020 at 03:36:38PM +0200, Joerg Roedel wrote:
> > Hi,
> > 
> > here is the third version of this patch-set. Older versions can be found
> > here:
> > 
> > v1: https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/
> > (Has some more introductory text)
> > 
> > v2: https://lore.kernel.org/lkml/20200414131542.25608-1-j...@8bytes.org/
> > 
> > Changes v2 -> v3:
> > 
> > * Rebased v5.7-rc3
> > 
> > * Added a missing iommu_group_put() as reported by Lu Baolu.
> > 
> > * Added a patch to consolidate more initialization work in
> >   __iommu_probe_device(), fixing a bug where no 'struct
> >   device_iommu' was allocated in the hotplug path.
> > 
> > There is also a git-branch available with these patches applied:
> > 
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v3
> > 
> > Please review. If there are no objections I plan to put these patches
> > into the IOMMU tree early next week.
> 
> Looks like this patchset introduced an use-after-free on arm-smmu-v3.
> 
> Reproduced using mlx5,
> 
> # echo 1 > /sys/class/net/enp11s0f1np1/device/sriov_numvfs
> # echo 0 > /sys/class/net/enp11s0f1np1/device/sriov_numvfs 
> 
> The .config,
> https://github.com/cailca/linux-mm/blob/master/arm64.config
> 
> Looking at the free stack,
> 
> iommu_release_device->iommu_group_remove_device
> 
> was introduced in 07/34 ("iommu: Add probe_device() and release_device()
> call-backs").

FYI, I have just sent a patch to fix this,

https://lore.kernel.org/linux-iommu/20200704001003.2303-1-...@lca.pw/

> 
> [ 9426.724641][ T3356] pci :0b:01.2: Removing from iommu group 3
> [ 9426.731347][ T3356] 
> ==
> [ 9426.739263][ T3356] BUG: KASAN: use-after-free in 
> __lock_acquire+0x3458/0x4440
> __lock_acquire at kernel/locking/lockdep.c:4250
> [ 9426.746477][ T3356] Read of size 8 at addr 0089df1a6f68 by task 
> bash/3356
> [ 9426.753601][ T3356]
> [ 9426.755782][ T3356] CPU: 5 PID: 3356 Comm: bash Not tainted 
> 5.8.0-rc3-next-20200630 #2
> [ 9426.763687][ T3356] Hardware name: HPE Apollo 70 
> /C01_APACHE_MB , BIOS L50_5.13_1.11 06/18/2019
> [ 9426.774111][ T3356] Call trace:
> [ 9426.777245][ T3356]  dump_backtrace+0x0/0x398
> [ 9426.781593][ T3356]  show_stack+0x14/0x20
> [ 9426.785596][ T3356]  dump_stack+0x140/0x1b8
> [ 9426.789772][ T3356]  print_address_description.isra.12+0x54/0x4a8
> [ 9426.795855][ T3356]  kasan_report+0x134/0x1b8
> [ 9426.800203][ T3356]  __asan_report_load8_noabort+0x2c/0x50
> [ 9426.805679][ T3356]  __lock_acquire+0x3458/0x4440
> [ 9426.810373][ T3356]  lock_acquire+0x204/0xf10
> [ 9426.814722][ T3356]  _raw_spin_lock_irqsave+0xf8/0x180
> [ 9426.819853][ T3356]  arm_smmu_detach_dev+0xd8/0x4a0
> arm_smmu_detach_dev at drivers/iommu/arm-smmu-v3.c:2776
> [ 9426.824721][ T3356]  arm_smmu_release_device+0xb4/0x1c8
> arm_smmu_disable_pasid at drivers/iommu/arm-smmu-v3.c:2754
> (inlined by) arm_smmu_release_device at drivers/iommu/arm-smmu-v3.c:3000
> [ 9426.829937][ T3356]  iommu_release_device+0xc0/0x178
> iommu_release_device at drivers/iommu/iommu.c:302
> [ 9426.834892][ T3356]  iommu_bus_notifier+0x118/0x160
> [ 9426.839762][ T3356]  notifier_call_chain+0xa4/0x128
> [ 9426.844630][ T3356]  __blocking_notifier_call_chain+0x70/0xa8
> [ 9426.850367][ T3356]  blocking_notifier_call_chain+0x14/0x20
> [ 9426.855929][ T3356]  device_del+0x618/0xa00
> [ 9426.860105][ T3356]  pci_remove_bus_device+0x108/0x2d8
> [ 9426.865233][ T3356]  pci_stop_and_remove_bus_device+0x1c/0x28
> [ 9426.870972][ T3356]  pci_iov_remove_virtfn+0x228/0x368
> [ 9426.876100][ T3356]  sriov_disable+0x8c/0x348
> [ 9426.880447][ T3356]  pci_disable_sriov+0x5c/0x70
> [ 9426.885117][ T3356]  mlx5_core_sriov_configure+0xd8/0x260 [mlx5_core]
> [ 9426.891549][ T3356]  sriov_numvfs_store+0x240/0x318
> [ 9426.896417][ T3356]  dev_attr_store+0x38/0x68
> [ 9426.900766][ T3356]  sysfs_kf_write+0xdc/0x128
> [ 9426.905200][ T3356]  kernfs_fop_write+0x23c/0x448
> [ 9426.909897][ T3356]  __vfs_write+0x54/0xe8
> [ 9426.913984][ T3356]  vfs_write+0x124/0x3f0
> [ 9426.918070][ T3356]  ksys_write+0xe8/0x1b8
> [ 9426.922157][ T3356]  __arm64_sys_write+0x68/0x98
> [ 9426.926766][ T3356]  do_el0_svc+0x124/0x220
> [ 9426.930941][ T3356]  el0_sync_handler+0x260/0x408
> [ 9426.935634][ T3356]  el0_sync+0x140/0x180
> [ 9426.939633][ T3356]
> [ 9426.941810][ T3356] Allocated by task 3356:
> [ 9426.945985][ T3356]  save_stack+0x24/0x50
> [ 9426.949986][ T3356]  __kasan_kmalloc.isra.13+0xc4/0xe0
> [ 9426.955114][ T3356]  kasan_kmalloc+0xc/0x18
> [ 9426.959288][ T3356]  kmem_cache_alloc_trace+0x1ec/0x318
> [ 9426.964503][ T3356]  arm_smmu_domain_alloc+0x54/0x148
> [ 9426.969545][ T3356]  iommu_group_alloc_default_domain+0xc0/0x440
> [ 9426.975541][ T3356]  iommu_probe_device+0x1c0/0x308
> [ 9426.980409][ T3356]  

[PATCH] iommu: fix use-after-free in iommu_release_device

2020-07-03 Thread Qian Cai
In pci_disable_sriov(), i.e.,

 # echo 0 > /sys/class/net/enp11s0f1np1/device/sriov_numvfs

iommu_release_device
  iommu_group_remove_device
arm_smmu_domain_free
  kfree(smmu_domain)

Later,

iommu_release_device
  arm_smmu_release_device
arm_smmu_detach_dev
  spin_lock_irqsave(_domain->devices_lock,

would trigger an use-after-free. Fixed it by call
arm_smmu_release_device() first before iommu_group_remove_device().

 BUG: KASAN: use-after-free in __lock_acquire+0x3458/0x4440
  __lock_acquire at kernel/locking/lockdep.c:4250
 Read of size 8 at addr 0089df1a6f68 by task bash/3356

 CPU: 5 PID: 3356 Comm: bash Not tainted 5.8.0-rc3-next-20200630 #2
 Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS 
L50_5.13_1.11 06/18/2019
 Call trace:
  dump_backtrace+0x0/0x398
  show_stack+0x14/0x20
  dump_stack+0x140/0x1b8
  print_address_description.isra.12+0x54/0x4a8
  kasan_report+0x134/0x1b8
  __asan_report_load8_noabort+0x2c/0x50
  __lock_acquire+0x3458/0x4440
  lock_acquire+0x204/0xf10
  _raw_spin_lock_irqsave+0xf8/0x180
  arm_smmu_detach_dev+0xd8/0x4a0
  arm_smmu_detach_dev at drivers/iommu/arm-smmu-v3.c:2776
  arm_smmu_release_device+0xb4/0x1c8
  arm_smmu_disable_pasid at drivers/iommu/arm-smmu-v3.c:2754
  (inlined by) arm_smmu_release_device at drivers/iommu/arm-smmu-v3.c:3000
  iommu_release_device+0xc0/0x178
  iommu_release_device at drivers/iommu/iommu.c:302
  iommu_bus_notifier+0x118/0x160
  notifier_call_chain+0xa4/0x128
  __blocking_notifier_call_chain+0x70/0xa8
  blocking_notifier_call_chain+0x14/0x20
  device_del+0x618/0xa00
  pci_remove_bus_device+0x108/0x2d8
  pci_stop_and_remove_bus_device+0x1c/0x28
  pci_iov_remove_virtfn+0x228/0x368
  sriov_disable+0x8c/0x348
  pci_disable_sriov+0x5c/0x70
  mlx5_core_sriov_configure+0xd8/0x260 [mlx5_core]
  sriov_numvfs_store+0x240/0x318
  dev_attr_store+0x38/0x68
  sysfs_kf_write+0xdc/0x128
  kernfs_fop_write+0x23c/0x448
  __vfs_write+0x54/0xe8
  vfs_write+0x124/0x3f0
  ksys_write+0xe8/0x1b8
  __arm64_sys_write+0x68/0x98
  do_el0_svc+0x124/0x220
  el0_sync_handler+0x260/0x408
  el0_sync+0x140/0x180

 Allocated by task 3356:
  save_stack+0x24/0x50
  __kasan_kmalloc.isra.13+0xc4/0xe0
  kasan_kmalloc+0xc/0x18
  kmem_cache_alloc_trace+0x1ec/0x318
  arm_smmu_domain_alloc+0x54/0x148
  iommu_group_alloc_default_domain+0xc0/0x440
  iommu_probe_device+0x1c0/0x308
  iort_iommu_configure+0x434/0x518
  acpi_dma_configure+0xf0/0x128
  pci_dma_configure+0x114/0x160
  really_probe+0x124/0x6d8
  driver_probe_device+0xc4/0x180
  __device_attach_driver+0x184/0x1e8
  bus_for_each_drv+0x114/0x1a0
  __device_attach+0x19c/0x2a8
  device_attach+0x10/0x18
  pci_bus_add_device+0x70/0xf8
  pci_iov_add_virtfn+0x7b4/0xb40
  sriov_enable+0x5c8/0xc30
  pci_enable_sriov+0x64/0x80
  mlx5_core_sriov_configure+0x58/0x260 [mlx5_core]
  sriov_numvfs_store+0x1c0/0x318
  dev_attr_store+0x38/0x68
  sysfs_kf_write+0xdc/0x128
  kernfs_fop_write+0x23c/0x448
  __vfs_write+0x54/0xe8
  vfs_write+0x124/0x3f0
  ksys_write+0xe8/0x1b8
  __arm64_sys_write+0x68/0x98
  do_el0_svc+0x124/0x220
  el0_sync_handler+0x260/0x408
  el0_sync+0x140/0x180

 Freed by task 3356:
  save_stack+0x24/0x50
  __kasan_slab_free+0x124/0x198
  kasan_slab_free+0x10/0x18
  slab_free_freelist_hook+0x110/0x298
  kfree+0x128/0x668
  arm_smmu_domain_free+0xf4/0x1a0
  iommu_group_release+0xec/0x160
  kobject_put+0xf4/0x238
  kobject_del+0x110/0x190
  kobject_put+0x1e4/0x238
  iommu_group_remove_device+0x394/0x938
  iommu_release_device+0x9c/0x178
  iommu_release_device at drivers/iommu/iommu.c:300
  iommu_bus_notifier+0x118/0x160
  notifier_call_chain+0xa4/0x128
  __blocking_notifier_call_chain+0x70/0xa8
  blocking_notifier_call_chain+0x14/0x20
  device_del+0x618/0xa00
  pci_remove_bus_device+0x108/0x2d8
  pci_stop_and_remove_bus_device+0x1c/0x28
  pci_iov_remove_virtfn+0x228/0x368
  sriov_disable+0x8c/0x348
  pci_disable_sriov+0x5c/0x70
  mlx5_core_sriov_configure+0xd8/0x260 [mlx5_core]
  sriov_numvfs_store+0x240/0x318
  dev_attr_store+0x38/0x68
  sysfs_kf_write+0xdc/0x128
  kernfs_fop_write+0x23c/0x448
  __vfs_write+0x54/0xe8
  vfs_write+0x124/0x3f0
  ksys_write+0xe8/0x1b8
  __arm64_sys_write+0x68/0x98
  do_el0_svc+0x124/0x220
  el0_sync_handler+0x260/0x408
  el0_sync+0x140/0x180

 The buggy address belongs to the object at 0089df1a6e00
  which belongs to the cache kmalloc-512 of size 512
 The buggy address is located 360 bytes inside of
  512-byte region [0089df1a6e00, 0089df1a7000)
 The buggy address belongs to the page:
 page:ffe02257c680 refcount:1 mapcount:0 mapping: 
index:0x0089df1a1400
 flags: 0x780200(slab)
 raw: 00780200 ffe02246b8c8 ffe02257ff88 00320680
 raw: 0089df1a1400 002a000e 0001 0089df1a5001
 page dumped because: kasan: bad access detected
 page->mem_cgroup:0089df1a5001

 Memory state around the buggy address:
  0089df1a6e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  

Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)

2020-07-03 Thread Sai Prakash Ranjan

On 2020-07-03 21:34, Rob Clark wrote:

On Fri, Jul 3, 2020 at 7:53 AM Sai Prakash Ranjan
 wrote:


Hi Will,

On 2020-07-03 19:07, Will Deacon wrote:
> On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote:
>> diff --git a/drivers/gpu/drm/msm/msm_iommu.c
>> b/drivers/gpu/drm/msm/msm_iommu.c
>> index f455c597f76d..bd1d58229cc2 100644
>> --- a/drivers/gpu/drm/msm/msm_iommu.c
>> +++ b/drivers/gpu/drm/msm/msm_iommu.c
>> @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu,
>> uint64_t iova,
>>  iova |= GENMASK_ULL(63, 49);
>>
>>
>> +if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE)
>> +prot |= IOMMU_SYS_CACHE_ONLY;
>
> Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then
> it
> looks like it should actually be a property on the domain because we
> never
> need to configure it on a per-mapping basis within a domain, and
> therefore
> it shouldn't be exposed by the IOMMU API as a prot flag.
>
> Do you agree?
>

GPU being the only user is for now, but there are other clients which
can use this.
Plus how do we set the memory attributes if we do not expose this as
prot flag?


It does appear that the downstream kgsl driver sets this for basically
all mappings.. well there is some conditional stuff around
DOMAIN_ATTR_USE_LLC_NWA but it seems based on the property of the
domain.  (Jordan may know more about what that is about.)  But looks
like there are a lot of different paths into iommu_map in kgsl so I
might have missed something.

Assuming there isn't some case where we specifically don't want to use
the system cache for some mapping, I think it could be a domain
attribute that sets an io_pgtable_cfg::quirks flag



Ok then we are good to remove unused sys cache prot flag which Will has 
posted.


Thanks,
Sai

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member

of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag

2020-07-03 Thread Sai Prakash Ranjan

On 2020-07-03 21:55, Will Deacon wrote:

The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and
has no in-tree users. Remove it.

Cc: Robin Murphy 
Cc: "Isaac J. Manjarres" 
Cc: Joerg Roedel 
Cc: Christoph Hellwig 
Cc: Sai Prakash Ranjan 
Cc: Rob Clark 
Signed-off-by: Will Deacon 
---

As discussed in [1], sounds like this should be a domain attribute 
anyway

when it's needed by the GPU.

[1]
https://lore.kernel.org/r/caf6aegscrovtsi2r7_aukmh9luoc_gumr0w0kujc2cegpfj...@mail.gmail.com



Reviewed-by: Sai Prakash Ranjan QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member

of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag

2020-07-03 Thread Will Deacon
The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and
has no in-tree users. Remove it.

Cc: Robin Murphy 
Cc: "Isaac J. Manjarres" 
Cc: Joerg Roedel 
Cc: Christoph Hellwig 
Cc: Sai Prakash Ranjan 
Cc: Rob Clark 
Signed-off-by: Will Deacon 
---

As discussed in [1], sounds like this should be a domain attribute anyway
when it's needed by the GPU.

[1] 
https://lore.kernel.org/r/caf6aegscrovtsi2r7_aukmh9luoc_gumr0w0kujc2cegpfj...@mail.gmail.com

 drivers/iommu/io-pgtable-arm.c | 3 ---
 include/linux/iommu.h  | 6 --
 2 files changed, 9 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 04fbd4bf0ff9..8f175c02f8e3 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -438,9 +438,6 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
else if (prot & IOMMU_CACHE)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
-   else if (prot & IOMMU_SYS_CACHE_ONLY)
-   pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
-   << ARM_LPAE_PTE_ATTRINDX_SHIFT);
}
 
if (prot & IOMMU_CACHE)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5f0b7859d2eb..bee1a8fa1fb1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,12 +31,6 @@
  * if the IOMMU page table format is equivalent.
  */
 #define IOMMU_PRIV (1 << 5)
-/*
- * Non-coherent masters can use this page protection flag to set cacheable
- * memory attributes for only a transparent outer level of cache, also known as
- * the last-level or system cache.
- */
-#define IOMMU_SYS_CACHE_ONLY   (1 << 6)
 
 struct iommu_ops;
 struct iommu_group;
-- 
2.27.0.212.ge8ba1cc988-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: expose numa_node attribute to users in sysfs

2020-07-03 Thread Will Deacon
On Sat, May 30, 2020 at 09:15:05PM +1200, Barry Song wrote:
> As tests show the latency of dma_unmap can increase dramatically while
> calling them cross NUMA nodes, especially cross CPU packages, eg.
> 300ns vs 800ns while waiting for the completion of CMD_SYNC in an
> empty command queue. The large latency causing by remote node will
> in turn make contention of the command queue more serious, and enlarge
> the latency of DMA users within local NUMA nodes.
> 
> Users might intend to enforce NUMA locality with the consideration of
> the position of SMMU. The patch provides minor benefit by presenting
> this information to users directly, as they might want to know it without
> checking hardware spec at all.

I don't think that's a very good reason to expose things to userspace.
I know sysfs shouldn't be treated as ABI, but the grim reality is that
once somebody relies on this stuff then we can't change it, so I'd
rather avoid exposing it unless it's absolutely necessary.

Thanks,

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: allocate the memory of queues in local numa node

2020-07-03 Thread Will Deacon
On Mon, Jun 01, 2020 at 11:31:41PM +1200, Barry Song wrote:
> dmam_alloc_coherent() will usually allocate memory from the default CMA. For
> a common arm64 defconfig without reserved memory in device tree, there is only
> one CMA close to address 0.
> dma_alloc_contiguous() will allocate memory without any idea of  NUMA and smmu
> has no customized per-numa cma_area.
> struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
> {
> size_t count = size >> PAGE_SHIFT;
> struct page *page = NULL;
> struct cma *cma = NULL;
> 
> if (dev && dev->cma_area)
> cma = dev->cma_area;
> else if (count > 1)
> cma = dma_contiguous_default_area;
> 
>   ...
> return page;
> }
> 
> if there are N numa nodes, N-1 nodes will put command/evt queues etc in a
> remote node the default CMA belongs to,probably node 0. Tests show, after
> sending CMD_SYNC in an empty command queue,
> on Node2, without this patch, it takes 550ns to wait for the completion
> of CMD_SYNC; with this patch, it takes 250ns to wait for the completion
> of CMD_SYNC.
> 
> Signed-off-by: Barry Song 
> ---
>  drivers/iommu/arm-smmu-v3.c | 63 -
>  1 file changed, 48 insertions(+), 15 deletions(-)

I would prefer that the coherent DMA allocator learned about NUMA, rather
than we bodge drivers to use the streaming API where it doesn't really
make sense.

I see that you've posted other patches to do that (thanks!), so I'll
disregard this series.

Cheers,

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)

2020-07-03 Thread Rob Clark
On Fri, Jul 3, 2020 at 7:53 AM Sai Prakash Ranjan
 wrote:
>
> Hi Will,
>
> On 2020-07-03 19:07, Will Deacon wrote:
> > On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote:
> >> diff --git a/drivers/gpu/drm/msm/msm_iommu.c
> >> b/drivers/gpu/drm/msm/msm_iommu.c
> >> index f455c597f76d..bd1d58229cc2 100644
> >> --- a/drivers/gpu/drm/msm/msm_iommu.c
> >> +++ b/drivers/gpu/drm/msm/msm_iommu.c
> >> @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu,
> >> uint64_t iova,
> >>  iova |= GENMASK_ULL(63, 49);
> >>
> >>
> >> +if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE)
> >> +prot |= IOMMU_SYS_CACHE_ONLY;
> >
> > Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then
> > it
> > looks like it should actually be a property on the domain because we
> > never
> > need to configure it on a per-mapping basis within a domain, and
> > therefore
> > it shouldn't be exposed by the IOMMU API as a prot flag.
> >
> > Do you agree?
> >
>
> GPU being the only user is for now, but there are other clients which
> can use this.
> Plus how do we set the memory attributes if we do not expose this as
> prot flag?

It does appear that the downstream kgsl driver sets this for basically
all mappings.. well there is some conditional stuff around
DOMAIN_ATTR_USE_LLC_NWA but it seems based on the property of the
domain.  (Jordan may know more about what that is about.)  But looks
like there are a lot of different paths into iommu_map in kgsl so I
might have missed something.

Assuming there isn't some case where we specifically don't want to use
the system cache for some mapping, I think it could be a domain
attribute that sets an io_pgtable_cfg::quirks flag

BR,
-R
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu: Tidy up Kconfig for SoC IOMMUs

2020-07-03 Thread Robin Murphy
Wacky COMPILE_TEST dependencies based on who used to define
dev_archdata.iommu can go.

Dependencies on ARM or ARM64 already implied by the ARCH_* platform
selection can go.

The entire IOMMU_SUPPORT menu already depends on MMU, so those can go.

IOMMU_DMA is for the architecture's DMA API implementation to choose,
and its interface to IOMMU drivers is properly stubbed out if disabled,
so dependencies on or selections of that can go (AMD_IOMMU is the
current exception since the x86 drivers have to provide their own entire
dma_map_ops implementation).

Since commit ed6ccf10f24b ("dma-mapping: properly stub out the DMA API
for !CONFIG_HAS_DMA"), drivers which simply use the dma-mapping API
should not need to depend on HAS_DMA, so those can go.

And a long-dead option for code removed from the MSM driver 4 years ago
can also go.

Signed-off-by: Robin Murphy 
---

Based on the current iommu/next branch.

 drivers/iommu/Kconfig | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index b0f308cb7f7c..4dcd85bbff99 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -128,10 +128,6 @@ config MSM_IOMMU
 
  If unsure, say N here.
 
-config IOMMU_PGTABLES_L2
-   def_bool y
-   depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n
-
 # AMD IOMMU support
 config AMD_IOMMU
bool "AMD IOMMU support"
@@ -274,7 +270,6 @@ config IRQ_REMAP
 # OMAP IOMMU support
 config OMAP_IOMMU
bool "OMAP IOMMU Support"
-   depends on ARM && MMU || (COMPILE_TEST && (ARM || ARM64 || IA64 || 
SPARC))
depends on ARCH_OMAP2PLUS || COMPILE_TEST
select IOMMU_API
help
@@ -292,7 +287,6 @@ config OMAP_IOMMU_DEBUG
 
 config ROCKCHIP_IOMMU
bool "Rockchip IOMMU Support"
-   depends on ARM || ARM64 || (COMPILE_TEST && (ARM64 || IA64 || SPARC))
depends on ARCH_ROCKCHIP || COMPILE_TEST
select IOMMU_API
select ARM_DMA_USE_IOMMU
@@ -305,11 +299,9 @@ config ROCKCHIP_IOMMU
 
 config SUN50I_IOMMU
bool "Allwinner H6 IOMMU Support"
-   depends on HAS_DMA
depends on ARCH_SUNXI || COMPILE_TEST
select ARM_DMA_USE_IOMMU
select IOMMU_API
-   select IOMMU_DMA
help
  Support for the IOMMU introduced in the Allwinner H6 SoCs.
 
@@ -336,7 +328,7 @@ config TEGRA_IOMMU_SMMU
 
 config EXYNOS_IOMMU
bool "Exynos IOMMU Support"
-   depends on ARCH_EXYNOS && MMU || (COMPILE_TEST && (ARM || ARM64 || IA64 
|| SPARC))
+   depends on ARCH_EXYNOS || COMPILE_TEST
depends on !CPU_BIG_ENDIAN # revisit driver if we can enable big-endian 
ptes
select IOMMU_API
select ARM_DMA_USE_IOMMU
@@ -359,7 +351,7 @@ config EXYNOS_IOMMU_DEBUG
 
 config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU"
-   depends on ARM || IOMMU_DMA
+   depends on ARM || ARM64
depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
@@ -381,7 +373,7 @@ config SPAPR_TCE_IOMMU
 # ARM IOMMU support
 config ARM_SMMU
tristate "ARM Ltd. System MMU (SMMU) Support"
-   depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) && MMU
+   depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU if ARM
@@ -467,11 +459,9 @@ config S390_AP_IOMMU
 
 config MTK_IOMMU
bool "MTK IOMMU Support"
-   depends on HAS_DMA
depends on ARCH_MEDIATEK || COMPILE_TEST
select ARM_DMA_USE_IOMMU
select IOMMU_API
-   select IOMMU_DMA
select IOMMU_IO_PGTABLE_ARMV7S
select MEMORY
select MTK_SMI

base-commit: e5640f21b63d2a5d3e4e0c4111b2b38e99fe5164
-- 
2.23.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/renesas: Expand COMPILE_TEST coverage

2020-07-03 Thread Robin Murphy
This driver shouldn't need anything architecture-specific (that isn't
under CONFIG_ARM protection already), and has already been accessible
from certain x86 configurations by virtue of the previously-cleaned-up
"ARM || IOMMU_DMA" dependency. Allow COMPILE_TEST for all architectures.

Signed-off-by: Robin Murphy 
---

Let's see if 0-day agrees... :D

 drivers/iommu/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 4dcd85bbff99..af6c081d164f 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -351,7 +351,6 @@ config EXYNOS_IOMMU_DEBUG
 
 config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU"
-   depends on ARM || ARM64
depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
-- 
2.23.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)

2020-07-03 Thread Will Deacon
On Fri, Jul 03, 2020 at 08:23:07PM +0530, Sai Prakash Ranjan wrote:
> On 2020-07-03 19:07, Will Deacon wrote:
> > On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote:
> > > diff --git a/drivers/gpu/drm/msm/msm_iommu.c
> > > b/drivers/gpu/drm/msm/msm_iommu.c
> > > index f455c597f76d..bd1d58229cc2 100644
> > > --- a/drivers/gpu/drm/msm/msm_iommu.c
> > > +++ b/drivers/gpu/drm/msm/msm_iommu.c
> > > @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu,
> > > uint64_t iova,
> > >   iova |= GENMASK_ULL(63, 49);
> > > 
> > > 
> > > + if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE)
> > > + prot |= IOMMU_SYS_CACHE_ONLY;
> > 
> > Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then
> > it
> > looks like it should actually be a property on the domain because we
> > never
> > need to configure it on a per-mapping basis within a domain, and
> > therefore
> > it shouldn't be exposed by the IOMMU API as a prot flag.
> > 
> > Do you agree?
> > 
> 
> GPU being the only user is for now, but there are other clients which can
> use this.
> Plus how do we set the memory attributes if we do not expose this as prot
> flag?

I just don't understand the need for it to be per-map operation. Put another
way, if we extended the domain attribute to apply to cacheable mappings
on the domain and not just the table walk, what would break?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)

2020-07-03 Thread Sai Prakash Ranjan

Hi Will,

On 2020-07-03 19:07, Will Deacon wrote:

On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote:
diff --git a/drivers/gpu/drm/msm/msm_iommu.c 
b/drivers/gpu/drm/msm/msm_iommu.c

index f455c597f76d..bd1d58229cc2 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, 
uint64_t iova,

iova |= GENMASK_ULL(63, 49);


+   if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE)
+   prot |= IOMMU_SYS_CACHE_ONLY;


Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then 
it
looks like it should actually be a property on the domain because we 
never
need to configure it on a per-mapping basis within a domain, and 
therefore

it shouldn't be exposed by the IOMMU API as a prot flag.

Do you agree?



GPU being the only user is for now, but there are other clients which 
can use this.
Plus how do we set the memory attributes if we do not expose this as 
prot flag?


Thanks,
Sai

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member

of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/iova: Retry from last rb tree node if iova search fails

2020-07-03 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 49fc01f..4e77116 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, low_pfn_new;
unsigned long align_mask = ~0UL;
+   unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   low_pfn_new = curr_iova->pfn_hi + 1;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   high_pfn = min(high_pfn, curr_iova->pfn_lo);
+   new_pfn = (high_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
-   } while (curr && new_pfn <= curr_iova->pfn_hi);
-
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
+
+   if (high_pfn < size || new_pfn < low_pfn) {
+   if (low_pfn == iovad->start_pfn && low_pfn_new < limit_pfn) {
+   high_pfn = limit_pfn;
+   low_pfn = low_pfn_new;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/iova: Free global iova rcache on iova alloc failure

2020-07-03 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we could
still see iova alloc failure even after retry as global
rcache is holding the iova's which can cause fragmentation.
So, free the global iova rcache as well and then go for the
retry.

Change-Id: Ib8236dc88ba5516b73d4fbf6bf8e68bbf09bbad2
Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +++
 include/linux/iova.h |  6 ++
 2 files changed, 29 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 4e77116..5836c87 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -442,6 +442,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned 
long pfn)
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1055,5 +1056,27 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   unsigned long flags;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock_irqsave(>lock, flags);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
+
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/iova.h b/include/linux/iova.h
index a0637ab..a905726 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -163,6 +163,7 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi);
 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
+void free_global_cached_iovas(struct iova_domain *iovad);
 #else
 static inline int iova_cache_get(void)
 {
@@ -270,6 +271,11 @@ static inline void free_cpu_cached_iovas(unsigned int cpu,
 struct iova_domain *iovad)
 {
 }
+
+static inline void free_global_cached_iovas(struct iova_domain *iovad)
+{
+}
+
 #endif
 
 #endif
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv3 7/7] drm/msm/a6xx: Add support for using system cache(LLC)

2020-07-03 Thread Will Deacon
On Mon, Jun 29, 2020 at 09:22:50PM +0530, Sai Prakash Ranjan wrote:
> diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> index f455c597f76d..bd1d58229cc2 100644
> --- a/drivers/gpu/drm/msm/msm_iommu.c
> +++ b/drivers/gpu/drm/msm/msm_iommu.c
> @@ -218,6 +218,9 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t 
> iova,
>   iova |= GENMASK_ULL(63, 49);
>  
>  
> + if (mmu->features & MMU_FEATURE_USE_SYSTEM_CACHE)
> + prot |= IOMMU_SYS_CACHE_ONLY;

Given that I think this is the only user of IOMMU_SYS_CACHE_ONLY, then it
looks like it should actually be a property on the domain because we never
need to configure it on a per-mapping basis within a domain, and therefore
it shouldn't be exposed by the IOMMU API as a prot flag.

Do you agree?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v3 03/14] vfio/type1: Report iommu nesting info to userspace

2020-07-03 Thread Liu, Yi L
Hi Alex,

> From: Liu, Yi L 
> Sent: Friday, July 3, 2020 2:06 PM
[...]
> > > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING  3
> > > +
> > > +struct vfio_iommu_type1_info_cap_nesting {
> > > + struct  vfio_info_cap_header header;
> > > + __u32   flags;
> >
> > I think there's an alignment issue here for a uapi.  The header field
> > is 8-bytes total and info[] should start at an 8-byte alignment to
> > allow data[] within info to have 8-byte alignment.  This could lead to
> > the structure having a compiler dependent size and offsets.  We should
> > add a 4-byte reserved field here to resolve.
> 
> got it. or how about defining the flags as __u64?
> 
> >
> > > + __u8info[];
> > > +};
> >
> > This should have a lot more description around it, a user could not
> > infer that info[] is including a struct iommu_nesting_info from the
> > information provided here.
> > Thanks,
> 
> sure. BTW. do you think it is necessary to add a flag to indicate the info[] 
> is a
> struct iommu_nesting_info? or as a start, it's not necessary to do it.

seems like I misunderstood your comment. Does below description suits
your comment?

/*
 * Reporting nesting info to user space.
 *
 * @info:   the nesting info provided by IOMMU driver. Today
 *  it is expected to be a struct iommu_nesting_info
 *  data.
 */
struct vfio_iommu_type1_info_cap_nesting {
struct  vfio_info_cap_header header;
__u32   flags;
__u32   padding;
__u8info[];
};

Thanks,
Yi Liu

> Regards,
> Yi Liu
> 
> > Alex
> >
> > > +
> > >  #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > >
> > >  /**

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC][PATCH 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module

2020-07-03 Thread Vinod Koul
On 03-07-20, 13:23, Will Deacon wrote:
> On Tue, Jun 16, 2020 at 01:52:32PM -0700, John Stultz wrote:
> > On Tue, Jun 16, 2020 at 12:55 AM Marc Zyngier  wrote:
> > > On 2020-06-16 07:13, John Stultz wrote:
> > > > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> > > > index b510f67dfa49..714893535dd2 100644
> > > > --- a/drivers/iommu/Kconfig
> > > > +++ b/drivers/iommu/Kconfig
> > > > @@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU
> > > >  config ARM_SMMU
> > > >   tristate "ARM Ltd. System MMU (SMMU) Support"
> > > >   depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) 
> > > > &&
> > > > MMU
> > > > + depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y
> > >
> > > This looks a bit ugly. Could you explain why we need this at the SMMU
> > > level? I'd have expected the dependency to flow the other way around...
> > 
> > Yea, so the arm-smmu-qcom.c file calls directly into the qcom-scm code
> > via qcom_scm_qsmmu500_wait_safe_toggle()
> >   
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iommu/arm-smmu-qcom.c?h=v5.8-rc1#n44
> > 
> > So if ARM_SMMU=y and QCOM_SCM=m we get:
> > drivers/iommu/arm-smmu-qcom.o: In function `qcom_smmu500_reset':
> > arm-smmu-qcom.c:(.text+0xb4): undefined reference to
> > `qcom_scm_qsmmu500_wait_safe_toggle'
> > 
> > Do you have a suggestion for an alternative approach?
> 
> Can you use symbol_get() or something like that? How are module dependencies
> handled by other drivers?

So drivers deal with this by making rules in Kconfig which will prohibit
this case. QCOM_SCM depends on ARM_SMMU with the caveat that if ARM_SMMU
is a module, QCOM_SCM cant be inbuilt.

This can be done by adding below line in Kconfig for QCOM_SCM:

depends on ARM_SMMU || !ARM_SMMU

This is quite prevalent is drivers to ensure dependency like this

Thanks
-- 
~Vinod
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC][PATCH 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module

2020-07-03 Thread Will Deacon
On Tue, Jun 16, 2020 at 01:52:32PM -0700, John Stultz wrote:
> On Tue, Jun 16, 2020 at 12:55 AM Marc Zyngier  wrote:
> > On 2020-06-16 07:13, John Stultz wrote:
> > > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> > > index b510f67dfa49..714893535dd2 100644
> > > --- a/drivers/iommu/Kconfig
> > > +++ b/drivers/iommu/Kconfig
> > > @@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU
> > >  config ARM_SMMU
> > >   tristate "ARM Ltd. System MMU (SMMU) Support"
> > >   depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) &&
> > > MMU
> > > + depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y
> >
> > This looks a bit ugly. Could you explain why we need this at the SMMU
> > level? I'd have expected the dependency to flow the other way around...
> 
> Yea, so the arm-smmu-qcom.c file calls directly into the qcom-scm code
> via qcom_scm_qsmmu500_wait_safe_toggle()
>   
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iommu/arm-smmu-qcom.c?h=v5.8-rc1#n44
> 
> So if ARM_SMMU=y and QCOM_SCM=m we get:
> drivers/iommu/arm-smmu-qcom.o: In function `qcom_smmu500_reset':
> arm-smmu-qcom.c:(.text+0xb4): undefined reference to
> `qcom_scm_qsmmu500_wait_safe_toggle'
> 
> Do you have a suggestion for an alternative approach?

Can you use symbol_get() or something like that? How are module dependencies
handled by other drivers?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 2/4] iommu/arm-smmu: Workaround for Marvell Armada-AP806 SoC erratum #582743

2020-07-03 Thread Tomasz Nowicki



On 03.07.2020 11:03, Robin Murphy wrote:

On 2020-07-02 21:16, Tomasz Nowicki wrote:

From: Hanna Hawa 

Due to erratum #582743, the Marvell Armada-AP806 can't access 64bit to
ARM SMMUv2 registers.

Provide implementation relevant hooks:
- split the writeq/readq to two accesses of writel/readl.
- mask the MMU_IDR2.PTFSv8 fields to not use AArch64 format (but
only AARCH32_L) since with AArch64 format 32 bits access is not 
supported.


Note that separate writes/reads to 2 is not problem regards to
atomicity, because the driver use the readq/writeq while initialize
the SMMU, report for SMMU fault, and use spinlock in one
case (iova_to_phys).


The comment about the spinlock seems to be out of date, and TBH that 
whole sentence is a bit unclear - how about something like:


"Note that most 64-bit registers like TTBRn can be accessed as two 
32-bit halves without issue, and AArch32 format ensures that the 
register writes which must be atomic (for TLBI etc.) need only be 32-bit."



Signed-off-by: Hanna Hawa 
Signed-off-by: Gregory CLEMENT 
Signed-off-by: Tomasz Nowicki 
---
  Documentation/arm64/silicon-errata.rst |  3 ++
  drivers/iommu/arm-smmu-impl.c  | 52 ++
  2 files changed, 55 insertions(+)

diff --git a/Documentation/arm64/silicon-errata.rst 
b/Documentation/arm64/silicon-errata.rst

index 936cf2a59ca4..157214d3abe1 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -125,6 +125,9 @@ stable kernels.
  | Cavium | ThunderX2 Core  | #219    | 
CAVIUM_TX2_ERRATUM_219  |
  
++-+-+-+ 

  
++-+-+-+ 

+| Marvell    | ARM-MMU-500 | #582743 | 
N/A |
+++-+-+-+ 

+++-+-+-+ 

  | Freescale/NXP  | LS2080A/LS1043A | A-008585    | 
FSL_ERRATUM_A008585 |
  
++-+-+-+ 

  
++-+-+-+ 

diff --git a/drivers/iommu/arm-smmu-impl.c 
b/drivers/iommu/arm-smmu-impl.c

index c75b9d957b70..c1fc5e1b8193 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -147,6 +147,53 @@ static const struct arm_smmu_impl arm_mmu500_impl 
= {

  .reset = arm_mmu500_reset,
  };
+static u64 mrvl_mmu500_readq(struct arm_smmu_device *smmu, int page, 
int off)

+{
+    u64 val;
+
+    /*
+ * Marvell Armada-AP806 erratum #582743.
+ * Split all the readq to double readl
+ */
+    val = (u64)readl_relaxed(arm_smmu_page(smmu, page) + off + 4) << 32;
+    val |= readl_relaxed(arm_smmu_page(smmu, page) + off);


Even though io-64-nonatomic-hi-lo.h doesn't override readq() etc. for 
64-bit builds, you can still use hi_lo_readq_relaxed() directly.



+
+    return val;
+}
+
+static void mrvl_mmu500_writeq(struct arm_smmu_device *smmu, int 
page, int off,

+   u64 val)
+{
+    /*
+ * Marvell Armada-AP806 erratum #582743.
+ * Split all the writeq to double writel
+ */
+    writel_relaxed(upper_32_bits(val), arm_smmu_page(smmu, page) + 
off + 4);

+    writel_relaxed(lower_32_bits(val), arm_smmu_page(smmu, page) + off);


Similarly, hi_lo_writeq_relaxed().


+}
+
+static u32 mrvl_mmu500_cfg_id2_fixup(u32 id)
+{
+
+    /*
+ * Armada-AP806 erratum #582743.
+ * Hide the SMMU_IDR2.PTFSv8 fields to sidestep the AArch64
+ * formats altogether and allow using 32 bits access on the
+ * interconnect.
+ */
+    id &= ~(ARM_SMMU_ID2_PTFS_4K | ARM_SMMU_ID2_PTFS_16K |
+    ARM_SMMU_ID2_PTFS_64K);
+
+    return id;
+}
+
+static const struct arm_smmu_impl mrvl_mmu500_impl = {
+    .read_reg64 = mrvl_mmu500_readq,
+    .write_reg64 = mrvl_mmu500_writeq,
+    .cfg_id2_fixup = mrvl_mmu500_cfg_id2_fixup,
+    .reset = arm_mmu500_reset,
+};
+
  struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device 
*smmu)

  {
@@ -160,6 +207,11 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
arm_smmu_device *smmu)

   */
  switch (smmu->model) {
  case ARM_MMU500:
+    if (of_device_is_compatible(smmu->dev->of_node,


Nit: there's a local "np" variable now.


+    "marvell,ap806-smmu-500")) {
+    smmu->impl = _mmu500_impl;
+    return smmu;
+    }


Please put this with the other integration checks below the switch 
statement. Yes, it means we'll end up assigning smmu->impl twice for 
this particular case, but that's the intended pattern.




Thanks, all above comments do make sense and will be fixed in next spin.

Thanks,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org

Re: [PATCH v3 02/14] iommu: Report domain nesting info

2020-07-03 Thread Stefan Hajnoczi
On Tue, Jun 30, 2020 at 02:00:49AM +, Tian, Kevin wrote:
> > From: Liu, Yi L 
> > Sent: Monday, June 29, 2020 8:23 PM
> > 
> > Hi Stefan,
> > 
> > > From: Stefan Hajnoczi 
> > > Sent: Monday, June 29, 2020 5:25 PM
> > >
> > > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote:
> > > > +/*
> > > > + * struct iommu_nesting_info - Information for nesting-capable IOMMU.
> > > > + * user space should check it before using
> > > > + * nesting capability.
> > > > + *
> > > > + * @size:  size of the whole structure
> > > > + * @format:PASID table entry format, the same definition with
> > > > + * @format of struct iommu_gpasid_bind_data.
> > > > + * @features:  supported nesting features.
> > > > + * @flags: currently reserved for future extension.
> > > > + * @data:  vendor specific cap info.
> > > > + *
> > > > + * 
> > > > +---++
> > > > + * | feature   |  Notes
> > > >  |
> > > > + *
> > >
> > +===+===
> > 
> > > =+
> > > > + * | SYSWIDE_PASID |  Kernel manages PASID in system wide, PASIDs
> > used  |
> > > > + * |   |  in the system should be allocated by host kernel 
> > > >  |
> > > > + * 
> > > > +---++
> > > > + * | BIND_PGTBL|  bind page tables to host PASID, the PASID could  
> > > >  |
> > > > + * |   |  either be a host PASID passed in bind request or 
> > > >  |
> > > > + * |   |  default PASIDs (e.g. default PASID of 
> > > > aux-domain) |
> > > > + * 
> > > > +---++
> > > > + * | CACHE_INVLD   |  mandatory feature for nesting capable IOMMU
> > |
> > > > + * 
> > > > +---++
> > >
> > > This feature description is vague about what CACHE_INVLD does and how
> > to
> > > use it. If I understand correctly, the presence of this feature means
> > > that VFIO_IOMMU_NESTING_OP_CACHE_INVLD must be used?
> > >
> > > The same kind of clarification could be done for SYSWIDE_PASID and
> > > BIND_PGTBL too.
> > 
> > For SYSWIDE_PASID and BIND_PGTBL, yes, presence of the feature bit
> > means must use. So the two are requirements to user space if it wants
> > to setup nesting. While for CACHE_INVLD, it's kind of availability
> > here. How about removing CACHE_INVLD as presence of BIND_PGTBL should
> > indicates support of CACHE_INVLD?
> > 
> 
> So far this assumption is correct but it may not be true when thinking 
> forward.
> For example, a vendor might find a way to allow the owner of 1st-level page
> table to directly invalidate cache w/o going through host IOMMU driver. From
> this angle I feel explicitly reporting this capability is more robust.
> 
> Regarding to the description, what about below?
> 
> --
> SYSWIDE_PASID: PASIDs are managed in system-wide, instead of per device.
> When a device is assigned to userspace or VM, proper uAPI (provided by 
> userspace driver framework, e.g. VFIO) must be used to allocate/free PASIDs
> for the assigned device.
> 
> BIND_PGTBL: The owner of the first-level/stage-1 page table must explicitly 
> bind the page table to associated PASID (either the one specified in bind 
> request or the default PASID of the iommu domain), through VFIO_IOMMU
> _NESTING_OP
> 
> CACHE_INVLD: The owner of the first-level/stage-1 page table must
> explicitly invalidate the IOMMU cache through VFIO_IOMMU_NESTING_OP,
> according to vendor-specific requirement when changing the page table.
> --

Mentioning the API to allocate/free PASIDs and VFIO_IOMMU_NESTING_OP has
made this clearer. This lets someone reading the documentation know
where to look for further information on using these features.

Thank you!

Stefan


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v3 4/4] arm64: dts: marvell: add SMMU support

2020-07-03 Thread Tomasz Nowicki

On 03.07.2020 11:16, Robin Murphy wrote:

On 2020-07-02 21:16, Tomasz Nowicki wrote:

From: Marcin Wojtas 

Add IOMMU node for Marvell AP806 based SoCs together with platform
and PCI device Stream ID mapping.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Tomasz Nowicki 
---
  arch/arm64/boot/dts/marvell/armada-8040.dtsi  | 36 +++
  arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 17 +
  2 files changed, 53 insertions(+)

diff --git a/arch/arm64/boot/dts/marvell/armada-8040.dtsi 
b/arch/arm64/boot/dts/marvell/armada-8040.dtsi

index 7699b19224c2..25c1df709f72 100644
--- a/arch/arm64/boot/dts/marvell/armada-8040.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-8040.dtsi
@@ -23,3 +23,39 @@
  _rtc {
  status = "disabled";
  };
+
+_usb3_0 {
+    iommus = < 0x440>;
+};
+
+_usb3_1 {
+    iommus = < 0x441>;
+};
+
+_sata0 {
+    iommus = < 0x444>;
+};
+
+_sdhci0 {
+    iommus = < 0x445>;
+};
+
+_sata0 {
+    iommus = < 0x454>;
+};
+
+_usb3_0 {
+    iommus = < 0x450>;
+};
+
+_usb3_1 {
+    iommus = < 0x451>;
+};
+
+_pcie0 {
+    iommu-map =
+    <0x0    0x480 0x20>,
+    <0x100  0x4a0 0x20>,
+    <0x200  0x4c0 0x20>;
+    iommu-map-mask = <0x031f>;


Nice! I do like a good compressed mapping :D


+};
diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi 
b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi

index 7f9b9a647717..ded8b8082d79 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
@@ -56,6 +56,23 @@
  compatible = "simple-bus";
  ranges = <0x0 0x0 0xf000 0x100>;
+    smmu: iommu@500 {
+    compatible = "marvell,ap806-smmu-500", "arm,mmu-500";
+    reg = <0x10 0x10>;
+    dma-coherent;
+    #iommu-cells = <1>;
+    #global-interrupts = <1>;
+    interrupts = ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ;


I'd recommend you have the node disabled by default here, then 
explicitly enable it in armada-8040.dtsi where you add the Stream IDs. 
Otherwise it will also end up enabled for 8020, 70x0, etc. where 
disable_bypass will then catastrophically break everything.




Good point! I will fix this.

Thanks,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v3 3/4] dt-bindings: arm-smmu: add compatible string for Marvell Armada-AP806 SMMU-500

2020-07-03 Thread Tomasz Nowicki

On 03.07.2020 11:05, Robin Murphy wrote:

On 2020-07-02 21:16, Tomasz Nowicki wrote:

Add specific compatible string for Marvell usage due to errata of
accessing 64bits registers of ARM SMMU, in AP806.

AP806 SoC uses the generic ARM-MMU500, and there's no specific
implementation of Marvell, this compatible is used for errata only.

Signed-off-by: Hanna Hawa 
Signed-off-by: Gregory CLEMENT 
Signed-off-by: Tomasz Nowicki 
---
  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 5 +
  1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml

index d7ceb4c34423..7beca9c00b12 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -38,6 +38,11 @@ properties:
    - qcom,sc7180-smmu-500
    - qcom,sdm845-smmu-500
    - const: arm,mmu-500
+  - description: Marvell SoCs implementing "arm,mmu-500"
+    items:
+  - enum:
+  - marvell,ap806-smmu-500


Isn't a single-valued enum just a constant? :P


That's how copy-paste engineering ends up :)

Thanks,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v3 1/4] iommu/arm-smmu: Add SMMU ID2 register fixup hook

2020-07-03 Thread Tomasz Nowicki

On 03.07.2020 10:24, Robin Murphy wrote:

On 2020-07-02 21:16, Tomasz Nowicki wrote:

We already have 'cfg_probe' hook which meant to override and apply
workarounds while probing ID registers. However, 'cfg_probe' is called
at the very end and therefore for some cases fixing up things becomes 
complex
or requires exporting of SMMU driver structures. Hence, seems it is 
better and

cleaner to do ID fixup right away. In preparation for adding Marvell
errata add an extra ID2 fixup hook.


Hmm, the intent of ->cfg_probe was very much to give impl a chance to 
adjust the detected features before we start consuming them, with this 
exact case in mind. Since the Cavium quirk isn't actually doing that - 
it just needs to run *anywhere* in the whole probe process - I'm under 
no illusion that I put the hook in exactly the right place first time 
around ;)


The diff below should be more on the mark...

Robin.

->8-
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 243bc4cb2705..884ffca5b1eb 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1891,6 +1891,9 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)

  smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
  }

+    if (smmu->impl && smmu->impl->cfg_probe)
+    return smmu->impl->cfg_probe(smmu);
+
  /* Now we've corralled the various formats, what'll it do? */
  if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
  smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
@@ -1909,7 +1912,6 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)

  dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
     smmu->pgsize_bitmap);

-
  if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
  dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
     smmu->va_size, smmu->ipa_size);
@@ -1918,9 +1920,6 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)

  dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
     smmu->ipa_size, smmu->pa_size);

-    if (smmu->impl && smmu->impl->cfg_probe)
-    return smmu->impl->cfg_probe(smmu);
-
  return 0;
  }



I was under impression that ->cfg_probe was meant for Cavium alike 
errata (position independent). Then I will move ->cfg_probe as you 
suggested. I prefer not to add yet another impl hook too :)


Thanks,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v3 4/4] arm64: dts: marvell: add SMMU support

2020-07-03 Thread Robin Murphy

On 2020-07-02 21:16, Tomasz Nowicki wrote:

From: Marcin Wojtas 

Add IOMMU node for Marvell AP806 based SoCs together with platform
and PCI device Stream ID mapping.

Signed-off-by: Marcin Wojtas 
Signed-off-by: Tomasz Nowicki 
---
  arch/arm64/boot/dts/marvell/armada-8040.dtsi  | 36 +++
  arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 17 +
  2 files changed, 53 insertions(+)

diff --git a/arch/arm64/boot/dts/marvell/armada-8040.dtsi 
b/arch/arm64/boot/dts/marvell/armada-8040.dtsi
index 7699b19224c2..25c1df709f72 100644
--- a/arch/arm64/boot/dts/marvell/armada-8040.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-8040.dtsi
@@ -23,3 +23,39 @@
  _rtc {
status = "disabled";
  };
+
+_usb3_0 {
+   iommus = < 0x440>;
+};
+
+_usb3_1 {
+   iommus = < 0x441>;
+};
+
+_sata0 {
+   iommus = < 0x444>;
+};
+
+_sdhci0 {
+   iommus = < 0x445>;
+};
+
+_sata0 {
+   iommus = < 0x454>;
+};
+
+_usb3_0 {
+   iommus = < 0x450>;
+};
+
+_usb3_1 {
+   iommus = < 0x451>;
+};
+
+_pcie0 {
+   iommu-map =
+   <0x00x480 0x20>,
+   <0x100  0x4a0 0x20>,
+   <0x200  0x4c0 0x20>;
+   iommu-map-mask = <0x031f>;


Nice! I do like a good compressed mapping :D


+};
diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi 
b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
index 7f9b9a647717..ded8b8082d79 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
@@ -56,6 +56,23 @@
compatible = "simple-bus";
ranges = <0x0 0x0 0xf000 0x100>;
  
+			smmu: iommu@500 {

+   compatible = "marvell,ap806-smmu-500", 
"arm,mmu-500";
+   reg = <0x10 0x10>;
+   dma-coherent;
+   #iommu-cells = <1>;
+   #global-interrupts = <1>;
+   interrupts = ,
+,
+,
+,
+,
+,
+,
+,
+;


I'd recommend you have the node disabled by default here, then 
explicitly enable it in armada-8040.dtsi where you add the Stream IDs. 
Otherwise it will also end up enabled for 8020, 70x0, etc. where 
disable_bypass will then catastrophically break everything.


Robin.


+   };
+
gic: interrupt-controller@21 {
compatible = "arm,gic-400";
#interrupt-cells = <3>;


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] dt-bindings: arm-smmu: add compatible string for Marvell Armada-AP806 SMMU-500

2020-07-03 Thread Robin Murphy

On 2020-07-02 21:16, Tomasz Nowicki wrote:

Add specific compatible string for Marvell usage due to errata of
accessing 64bits registers of ARM SMMU, in AP806.

AP806 SoC uses the generic ARM-MMU500, and there's no specific
implementation of Marvell, this compatible is used for errata only.

Signed-off-by: Hanna Hawa 
Signed-off-by: Gregory CLEMENT 
Signed-off-by: Tomasz Nowicki 
---
  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 5 +
  1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index d7ceb4c34423..7beca9c00b12 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -38,6 +38,11 @@ properties:
- qcom,sc7180-smmu-500
- qcom,sdm845-smmu-500
- const: arm,mmu-500
+  - description: Marvell SoCs implementing "arm,mmu-500"
+items:
+  - enum:
+  - marvell,ap806-smmu-500


Isn't a single-valued enum just a constant? :P

Robin.


+  - const: arm,mmu-500
- items:
- const: arm,mmu-500
- const: arm,smmu-v2


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 2/4] iommu/arm-smmu: Workaround for Marvell Armada-AP806 SoC erratum #582743

2020-07-03 Thread Robin Murphy

On 2020-07-02 21:16, Tomasz Nowicki wrote:

From: Hanna Hawa 

Due to erratum #582743, the Marvell Armada-AP806 can't access 64bit to
ARM SMMUv2 registers.

Provide implementation relevant hooks:
- split the writeq/readq to two accesses of writel/readl.
- mask the MMU_IDR2.PTFSv8 fields to not use AArch64 format (but
only AARCH32_L) since with AArch64 format 32 bits access is not supported.

Note that separate writes/reads to 2 is not problem regards to
atomicity, because the driver use the readq/writeq while initialize
the SMMU, report for SMMU fault, and use spinlock in one
case (iova_to_phys).


The comment about the spinlock seems to be out of date, and TBH that 
whole sentence is a bit unclear - how about something like:


"Note that most 64-bit registers like TTBRn can be accessed as two 
32-bit halves without issue, and AArch32 format ensures that the 
register writes which must be atomic (for TLBI etc.) need only be 32-bit."



Signed-off-by: Hanna Hawa 
Signed-off-by: Gregory CLEMENT 
Signed-off-by: Tomasz Nowicki 
---
  Documentation/arm64/silicon-errata.rst |  3 ++
  drivers/iommu/arm-smmu-impl.c  | 52 ++
  2 files changed, 55 insertions(+)

diff --git a/Documentation/arm64/silicon-errata.rst 
b/Documentation/arm64/silicon-errata.rst
index 936cf2a59ca4..157214d3abe1 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -125,6 +125,9 @@ stable kernels.
  | Cavium | ThunderX2 Core  | #219| CAVIUM_TX2_ERRATUM_219 
 |
  
++-+-+-+
  
++-+-+-+
+| Marvell| ARM-MMU-500 | #582743 | N/A 
|
+++-+-+-+
+++-+-+-+
  | Freescale/NXP  | LS2080A/LS1043A | A-008585| FSL_ERRATUM_A008585
 |
  
++-+-+-+
  
++-+-+-+
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index c75b9d957b70..c1fc5e1b8193 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -147,6 +147,53 @@ static const struct arm_smmu_impl arm_mmu500_impl = {
.reset = arm_mmu500_reset,
  };
  
+static u64 mrvl_mmu500_readq(struct arm_smmu_device *smmu, int page, int off)

+{
+   u64 val;
+
+   /*
+* Marvell Armada-AP806 erratum #582743.
+* Split all the readq to double readl
+*/
+   val = (u64)readl_relaxed(arm_smmu_page(smmu, page) + off + 4) << 32;
+   val |= readl_relaxed(arm_smmu_page(smmu, page) + off);


Even though io-64-nonatomic-hi-lo.h doesn't override readq() etc. for 
64-bit builds, you can still use hi_lo_readq_relaxed() directly.



+
+   return val;
+}
+
+static void mrvl_mmu500_writeq(struct arm_smmu_device *smmu, int page, int off,
+  u64 val)
+{
+   /*
+* Marvell Armada-AP806 erratum #582743.
+* Split all the writeq to double writel
+*/
+   writel_relaxed(upper_32_bits(val), arm_smmu_page(smmu, page) + off + 4);
+   writel_relaxed(lower_32_bits(val), arm_smmu_page(smmu, page) + off);


Similarly, hi_lo_writeq_relaxed().


+}
+
+static u32 mrvl_mmu500_cfg_id2_fixup(u32 id)
+{
+
+   /*
+* Armada-AP806 erratum #582743.
+* Hide the SMMU_IDR2.PTFSv8 fields to sidestep the AArch64
+* formats altogether and allow using 32 bits access on the
+* interconnect.
+*/
+   id &= ~(ARM_SMMU_ID2_PTFS_4K | ARM_SMMU_ID2_PTFS_16K |
+   ARM_SMMU_ID2_PTFS_64K);
+
+   return id;
+}
+
+static const struct arm_smmu_impl mrvl_mmu500_impl = {
+   .read_reg64 = mrvl_mmu500_readq,
+   .write_reg64 = mrvl_mmu500_writeq,
+   .cfg_id2_fixup = mrvl_mmu500_cfg_id2_fixup,
+   .reset = arm_mmu500_reset,
+};
+
  
  struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)

  {
@@ -160,6 +207,11 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
arm_smmu_device *smmu)
 */
switch (smmu->model) {
case ARM_MMU500:
+   if (of_device_is_compatible(smmu->dev->of_node,


Nit: there's a local "np" variable now.


+   "marvell,ap806-smmu-500")) {
+   smmu->impl = _mmu500_impl;
+   return smmu;
+   }


Please put this with the other integration checks below the switch 
statement. Yes, it means we'll end up assigning smmu->impl twice for 
this particular case, but that's the intended pattern.


Robin.


smmu->impl = _mmu500_impl;
break;
case 

Re: [PATCH v3 1/4] iommu/arm-smmu: Add SMMU ID2 register fixup hook

2020-07-03 Thread Robin Murphy

On 2020-07-02 21:16, Tomasz Nowicki wrote:

We already have 'cfg_probe' hook which meant to override and apply
workarounds while probing ID registers. However, 'cfg_probe' is called
at the very end and therefore for some cases fixing up things becomes complex
or requires exporting of SMMU driver structures. Hence, seems it is better and
cleaner to do ID fixup right away. In preparation for adding Marvell
errata add an extra ID2 fixup hook.


Hmm, the intent of ->cfg_probe was very much to give impl a chance to 
adjust the detected features before we start consuming them, with this 
exact case in mind. Since the Cavium quirk isn't actually doing that - 
it just needs to run *anywhere* in the whole probe process - I'm under 
no illusion that I put the hook in exactly the right place first time 
around ;)


The diff below should be more on the mark...

Robin.

->8-
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 243bc4cb2705..884ffca5b1eb 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1891,6 +1891,9 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)

smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
}

+   if (smmu->impl && smmu->impl->cfg_probe)
+   return smmu->impl->cfg_probe(smmu);
+
/* Now we've corralled the various formats, what'll it do? */
if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
@@ -1909,7 +1912,6 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)

dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
   smmu->pgsize_bitmap);

-
if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
   smmu->va_size, smmu->ipa_size);
@@ -1918,9 +1920,6 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)

dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
   smmu->ipa_size, smmu->pa_size);

-   if (smmu->impl && smmu->impl->cfg_probe)
-   return smmu->impl->cfg_probe(smmu);
-
return 0;
 }

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 01/12] iommu: Change type of pasid to u32

2020-07-03 Thread Felix Kuehling
Am 2020-07-02 um 3:10 p.m. schrieb Fenghua Yu:
> Hi, Felix, Thomas, Joerg and maintainers,
>
> On Tue, Jun 30, 2020 at 10:12:38PM -0400, Felix Kuehling wrote:
>> Am 2020-06-30 um 7:44 p.m. schrieb Fenghua Yu:
>> You didn't change the return types of amdgpu_pasid_alloc and
>> kfd_pasid_alloc. amdgpu_pasid_alloc returns int, because it can return
>> negative error codes. But kfd_pasid_alloc could be updated, because it
>> returns 0 for errors.
> I fixed return type as "u32" for kfd_pasid_alloc().

Thank you. The patch is

Acked-by: Felix Kuehling 



>
> The fix is minor and only limited in patch 1. So instead of sending the
> whole series, I only send the updated patch 1 here. If you want me to
> send the whole series with the fix, I can do that too.
>
> Thanks.
>
> -Fenghua
>
> From 4ff6c14bb0761dd97d803350d31f87edc4336345 Mon Sep 17 00:00:00 2001
> From: Fenghua Yu 
> Date: Mon, 4 May 2020 18:00:55 +
> Subject: [PATCH v5.1 01/12] iommu: Change type of pasid to u32
>
> PASID is defined as a few different types in iommu including "int",
> "u32", and "unsigned int". To be consistent and to match with uapi
> definitions, define PASID and its variations (e.g. max PASID) as "u32".
> "u32" is also shorter and a little more explicit than "unsigned int".
>
> No PASID type change in uapi although it defines PASID as __u64 in
> some places.
>
> Suggested-by: Thomas Gleixner 
> Signed-off-by: Fenghua Yu 
> Reviewed-by: Tony Luck 
> Reviewed-by: Lu Baolu 
> ---
> v5.1:
> - Change return type to u32 for kfd_pasid_alloc() (Felix)
>
> v5:
> - Reviewed by Lu Baolu
>
> v4:
> - Change PASID type from "unsigned int" to "u32" (Christoph)
>
> v2:
> - Create this new patch to define PASID as "unsigned int" consistently in
>   iommu (Thomas)
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  4 +--
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  4 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c   |  6 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h   |  4 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  8 ++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  8 ++---
>  .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   |  2 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  7 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  8 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_events.h   |  4 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  6 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_pasid.c|  4 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 20 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  2 +-
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
>  drivers/iommu/amd/amd_iommu.h | 10 +++---
>  drivers/iommu/amd/iommu.c | 31 ++-
>  drivers/iommu/amd/iommu_v2.c  | 20 ++--
>  drivers/iommu/intel/dmar.c|  7 +++--
>  drivers/iommu/intel/intel-pasid.h | 24 +++---
>  drivers/iommu/intel/iommu.c   |  4 +--
>  drivers/iommu/intel/pasid.c   | 31 +--
>  drivers/iommu/intel/svm.c | 12 +++
>  drivers/iommu/iommu.c |  2 +-
>  drivers/misc/uacce/uacce.c|  2 +-
>  include/linux/amd-iommu.h |  8 ++---
>  include/linux/intel-iommu.h   | 12 +++
>  include/linux/intel-svm.h |  2 +-
>  include/linux/iommu.h | 10 +++---
>  include/linux/uacce.h |  2 +-
>  38 files changed, 141 insertions(+), 141 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index ffe149aafc39..dfef5a7e0f5a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -207,11 +207,11 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct 
> kgd_dev *dst, struct kgd_dev *s
>   })
>  
>  /* GPUVM API */
> -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int 
> pasid,
> +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
>   void **vm, void **process_info,
>   struct dma_fence **ef);
>  int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
> - struct file *filp, unsigned int pasid,
> + struct file *filp, u32 

RE: [PATCH v3 09/14] vfio/type1: Support binding guest page tables to PASID

2020-07-03 Thread Liu, Yi L
Hi Alex,

> From: Alex Williamson 
> Sent: Friday, July 3, 2020 5:20 AM
> 
> On Wed, 24 Jun 2020 01:55:22 -0700
> Liu Yi L  wrote:
> 
> > Nesting translation allows two-levels/stages page tables, with 1st
> > level for guest translations (e.g. GVA->GPA), 2nd level for host
> > translations (e.g. GPA->HPA). This patch adds interface for binding
> > guest page tables to a PASID. This PASID must have been allocated to
> > user space before the binding request.
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Jean-Philippe Brucker 
> > Signed-off-by: Liu Yi L 
> > Signed-off-by: Jacob Pan 
> > ---
> > v2 -> v3:
> > *) use __iommu_sva_unbind_gpasid() for unbind call issued by VFIO
> > https://lore.kernel.org/linux-iommu/1592931837-58223-6-git-send-email-
> > jacob.jun@linux.intel.com/
> >
> > v1 -> v2:
> > *) rename subject from "vfio/type1: Bind guest page tables to host"
> > *) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to
> support bind/
> >unbind guet page table
> > *) replaced vfio_iommu_for_each_dev() with a group level loop since this
> >series enforces one group per container w/ nesting type as start.
> > *) rename vfio_bind/unbind_gpasid_fn() to
> > vfio_dev_bind/unbind_gpasid_fn()
> > *) vfio_dev_unbind_gpasid() always successful
> > *) use vfio_mm->pasid_lock to avoid race between PASID free and page table
> >bind/unbind
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 169
> 
> >  drivers/vfio/vfio_pasid.c   |  30 +++
> >  include/linux/vfio.h|  20 +
> >  include/uapi/linux/vfio.h   |  30 +++
> >  4 files changed, 249 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > b/drivers/vfio/vfio_iommu_type1.c index d0891c5..5926533 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -148,6 +148,33 @@ struct vfio_regions {
> >  #define DIRTY_BITMAP_PAGES_MAX  ((u64)INT_MAX)
> >  #define DIRTY_BITMAP_SIZE_MAX
> DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
> >
> > +struct domain_capsule {
> > +   struct vfio_group *group;
> > +   struct iommu_domain *domain;
> > +   void *data;
> > +};
> > +
> > +/* iommu->lock must be held */
> > +static struct vfio_group *vfio_find_nesting_group(struct vfio_iommu
> > +*iommu) {
> > +   struct vfio_domain *d;
> > +   struct vfio_group *g, *group = NULL;
> > +
> > +   if (!iommu->nesting_info)
> > +   return NULL;
> > +
> > +   /* only support singleton container with nesting type */
> > +   list_for_each_entry(d, >domain_list, next) {
> > +   list_for_each_entry(g, >group_list, next) {
> > +   if (!group) {
> > +   group = g;
> > +   break;
> > +   }
> 
> 
> We break out of the inner loop only to pointlessly continue in the outer loop
> when we could simply return g and remove the second group pointer altogether
> (use "group" instead of "g" if so).

how about below? :-)

/* only support singleton container with nesting type */
list_for_each_entry(d, >domain_list, next) {
list_for_each_entry(group, >group_list, next) {
break;
}
}

> 
> > +   }
> > +   }
> > +   return group;
> > +}
> > +
> >  static int put_pfn(unsigned long pfn, int prot);
> >
> >  static struct vfio_group *vfio_iommu_find_iommu_group(struct
> > vfio_iommu *iommu, @@ -2351,6 +2378,48 @@ static int
> vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
> > return ret;
> >  }
> >
> > +static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data) {
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   unsigned long arg = *(unsigned long *) dc->data;
> > +
> > +   return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *) arg);
> > +}
> > +
> > +static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
> > +{
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   unsigned long arg = *(unsigned long *) dc->data;
> > +
> > +   iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *) arg);
> > +   return 0;
> > +}
> > +
> > +static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void
> > +*data) {
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   struct iommu_gpasid_bind_data *unbind_data =
> > +   (struct iommu_gpasid_bind_data *) dc->data;
> > +
> > +   __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data);
> > +   return 0;
> > +}
> > +
> > +static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data) {
> > +   struct domain_capsule *dc = (struct domain_capsule *) data;
> > +   struct iommu_gpasid_bind_data unbind_data;
> > +
> > +   unbind_data.argsz = offsetof(struct iommu_gpasid_bind_data, vendor);
> > +   

RE: [PATCH v3 06/14] vfio/type1: Add VFIO_IOMMU_PASID_REQUEST (alloc/free)

2020-07-03 Thread Liu, Yi L
Hi Alex,

> From: Alex Williamson 
> Sent: Friday, July 3, 2020 5:19 AM
> 
> On Wed, 24 Jun 2020 01:55:19 -0700
> Liu Yi L  wrote:
> 
> > This patch allows user space to request PASID allocation/free, e.g.
> > when serving the request from the guest.
> >
> > PASIDs that are not freed by userspace are automatically freed when
> > the IOASID set is destroyed when process exits.
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Liu Yi L 
> > Signed-off-by: Yi Sun 
> > Signed-off-by: Jacob Pan 
> > ---
> > v1 -> v2:
> > *) move the vfio_mm related code to be a seprate module
> > *) use a single structure for alloc/free, could support a range of
> > PASIDs
> > *) fetch vfio_mm at group_attach time instead of at iommu driver open
> > time
> > ---
> >  drivers/vfio/Kconfig|  1 +
> >  drivers/vfio/vfio_iommu_type1.c | 96
> -
> >  drivers/vfio/vfio_pasid.c   | 10 +
> >  include/linux/vfio.h|  6 +++
> >  include/uapi/linux/vfio.h   | 36 
> >  5 files changed, 147 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index
> > 3d8a108..95d90c6 100644
> > --- a/drivers/vfio/Kconfig
> > +++ b/drivers/vfio/Kconfig
> > @@ -2,6 +2,7 @@
> >  config VFIO_IOMMU_TYPE1
> > tristate
> > depends on VFIO
> > +   select VFIO_PASID if (X86)
> > default n
> >
> >  config VFIO_IOMMU_SPAPR_TCE
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > b/drivers/vfio/vfio_iommu_type1.c index 8c143d5..d0891c5 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -73,6 +73,7 @@ struct vfio_iommu {
> > boolv2;
> > boolnesting;
> > struct iommu_nesting_info *nesting_info;
> > +   struct vfio_mm  *vmm;
> 
> Structure alignment again.

sure. may get agreement in the prior email.

> 
> > booldirty_page_tracking;
> > boolpinned_page_dirty_scope;
> >  };
> > @@ -1933,6 +1934,17 @@ static void vfio_iommu_iova_insert_copy(struct
> > vfio_iommu *iommu,
> >
> > list_splice_tail(iova_copy, iova);
> >  }
> > +
> > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> > +{
> > +   if (iommu->vmm) {
> > +   vfio_mm_put(iommu->vmm);
> > +   iommu->vmm = NULL;
> > +   }
> > +
> > +   kfree(iommu->nesting_info);
> 
> iommu->nesting_info = NULL;

got it.

> > +}
> > +
> >  static int vfio_iommu_type1_attach_group(void *iommu_data,
> >  struct iommu_group *iommu_group)
> { @@ -2067,6 +2079,25 @@
> > static int vfio_iommu_type1_attach_group(void *iommu_data,
> > goto out_detach;
> > }
> > iommu->nesting_info = info;
> > +
> > +   if (info->features & IOMMU_NESTING_FEAT_SYSWIDE_PASID) {
> > +   struct vfio_mm *vmm;
> > +   int sid;
> > +
> > +   vmm = vfio_mm_get_from_task(current);
> > +   if (IS_ERR(vmm)) {
> > +   ret = PTR_ERR(vmm);
> > +   goto out_detach;
> > +   }
> > +   iommu->vmm = vmm;
> > +
> > +   sid = vfio_mm_ioasid_sid(vmm);
> > +   ret = iommu_domain_set_attr(domain->domain,
> > +   DOMAIN_ATTR_IOASID_SID,
> > +   );
> 
> This looks pretty dicey in the case of !CONFIG_VFIO_PASID, can we get here in
> that case?  If so it looks like we're doing bad things with setting the 
> domain-
> >ioasid_sid.

I guess not. So far, vfio_iommu_type1 will select CONFIG_VFIO_PASID for X86.
do you think it is enough?

> 
> > +   if (ret)
> > +   goto out_detach;
> > +   }
> > }
> >
> > /* Get aperture info */
> > @@ -2178,7 +2209,8 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > return 0;
> >
> >  out_detach:
> > -   kfree(iommu->nesting_info);
> > +   if (iommu->nesting_info)
> > +   vfio_iommu_release_nesting_info(iommu);
> 
> Make vfio_iommu_release_nesting_info() check iommu->nesting_info, then call
> it unconditionally?

got it. :-)

> > vfio_iommu_detach_group(domain, group);
> >  out_domain:
> > iommu_domain_free(domain->domain);
> > @@ -2380,7 +2412,8 @@ static void vfio_iommu_type1_detach_group(void
> *iommu_data,
> > else
> >
>   vfio_iommu_unmap_unpin_reaccount(iommu);
> >
> > -   kfree(iommu->nesting_info);
> > +   if (iommu->nesting_info)
> > +
>   vfio_iommu_release_nesting_info(iommu);
> > }
> > iommu_domain_free(domain->domain);
> >  

RE: [PATCH v3 04/14] vfio: Add PASID allocation/free support

2020-07-03 Thread Liu, Yi L
Hi Alex,

> From: Alex Williamson 
> Sent: Friday, July 3, 2020 5:17 AM
> 
> On Wed, 24 Jun 2020 01:55:17 -0700
> Liu Yi L  wrote:
> 
> > Shared Virtual Addressing (a.k.a Shared Virtual Memory) allows sharing
> > multiple process virtual address spaces with the device for simplified
> > programming model. PASID is used to tag an virtual address space in
> > DMA requests and to identify the related translation structure in
> > IOMMU. When a PASID-capable device is assigned to a VM, we want the
> > same capability of using PASID to tag guest process virtual address
> > spaces to achieve virtual SVA (vSVA).
> >
> > PASID management for guest is vendor specific. Some vendors (e.g.
> > Intel
> > VT-d) requires system-wide managed PASIDs cross all devices,
> > regardless of whether a device is used by host or assigned to guest.
> > Other vendors (e.g. ARM SMMU) may allow PASIDs managed per-device thus
> > could be fully delegated to the guest for assigned devices.
> >
> > For system-wide managed PASIDs, this patch introduces a vfio module to
> > handle explicit PASID alloc/free requests from guest. Allocated PASIDs
> > are associated to a process (or, mm_struct) in IOASID core. A vfio_mm
> > object is introduced to track mm_struct. Multiple VFIO containers
> > within a process share the same vfio_mm object.
> >
> > A quota mechanism is provided to prevent malicious user from
> > exhausting available PASIDs. Currently the quota is a global parameter
> > applied to all VFIO devices. In the future per-device quota might be 
> > supported
> too.
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Suggested-by: Alex Williamson 
> > Signed-off-by: Liu Yi L 
> > ---
> > v1 -> v2:
> > *) added in v2, split from the pasid alloc/free support of v1
> > ---
> >  drivers/vfio/Kconfig  |   5 ++
> >  drivers/vfio/Makefile |   1 +
> >  drivers/vfio/vfio_pasid.c | 151
> ++
> >  include/linux/vfio.h  |  28 +
> >  4 files changed, 185 insertions(+)
> >  create mode 100644 drivers/vfio/vfio_pasid.c
> >
> > diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index
> > fd17db9..3d8a108 100644
> > --- a/drivers/vfio/Kconfig
> > +++ b/drivers/vfio/Kconfig
> > @@ -19,6 +19,11 @@ config VFIO_VIRQFD
> > depends on VFIO && EVENTFD
> > default n
> >
> > +config VFIO_PASID
> > +   tristate
> > +   depends on IOASID && VFIO
> > +   default n
> > +
> >  menuconfig VFIO
> > tristate "VFIO Non-Privileged userspace driver framework"
> > depends on IOMMU_API
> > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index
> > de67c47..bb836a3 100644
> > --- a/drivers/vfio/Makefile
> > +++ b/drivers/vfio/Makefile
> > @@ -3,6 +3,7 @@ vfio_virqfd-y := virqfd.o
> >
> >  obj-$(CONFIG_VFIO) += vfio.o
> >  obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
> > +obj-$(CONFIG_VFIO_PASID) += vfio_pasid.o
> >  obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
> >  obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
> >  obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o diff --git
> > a/drivers/vfio/vfio_pasid.c b/drivers/vfio/vfio_pasid.c new file mode
> > 100644 index 000..dd5b6d1
> > --- /dev/null
> > +++ b/drivers/vfio/vfio_pasid.c
> > @@ -0,0 +1,151 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2020 Intel Corporation.
> > + * Author: Liu Yi L 
> > + *
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#define DRIVER_VERSION  "0.1"
> > +#define DRIVER_AUTHOR   "Liu Yi L "
> > +#define DRIVER_DESC "PASID management for VFIO bus drivers"
> > +
> > +#define VFIO_DEFAULT_PASID_QUOTA   1000
> > +static int pasid_quota = VFIO_DEFAULT_PASID_QUOTA;
> > +module_param_named(pasid_quota, pasid_quota, uint, 0444);
> > +MODULE_PARM_DESC(pasid_quota,
> > +" Set the quota for max number of PASIDs that an application is
> > +allowed to request (default 1000)");
> > +
> > +struct vfio_mm_token {
> > +   unsigned long long val;
> > +};
> > +
> > +struct vfio_mm {
> > +   struct kref kref;
> > +   struct vfio_mm_tokentoken;
> > +   int ioasid_sid;
> > +   int pasid_quota;
> > +   struct list_headnext;
> > +};
> > +
> > +static struct vfio_pasid {
> > +   struct mutexvfio_mm_lock;
> > +   struct list_headvfio_mm_list;
> > +} vfio_pasid;
> > +
> > +/* called with vfio.vfio_mm_lock held */ static void
> > +vfio_mm_release(struct kref *kref) {
> > +   struct vfio_mm *vmm = container_of(kref, struct vfio_mm, kref);
> > +
> > +   list_del(>next);
> > +   mutex_unlock(_pasid.vfio_mm_lock);
> > +   ioasid_free_set(vmm->ioasid_sid, true);
> > +   kfree(vmm);
> > +}
> > +
> > +void vfio_mm_put(struct vfio_mm *vmm) {
> > +   kref_put_mutex(>kref, vfio_mm_release,
> > +_pasid.vfio_mm_lock); }
> > +
> > 

RE: [PATCH v3 03/14] vfio/type1: Report iommu nesting info to userspace

2020-07-03 Thread Liu, Yi L
Hi Alex,

> From: Alex Williamson < alex.william...@redhat.com >
> Sent: Friday, July 3, 2020 2:39 AM
> 
> On Wed, 24 Jun 2020 01:55:16 -0700
> Liu Yi L  wrote:
> 
> > This patch exports iommu nesting capability info to user space through
> > VFIO. User space is expected to check this info for supported uAPIs (e.g.
> > PASID alloc/free, bind page table, and cache invalidation) and the
> > vendor specific format information for first level/stage page table
> > that will be bound to.
> >
> > The nesting info is available only after the nesting iommu type is set
> > for a container. Current implementation imposes one limitation - one
> > nesting container should include at most one group. The philosophy of
> > vfio container is having all groups/devices within the container share
> > the same IOMMU context. When vSVA is enabled, one IOMMU context could
> > include one 2nd-level address space and multiple 1st-level address spaces.
> > While the 2nd-leve address space is reasonably sharable by multiple
> > groups , blindly sharing 1st-level address spaces across all groups
> > within the container might instead break the guest expectation. In the
> > future sub/ super container concept might be introduced to allow
> > partial address space sharing within an IOMMU context. But for now
> > let's go with this restriction by requiring singleton container for
> > using nesting iommu features. Below link has the related discussion
> > about this
> > decision.
> >
> > https://lkml.org/lkml/2020/5/15/1028
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Liu Yi L 
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 73
> +
> >  include/uapi/linux/vfio.h   |  9 +
> >  2 files changed, 82 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > b/drivers/vfio/vfio_iommu_type1.c index 7accb59..8c143d5 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -72,6 +72,7 @@ struct vfio_iommu {
> > uint64_tpgsize_bitmap;
> > boolv2;
> > boolnesting;
> > +   struct iommu_nesting_info *nesting_info;
> > booldirty_page_tracking;
> > boolpinned_page_dirty_scope;
> >  };
> 
> Mind the structure packing and alignment, placing a pointer in the middle
> of a
> section of bools is going to create wasteful holes in the data structure.

how about below? Add the @nesting_info and @vmm in the end of this struct.
I've two questions, the first one is how the place the comment of the
@external_domain; second question is do you want me to move the @nesting
field to be near-by with the @nesting_info and @vmm. :) please let me know
your preference.

struct vfio_iommu {
struct list_headdomain_list;
struct list_headiova_list;
struct vfio_domain  *external_domain; /* domain for 
external user */
struct mutexlock;
struct rb_root  dma_list;
struct blocking_notifier_head   notifier;
unsigned intdma_avail;
uint64_tpgsize_bitmap;
boolv2;
boolnesting;
booldirty_page_tracking;
boolpinned_page_dirty_scope;
struct iommu_nesting_info   *nesting_info;
struct vfio_mm  *vmm;
};

> > @@ -130,6 +131,9 @@ struct vfio_regions {
> >  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
> > (!list_empty(>domain_list))
> >
> > +#define IS_DOMAIN_IN_CONTAINER(iommu)  ((iommu-
> >external_domain) || \
> > +(!list_empty(>domain_list)))
> > +
> >  #define DIRTY_BITMAP_BYTES(n)  (ALIGN(n, BITS_PER_TYPE(u64)) /
> BITS_PER_BYTE)
> >
> >  /*
> > @@ -1959,6 +1963,12 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > }
> > }
> >
> > +   /* Nesting type container can include only one group */
> > +   if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> > +   mutex_unlock(>lock);
> > +   return -EINVAL;
> > +   }
> > +
> > group = kzalloc(sizeof(*group), GFP_KERNEL);
> > domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> > if (!group || !domain) {
> > @@ -2029,6 +2039,36 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > if (ret)
> > goto out_domain;
> >
> > +   /* Nesting cap info is available only after attaching */
> > +   if (iommu->nesting) {
> > +   struct iommu_nesting_info tmp;
> > +   struct iommu_nesting_info *info;
> > +
> > +   /* First get the size of vendor specific