Re: [PATCH 3/4] iommu: remove the put_resv_regions method

2022-07-10 Thread Baolu Lu

On 2022/7/8 17:33, Christoph Hellwig wrote:

On Fri, Jul 08, 2022 at 05:00:59PM +0800, Baolu Lu wrote:

Do we really need to export this symbol? It is not used beyond the iommu
core code.


virtio-iommu calls it and can be modular.


Yes. Thanks for the explanation.

Reviewed-by: Lu Baolu 

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/4] iommu: remove the unused dev_has_feat method

2022-07-10 Thread Baolu Lu

On 2022/7/8 16:06, Christoph Hellwig wrote:

This method is never actually called.

Signed-off-by: Christoph Hellwig 


Reviewed-by: Lu Baolu 

Best regards,
baolu


---
  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 -
  include/linux/iommu.h   | 4 +---
  2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d9c1623ec1a9a..1b6c17dd81ee4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2853,7 +2853,6 @@ static struct iommu_ops arm_smmu_ops = {
.of_xlate   = arm_smmu_of_xlate,
.get_resv_regions   = arm_smmu_get_resv_regions,
.put_resv_regions   = generic_iommu_put_resv_regions,
-   .dev_has_feat   = arm_smmu_dev_has_feature,
.dev_feat_enabled   = arm_smmu_dev_feature_enabled,
.dev_enable_feat= arm_smmu_dev_enable_feature,
.dev_disable_feat   = arm_smmu_dev_disable_feature,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e6abd998dbe73..a3acdb46b9391 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -164,8 +164,7 @@ struct iommu_iort_rmr_data {
   * supported, this feature must be enabled before and
   * disabled after %IOMMU_DEV_FEAT_SVA.
   *
- * Device drivers query whether a feature is supported using
- * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature().
+ * Device drivers enable a feature using iommu_dev_enable_feature().
   */
  enum iommu_dev_features {
IOMMU_DEV_FEAT_SVA,
@@ -248,7 +247,6 @@ struct iommu_ops {
bool (*is_attach_deferred)(struct device *dev);
  
  	/* Per device IOMMU features */

-   bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f);
bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f);
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/4] iommu: remove iommu_dev_feature_enabled

2022-07-10 Thread Baolu Lu

On 2022/7/8 16:06, Christoph Hellwig wrote:

Remove the unused iommu_dev_feature_enabled function.

Signed-off-by: Christoph Hellwig 


A nit comment below, anyway

Reviewed-by: Lu Baolu 


---
  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  1 -
  drivers/iommu/iommu.c   | 13 -
  include/linux/iommu.h   |  9 -
  3 files changed, 23 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 1b6c17dd81ee4..4d30a8d2bc236 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2853,7 +2853,6 @@ static struct iommu_ops arm_smmu_ops = {
.of_xlate   = arm_smmu_of_xlate,
.get_resv_regions   = arm_smmu_get_resv_regions,
.put_resv_regions   = generic_iommu_put_resv_regions,
-   .dev_feat_enabled   = arm_smmu_dev_feature_enabled,
.dev_enable_feat= arm_smmu_dev_enable_feature,
.dev_disable_feat   = arm_smmu_dev_disable_feature,
.sva_bind   = arm_smmu_sva_bind,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0aa141646bdf4..1bb016a6a2aa1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2760,19 +2760,6 @@ int iommu_dev_disable_feature(struct device *dev, enum 
iommu_dev_features feat)
  }
  EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
  
-bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)

-{
-   if (dev->iommu && dev->iommu->iommu_dev) {
-   const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
-
-   if (ops->dev_feat_enabled)
-   return ops->dev_feat_enabled(dev, feat);
-   }
-
-   return false;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
-
  /**
   * iommu_sva_bind_device() - Bind a process address space to a device
   * @dev: the device
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a3acdb46b9391..0bc2eb14b0262 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -215,7 +215,6 @@ struct iommu_iotlb_gather {
   *  driver init to device driver init (default no)
   * @dev_has/enable/disable_feat: per device entries to check/enable/disable

^^^

dev_has_feat() has been removed by the previous patch.


   *   iommu specific features.
- * @dev_feat_enabled: check enabled feature
   * @sva_bind: Bind process address space to device
   * @sva_unbind: Unbind process address space from device
   * @sva_get_pasid: Get PASID associated to a SVA handle
@@ -247,7 +246,6 @@ struct iommu_ops {
bool (*is_attach_deferred)(struct device *dev);
  
  	/* Per device IOMMU features */

-   bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f);
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
  
@@ -670,7 +668,6 @@ void iommu_release_device(struct device *dev);
  
  int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);

  int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
-bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f);
  
  struct iommu_sva *iommu_sva_bind_device(struct device *dev,

struct mm_struct *mm,
@@ -997,12 +994,6 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct 
fwnode_handle *fwnode)
return NULL;
  }
  
-static inline bool

-iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
-{
-   return false;
-}
-
  static inline int
  iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
  {


Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/4] iommu: remove the put_resv_regions method

2022-07-10 Thread Baolu Lu

On 2022/7/8 16:06, Christoph Hellwig wrote:

-void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list)
+void iommu_put_resv_regions(struct device *dev, struct list_head *list)
  {
struct iommu_resv_region *entry, *next;
  
@@ -2610,7 +2597,7 @@ void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list)

kfree(entry);
}
  }
-EXPORT_SYMBOL(generic_iommu_put_resv_regions);
+EXPORT_SYMBOL(iommu_put_resv_regions);


Do we really need to export this symbol? It is not used beyond the iommu
core code.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 01/11] iommu/vt-d: debugfs: Remove device_domain_lock usage

2022-07-07 Thread Baolu Lu

On 2022/7/7 16:30, Ethan Zhao wrote:

-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
  {
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   struct dmar_domain *domain = info->domain;
+   struct dmar_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };
  
+	domain = to_dmar_domain(iommu_get_domain_for_dev(dev));

if (!domain)
return 0;
  
@@ -359,20 +359,39 @@ static int show_device_domain_translation(struct device *dev, void *data)

pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');
  
-	return 0;

+   /* Don't iterate */
+   return 1;
  }


Using this return value trick to change the caller behaviour, seems not 
saving


anything, but really cost me a few seconds more to know the 
*incantation* --


'Don't iterate' :) .


This is defined by iommu_group_for_each_dev(). Return value 0 means
continuing to next one, while non-zero means stopping iteration.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/6] iommu/vt-d: Refactor iommu information of each domain

2022-07-06 Thread Baolu Lu

On 2022/7/7 09:01, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Saturday, July 2, 2022 9:56 AM

-out_unlock:
+   set_bit(num, iommu->domain_ids);
+   info->refcnt = 1;
+   info->did= num;
+   info->iommu  = iommu;
+   domain->nid  = iommu->node;


One nit. this line should be removed as it's incorrect to blindly update
domain->nid and we should just leave to domain_update_iommu_cap()
to decide the right node. Otherwise this causes a policy conflict as
here it is the last attached device deciding the node which is different
from domain_update_iommu_cap() which picks the node of the first
attached device.


Agreed and updated. Thank you!



Otherwise,

Reviewed-by: Kevin Tian 


Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 03/14] iommu: Move bus setup to IOMMU device registration

2022-07-04 Thread Baolu Lu

Hi Robin,

On 2022/4/30 02:06, Robin Murphy wrote:

On 29/04/2022 9:50 am, Robin Murphy wrote:

On 2022-04-29 07:57, Baolu Lu wrote:

Hi Robin,

On 2022/4/28 21:18, Robin Murphy wrote:

Move the bus setup to iommu_device_register(). This should allow
bus_iommu_probe() to be correctly replayed for multiple IOMMU 
instances,

and leaves bus_set_iommu() as a glorified no-op to be cleaned up next.


I re-fetched the latest patches on

https://gitlab.arm.com/linux-arm/linux-rm/-/commits/iommu/bus

and rolled back the head to "iommu: Cleanup bus_set_iommu".

The test machine still hangs during boot.

I went through the code. It seems that the .probe_device for Intel IOMMU
driver can't handle the probe replay well. It always assumes that the
device has never been probed.


Hmm, but probe_iommu_group() is supposed to prevent the 
__iommu_probe_device() call even happening if the device *has* already 
been probed before :/


I've still got an old Intel box spare in the office so I'll rig that 
up and see if I can see what might be going on here...


OK, on a Xeon with two DMAR units, this seems to boot OK with or without 
patch #1, so it doesn't seem to be a general problem with replaying in 
iommu_device_register(), or with platform devices. Not sure where to go 
from here... :/


The kernel boot panic message:

[6.639432] BUG: kernel NULL pointer dereference, address: 
0028

[6.743829] #PF: supervisor read access in kernel mode
[6.743829] #PF: error_code(0x) - not-present page
[6.743829] PGD 0
[6.743829] Oops:  [#1] PREEMPT SMP NOPTI
[6.743829] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G  I 
 5.19.0-rc3+ #182

[6.743829] RIP: 0010:__iommu_probe_device+0x115/0x200
[6.743829] Code: 89 ff e8 3e e1 ff ff 48 85 c0 0f 85 47 ff ff ff 41 
bd f4 ff ff ff eb 83 48 8b 83 d8 02 00 00 48 89 df 48 8b 40 38 48 8b 40 
10 <48> 8b 40 28 ff d0 0f 1f 00 48 89 c1 48 85 c0 0f 84 b7 00 00 00 48

[6.743829] RSP: :ff30605c00063d40 EFLAGS: 00010246
[6.743829] RAX:  RBX: ff128b9c5fdc90d0 RCX: 

[6.743829] RDX: 8001 RSI: 0246 RDI: 
ff128b9c5fdc90d0
[6.743829] RBP: b60c34e0 R08: b68664d0 R09: 
ff128b9501d4ce40
[6.743829] R10: b6267096 R11: ff128b950014c267 R12: 
ff30605c00063de0
[6.743829] R13: 001b9d28 R14: ff128b95001b9d28 R15: 
ff128b9c5fdc93a8
[6.743829] FS:  () GS:ff128b9c5fc0() 
knlGS:

[6.743829] CS:  0010 DS:  ES:  CR0: 80050033
[6.743829] CR2: 0028 CR3: 000149210001 CR4: 
00771ef0
[6.743829] DR0:  DR1:  DR2: 

[6.743829] DR3:  DR6: 07f0 DR7: 
0400

[6.743829] PKRU: 5554
[6.743829] Call Trace:
[6.743829]  
[6.743829]  ? _raw_spin_lock_irqsave+0x17/0x40
[6.743829]  ? __iommu_probe_device+0x200/0x200
[6.743829]  probe_iommu_group+0x2d/0x40
[6.743829]  bus_for_each_dev+0x74/0xc0
[6.743829]  bus_iommu_probe+0x48/0x2d0
[6.743829]  iommu_device_register+0xde/0x120
[6.743829]  intel_iommu_init+0x35f/0x5f2
[6.743829]  ? iommu_setup+0x27d/0x27d
[6.743829]  ? rdinit_setup+0x2c/0x2c
[6.743829]  pci_iommu_init+0xe/0x32
[6.743829]  do_one_initcall+0x41/0x200
[6.743829]  kernel_init_freeable+0x1de/0x228
[6.743829]  ? rest_init+0xc0/0xc0
[6.743829]  kernel_init+0x16/0x120
[6.743829]  ret_from_fork+0x1f/0x30
[6.743829]  
[6.743829] Modules linked in:
[6.743829] CR2: 0028
[6.743829] ---[ end trace  ]---
[6.743829] RIP: 0010:__iommu_probe_device+0x115/0x200
[6.743829] Code: 89 ff e8 3e e1 ff ff 48 85 c0 0f 85 47 ff ff ff 41 
bd f4 ff ff ff eb 83 48 8b 83 d8 02 00 00 48 89 df 48 8b 40 38 48 8b 40 
10 <48> 8b 40 28 ff d0 0f 1f 00 48 89 c1 48 85 c0 0f 84 b7 00 00 00 48

[6.743829] RSP: :ff30605c00063d40 EFLAGS: 00010246
[6.743829] RAX:  RBX: ff128b9c5fdc90d0 RCX: 

[6.743829] RDX: 8001 RSI: 0246 RDI: 
ff128b9c5fdc90d0
[6.743829] RBP: b60c34e0 R08: b68664d0 R09: 
ff128b9501d4ce40
[6.743829] R10: b6267096 R11: ff128b950014c267 R12: 
ff30605c00063de0
[6.743829] R13: 001b9d28 R14: ff128b95001b9d28 R15: 
ff128b9c5fdc93a8
[6.743829] FS:  () GS:ff128b9c5fc0() 
knlGS:

[6.743829] CS:  0010 DS:  ES:  CR0: 80050033
[6.743829] CR2: 0028 CR3: 000149210001 CR4: 
00771ef0
[6.743829] DR0:  DR1:  DR2: 

[6.743829] DR3:  DR6: 07f0 DR7: 
0400

[6.743829] PKRU: 5554
[6.743829] Kernel panic - not syncing: Fatal exception
[6.743829] ---[ end Kern

Re: [PATCH v3 10/11] iommu/vt-d: Use device_domain_lock accurately

2022-07-02 Thread Baolu Lu

On 2022/7/1 16:15, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Wednesday, June 29, 2022 3:47 PM

+   spin_lock_irqsave(_domain_lock, flags);
list_for_each_entry(info, >devices, link) {
-   if (!info->dev)
-   continue;
-


suppose you can replace all spin_lock_irqsave() with spin_lock()
in patch5 instead of leaving some replacement to next patch.



Make sense to me. Will update this.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 02/11] iommu/vt-d: Remove clearing translation data in disable_dmar_iommu()

2022-07-02 Thread Baolu Lu

On 2022/7/1 15:58, Tian, Kevin wrote:

From: Lu Baolu  Sent: Wednesday, June 29,
2022 3:47 PM

The disable_dmar_iommu() is called when IOMMU initialization fails
or the IOMMU is hot-removed from the system. In both cases, there
is no need to clear the IOMMU translation data structures for
devices.

On the initialization path, the device probing only happens after
the IOMMU is initialized successfully, hence there're no
translation data structures.

On the hot-remove path, there is no real use case where the IOMMU
is hot-removed, but the devices that it manages are still alive in
the system. The translation data structures were torn down during
device release, hence there's no need to repeat it in IOMMU
hot-remove path either. This removes the unnecessary code and only
leaves a check.

Signed-off-by: Lu Baolu 


You probably overlooked my last comment on kexec:

https://lore.kernel.org/lkml/bl1pr11mb52711a71ad9f11b7ae42694c8c...@bl1pr11mb5271.namprd11.prod.outlook.com/

 I think my question is still not answered.


Sorry! I did overlook that comment. I can see your points now, though it
seems to be irrelevant to the problems that this series tries to solve.

The failure path of copying table still needs some improvement. At least
the pages allocated for root/context tables should be freed in the
failure path. Even worse, the software occupied a bit of page table
entry which is feasible for the old ECS, but not work for the new
scalable mode anymore.

All these problems deserve a separate series. We could address your
concerns there. Does this work for you?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 10/11] iommu/vt-d: Use device_domain_lock accurately

2022-07-01 Thread Baolu Lu

On 2022/7/1 16:15, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Wednesday, June 29, 2022 3:47 PM

+   spin_lock_irqsave(_domain_lock, flags);
list_for_each_entry(info, >devices, link) {
-   if (!info->dev)
-   continue;
-


suppose you can replace all spin_lock_irqsave() with spin_lock()
in patch5 instead of leaving some replacement to next patch.



Make sense. I will update the series.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 00/11] iommu/vt-d: Optimize the use of locks

2022-07-01 Thread Baolu Lu

On 2022/7/1 15:53, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Wednesday, June 29, 2022 3:47 PM

v3:
  - Split reduction of lock ranges from changing irqsave.
https://lore.kernel.org/linux-
iommu/BN9PR11MB52760A3D7C6BF1AF9C9D34658CAA9@BN9PR11MB5276.
namprd11.prod.outlook.com/
  - Fully initialize the dev_info before adding it to the list.
https://lore.kernel.org/linux-
iommu/BN9PR11MB52764D7CD86448C5E4EB46668CAA9@BN9PR11MB5276.
namprd11.prod.outlook.com/
  - Various code and comments refinement.



This doesn't say why original patch2 was removed:

"iommu/vt-d: Remove for_each_device_domain()"

It took me a while to realize that it's already covered by your another
patch fixing RID2PASID. 


My fault! I forgot to mention it in the change log. Sorry about it.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 5/7] iommu/vt-d: Fix suspicious RCU usage in probe_acpi_namespace_devices()

2022-07-01 Thread Baolu Lu

On 2022/6/29 21:03, Robin Murphy wrote:

On 2019-06-12 01:28, Lu Baolu wrote:

The drhd and device scope list should be iterated with the
iommu global lock held. Otherwise, a suspicious RCU usage
message will be displayed.

[    3.695886] =
[    3.695917] WARNING: suspicious RCU usage
[    3.695950] 5.2.0-rc2+ #2467 Not tainted
[    3.695981] -
[    3.696014] drivers/iommu/intel-iommu.c:4569 suspicious 
rcu_dereference_check() usage!

[    3.696069]
    other info that might help us debug this:

[    3.696126]
    rcu_scheduler_active = 2, debug_locks = 1
[    3.696173] no locks held by swapper/0/1.
[    3.696204]
    stack backtrace:
[    3.696241] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.2.0-rc2+ #2467
[    3.696370] Call Trace:
[    3.696404]  dump_stack+0x85/0xcb
[    3.696441]  intel_iommu_init+0x128c/0x13ce
[    3.696478]  ? kmem_cache_free+0x16b/0x2c0
[    3.696516]  ? __fput+0x14b/0x270
[    3.696550]  ? __call_rcu+0xb7/0x300
[    3.696583]  ? get_max_files+0x10/0x10
[    3.696631]  ? set_debug_rodata+0x11/0x11
[    3.696668]  ? e820__memblock_setup+0x60/0x60
[    3.696704]  ? pci_iommu_init+0x16/0x3f
[    3.696737]  ? set_debug_rodata+0x11/0x11
[    3.696770]  pci_iommu_init+0x16/0x3f
[    3.696805]  do_one_initcall+0x5d/0x2e4
[    3.696844]  ? set_debug_rodata+0x11/0x11
[    3.696880]  ? rcu_read_lock_sched_held+0x6b/0x80
[    3.696924]  kernel_init_freeable+0x1f0/0x27c
[    3.696961]  ? rest_init+0x260/0x260
[    3.696997]  kernel_init+0xa/0x110
[    3.697028]  ret_from_fork+0x3a/0x50

Fixes: fa212a97f3a36 ("iommu/vt-d: Probe DMA-capable ACPI name space 
devices")

Signed-off-by: Lu Baolu 
---
  drivers/iommu/intel-iommu.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 19c4c387a3f6..84e650c6a46d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4793,8 +4793,10 @@ int __init intel_iommu_init(void)
  cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
    intel_iommu_cpu_dead);
+    down_read(_global_lock);
  if (probe_acpi_namespace_devices())
  pr_warn("ACPI name space devices didn't probe correctly\n");
+    up_read(_global_lock);


Doing a bit of archaeology here, is this actually broken? If any ANDD 
entries exist, we'd end up doing:


   down_read(_global_lock)
   probe_acpi_namespace_devices()
   -> iommu_probe_device()
  -> iommu_create_device_direct_mappings()
     -> iommu_get_resv_regions()
    -> intel_iommu_get_resv_regions()
   -> down_read(_global_lock)

I'm wondering whether this might explain why my bus_set_iommu series 
prevented Baolu's machine from booting, since "iommu: Move bus setup to 
IOMMU device registration" creates the same condition where we end up in 
get_resv_regions (via bus_iommu_probe() this time) from the same task 
that already holds dmar_global_lock. Of course that leaves me wondering 
how it *did* manage to boot OK on my Xeon box, but maybe there's a 
config difference or dumb luck at play?


This is really problematic. Where does the latest bus_set_iommu series
locate? I'd like to take a closer look at what happened here. Perhaps
two weeks later? I'm busy with preparing Intel IOMMU patches for v5.20
these days.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-07-01 Thread Baolu Lu

On 2022/7/1 04:36, Nicolin Chen wrote:

Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno from the IOMMU driver during attach that the
reason attached failed is because of domain incompatability. EMEDIUMTYPE
is chosen because it is never used within the iommu subsystem today and
evokes a sense that the 'medium' aka the domain is incompatible.

VFIO can use this to know attach is a soft failure and it should continue
searching. Otherwise the attach will be a hard failure and VFIO will
return the code to userspace.

Update all drivers to return EMEDIUMTYPE in their failure paths that are
related to domain incompatability. Also remove adjacent error prints for
these soft failures, to prevent a kernel log spam, since -EMEDIUMTYPE is
clear enough to indicate an incompatability error.

Add kdocs describing this behavior.

Suggested-by: Jason Gunthorpe
Reviewed-by: Kevin Tian
Signed-off-by: Nicolin Chen


Reviewed-by: Lu Baolu 

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 4/6] iommu/vt-d: Add VTD_FLAG_IOMMU_PROBED flag

2022-06-30 Thread Baolu Lu

On 6/30/22 4:29 PM, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Saturday, June 25, 2022 8:52 PM

In the IOMMU hot-add path, there's a need to check whether an IOMMU
has been probed. Instead of checking the IOMMU pointer in the global
list, it's better to allocate a flag bit in iommu->flags for this
purpose.


Sorry I didn't get the point of original check. This is the hotplug path
hence the caller of this function should already figure out it's a new
iommu before reaching this point?



Either did I. It was added by below commit without any comments about
this check.

commit ffebeb46dd34736c90ffbca1ccb0bef8f4827c44
Author: Jiang Liu 
Date:   Sun Nov 9 22:48:02 2014 +0800

iommu/vt-d: Enhance intel-iommu driver to support DMAR unit hotplug

Implement required callback functions for intel-iommu driver
to support DMAR unit hotplug.

Signed-off-by: Jiang Liu 
Reviewed-by: Yijing Wang 
Signed-off-by: Joerg Roedel 

I went through the whole hot-add process and found this check seemed to
be duplicate.

Hot-add process starts from dmar_device_hotplug(), it uses a rwlock to
synchronize the hot-add paths.

2386 down_write(_global_lock);
2387 if (insert)
2388 ret = dmar_hotplug_insert(tmp);
2389 else
2390 ret = dmar_hotplug_remove(tmp);
2391 up_write(_global_lock);

dmar_device_hotplug()
->dmar_hotplug_insert()
-->dmar_parse_one_drhd()   /* the added intel_iommu is allocated here*/
-->dmar_hp_add_drhd()   /* the intel_iommu is about to bring up */
--->intel_iommu_add()

The duplicate check here:

if (g_iommus[iommu->seq_id])
return 0;

All the iommu units are allocated and then initialized in the same
synchronized path. There is no need to check a duplicate initialization.

I would like to remove this check if no objection.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 04/11] iommu: Add sva iommu_domain support

2022-06-28 Thread Baolu Lu

On 2022/6/29 09:54, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 28, 2022 7:34 PM

On 2022/6/28 16:50, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 28, 2022 1:41 PM

struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
-   iommu_fault_handler_t handler;
-   void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   union {
+   struct {/* IOMMU_DOMAIN_DMA */
+   iommu_fault_handler_t handler;
+   void *handler_token;
+   };

why is it DMA domain specific? What about unmanaged
domain? Unrecoverable fault can happen on any type
including SVA. Hence I think above should be domain type
agnostic.

The report_iommu_fault() should be replaced by the new
iommu_report_device_fault(). Jean has already started this work.

https://lore.kernel.org/linux-iommu/Yo4Nw9QyllT1RZbd@myrica/

Currently this is only for DMA domains, hence Robin suggested to make it
exclude with the SVA domain things.

https://lore.kernel.org/linux-iommu/f3170016-4d7f-e78e-db48-
68305f683...@arm.com/

Then it's worthy a comment that those two fields are for
some legacy fault reporting stuff and DMA type only.


The iommu_fault and SVA fields are exclusive. The former is used for
unrecoverable DMA remapping faults, while the latter is only interested
in the recoverable page faults.

I will update the commit message with above explanation. Does this work
for you?



Not exactly. Your earlier explanation is about old vs. new API thus
leaving the existing fault handler with current only user is fine.

but this is not related to unrecoverable vs. recoverable. As I said
unrecoverable could happen on all domain types. Tying it to
DMA-only doesn't make sense and I think in the end the new
iommu_report_device_fault() will need support both. Is it not the
case?


You are right.

The report_iommu_fault() should be replaced by the new
iommu_report_device_fault(). Leave the existing fault handler with the
existing users and the newly added SVA members should exclude it.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 10/11] iommu: Per-domain I/O page fault handling

2022-06-28 Thread Baolu Lu

On 2022/6/28 22:20, Jean-Philippe Brucker wrote:

On Tue, Jun 28, 2022 at 07:53:39PM +0800, Baolu Lu wrote:

Once the iopf_handle_single() is removed, the name of
iopf_handle_group() looks a little weired

and confused, does this group mean the iommu group (domain) ?
while I take some minutes to


No. This is not the iommu group. It's page request group defined by the
PCI SIG spec. Multiple page requests could be put in a group with a
same group id. All page requests in a group could be responded to device
in one shot.


Thanks your explaination, understand the concept of PCIe PRG.  I meant

do we still have the necessity to mention the "group" here in the name

iopf_handle_group(),  which one is better ? iopf_handle_prg() or

iopf_handler(),  perhaps none of them ? :)


Oh! Sorry for the misunderstanding.

I have no strong feeling to change this naming. :-) All the names
express what the helper does. Jean is the author of this framework. If
he has the same idea as you, I don't mind renaming it in this patch.


I'm not attached to the name, and I see how it could be confusing. Given
that io-pgfault is not only for PCIe, 'prg' is not the best here either.
iopf_handle_faults(), or just iopf_handler(), seem more suitable.


Okay, so I will rename it to iopf_handle_faults() in this patch.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v1 0/6] iommu/vt-d: Reset DMAR_UNITS_SUPPORTED

2022-06-28 Thread Baolu Lu

On 2022/6/25 20:51, Lu Baolu wrote:

Hi folks,

This is a follow-up series of changes proposed by this patch:

https://lore.kernel.org/linux-iommu/20220615183650.32075-1-steve.w...@hpe.com/

It removes several static arrays of size DMAR_UNITS_SUPPORTED and sets
the DMAR_UNITS_SUPPORTED to 1024.

Please help review and suggest.


This series is also available on github:

https://github.com/LuBaolu/intel-iommu/commits/vtd-next-for-v5.20

Best regards,
baolu



Best regards,
baolu

Lu Baolu (6):
   iommu/vt-d: Remove unused domain_get_iommu()
   iommu/vt-d: Use IDA interface to manage iommu sequence id
   iommu/vt-d: Refactor iommu information of each domain
   iommu/vt-d: Add VTD_FLAG_IOMMU_PROBED flag
   iommu/vt-d: Remove global g_iommus array
   iommu/vt-d: Make DMAR_UNITS_SUPPORTED default 1024

  include/linux/dmar.h|   6 +-
  drivers/iommu/intel/iommu.h |  29 --
  drivers/iommu/intel/dmar.c  |  33 ++-
  drivers/iommu/intel/iommu.c | 188 ++--
  drivers/iommu/intel/pasid.c |   2 +-
  drivers/iommu/intel/svm.c   |   2 +-
  6 files changed, 103 insertions(+), 157 deletions(-)



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 10/11] iommu: Per-domain I/O page fault handling

2022-06-28 Thread Baolu Lu

On 2022/6/28 18:02, Tian, Kevin wrote:

From: Jean-Philippe Brucker 
Sent: Tuesday, June 28, 2022 5:44 PM

On Tue, Jun 28, 2022 at 08:39:36AM +, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 21, 2022 10:44 PM

Tweak the I/O page fault handling framework to route the page faults to
the domain and call the page fault handler retrieved from the domain.
This makes the I/O page fault handling framework possible to serve more
usage scenarios as long as they have an IOMMU domain and install a

page

fault handler in it. Some unused functions are also removed to avoid
dead code.

The iommu_get_domain_for_dev_pasid() which retrieves attached

domain

for a {device, PASID} pair is used. It will be used by the page fault
handling framework which knows {device, PASID} reported from the

iommu

driver. We have a guarantee that the SVA domain doesn't go away during
IOPF handling, because unbind() waits for pending faults with
iopf_queue_flush_dev() before freeing the domain. Hence, there's no

need

to synchronize life cycle of the iommu domains between the unbind() and
the interrupt threads.


I found iopf_queue_flush_dev() is only called in intel-iommu driver. Did
I overlook anything?


The SMMU driver will need it as well when we upstream PRI support.
Currently it only supports stall, and that requires the device driver to
flush all DMA including stalled transactions *before* calling unbind(), so
ne need for iopf_queue_flush_dev() in this case.



then it makes sense. Probably Baolu can add this information in the
commit msg so others with similar question can quickly get the
point here.


Sure. Updated.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 10/11] iommu: Per-domain I/O page fault handling

2022-06-28 Thread Baolu Lu

On 2022/6/28 17:10, Ethan Zhao wrote:

Hi, Baolu

在 2022/6/28 14:28, Baolu Lu 写道:

Hi Ethan,

On 2022/6/27 21:03, Ethan Zhao wrote:

Hi,

在 2022/6/21 22:43, Lu Baolu 写道:

Tweak the I/O page fault handling framework to route the page faults to
the domain and call the page fault handler retrieved from the domain.
This makes the I/O page fault handling framework possible to serve more
usage scenarios as long as they have an IOMMU domain and install a page
fault handler in it. Some unused functions are also removed to avoid
dead code.

The iommu_get_domain_for_dev_pasid() which retrieves attached domain
for a {device, PASID} pair is used. It will be used by the page fault
handling framework which knows {device, PASID} reported from the iommu
driver. We have a guarantee that the SVA domain doesn't go away during
IOPF handling, because unbind() waits for pending faults with
iopf_queue_flush_dev() before freeing the domain. Hence, there's no 
need

to synchronize life cycle of the iommu domains between the unbind() and
the interrupt threads.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
  drivers/iommu/io-pgfault.c | 64 
+-

  1 file changed, 7 insertions(+), 57 deletions(-)

diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index aee9e033012f..4f24ec703479 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -69,69 +69,18 @@ static int iopf_complete_group(struct device 
*dev, struct iopf_fault *iopf,

  return iommu_page_response(dev, );
  }
-static enum iommu_page_response_code
-iopf_handle_single(struct iopf_fault *iopf)
-{
-    vm_fault_t ret;
-    struct mm_struct *mm;
-    struct vm_area_struct *vma;
-    unsigned int access_flags = 0;
-    unsigned int fault_flags = FAULT_FLAG_REMOTE;
-    struct iommu_fault_page_request *prm = >fault.prm;
-    enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
-
-    if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
-    return status;
-
-    mm = iommu_sva_find(prm->pasid);
-    if (IS_ERR_OR_NULL(mm))
-    return status;
-
-    mmap_read_lock(mm);
-
-    vma = find_extend_vma(mm, prm->addr);
-    if (!vma)
-    /* Unmapped area */
-    goto out_put_mm;
-
-    if (prm->perm & IOMMU_FAULT_PERM_READ)
-    access_flags |= VM_READ;
-
-    if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
-    access_flags |= VM_WRITE;
-    fault_flags |= FAULT_FLAG_WRITE;
-    }
-
-    if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
-    access_flags |= VM_EXEC;
-    fault_flags |= FAULT_FLAG_INSTRUCTION;
-    }
-
-    if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
-    fault_flags |= FAULT_FLAG_USER;
-
-    if (access_flags & ~vma->vm_flags)
-    /* Access fault */
-    goto out_put_mm;
-
-    ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
-    status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
-    IOMMU_PAGE_RESP_SUCCESS;
-
-out_put_mm:
-    mmap_read_unlock(mm);
-    mmput(mm);
-
-    return status;
-}
-


Once the iopf_handle_single() is removed, the name of 
iopf_handle_group() looks a little weired


and confused, does this group mean the iommu group (domain) ? while I 
take some minutes to


No. This is not the iommu group. It's page request group defined by the
PCI SIG spec. Multiple page requests could be put in a group with a
same group id. All page requests in a group could be responded to device
in one shot.


Thanks your explaination, understand the concept of PCIe PRG.  I meant

do we still have the necessity to mention the "group" here in the name

iopf_handle_group(),  which one is better ? iopf_handle_prg() or

iopf_handler(),  perhaps none of them ? :)


Oh! Sorry for the misunderstanding.

I have no strong feeling to change this naming. :-) All the names
express what the helper does. Jean is the author of this framework. If
he has the same idea as you, I don't mind renaming it in this patch.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v9 04/11] iommu: Add sva iommu_domain support

2022-06-28 Thread Baolu Lu

On 2022/6/28 16:50, Tian, Kevin wrote:

+
+   mutex_lock(>mutex);
+   curr = xa_cmpxchg(>pasid_array, pasid, NULL, domain,
GFP_KERNEL);
+   if (curr)
+   goto out_unlock;

Need check xa_is_err(old).

Either

(1) old entry is a valid pointer, or

return -EBUSY in this case


(2) xa_is_err(curr)

return xa_err(cur)


are failure cases. Hence, "curr == NULL" is the only check we need. Did
I miss anything?


But now you always return -EBUSY for all kinds of xa errors.


Fair enough. Updated like below.

curr = xa_cmpxchg(>pasid_array, pasid, NULL, domain, 
GFP_KERNEL);

if (curr) {
ret = xa_err(curr) ? : -EBUSY;
goto out_unlock;
}

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 04/11] iommu: Add sva iommu_domain support

2022-06-28 Thread Baolu Lu

On 2022/6/28 16:50, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 28, 2022 1:41 PM

   struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
-   iommu_fault_handler_t handler;
-   void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   union {
+   struct {/* IOMMU_DOMAIN_DMA */
+   iommu_fault_handler_t handler;
+   void *handler_token;
+   };

why is it DMA domain specific? What about unmanaged
domain? Unrecoverable fault can happen on any type
including SVA. Hence I think above should be domain type
agnostic.

The report_iommu_fault() should be replaced by the new
iommu_report_device_fault(). Jean has already started this work.

https://lore.kernel.org/linux-iommu/Yo4Nw9QyllT1RZbd@myrica/

Currently this is only for DMA domains, hence Robin suggested to make it
exclude with the SVA domain things.

https://lore.kernel.org/linux-iommu/f3170016-4d7f-e78e-db48-
68305f683...@arm.com/

Then it's worthy a comment that those two fields are for
some legacy fault reporting stuff and DMA type only.


The iommu_fault and SVA fields are exclusive. The former is used for
unrecoverable DMA remapping faults, while the latter is only interested
in the recoverable page faults.

I will update the commit message with above explanation. Does this work
for you?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 10/11] iommu: Per-domain I/O page fault handling

2022-06-28 Thread Baolu Lu

On 2022/6/28 16:39, Tian, Kevin wrote:

  static void iopf_handle_group(struct work_struct *work)
  {
struct iopf_group *group;
+   struct iommu_domain *domain;
struct iopf_fault *iopf, *next;
enum iommu_page_response_code status =
IOMMU_PAGE_RESP_SUCCESS;

group = container_of(work, struct iopf_group, work);
+   domain = iommu_get_domain_for_dev_pasid(group->dev,
+   group->last_fault.fault.prm.pasid);
+   if (!domain || !domain->iopf_handler)
+   status = IOMMU_PAGE_RESP_INVALID;

Miss a comment on why no refcnt is required on domain as explained
in the commit msg.


I had some comments around iommu_queue_iopf() in the previous patch. The
iommu_queue_iopf() is the generic page fault handler exposed by iommu
core, hence that's the right place to document this.

Post it below as well:

diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 1df8c1dcae77..aee9e033012f 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -181,6 +181,13 @@ static void iopf_handle_group(struct work_struct *work)
  * request completes, outstanding faults will have been dealt with by 
the time

  * the PASID is freed.
  *
+ * Any valid page fault will be eventually routed to an iommu domain 
and the

+ * page fault handler installed there will get called. The users of this
+ * handling framework should guarantee that the iommu domain could only be
+ * freed after the device has stopped generating page faults (or the iommu
+ * hardware has been set to block the page faults) and the pending page 
faults

+ * have been flushed.
+ *
  * Return: 0 on success and <0 on error.
  */
 int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 09/11] iommu: Prepare IOMMU domain for IOPF

2022-06-28 Thread Baolu Lu

On 2022/6/28 16:29, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 21, 2022 10:44 PM
+/*
+ * I/O page fault handler for SVA
+ */
+enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+   vm_fault_t ret;
+   struct mm_struct *mm;
+   struct vm_area_struct *vma;
+   unsigned int access_flags = 0;
+   struct iommu_domain *domain = data;
+   unsigned int fault_flags = FAULT_FLAG_REMOTE;
+   struct iommu_fault_page_request *prm = >prm;
+   enum iommu_page_response_code status =
IOMMU_PAGE_RESP_INVALID;
+
+   if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+   return status;
+
+   mm = domain->mm;


What about directly passing domain->mm in as the fault data?

The entire logic here is only about mm instead of domain.


Yes. Updated.

Best regards,
baolu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 10/11] iommu: Per-domain I/O page fault handling

2022-06-28 Thread Baolu Lu

Hi Ethan,

On 2022/6/27 21:03, Ethan Zhao wrote:

Hi,

在 2022/6/21 22:43, Lu Baolu 写道:

Tweak the I/O page fault handling framework to route the page faults to
the domain and call the page fault handler retrieved from the domain.
This makes the I/O page fault handling framework possible to serve more
usage scenarios as long as they have an IOMMU domain and install a page
fault handler in it. Some unused functions are also removed to avoid
dead code.

The iommu_get_domain_for_dev_pasid() which retrieves attached domain
for a {device, PASID} pair is used. It will be used by the page fault
handling framework which knows {device, PASID} reported from the iommu
driver. We have a guarantee that the SVA domain doesn't go away during
IOPF handling, because unbind() waits for pending faults with
iopf_queue_flush_dev() before freeing the domain. Hence, there's no need
to synchronize life cycle of the iommu domains between the unbind() and
the interrupt threads.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
  drivers/iommu/io-pgfault.c | 64 +-
  1 file changed, 7 insertions(+), 57 deletions(-)

diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index aee9e033012f..4f24ec703479 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, 
struct iopf_fault *iopf,

  return iommu_page_response(dev, );
  }
-static enum iommu_page_response_code
-iopf_handle_single(struct iopf_fault *iopf)
-{
-    vm_fault_t ret;
-    struct mm_struct *mm;
-    struct vm_area_struct *vma;
-    unsigned int access_flags = 0;
-    unsigned int fault_flags = FAULT_FLAG_REMOTE;
-    struct iommu_fault_page_request *prm = >fault.prm;
-    enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
-
-    if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
-    return status;
-
-    mm = iommu_sva_find(prm->pasid);
-    if (IS_ERR_OR_NULL(mm))
-    return status;
-
-    mmap_read_lock(mm);
-
-    vma = find_extend_vma(mm, prm->addr);
-    if (!vma)
-    /* Unmapped area */
-    goto out_put_mm;
-
-    if (prm->perm & IOMMU_FAULT_PERM_READ)
-    access_flags |= VM_READ;
-
-    if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
-    access_flags |= VM_WRITE;
-    fault_flags |= FAULT_FLAG_WRITE;
-    }
-
-    if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
-    access_flags |= VM_EXEC;
-    fault_flags |= FAULT_FLAG_INSTRUCTION;
-    }
-
-    if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
-    fault_flags |= FAULT_FLAG_USER;
-
-    if (access_flags & ~vma->vm_flags)
-    /* Access fault */
-    goto out_put_mm;
-
-    ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
-    status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
-    IOMMU_PAGE_RESP_SUCCESS;
-
-out_put_mm:
-    mmap_read_unlock(mm);
-    mmput(mm);
-
-    return status;
-}
-


Once the iopf_handle_single() is removed, the name of 
iopf_handle_group() looks a little weired


and confused, does this group mean the iommu group (domain) ? while I 
take some minutes to


No. This is not the iommu group. It's page request group defined by the
PCI SIG spec. Multiple page requests could be put in a group with a
same group id. All page requests in a group could be responded to device
in one shot.

Best regards,
baolu



look into the code, oh, means a batch / list / queue  of iopfs , and 
iopf_handle_group() becomes a


generic iopf_handler() .

Doe it make sense to revise the names of iopf_handle_group(), 
iopf_complete_group,  iopf_group in


this patch set ?


Thanks,

Ethan


  static void iopf_handle_group(struct work_struct *work)
  {
  struct iopf_group *group;
+    struct iommu_domain *domain;
  struct iopf_fault *iopf, *next;
  enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
  group = container_of(work, struct iopf_group, work);
+    domain = iommu_get_domain_for_dev_pasid(group->dev,
+    group->last_fault.fault.prm.pasid);
+    if (!domain || !domain->iopf_handler)
+    status = IOMMU_PAGE_RESP_INVALID;
  list_for_each_entry_safe(iopf, next, >faults, list) {
  /*
@@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct 
*work)

   * faults in the group if there is an error.
   */
  if (status == IOMMU_PAGE_RESP_SUCCESS)
-    status = iopf_handle_single(iopf);
+    status = domain->iopf_handler(>fault,
+  domain->fault_data);
  if (!(iopf->fault.prm.flags &
    IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v9 06/11] arm-smmu-v3/sva: Add SVA domain support

2022-06-28 Thread Baolu Lu

On 2022/6/27 19:50, Zhangfei Gao wrote:



On 2022/6/21 下午10:43, Lu Baolu wrote:

Add support for SVA domain allocation and provide an SVA-specific
iommu_domain_ops.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 


Tested-by: Zhangfei Gao 
Have tested the series on aarch64.


Tony has been helping to validate this series on Intel's platform.

Tony, can I add your Test-by as well in this series?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v9 07/11] iommu/sva: Refactoring iommu_sva_bind/unbind_device()

2022-06-27 Thread Baolu Lu

On 2022/6/27 18:14, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 21, 2022 10:44 PM
+struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct
mm_struct *mm)
+{
+   struct iommu_domain *domain;
+   ioasid_t max_pasids;
+   int ret = -EINVAL;
+
+   /* Allocate mm->pasid if necessary. */


this comment is for iommu_sva_alloc_pasid()


Updated.




+   max_pasids = dev->iommu->max_pasids;
+   if (!max_pasids)
+   return ERR_PTR(-EOPNOTSUPP);
+
+   ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1);
+   if (ret)
+   return ERR_PTR(ret);
+



...

+void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+   struct device *dev = handle->dev;
+   struct iommu_domain *domain =
+   container_of(handle, struct iommu_domain, bond);
+   ioasid_t pasid = iommu_sva_get_pasid(handle);
+
+   mutex_lock(_sva_lock);
+   if (refcount_dec_and_test(>bond.users)) {
+   iommu_detach_device_pasid(domain, dev, pasid);
+   iommu_domain_free(domain);
+   }
+   mutex_unlock(_sva_lock);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
+
+u32 iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+   struct iommu_domain *domain =
+   container_of(handle, struct iommu_domain, bond);
+
+   return domain->mm->pasid;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);


Looks this is only used by unbind_device. Just open code it.


It's part of current IOMMU/SVA interfaces for the device drivers, and
has been used in various drivers.

$ git grep iommu_sva_get_pasid
drivers/dma/idxd/cdev.c:pasid = iommu_sva_get_pasid(sva);
drivers/iommu/iommu-sva-lib.c:  ioasid_t pasid = 
iommu_sva_get_pasid(handle);
drivers/iommu/iommu-sva-lib.c:u32 iommu_sva_get_pasid(struct iommu_sva 
*handle)

drivers/iommu/iommu-sva-lib.c:EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
drivers/misc/uacce/uacce.c: pasid = iommu_sva_get_pasid(handle);
include/linux/iommu.h:u32 iommu_sva_get_pasid(struct iommu_sva *handle);
include/linux/iommu.h:static inline u32 iommu_sva_get_pasid(struct 
iommu_sva *handle)


Or, I missed anything?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 06/11] arm-smmu-v3/sva: Add SVA domain support

2022-06-27 Thread Baolu Lu

On 2022/6/27 19:50, Zhangfei Gao wrote:


On 2022/6/21 下午10:43, Lu Baolu wrote:

Add support for SVA domain allocation and provide an SVA-specific
iommu_domain_ops.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 


Tested-by: Zhangfei Gao 
Have tested the series on aarch64.


Thank you very much! Very appreciated for your help!

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v9 04/11] iommu: Add sva iommu_domain support

2022-06-27 Thread Baolu Lu

Hi Kevin,

On 2022/6/27 16:29, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 21, 2022 10:44 PM

The sva iommu_domain represents a hardware pagetable that the IOMMU
hardware could use for SVA translation. This adds some infrastructure
to support SVA domain in the iommu common layer. It includes:

- Extend the iommu_domain to support a new IOMMU_DOMAIN_SVA
domain
   type. The IOMMU drivers that support SVA should provide the sva
   domain specific iommu_domain_ops.
- Add a helper to allocate an SVA domain. The iommu_domain_free()
   is still used to free an SVA domain.
- Add helpers to attach an SVA domain to a device and the reverse
   operation.

Some buses, like PCI, route packets without considering the PASID value.
Thus a DMA target address with PASID might be treated as P2P if the
address falls into the MMIO BAR of other devices in the group. To make
things simple, the attach/detach interfaces only apply to devices
belonging to the singleton groups, and the singleton is immutable in
fabric i.e. not affected by hotplug.

The iommu_attach/detach_device_pasid() can be used for other purposes,
such as kernel DMA with pasid, mediation device, etc.


I'd split this into two patches. One for adding iommu_attach/
detach_device_pasid() and set/block_dev_pasid ops, and the
other for adding SVA.


Yes. Make sense.




  struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
-   iommu_fault_handler_t handler;
-   void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   union {
+   struct {/* IOMMU_DOMAIN_DMA */
+   iommu_fault_handler_t handler;
+   void *handler_token;
+   };


why is it DMA domain specific? What about unmanaged
domain? Unrecoverable fault can happen on any type
including SVA. Hence I think above should be domain type
agnostic.


The report_iommu_fault() should be replaced by the new
iommu_report_device_fault(). Jean has already started this work.

https://lore.kernel.org/linux-iommu/Yo4Nw9QyllT1RZbd@myrica/

Currently this is only for DMA domains, hence Robin suggested to make it
exclude with the SVA domain things.

https://lore.kernel.org/linux-iommu/f3170016-4d7f-e78e-db48-68305f683...@arm.com/




+   struct {/* IOMMU_DOMAIN_SVA */
+   struct mm_struct *mm;
+   };
+   };
  };






+
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+   struct mm_struct *mm)
+{
+   const struct iommu_ops *ops = dev_iommu_ops(dev);
+   struct iommu_domain *domain;
+
+   domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
+   if (!domain)
+   return NULL;
+
+   domain->type = IOMMU_DOMAIN_SVA;


It's a bit weird that the type has been specified when calling
ops->domain_alloc while it still leaves to the caller to set the
type. But this is not caused by this series. could be cleaned
up separately.


Yes. Robin has patches to refactor the domain allocation interface,
let's wait and see what it looks like finally.




+
+   mutex_lock(>mutex);
+   curr = xa_cmpxchg(>pasid_array, pasid, NULL, domain,
GFP_KERNEL);
+   if (curr)
+   goto out_unlock;


Need check xa_is_err(old).


Either

(1) old entry is a valid pointer, or
(2) xa_is_err(curr)

are failure cases. Hence, "curr == NULL" is the only check we need. Did
I miss anything?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 00/11] iommu: SVA and IOPF refactoring

2022-06-25 Thread Baolu Lu

Hi folks,

On 2022/6/21 22:43, Lu Baolu wrote:

Hi folks,

The former part of this series refactors the IOMMU SVA code by assigning
an SVA type of iommu_domain to a shared virtual address and replacing
sva_bind/unbind iommu ops with set/block_dev_pasid domain ops.

The latter part changes the existing I/O page fault handling framework
from only serving SVA to a generic one. Any driver or component could
handle the I/O page faults for its domain in its own way by installing
an I/O page fault handler.

This series has been functionally tested on an x86 machine and compile
tested for all architectures.

This series is also available on github:
[2]https://github.com/LuBaolu/intel-iommu/commits/iommu-sva-refactoring-v9

Please review and suggest.


Just a gentle ping on this series.

Do you have further inputs? I am trying to see if we can merge this
series for v5.20. The drivers also depend on it to enable their kernel
DMA with PASID.

Sorry to disturb you.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/1] iommu/vt-d: Fix RID2PASID setup/teardown failure

2022-06-24 Thread Baolu Lu

On 2022/6/24 14:02, Ethan Zhao wrote:

在 2022/6/23 14:57, Lu Baolu 写道:

The IOMMU driver shares the pasid table for PCI alias devices. When the
RID2PASID entry of the shared pasid table has been filled by the first
device, the subsequent device will encounter the "DMAR: Setup RID2PASID
failed" failure as the pasid entry has already been marked as present.
As the result, the IOMMU probing process will be aborted.

On the contrary, when any alias device is hot-removed from the system,
for example, by writing to /sys/bus/pci/devices/.../remove, the shared
RID2PASID will be cleared without any notifications to other devices.
As the result, any DMAs from those rest devices are blocked.

Sharing pasid table among PCI alias devices could save two memory pages
for devices underneath the PCIe-to-PCI bridges. Anyway, considering that
those devices are rare on modern platforms that support VT-d in scalable
mode and the saved memory is negligible, it's reasonable to remove this
part of immature code to make the driver feasible and stable.

In my understanding, thus cleanning will make the pasid table become
per-dev datastructure whatever the dev is pci-alias or not, and the
pasid_pte_is_present(pte)will only check against every pci-alias' own
private pasid table,the setup stagewouldn't break, so does the
detach/release path, and little value to code otherreference counter
like complex implenmataion, looks good to me !


Thanks! Can I add a Reviewd-by from you?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/vt-d: Fix PCI bus rescan device hot add

2022-06-24 Thread Baolu Lu

Hi Joerg,

On 2022/6/24 13:45, Joerg Roedel wrote:

Hi Baolu,

On Wed, May 25, 2022 at 09:40:26AM +0800, Baolu Lu wrote:

How do you like it? If you agree, I can queue it in my next pull request
for fixes.


Would it help to tie DMAR and IOMMU components together, so that
selecting DMAR for IRQ remapping also selects IOMMU? The IOMMU can be in
PT mode and I think it would simplify a lot of things.


It makes sense as far as I am aware. By putting IOMMUs in pass-through
mode, there will be no run-time costs and things could be simplified a
lot.

Besides the refactoring efforts, we still need this quick fix so that
the fix could be propagated to various stable and vendors' downstream trees.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency

2022-06-23 Thread Baolu Lu

On 2022/6/24 04:00, Nicolin Chen wrote:

From: Jason Gunthorpe 

The KVM mechanism for controlling wbinvd is based on OR of the coherency
property of all devices attached to a guest, no matter whether those
devices are attached to a single domain or multiple domains.

On the other hand, the benefit to using separate domains was that those
devices attached to domains supporting enforced cache coherency always
mapped with the attributes necessary to provide that feature, therefore
if a non-enforced domain was dropped, the associated group removal would
re-trigger an evaluation by KVM.

In practice however, the only known cases of such mixed domains included
an Intel IGD device behind an IOMMU lacking snoop control, where such
devices do not support hotplug, therefore this scenario lacks testing and
is not considered sufficiently relevant to support.

After all, KVM won't take advantage of trying to push a device that could
do enforced cache coherency to a dedicated domain vs re-using an existing
domain, which is non-coherent.

Simplify this code and eliminate the test. This removes the only logic
that needed to have a dummy domain attached prior to searching for a
matching domain and simplifies the next patches.

It's unclear whether we want to further optimize the Intel driver to
update the domain coherency after a device is detached from it, at
least not before KVM can be verified to handle such dynamics in related
emulation paths (wbinvd, vcpu load, write_cr0, ept, etc.). In reality
we don't see an usage requiring such optimization as the only device
which imposes such non-coherency is Intel GPU which even doesn't
support hotplug/hot remove.

Signed-off-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Signed-off-by: Nicolin Chen 
---
  drivers/vfio/vfio_iommu_type1.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..f4e3b423a453 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2285,9 +2285,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 * testing if they're on the same bus_type.
 */
list_for_each_entry(d, >domain_list, next) {
-   if (d->domain->ops == domain->domain->ops &&
-   d->enforce_cache_coherency ==
-   domain->enforce_cache_coherency) {
+   if (d->domain->ops == domain->domain->ops) {
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {


Reviewed-by: Lu Baolu 

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: iommu_sva_bind_device question

2022-06-23 Thread Baolu Lu

On 2022/6/24 09:14, Jerry Snitselaar wrote:

On Fri, Jun 24, 2022 at 08:55:08AM +0800, Baolu Lu wrote:

On 2022/6/24 01:02, Jerry Snitselaar wrote:

Hi Baolu & Dave,

I noticed last night that on a Sapphire Rapids system if you boot without
intel_iommu=on, the idxd driver will crash during probe in 
iommu_sva_bind_device().
Should there be a sanity check before calling dev_iommu_ops(), or is the 
expectation
that the caller would verify it is safe to call? This seemed to be uncovered by
the combination of 3f6634d997db ("iommu: Use right way to retrieve iommu_ops"), 
and
42a1b73852c4 ("dmaengine: idxd: Separate user and kernel pasid enabling").

[   21.423729] BUG: kernel NULL pointer dereference, address: 0038
[   21.445108] #PF: supervisor read access in kernel mode
[   21.450912] #PF: error_code(0x) - not-present page
[   21.456706] PGD 0
[   21.459047] Oops:  [#1] PREEMPT SMP NOPTI
[   21.464004] CPU: 0 PID: 1420 Comm: kworker/0:3 Not tainted 
5.19.0-0.rc3.27.eln120.x86_64 #1
[   21.464011] Hardware name: Intel Corporation EAGLESTREAM/EAGLESTREAM, BIOS 
EGSDCRB1.SYS.0067.D12.2110190954 10/19/2021
[   21.464015] Workqueue: events work_for_cpu_fn
[   21.464030] RIP: 0010:iommu_sva_bind_device+0x1d/0xe0
[   21.464046] Code: c3 cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 
56 49 89 d6 41 55 41 54 55 53 48 83 ec 08 48 8b 87 d8 02 00 00 <48> 8b 40 38 48 
8b 50 10 48 83 7a 70 00 48 89 14 24 0f 84 91 00 00
[   21.464050] RSP: 0018:ff7245d9096b7db8 EFLAGS: 00010296
[   21.464054] RAX:  RBX: ff1eadeec8a51000 RCX: 
[   21.464058] RDX: ff7245d9096b7e24 RSI:  RDI: ff1eadeec8a510d0
[   21.464060] RBP: ff1eadeec8a51000 R08: b1a12300 R09: ff1eadffbfce25b4
[   21.464062] R10:  R11: 0038 R12: c09f8000
[   21.464065] R13: ff1eadeec8a510d0 R14: ff7245d9096b7e24 R15: ff1eaddf54429000
[   21.464067] FS:  () GS:ff1eadee7f60() 
knlGS:
[   21.464070] CS:  0010 DS:  ES:  CR0: 80050033
[   21.464072] CR2: 0038 CR3: 0008c0e10006 CR4: 00771ef0
[   21.464074] DR0:  DR1:  DR2: 
[   21.464076] DR3:  DR6: fffe07f0 DR7: 0400
[   21.464078] PKRU: 5554
[   21.464079] Call Trace:
[   21.464083]  
[   21.464092]  idxd_pci_probe+0x259/0x1070 [idxd]
[   21.464121]  local_pci_probe+0x3e/0x80
[   21.464132]  work_for_cpu_fn+0x13/0x20
[   21.464136]  process_one_work+0x1c4/0x380
[   21.464143]  worker_thread+0x1ab/0x380
[   21.464147]  ? _raw_spin_lock_irqsave+0x23/0x50
[   21.464158]  ? process_one_work+0x380/0x380
[   21.464161]  kthread+0xe6/0x110
[   21.464168]  ? kthread_complete_and_exit+0x20/0x20
[   21.464172]  ret_from_fork+0x1f/0x30

I figure either there needs to be a check in iommu_sva_bind_device, or
idxd needs to check in idxd_enable_system_pasid that that
idxd->pdev->dev.iommu is not null before it tries calling iommu_sva_bind_device.


As documented around the iommu_sva_bind_device() interface:

  * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first,
to
  * initialize the required SVA features.

idxd->pdev->dev.iommu should be checked in there.

Dave, any thoughts?

Best regards,
baolu


Duh, sorry I missed that in the comments.

It calls iommu_dev_enable_feature(), but then goes into code that
calls iommu_sva_bind_device whether or not iommu_dev_enable_feature()
fails.

You also will get the following warning if you don't have scalable
mode enabled (either not enabled by default, or if enabled by default
and passed intel_iommu=on,sm_off):


If scalable mode is disabled, iommu_dev_enable_feature(IOMMU_SVA) will
return failure, hence driver should not call iommu_sva_bind_device().
I guess below will disappear if above is fixed in the idxd driver.

Best regards,
baolu



[   24.645784] idxd :6a:01.0: enabling device (0144 -> 0146)
[   24.645871] idxd :6a:01.0: Unable to turn on user SVA feature.
[   24.645932] [ cut here ]
[   24.645935] WARNING: CPU: 0 PID: 422 at drivers/iommu/intel/pasid.c:253 
intel_pasid_get_entry.isra.0+0xcd/0xe0
[   24.675872] Modules linked in: intel_uncore(+) drm_ttm_helper 
isst_if_mbox_pci(+) idxd(+) snd i2c_i801(+) isst_if_mmio ttm isst_if_common mei 
fjes(+) soundcore intel_vsec i2c_ismt i2c_smbus idxd_bus ipmi_ssif acpi_ipmi 
ipmi_si acpi_pad acpi_power_meter pfr_telemetry pfr_update vfat fat fuse xfs 
crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel igc wmi 
pinctrl_emmitsburg ipmi_devintf ipmi_msghandler
[   24.716612] CPU: 0 PID: 422 Comm: kworker/0:2 Not tainted 
5.19.0-0.rc3.27.eln120.x86_64 #1
[   24.716621] Hardware name: Intel Corporation EAGLESTREAM/EAGLESTREAM, BIOS 
EGSDCRB1.SYS.0067.D12.2110190954 10/19/2021
[   24.716625] Workqueue: events work_for_cpu_fn
[   24.716645] RIP

Re: [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-23 Thread Baolu Lu

On 2022/6/24 04:00, Nicolin Chen wrote:

diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index e1cb51b9866c..5386d889429d 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -304,7 +304,7 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain 
*domain, struct device
/* Only allow the domain created internally. */
mtk_mapping = data->mapping;
if (mtk_mapping->domain != domain)
-   return 0;
+   return -EMEDIUMTYPE;
  
  	if (!data->m4u_dom) {

data->m4u_dom = dom;


This change looks odd. It turns the return value from success to
failure. Is it a bug? If so, it should go through a separated fix patch.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: iommu_sva_bind_device question

2022-06-23 Thread Baolu Lu

On 2022/6/24 01:02, Jerry Snitselaar wrote:

Hi Baolu & Dave,

I noticed last night that on a Sapphire Rapids system if you boot without
intel_iommu=on, the idxd driver will crash during probe in 
iommu_sva_bind_device().
Should there be a sanity check before calling dev_iommu_ops(), or is the 
expectation
that the caller would verify it is safe to call? This seemed to be uncovered by
the combination of 3f6634d997db ("iommu: Use right way to retrieve iommu_ops"), 
and
42a1b73852c4 ("dmaengine: idxd: Separate user and kernel pasid enabling").

[   21.423729] BUG: kernel NULL pointer dereference, address: 0038
[   21.445108] #PF: supervisor read access in kernel mode
[   21.450912] #PF: error_code(0x) - not-present page
[   21.456706] PGD 0
[   21.459047] Oops:  [#1] PREEMPT SMP NOPTI
[   21.464004] CPU: 0 PID: 1420 Comm: kworker/0:3 Not tainted 
5.19.0-0.rc3.27.eln120.x86_64 #1
[   21.464011] Hardware name: Intel Corporation EAGLESTREAM/EAGLESTREAM, BIOS 
EGSDCRB1.SYS.0067.D12.2110190954 10/19/2021
[   21.464015] Workqueue: events work_for_cpu_fn
[   21.464030] RIP: 0010:iommu_sva_bind_device+0x1d/0xe0
[   21.464046] Code: c3 cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 
56 49 89 d6 41 55 41 54 55 53 48 83 ec 08 48 8b 87 d8 02 00 00 <48> 8b 40 38 48 
8b 50 10 48 83 7a 70 00 48 89 14 24 0f 84 91 00 00
[   21.464050] RSP: 0018:ff7245d9096b7db8 EFLAGS: 00010296
[   21.464054] RAX:  RBX: ff1eadeec8a51000 RCX: 
[   21.464058] RDX: ff7245d9096b7e24 RSI:  RDI: ff1eadeec8a510d0
[   21.464060] RBP: ff1eadeec8a51000 R08: b1a12300 R09: ff1eadffbfce25b4
[   21.464062] R10:  R11: 0038 R12: c09f8000
[   21.464065] R13: ff1eadeec8a510d0 R14: ff7245d9096b7e24 R15: ff1eaddf54429000
[   21.464067] FS:  () GS:ff1eadee7f60() 
knlGS:
[   21.464070] CS:  0010 DS:  ES:  CR0: 80050033
[   21.464072] CR2: 0038 CR3: 0008c0e10006 CR4: 00771ef0
[   21.464074] DR0:  DR1:  DR2: 
[   21.464076] DR3:  DR6: fffe07f0 DR7: 0400
[   21.464078] PKRU: 5554
[   21.464079] Call Trace:
[   21.464083]  
[   21.464092]  idxd_pci_probe+0x259/0x1070 [idxd]
[   21.464121]  local_pci_probe+0x3e/0x80
[   21.464132]  work_for_cpu_fn+0x13/0x20
[   21.464136]  process_one_work+0x1c4/0x380
[   21.464143]  worker_thread+0x1ab/0x380
[   21.464147]  ? _raw_spin_lock_irqsave+0x23/0x50
[   21.464158]  ? process_one_work+0x380/0x380
[   21.464161]  kthread+0xe6/0x110
[   21.464168]  ? kthread_complete_and_exit+0x20/0x20
[   21.464172]  ret_from_fork+0x1f/0x30

I figure either there needs to be a check in iommu_sva_bind_device, or
idxd needs to check in idxd_enable_system_pasid that that
idxd->pdev->dev.iommu is not null before it tries calling iommu_sva_bind_device.


As documented around the iommu_sva_bind_device() interface:

 * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called 
first, to

 * initialize the required SVA features.

idxd->pdev->dev.iommu should be checked in there.

Dave, any thoughts?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-22 Thread Baolu Lu

On 2022/6/23 10:51, Jerry Snitselaar wrote:

The real problem here is that the iommu sequence ID overflows if
DMAR_UNITS_SUPPORTED is not big enough. This is purely a software
implementation issue, I am not sure whether user opt-in when building a
kernel package could help a lot here.


Is this something that could be figured out when parsing the dmar
table? It looks like currently iommu_refcnt[], iommu_did[], and
dmar_seq_ids[] depend on it.


That's definitely a better solution.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-22 Thread Baolu Lu

On 2022/6/22 23:05, Jerry Snitselaar wrote:

On Wed, Jun 22, 2022 at 7:52 AM Baolu Lu  wrote:

On 2022/6/16 02:36, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl
Reviewed-by: Kevin Tian
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

v3: Make the config option dependent upon DMAR_TABLE, as it is not used without 
this.

   drivers/iommu/intel/Kconfig | 7 +++
   include/linux/dmar.h| 6 +-
   2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 39a06d245f12..07aaebcb581d 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -9,6 +9,13 @@ config DMAR_PERF
   config DMAR_DEBUG
   bool

+config DMAR_UNITS_SUPPORTED
+ int "Number of DMA Remapping Units supported"
+ depends on DMAR_TABLE
+ default 1024 if MAXSMP
+ default 128  if X86_64
+ default 64

With this patch applied, the IOMMU configuration looks like:

[*]   AMD IOMMU support
 AMD IOMMU Version 2 driver
[*] Enable AMD IOMMU internals in DebugFS
(1024) Number of DMA Remapping Units supported   <<<< NEW
[*]   Support for Intel IOMMU using DMA Remapping Devices
[*] Export Intel IOMMU internals in Debugfs
[*] Support for Shared Virtual Memory with Intel IOMMU
[*] Enable Intel DMA Remapping Devices by default
[*] Enable Intel IOMMU scalable mode by default
[*]   Support for Interrupt Remapping
[*]   OMAP IOMMU Support
[*] Export OMAP IOMMU internals in DebugFS
[*]   Rockchip IOMMU Support

The NEW item looks confusing. It looks to be a generic configurable
value though it's actually Intel DMAR specific. Any thoughts?

Best regards,
baolu


Would moving it under INTEL_IOMMU at least have it show up below
"Support for Intel IOMMU using DMA Remapping Devices"? I'm not sure it
can be better than that, because IRQ_REMAP selects DMAR_TABLE, so we
can't stick it in the if INTEL_IOMMU section.


It's more reasonable to move it under INTEL_IOMMU, but the trouble is
that this also stands even if INTEL_IOMMU is not configured.

The real problem here is that the iommu sequence ID overflows if
DMAR_UNITS_SUPPORTED is not big enough. This is purely a software
implementation issue, I am not sure whether user opt-in when building a
kernel package could help a lot here.

If we can't find a better way, can we just step back?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 2/2] vfio: Use device_iommu_capable()

2022-06-22 Thread Baolu Lu

On 2022/6/22 20:04, Robin Murphy wrote:

Use the new interface to check the capabilities for our device
specifically.

Signed-off-by: Robin Murphy 
---
  drivers/vfio/vfio.c | 2 +-
  drivers/vfio/vfio_iommu_type1.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 73bab04880d0..765d68192c88 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -621,7 +621,7 @@ int vfio_register_group_dev(struct vfio_device *device)
 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
 * restore cache coherency.
 */
-   if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY))
+   if (!device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY))
return -EINVAL;
  
  	return __vfio_register_dev(device,

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index e38b8bfde677..2107e95eb743 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2247,7 +2247,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(>next, >group_list);
  
  	msi_remap = irq_domain_check_msi_remap() ||

-   iommu_capable(iommu_api_dev->dev->bus, 
IOMMU_CAP_INTR_REMAP);
+   device_iommu_capable(iommu_api_dev->dev, 
IOMMU_CAP_INTR_REMAP);
  
  	if (!allow_unsafe_interrupts && !msi_remap) {

pr_warn("%s: No interrupt remapping support.  Use the module param 
\"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",


Reviewed-by: Lu Baolu 

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/2] vfio/type1: Simplify bus_type determination

2022-06-22 Thread Baolu Lu

On 2022/6/22 20:04, Robin Murphy wrote:

Since IOMMU groups are mandatory for drivers to support, it stands to
reason that any device which has been successfully be added to a group
must be on a bus supported by that IOMMU driver, and therefore a domain
viable for any device in the group must be viable for all devices in
the group. This already has to be the case for the IOMMU API's internal
default domain, for instance. Thus even if the group contains devices on
different buses, that can only mean that the IOMMU driver actually
supports such an odd topology, and so without loss of generality we can
expect the bus type of any device in a group to be suitable for IOMMU
API calls.


Ideally we could remove bus->iommu_ops and all IOMMU APIs go through the
dev_iommu_ops().



Replace vfio_bus_type() with a simple call to resolve an appropriate
member device from which to then derive a bus type. This is also a step
towards removing the vague bus-based interfaces from the IOMMU API, when
we can subsequently switch to using this device directly.

Furthermore, scrutiny reveals a lack of protection for the bus being
removed while vfio_iommu_type1_attach_group() is using it; the reference
that VFIO holds on the iommu_group ensures that data remains valid, but
does not prevent the group's membership changing underfoot. Holding the
vfio_device for as long as we need here also neatly solves this.

Signed-off-by: Robin Murphy 


Reviewed-by: Lu Baolu 

Best regards,
baolu


---

After sleeping on it, I decided to type up the helper function approach
to see how it looked in practice, and in doing so realised that with one
more tweak it could also subsume the locking out of the common paths as
well, so end up being a self-contained way for type1 to take care of its
own concern, which I rather like.

  drivers/vfio/vfio.c | 18 +-
  drivers/vfio/vfio.h |  3 +++
  drivers/vfio/vfio_iommu_type1.c | 30 +++---
  3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 61e71c1154be..73bab04880d0 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -448,7 +448,7 @@ static void vfio_group_get(struct vfio_group *group)
   * Device objects - create, release, get, put, search
   */
  /* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+void vfio_device_put(struct vfio_device *device)
  {
if (refcount_dec_and_test(>refcount))
complete(>comp);
@@ -475,6 +475,22 @@ static struct vfio_device *vfio_group_get_device(struct 
vfio_group *group,
return NULL;
  }
  
+struct vfio_device *vfio_device_get_from_iommu(struct iommu_group *iommu_group)

+{
+   struct vfio_group *group = vfio_group_get_from_iommu(iommu_group);
+   struct vfio_device *device;
+
+   mutex_lock(>device_lock);
+   list_for_each_entry(device, >device_list, group_next) {
+   if (vfio_device_try_get(device)) {
+   mutex_unlock(>device_lock);
+   return device;
+   }
+   }
+   mutex_unlock(>device_lock);
+   return NULL;
+}
+
  /*
   * VFIO driver API
   */
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a67130221151..e8f21e64541b 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -70,3 +70,6 @@ struct vfio_iommu_driver_ops {
  
  int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);

  void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
+
+struct vfio_device *vfio_device_get_from_iommu(struct iommu_group 
*iommu_group);
+void vfio_device_put(struct vfio_device *device);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..e38b8bfde677 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1679,18 +1679,6 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
return ret;
  }
  
-static int vfio_bus_type(struct device *dev, void *data)

-{
-   struct bus_type **bus = data;
-
-   if (*bus && *bus != dev->bus)
-   return -EINVAL;
-
-   *bus = dev->bus;
-
-   return 0;
-}
-
  static int vfio_iommu_replay(struct vfio_iommu *iommu,
 struct vfio_domain *domain)
  {
@@ -2159,7 +2147,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
struct vfio_domain *domain, *d;
-   struct bus_type *bus = NULL;
+   struct vfio_device *iommu_api_dev;
bool resv_msi, msi_remap;
phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo;
@@ -2192,18 +2180,19 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
goto out_unlock;
}
  
-	/* Determine bus_type in order to allocate a domain */

-   ret = 

Re: [PATCH v3] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-22 Thread Baolu Lu

On 2022/6/16 02:36, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl
Reviewed-by: Kevin Tian
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

v3: Make the config option dependent upon DMAR_TABLE, as it is not used without 
this.

  drivers/iommu/intel/Kconfig | 7 +++
  include/linux/dmar.h| 6 +-
  2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 39a06d245f12..07aaebcb581d 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -9,6 +9,13 @@ config DMAR_PERF
  config DMAR_DEBUG
bool
  
+config DMAR_UNITS_SUPPORTED

+   int "Number of DMA Remapping Units supported"
+   depends on DMAR_TABLE
+   default 1024 if MAXSMP
+   default 128  if X86_64
+   default 64


With this patch applied, the IOMMU configuration looks like:

[*]   AMD IOMMU support
 AMD IOMMU Version 2 driver
[*] Enable AMD IOMMU internals in DebugFS
(1024) Number of DMA Remapping Units supported    NEW
[*]   Support for Intel IOMMU using DMA Remapping Devices
[*] Export Intel IOMMU internals in Debugfs
[*] Support for Shared Virtual Memory with Intel IOMMU
[*] Enable Intel DMA Remapping Devices by default
[*] Enable Intel IOMMU scalable mode by default
[*]   Support for Interrupt Remapping
[*]   OMAP IOMMU Support
[*] Export OMAP IOMMU internals in DebugFS
[*]   Rockchip IOMMU Support

The NEW item looks confusing. It looks to be a generic configurable
value though it's actually Intel DMAR specific. Any thoughts?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-22 Thread Baolu Lu

On 2022/6/22 17:09, Ethan Zhao wrote:


在 2022/6/22 12:41, Lu Baolu 写道:

The IOMMU driver shares the pasid table for PCI alias devices. When the
RID2PASID entry of the shared pasid table has been filled by the first
device, the subsequent devices will encounter the "DMAR: Setup RID2PASID
failed" failure as the pasid entry has already been marked as present. As
the result, the IOMMU probing process will be aborted.

This fixes it by skipping RID2PASID setting if the pasid entry has been
populated. This works because the IOMMU core ensures that only the same
IOMMU domain can be attached to all PCI alias devices at the same time.
Therefore the subsequent devices just try to setup the RID2PASID entry
with the same domain, which is negligible. This also adds domain validity
checks for more confidence anyway.

Fixes: ef848b7e5a6a0 ("iommu/vt-d: Setup pasid entry for RID2PASID 
support")

Reported-by: Chenyi Qiang 
Cc: sta...@vger.kernel.org
Signed-off-by: Lu Baolu 
---
  drivers/iommu/intel/pasid.c | 22 --
  1 file changed, 16 insertions(+), 6 deletions(-)

Change log:
v2:
  - Add domain validity check in RID2PASID entry setup.

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index cb4c1d0cf25c..4f3525f3346f 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -575,6 +575,19 @@ static inline int pasid_enable_wpe(struct 
pasid_entry *pte)

  return 0;
  };
+/*
+ * Return true if @pasid is RID2PASID and the domain @did has already
+ * been setup to the @pte. Otherwise, return false. PCI alias devices
+ * probably share the single RID2PASID pasid entry in the shared pasid
+ * table. It's reasonable that those devices try to set a share domain
+ * in their probe paths.
+ */


I am thinking about the counter-part, the intel_pasid_tear_down_entry(),

Multi devices share the same PASID entry, then one was detached from the 
domain,


so the entry doesn't exist anymore, while another devices don't know 
about the change,


and they are using the mapping, is it possible case ?shared thing, no 
refer-counter,


am I missing something ?


No. You are right. When any alias device is hot-removed from the system,
the shared RID2PASID will be cleared without any notification to other
devices. Hence any DMAs from those devices are blocked.

We still have a lot to do for sharing pasid table among alias devices.
Before we arrive there, let's remove it for now.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 3/3] iommu: Clean up release_device checks

2022-06-22 Thread Baolu Lu

On 2022/6/22 15:17, Robin Murphy wrote:

On 2022-06-22 02:36, Baolu Lu wrote:

On 2022/6/21 23:14, Robin Murphy wrote:

Since .release_device is now called through per-device ops, any call
which gets as far as a driver definitely*is*  for that driver, for a
device which has successfully passed .probe_device, so all the checks to
that effect are now redundant and can be removed. In the same vein we
can also skip freeing fwspecs which are now managed by core code.


Does this depend on any other series? I didn't see iommu_fwspec_free()
called in the core code. Or I missed anything?


dev_iommu_free() cleans up param->fwspec directly (see b54240ad4943). 
FWIW the plan is that iommu_fwspec_free() should eventually go away - of 
the remaining uses after this, two are in fact similarly redundant 
already, since there's also a dev_iommu_free() in the probe failure 
path, and the other two should disappear in part 2 of fixing the bus 
probing mess (wherein the of_xlate step gets pulled into 
iommu_probe_device as well, and finally works correctly again).


Yes, it is. Thanks for the explanation.

Reviewed-by: Lu Baolu 

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-21 Thread Baolu Lu

On 2022/6/22 11:31, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Wednesday, June 22, 2022 11:28 AM

On 2022/6/22 11:06, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 21, 2022 5:04 PM

On 2022/6/21 13:48, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 21, 2022 12:28 PM

On 2022/6/21 11:46, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 21, 2022 11:39 AM

On 2022/6/21 10:54, Tian, Kevin wrote:

From: Lu Baolu
Sent: Monday, June 20, 2022 4:17 PM
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
ret = intel_pasid_setup_second_level(iommu,
domain,
dev, PASID_RID2PASID);
spin_unlock_irqrestore(>lock, flags);
-   if (ret) {
+   if (ret && ret != -EBUSY) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return ret;
--
2.25.1

It's cleaner to avoid this error at the first place, i.e. only do the
setup when the first device is attached to the pasid table.

The logic that identifies the first device might introduce additional
unnecessary complexity. Devices that share a pasid table are rare. I
even prefer to give up sharing tables so that the code can be
simpler.:-)


It's not that complex if you simply move device_attach_pasid_table()
out of intel_pasid_alloc_table(). Then do the setup if
list_empty(_table->dev) and then attach device to the
pasid table in domain_add_dev_info().

The pasid table is part of the device, hence a better place to
allocate/free the pasid table is in the device probe/release paths.
Things will become more complicated if we change relationship

between

device and it's pasid table when attaching/detaching a domain. That's
the reason why I thought it was additional complexity.


If you do want to follow current route it’s still cleaner to check
whether the pasid entry has pointed to the domain in the individual
setup function instead of blindly returning -EBUSY and then ignoring
it even if a real busy condition occurs. The setup functions can
just return zero for this benign alias case.

Kevin, how do you like this one?

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index cb4c1d0cf25c..ecffd0129b2b 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -575,6 +575,16 @@ static inline int pasid_enable_wpe(struct
pasid_entry *pte)
return 0;
};

+/*
+ * Return true if @pasid is RID2PASID and the domain @did has already
+ * been setup to the @pte. Otherwise, return false.
+ */
+static inline bool
+rid2pasid_domain_valid(struct pasid_entry *pte, u32 pasid, u16 did)
+{
+   return pasid == PASID_RID2PASID && pasid_get_domain_id(pte) ==
did;
+}

better this is not restricted to RID2PASID only, e.g.

pasid_pte_match_domain()

and then read pasid from the pte to compare with the pasid argument.



The pasid value is not encoded in the pasid table entry. This validity
check is only for RID2PASID as alias devices share the single RID2PASID
entry. For other cases, we should always return -EBUSY as what the code
is doing now.



You are right.


Very appreciated for your input. I will update it with a v2.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-21 Thread Baolu Lu

On 2022/6/22 11:06, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 21, 2022 5:04 PM

On 2022/6/21 13:48, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 21, 2022 12:28 PM

On 2022/6/21 11:46, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 21, 2022 11:39 AM

On 2022/6/21 10:54, Tian, Kevin wrote:

From: Lu Baolu
Sent: Monday, June 20, 2022 4:17 PM
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
ret = intel_pasid_setup_second_level(iommu,
domain,
dev, PASID_RID2PASID);
spin_unlock_irqrestore(>lock, flags);
-   if (ret) {
+   if (ret && ret != -EBUSY) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return ret;
--
2.25.1

It's cleaner to avoid this error at the first place, i.e. only do the
setup when the first device is attached to the pasid table.

The logic that identifies the first device might introduce additional
unnecessary complexity. Devices that share a pasid table are rare. I
even prefer to give up sharing tables so that the code can be
simpler.:-)


It's not that complex if you simply move device_attach_pasid_table()
out of intel_pasid_alloc_table(). Then do the setup if
list_empty(_table->dev) and then attach device to the
pasid table in domain_add_dev_info().

The pasid table is part of the device, hence a better place to
allocate/free the pasid table is in the device probe/release paths.
Things will become more complicated if we change relationship between
device and it's pasid table when attaching/detaching a domain. That's
the reason why I thought it was additional complexity.


If you do want to follow current route it’s still cleaner to check
whether the pasid entry has pointed to the domain in the individual
setup function instead of blindly returning -EBUSY and then ignoring
it even if a real busy condition occurs. The setup functions can
just return zero for this benign alias case.

Kevin, how do you like this one?

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index cb4c1d0cf25c..ecffd0129b2b 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -575,6 +575,16 @@ static inline int pasid_enable_wpe(struct
pasid_entry *pte)
return 0;
   };

+/*
+ * Return true if @pasid is RID2PASID and the domain @did has already
+ * been setup to the @pte. Otherwise, return false.
+ */
+static inline bool
+rid2pasid_domain_valid(struct pasid_entry *pte, u32 pasid, u16 did)
+{
+   return pasid == PASID_RID2PASID && pasid_get_domain_id(pte) ==
did;
+}

better this is not restricted to RID2PASID only, e.g. pasid_pte_match_domain()
and then read pasid from the pte to compare with the pasid argument.



The pasid value is not encoded in the pasid table entry. This validity
check is only for RID2PASID as alias devices share the single RID2PASID
entry. For other cases, we should always return -EBUSY as what the code
is doing now.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-21 Thread Baolu Lu

On 2022/6/22 10:56, Ethan Zhao wrote:

在 2022/6/20 16:17, Lu Baolu 写道:

The IOMMU driver shares the pasid table for PCI alias devices. When the
RID2PASID entry of the shared pasid table has been filled by the first
device, the subsequent devices will encounter the "DMAR: Setup RID2PASID
failed" failure as the pasid entry has already been marke as present. As
the result, the IOMMU probing process will be aborted.

This fixes it by skipping RID2PASID setting if the pasid entry has been
populated. This works because the IOMMU core ensures that only the same
IOMMU domain can be attached to all PCI alias devices at the same time.
Therefore the subsequent devices just try to setup the RID2PASID entry
with the same domain, which is negligible.
     We have two customers reported the issue "DMAR: Setup RID2PASID 
failed",


Two ASPEED devices locate behind one PCIe-PCI bridge and iommu SM, PT 
mode is enabled.  Most


Interesting thing is the second device is only used by BIOS, and BIOS 
left it to OS without shutting down,


and it is useless for OS.


This sounds odd. Isn't this a bug?


Is there practical case multi devices behind 
PCIe-PCI bridge share the same


PASID entry without any security concern ? these two customer's case is 
not.


The devices underneath the PCIe-PCI bridge are alias devices of the
bridge. PCI alias devices always sit in the same group (the minimal unit
that IOMMU guarantees isolation) and can only be attached with a same
domain (managed I/O address space). Hence, there's no security concern
if they further share the pasid table.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 3/3] iommu: Clean up release_device checks

2022-06-21 Thread Baolu Lu

On 2022/6/21 23:14, Robin Murphy wrote:

Since .release_device is now called through per-device ops, any call
which gets as far as a driver definitely*is*  for that driver, for a
device which has successfully passed .probe_device, so all the checks to
that effect are now redundant and can be removed. In the same vein we
can also skip freeing fwspecs which are now managed by core code.


Does this depend on any other series? I didn't see iommu_fwspec_free()
called in the core code. Or I missed anything?



Signed-off-by: Robin Murphy
---
  drivers/iommu/apple-dart.c  |  3 ---
  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  8 +---
  drivers/iommu/arm/arm-smmu/arm-smmu.c   | 14 +++---
  drivers/iommu/arm/arm-smmu/qcom_iommu.c | 11 ---
  drivers/iommu/exynos-iommu.c|  3 ---
  drivers/iommu/mtk_iommu.c   |  5 -
  drivers/iommu/mtk_iommu_v1.c|  5 -
  drivers/iommu/sprd-iommu.c  | 11 ---
  drivers/iommu/virtio-iommu.c|  8 +---
  9 files changed, 5 insertions(+), 63 deletions(-)


Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/3] iommu: Make .release_device optional

2022-06-21 Thread Baolu Lu

On 2022/6/21 23:14, Robin Murphy wrote:

Many drivers do nothing meaningful for .release_device, and it's neatly
abstracted to just two callsites in the core code, so let's make it
optional to implement.

Signed-off-by: Robin Murphy 
---
  drivers/iommu/fsl_pamu_domain.c | 5 -
  drivers/iommu/iommu.c   | 6 --
  drivers/iommu/msm_iommu.c   | 5 -
  drivers/iommu/sun50i-iommu.c| 3 ---
  drivers/iommu/tegra-gart.c  | 5 -
  drivers/iommu/tegra-smmu.c  | 3 ---
  6 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 94b4589dc67c..011f9ab7f743 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -447,15 +447,10 @@ static struct iommu_device *fsl_pamu_probe_device(struct 
device *dev)
return _iommu;
  }
  
-static void fsl_pamu_release_device(struct device *dev)

-{
-}
-
  static const struct iommu_ops fsl_pamu_ops = {
.capable= fsl_pamu_capable,
.domain_alloc   = fsl_pamu_domain_alloc,
.probe_device   = fsl_pamu_probe_device,
-   .release_device = fsl_pamu_release_device,
.device_group   = fsl_pamu_device_group,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = fsl_pamu_attach_device,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 06d6989f07f6..8b4fc7e62b99 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -259,7 +259,8 @@ static int __iommu_probe_device(struct device *dev, struct 
list_head *group_list
return 0;
  
  out_release:

-   ops->release_device(dev);
+   if (ops->release_device)
+   ops->release_device(dev);
  
  out_module_put:

module_put(ops->owner);
@@ -337,7 +338,8 @@ void iommu_release_device(struct device *dev)
iommu_device_unlink(dev->iommu->iommu_dev, dev);
  
  	ops = dev_iommu_ops(dev);

-   ops->release_device(dev);
+   if (ops->release_device)
+   ops->release_device(dev);
  
  	iommu_group_remove_device(dev);

module_put(ops->owner);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f09aedfdd462..428919a474c1 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -394,10 +394,6 @@ static struct iommu_device *msm_iommu_probe_device(struct 
device *dev)
return >iommu;
  }
  
-static void msm_iommu_release_device(struct device *dev)

-{
-}
-
  static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device 
*dev)
  {
int ret = 0;
@@ -677,7 +673,6 @@ irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
  static struct iommu_ops msm_iommu_ops = {
.domain_alloc = msm_iommu_domain_alloc,
.probe_device = msm_iommu_probe_device,
-   .release_device = msm_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index c54ab477b8fd..a84c63518773 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -738,8 +738,6 @@ static struct iommu_device 
*sun50i_iommu_probe_device(struct device *dev)
return >iommu;
  }
  
-static void sun50i_iommu_release_device(struct device *dev) {}

-
  static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
  {
struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
@@ -764,7 +762,6 @@ static const struct iommu_ops sun50i_iommu_ops = {
.domain_alloc   = sun50i_iommu_domain_alloc,
.of_xlate   = sun50i_iommu_of_xlate,
.probe_device   = sun50i_iommu_probe_device,
-   .release_device = sun50i_iommu_release_device,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = sun50i_iommu_attach_device,
.detach_dev = sun50i_iommu_detach_device,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index a6700a40a6f8..e5ca3cf1a949 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -246,10 +246,6 @@ static struct iommu_device *gart_iommu_probe_device(struct 
device *dev)
return _handle->iommu;
  }
  
-static void gart_iommu_release_device(struct device *dev)

-{
-}
-
  static int gart_iommu_of_xlate(struct device *dev,
   struct of_phandle_args *args)
  {
@@ -273,7 +269,6 @@ static void gart_iommu_sync(struct iommu_domain *domain,
  static const struct iommu_ops gart_iommu_ops = {
.domain_alloc   = gart_iommu_domain_alloc,
.probe_device   = gart_iommu_probe_device,
-   .release_device = gart_iommu_release_device,
.device_group   = generic_device_group,
.pgsize_bitmap  = GART_IOMMU_PGSIZES,
.of_xlate   = gart_iommu_of_xlate,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 

Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-21 Thread Baolu Lu

On 2022/6/21 13:48, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 21, 2022 12:28 PM

On 2022/6/21 11:46, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 21, 2022 11:39 AM

On 2022/6/21 10:54, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Monday, June 20, 2022 4:17 PM
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
ret = intel_pasid_setup_second_level(iommu,
domain,
dev, PASID_RID2PASID);
spin_unlock_irqrestore(>lock, flags);
-   if (ret) {
+   if (ret && ret != -EBUSY) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return ret;
--
2.25.1


It's cleaner to avoid this error at the first place, i.e. only do the
setup when the first device is attached to the pasid table.


The logic that identifies the first device might introduce additional
unnecessary complexity. Devices that share a pasid table are rare. I
even prefer to give up sharing tables so that the code can be
simpler.:-)



It's not that complex if you simply move device_attach_pasid_table()
out of intel_pasid_alloc_table(). Then do the setup if
list_empty(_table->dev) and then attach device to the
pasid table in domain_add_dev_info().


The pasid table is part of the device, hence a better place to
allocate/free the pasid table is in the device probe/release paths.
Things will become more complicated if we change relationship between
device and it's pasid table when attaching/detaching a domain. That's
the reason why I thought it was additional complexity.



If you do want to follow current route it’s still cleaner to check
whether the pasid entry has pointed to the domain in the individual
setup function instead of blindly returning -EBUSY and then ignoring
it even if a real busy condition occurs. The setup functions can
just return zero for this benign alias case.


Kevin, how do you like this one?

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index cb4c1d0cf25c..ecffd0129b2b 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -575,6 +575,16 @@ static inline int pasid_enable_wpe(struct 
pasid_entry *pte)

return 0;
 };

+/*
+ * Return true if @pasid is RID2PASID and the domain @did has already
+ * been setup to the @pte. Otherwise, return false.
+ */
+static inline bool
+rid2pasid_domain_valid(struct pasid_entry *pte, u32 pasid, u16 did)
+{
+   return pasid == PASID_RID2PASID && pasid_get_domain_id(pte) == did;
+}
+
 /*
  * Set up the scalable mode pasid table entry for first only
  * translation type.
@@ -595,9 +605,8 @@ int intel_pasid_setup_first_level(struct intel_iommu 
*iommu,

if (WARN_ON(!pte))
return -EINVAL;

-   /* Caller must ensure PASID entry is not in use. */
if (pasid_pte_is_present(pte))
-   return -EBUSY;
+   return rid2pasid_domain_valid(pte, pasid, did) ? 0: -EBUSY;

pasid_clear_entry(pte);

@@ -698,9 +707,8 @@ int intel_pasid_setup_second_level(struct 
intel_iommu *iommu,

return -ENODEV;
}

-   /* Caller must ensure PASID entry is not in use. */
if (pasid_pte_is_present(pte))
-   return -EBUSY;
+   return rid2pasid_domain_valid(pte, pasid, did) ? 0: -EBUSY;

pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
@@ -738,9 +746,8 @@ int intel_pasid_setup_pass_through(struct 
intel_iommu *iommu,

return -ENODEV;
}

-   /* Caller must ensure PASID entry is not in use. */
if (pasid_pte_is_present(pte))
-   return -EBUSY;
+   return rid2pasid_domain_valid(pte, pasid, did) ? 0: -EBUSY;

pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);

--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-21 Thread Baolu Lu

On 2022/6/21 13:48, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 21, 2022 12:28 PM

On 2022/6/21 11:46, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 21, 2022 11:39 AM

On 2022/6/21 10:54, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Monday, June 20, 2022 4:17 PM
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
ret = intel_pasid_setup_second_level(iommu,
domain,
dev, PASID_RID2PASID);
spin_unlock_irqrestore(>lock, flags);
-   if (ret) {
+   if (ret && ret != -EBUSY) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return ret;
--
2.25.1


It's cleaner to avoid this error at the first place, i.e. only do the
setup when the first device is attached to the pasid table.


The logic that identifies the first device might introduce additional
unnecessary complexity. Devices that share a pasid table are rare. I
even prefer to give up sharing tables so that the code can be
simpler.:-)



It's not that complex if you simply move device_attach_pasid_table()
out of intel_pasid_alloc_table(). Then do the setup if
list_empty(_table->dev) and then attach device to the
pasid table in domain_add_dev_info().


The pasid table is part of the device, hence a better place to
allocate/free the pasid table is in the device probe/release paths.
Things will become more complicated if we change relationship between
device and it's pasid table when attaching/detaching a domain. That's
the reason why I thought it was additional complexity.



If you do want to follow current route it’s still cleaner to check
whether the pasid entry has pointed to the domain in the individual
setup function instead of blindly returning -EBUSY and then ignoring
it even if a real busy condition occurs. The setup functions can
just return zero for this benign alias case.


Fair enough. Let me improve it.

--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-20 Thread Baolu Lu

On 2022/6/21 11:46, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 21, 2022 11:39 AM

On 2022/6/21 10:54, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Monday, June 20, 2022 4:17 PM
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
ret = intel_pasid_setup_second_level(iommu,
domain,
dev, PASID_RID2PASID);
spin_unlock_irqrestore(>lock, flags);
-   if (ret) {
+   if (ret && ret != -EBUSY) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return ret;
--
2.25.1


It's cleaner to avoid this error at the first place, i.e. only do the
setup when the first device is attached to the pasid table.


The logic that identifies the first device might introduce additional
unnecessary complexity. Devices that share a pasid table are rare. I
even prefer to give up sharing tables so that the code can be
simpler.:-)



It's not that complex if you simply move device_attach_pasid_table()
out of intel_pasid_alloc_table(). Then do the setup if
list_empty(_table->dev) and then attach device to the
pasid table in domain_add_dev_info().


The pasid table is part of the device, hence a better place to
allocate/free the pasid table is in the device probe/release paths.
Things will become more complicated if we change relationship between
device and it's pasid table when attaching/detaching a domain. That's
the reason why I thought it was additional complexity.

--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-20 Thread Baolu Lu

On 2022/6/21 10:54, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Monday, June 20, 2022 4:17 PM
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
ret = intel_pasid_setup_second_level(iommu,
domain,
dev, PASID_RID2PASID);
spin_unlock_irqrestore(>lock, flags);
-   if (ret) {
+   if (ret && ret != -EBUSY) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return ret;
--
2.25.1


It's cleaner to avoid this error at the first place, i.e. only do the
setup when the first device is attached to the pasid table.


The logic that identifies the first device might introduce additional
unnecessary complexity. Devices that share a pasid table are rare. I
even prefer to give up sharing tables so that the code can be
simpler.:-)

--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Fix RID2PASID setup failure

2022-06-20 Thread Baolu Lu

On 2022/6/20 16:31, Yi Liu wrote:

Hi Baolu,

On 2022/6/20 16:17, Lu Baolu wrote:

The IOMMU driver shares the pasid table for PCI alias devices. When the
RID2PASID entry of the shared pasid table has been filled by the first
device, the subsequent devices will encounter the "DMAR: Setup RID2PASID
failed" failure as the pasid entry has already been marke as present. As


s/marke/marked/


Updated. Thanks!


the result, the IOMMU probing process will be aborted.

This fixes it by skipping RID2PASID setting if the pasid entry has been
populated. This works because the IOMMU core ensures that only the same
IOMMU domain can be attached to all PCI alias devices at the same time.


nit. this sentence is a little bit to interpret. :-) I guess what you want
to describe is the PCI alias devices should be attached to the same domain
instead of different domain. right?


Yes.



also, does it apply to all domain types? e.g. the SVA domains introduced 
in "iommu: SVA and IOPF refactoring"


No. Only about the RID2PASID.




Therefore the subsequent devices just try to setup the RID2PASID entry
with the same domain, which is negligible.

Fixes: ef848b7e5a6a0 ("iommu/vt-d: Setup pasid entry for RID2PASID 
support")

Reported-by: Chenyi Qiang 
Cc: sta...@vger.kernel.org
Signed-off-by: Lu Baolu 
---
  drivers/iommu/intel/iommu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 44016594831d..b9966c01a2a2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2564,7 +2564,7 @@ static int domain_add_dev_info(struct 
dmar_domain *domain, struct device *dev)

  ret = intel_pasid_setup_second_level(iommu, domain,
  dev, PASID_RID2PASID);
  spin_unlock_irqrestore(>lock, flags);
-    if (ret) {
+    if (ret && ret != -EBUSY) {
  dev_err(dev, "Setup RID2PASID failed\n");
  dmar_remove_one_dev_info(dev);
  return ret;




--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 05/11] iommu/vt-d: Add SVA domain support

2022-06-19 Thread Baolu Lu

On 2022/6/17 15:47, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 7, 2022 9:50 AM

+
+static const struct iommu_domain_ops intel_svm_domain_ops = {
+   .set_dev_pasid  = intel_svm_attach_dev_pasid,
+   .block_dev_pasid= intel_svm_detach_dev_pasid,
+   .free   = intel_svm_domain_free,
+};
+


It's clearer to use set/block for intel callbacks.


Yes. That reads clearer.

--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 04/11] iommu: Add sva iommu_domain support

2022-06-19 Thread Baolu Lu

On 2022/6/17 15:43, Tian, Kevin wrote:

From: Baolu Lu
Sent: Friday, June 10, 2022 3:16 PM



+#define __IOMMU_DOMAIN_HOST_VA (1U << 5)  /* Host CPU virtual

address */


Do you mean general CPU VA? or Host CPU VA, I'm reading the latter as

2nd

stage?


Host CPU VA. In the near future, we will add another flag _GUEST_VA, so
that the shared page table types are distiguished.


How does the kernel knows that an user page table translates guest VA?
IMHO I don't think the kernel should care about it except managing
all the aspects related to the user page table itself...


Okay.








+
   /*
* This are the possible domain-types
*
@@ -86,15 +89,24 @@ struct iommu_domain_geometry {
   #define IOMMU_DOMAIN_DMA_FQ

(__IOMMU_DOMAIN_PAGING |\

 __IOMMU_DOMAIN_DMA_API |   \
 __IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_SVA   (__IOMMU_DOMAIN_SHARED |

\

+__IOMMU_DOMAIN_HOST_VA)


Doesn't shared automatically mean CPU VA? Do we need another flag?


Yes. Shared means CPU VA, but there're many types. Besides above two, we
also see the shared KVM/EPT.



Will the two sharing scenarios share any common code? If not then
having a separate flag bit is meaningless.


So far, I haven't seen the need for common code. I've ever thought about
the common notifier callback for page table entry update of SVA and KVM.
But there has been no feasible plan.



It might be more straightforward to be:

#define IOMMU_DOMAIN_SVA__IOMMU_DOMAIN_SVA
#define IOMMU_DOMAIN_KVM __IOMMU_DOMAIN_KVM
#define IOMMU_DOMAIN_USER __IOMMU_DOMAIN_USER


I am okay with this and we can add some shared bits later if we need to
consolidate any code.

--
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-15 Thread Baolu Lu

On 2022/6/15 23:40, Jason Gunthorpe wrote:

On Fri, Jun 10, 2022 at 01:37:20PM +0800, Baolu Lu wrote:

On 2022/6/9 20:49, Jason Gunthorpe wrote:

+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(>lru);
+   call_rcu(>rcu_head, pgtble_page_free_rcu);
+   }

It seems OK, but I wonder if there is benifit to using
put_pages_list() from the rcu callback


The price is that we need to allocate a "struct list_head" and free it
in the rcu callback as well. Currently the list_head is sitting in the
stack.


You'd have to use a different struct page layout so that the list_head
was in the struct page and didn't overlap with the rcu_head


Okay, let me head this direction in the next version.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-15 Thread Baolu Lu

On 2022/6/16 08:03, Nicolin Chen wrote:

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 44016594831d..0dd13330fe12 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4323,7 +4323,7 @@ static int prepare_domain_attach_device(struct 
iommu_domain *domain,
return -ENODEV;
  
  	if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))

-   return -EOPNOTSUPP;
+   return -EMEDIUMTYPE;
  
  	/* check if this iommu agaw is sufficient for max mapped address */

addr_width = agaw_to_width(iommu->agaw);
@@ -4331,10 +4331,10 @@ static int prepare_domain_attach_device(struct 
iommu_domain *domain,
addr_width = cap_mgaw(iommu->cap);
  
  	if (dmar_domain->max_addr > (1LL << addr_width)) {

-   dev_err(dev, "%s: iommu width (%d) is not "
+   dev_dbg(dev, "%s: iommu width (%d) is not "
"sufficient for the mapped address (%llx)\n",
__func__, addr_width, dmar_domain->max_addr);
-   return -EFAULT;
+   return -EMEDIUMTYPE;
}
dmar_domain->gaw = addr_width;


Can we simply remove the dev_err()? As the return value has explicitly
explained the failure reason, putting a print statement won't help much.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 03/12] iommu/vt-d: Remove clearing translation data in disable_dmar_iommu()

2022-06-15 Thread Baolu Lu

On 2022/6/15 14:22, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 14, 2022 3:21 PM

On 2022/6/14 14:49, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 10:51 AM

The disable_dmar_iommu() is called when IOMMU initialization fails or
the IOMMU is hot-removed from the system. In both cases, there is no
need to clear the IOMMU translation data structures for devices.

On the initialization path, the device probing only happens after the
IOMMU is initialized successfully, hence there're no translation data
structures.

Out of curiosity. With kexec the IOMMU may contain stale mappings
from the old kernel. Then is it meaningful to disable IOMMU after the
new kernel fails to initialize it properly?


For kexec kernel, if the IOMMU is detected to be pre-enabled, the IOMMU
driver will try to copy tables from the old kernel. If copying table
fails, the IOMMU driver will disable IOMMU and do the normal
initialization.



What about an error occurred after copying table in the initialization
path? The new kernel will be in a state assuming iommu is disabled
but it is still enabled using an old mapping for certain devices...
  


If copying table failed, the translation will be disabled and a clean
root table will be used.

if (translation_pre_enabled(iommu)) {
pr_info("Translation already enabled - trying to copy 
translation structures\n");


ret = copy_translation_tables(iommu);
if (ret) {
/*
 * We found the IOMMU with translation
 * enabled - but failed to copy over the
 * old root-entry table. Try to proceed
 * by disabling translation now and
 * allocating a clean root-entry table.
 * This might cause DMAR faults, but
 * probably the dump will still succeed.
 */
pr_err("Failed to copy translation tables from previous 
kernel for %s\n",

   iommu->name);
iommu_disable_translation(iommu);
clear_translation_pre_enabled(iommu);
} else {
pr_info("Copied translation tables from previous kernel 
for %s\n",

iommu->name);
}
}

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 01/12] iommu/vt-d: debugfs: Remove device_domain_lock usage

2022-06-15 Thread Baolu Lu

On 2022/6/15 14:13, Tian, Kevin wrote:

From: Baolu Lu
Sent: Wednesday, June 15, 2022 9:54 AM

On 2022/6/14 14:43, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 10:51 AM

The domain_translation_struct debugfs node is used to dump the DMAR
page
tables for the PCI devices. It potentially races with setting domains to
devices. The existing code uses a global spinlock device_domain_lock to
avoid the races, but this is problematical as this lock is only used to
protect the device tracking lists of each domain.

is it really problematic at this point? Before following patches are applied
using device_domain_lock should have similar effect as holding the group
lock.

Here it might make more sense to just focus on removing the use of
device_domain_lock outside of iommu.c. Just that using group lock is
cleaner and more compatible to following cleanups.

and it's worth mentioning that racing with page table updates is out
of the scope of this series. Probably also add a comment in the code
to clarify this point.


Hi Kevin,

How do you like below updated patch?

Yes, this is better.


  From cecc9a0623780a11c4ea4d0a15aa6187f01541c4 Mon Sep 17 00:00:00
2001
From: Lu Baolu
Date: Sun, 29 May 2022 10:18:56 +0800
Subject: [PATCH 1/1] iommu/vt-d: debugfs: Remove device_domain_lock
usage

The domain_translation_struct debugfs node is used to dump the DMAR
page
tables for the PCI devices. It potentially races with setting domains to
devices. The existing code uses the global spinlock device_domain_lock to
avoid the races.

This removes the use of device_domain_lock outside of iommu.c by replacing
it with the group mutex lock. Using the group mutex lock is cleaner and
more compatible to following cleanups.

Signed-off-by: Lu Baolu
---
   drivers/iommu/intel/debugfs.c | 42 +--
   drivers/iommu/intel/iommu.c   |  2 +-
   drivers/iommu/intel/iommu.h   |  1 -
   3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..f4acd8993f60 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -342,13 +342,13 @@ static void pgtable_walk_level(struct seq_file *m,
struct dma_pte *pde,
}
   }

-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
   {
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   struct dmar_domain *domain = info->domain;
+   struct dmar_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };

+   domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
if (!domain)
return 0;

@@ -359,20 +359,38 @@ static int show_device_domain_translation(struct
device *dev, void *data)
pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');

-   return 0;
+   return 1;
   }

-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
   {
-   unsigned long flags;
-   int ret;
+   struct iommu_group *group;

-   spin_lock_irqsave(_domain_lock, flags);
-   ret = bus_for_each_dev(_bus_type, NULL, m,
-  show_device_domain_translation);
-   spin_unlock_irqrestore(_domain_lock, flags);
+   group = iommu_group_get(dev);
+   if (group) {
+   /*
+* The group->mutex is held across the callback, which will
+* block calls to iommu_attach/detach_group/device. Hence,
+* the domain of the device will not change during traversal.
+*
+* All devices in an iommu group share a single domain,
hence
+* we only dump the domain of the first device. Even though,

bus_for_each_dev() will still lead to duplicated dump in the same group
but probably we can leave with it for a debug interface.



Yes. This is what it was. Ideally we could walk the iommu groups and
dump the device names belonging to the group and it's domain mappings,
but I was not willing to add any helpers in the iommu core just for a
debugfs use.

---
Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 01/12] iommu/vt-d: debugfs: Remove device_domain_lock usage

2022-06-14 Thread Baolu Lu

On 2022/6/14 14:43, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 10:51 AM

The domain_translation_struct debugfs node is used to dump the DMAR
page
tables for the PCI devices. It potentially races with setting domains to
devices. The existing code uses a global spinlock device_domain_lock to
avoid the races, but this is problematical as this lock is only used to
protect the device tracking lists of each domain.

is it really problematic at this point? Before following patches are applied
using device_domain_lock should have similar effect as holding the group
lock.

Here it might make more sense to just focus on removing the use of
device_domain_lock outside of iommu.c. Just that using group lock is
cleaner and more compatible to following cleanups.

and it's worth mentioning that racing with page table updates is out
of the scope of this series. Probably also add a comment in the code
to clarify this point.



Hi Kevin,

How do you like below updated patch?

From cecc9a0623780a11c4ea4d0a15aa6187f01541c4 Mon Sep 17 00:00:00 2001
From: Lu Baolu 
Date: Sun, 29 May 2022 10:18:56 +0800
Subject: [PATCH 1/1] iommu/vt-d: debugfs: Remove device_domain_lock usage

The domain_translation_struct debugfs node is used to dump the DMAR page
tables for the PCI devices. It potentially races with setting domains to
devices. The existing code uses the global spinlock device_domain_lock to
avoid the races.

This removes the use of device_domain_lock outside of iommu.c by replacing
it with the group mutex lock. Using the group mutex lock is cleaner and
more compatible to following cleanups.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/debugfs.c | 42 +--
 drivers/iommu/intel/iommu.c   |  2 +-
 drivers/iommu/intel/iommu.h   |  1 -
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..f4acd8993f60 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -342,13 +342,13 @@ static void pgtable_walk_level(struct seq_file *m, 
struct dma_pte *pde,

}
 }

-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
 {
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   struct dmar_domain *domain = info->domain;
+   struct dmar_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };

+   domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
if (!domain)
return 0;

@@ -359,20 +359,38 @@ static int show_device_domain_translation(struct 
device *dev, void *data)

pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');

-   return 0;
+   return 1;
 }

-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
 {
-   unsigned long flags;
-   int ret;
+   struct iommu_group *group;

-   spin_lock_irqsave(_domain_lock, flags);
-   ret = bus_for_each_dev(_bus_type, NULL, m,
-  show_device_domain_translation);
-   spin_unlock_irqrestore(_domain_lock, flags);
+   group = iommu_group_get(dev);
+   if (group) {
+   /*
+* The group->mutex is held across the callback, which will
+* block calls to iommu_attach/detach_group/device. Hence,
+* the domain of the device will not change during traversal.
+*
+* All devices in an iommu group share a single domain, hence
+* we only dump the domain of the first device. Even though,
+* this code still possibly races with the iommu_unmap()
+* interface. This could be solved by RCU-freeing the page
+* table pages in the iommu_unmap() path.
+*/
+   iommu_group_for_each_dev(group, data,
+__show_device_domain_translation);
+   iommu_group_put(group);
+   }

-   return ret;
+   return 0;
+}
+
+static int domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+   return bus_for_each_dev(_bus_type, NULL, m,
+   show_device_domain_translation);
 }
 DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 19024dc52735..a39d72a9d1cf 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -314,7 +314,7 @@ static int iommu_skip_te_disable;
 #define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4

-DEFINE_SPINLOCK(device_domain_lock);
+static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);

 /*
diff --git a/drivers/iommu/intel/iommu.h 

Re: [PATCH v2] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-14 Thread Baolu Lu

On 2022/6/15 05:12, Steve Wahl wrote:

On Tue, Jun 14, 2022 at 12:01:45PM -0700, Jerry Snitselaar wrote:

On Tue, Jun 14, 2022 at 11:45:35AM -0500, Steve Wahl wrote:

On Tue, Jun 14, 2022 at 10:21:29AM +0800, Baolu Lu wrote:

On 2022/6/14 09:54, Jerry Snitselaar wrote:

On Mon, Jun 13, 2022 at 6:51 PM Baolu Lu  wrote:


On 2022/6/14 09:44, Jerry Snitselaar wrote:

On Mon, Jun 13, 2022 at 6:36 PM Baolu Lu  wrote:

On 2022/6/14 04:57, Jerry Snitselaar wrote:

On Thu, May 12, 2022 at 10:13:09AM -0500, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

 drivers/iommu/intel/Kconfig | 6 ++
 include/linux/dmar.h| 6 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf..fdbda77ac21e 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -9,6 +9,12 @@ config DMAR_PERF
 config DMAR_DEBUG
bool

+config DMAR_UNITS_SUPPORTED
+int "Number of DMA Remapping Units supported"

Also, should there be a "depends on (X86 || IA64)" here?

Do you have any compilation errors or warnings?

Best regards,
baolu


I think it is probably harmless since it doesn't get used elsewhere,
but our tooling was complaining to me because DMAR_UNITS_SUPPORTED was
being autogenerated into the configs for the non-x86 architectures we
build (aarch64, s390x, ppcle64).
We have files corresponding to the config options that it looks at,
and I had one for x86 and not the others so it noticed the
discrepancy.


So with "depends on (X86 || IA64)", that tool doesn't complain anymore,
right?

Best regards,
baolu



Yes, with the depends it no longer happens.


The dmar code only exists on X86 and IA64 arch's. Adding this depending
makes sense to me. I will add it if no objections.


I think that works after Baolu's patchset that makes intel-iommu.h
private.  I'm pretty sure it wouldn't have worked before that.

No objections.



Yes, I think applying it with the depends prior to Baolu's change would
still run into the issue from the KTR report if someone compiled without
INTEL_IOMMU enabled.

This was dealing with being able to do something like:

make allmodconfig ARCH=arm64 ; grep DMAR_UNITS .config

and finding CONFIG_DMAR_UNITS_SUPPORTED=64.

Thinking some more though, instead of the depends being on the arch
would depending on DMAR_TABLE or INTEL_IOMMU be more appropriate?


At least in my limited exploration, depending on INTEL_IOMMU yields
compile errors, but depending upon DMAR_TABLE appears to work fine.


DMAR_TABLE is used beyond INTEL_IOMMU, so depending on DMAR_TABLE seems
better.

Steve, do you mind posting a v3 with this fixed?

Best regards,
baolu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Add set_dev_pasid callbacks for default domain

2022-06-14 Thread Baolu Lu

On 2022/6/14 15:19, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Tuesday, June 14, 2022 2:13 PM

On 2022/6/14 13:36, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 14, 2022 12:48 PM

On 2022/6/14 12:02, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 11:44 AM

This allows the upper layers to set a domain to a PASID of a device
if the PASID feature is supported by the IOMMU hardware. The typical
use cases are, for example, kernel DMA with PASID and hardware
assisted mediated device drivers.


why is it not part of the series for those use cases? There is no consumer
of added callbacks in this patch...

It could be. I just wanted to maintain the integrity of Intel IOMMU
driver implementation.

but let's not add dead code. and this patch is actually a right step
simply from set_dev_pasid() p.o.v hence you should include in any
series which first tries to use that interface.



Yes, that's my intention. If it reviews well, we can include it in the
driver's implementation.



Then you should make it clear in the first place. otherwise a patch
like this implies a review for merge. 


Yeah! Will update this in the next version.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 11/12] iommu/vt-d: Use device_domain_lock accurately

2022-06-14 Thread Baolu Lu

On 2022/6/14 15:16, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 14, 2022 10:52 AM

The device_domain_lock is used to protect the device tracking list of
a domain. Remove unnecessary spin_lock/unlock()'s and move the necessary
ones around the list access.

Signed-off-by: Lu Baolu 
---
  drivers/iommu/intel/iommu.c | 68 +++--
  1 file changed, 27 insertions(+), 41 deletions(-)


[...]

+iommu_support_dev_iotlb(struct dmar_domain *domain, struct
intel_iommu *iommu,
+   u8 bus, u8 devfn)
  {
-   struct device_domain_info *info;
-
-   assert_spin_locked(_domain_lock);
+   struct device_domain_info *info = NULL, *tmp;
+   unsigned long flags;

if (!iommu->qi)
return NULL;

-   list_for_each_entry(info, >devices, link)
-   if (info->iommu == iommu && info->bus == bus &&
-   info->devfn == devfn) {
-   if (info->ats_supported && info->dev)
-   return info;
+   spin_lock_irqsave(_domain_lock, flags);
+   list_for_each_entry(tmp, >devices, link) {
+   if (tmp->iommu == iommu && tmp->bus == bus &&
+   tmp->devfn == devfn) {
+   if (tmp->ats_supported)
+   info = tmp;


Directly returning with unlock here is clearer than adding
another tmp variable...


Sure.




@@ -2460,15 +2450,14 @@ static int domain_add_dev_info(struct
dmar_domain *domain, struct device *dev)
if (!iommu)
return -ENODEV;

-   spin_lock_irqsave(_domain_lock, flags);
-   info->domain = domain;
ret = domain_attach_iommu(domain, iommu);
-   if (ret) {
-   spin_unlock_irqrestore(_domain_lock, flags);
+   if (ret)
return ret;
-   }
+
+   spin_lock_irqsave(_domain_lock, flags);
list_add(>link, >devices);
spin_unlock_irqrestore(_domain_lock, flags);
+   info->domain = domain;



This is incorrect. You need fully initialize the object before adding
it to the list. Otherwise a search right after above unlock and
before assigning info->domain will get a wrong data


Fair enough. Will fix it in the next version.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 10/12] iommu/vt-d: Fold __dmar_remove_one_dev_info() into its caller

2022-06-14 Thread Baolu Lu

On 2022/6/14 15:07, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 10:52 AM

Fold __dmar_remove_one_dev_info() into dmar_remove_one_dev_info()
which
is its only caller. Make the spin lock critical range only cover the
device list change code and remove some unnecessary checks.

Signed-off-by: Lu Baolu
---
  drivers/iommu/intel/iommu.c | 34 +-
  1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index af22690f44c8..8345e0c0824c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -295,7 +295,6 @@ static LIST_HEAD(dmar_satc_units);
  static int g_num_of_iommus;

  static void dmar_remove_one_dev_info(struct device *dev);
-static void __dmar_remove_one_dev_info(struct device_domain_info *info);

  int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
  int intel_iommu_sm =
IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
@@ -4141,20 +4140,14 @@ static void domain_context_clear(struct
device_domain_info *info)
   _context_clear_one_cb, info);
  }

-static void __dmar_remove_one_dev_info(struct device_domain_info *info)
+static void dmar_remove_one_dev_info(struct device *dev)
  {
-   struct dmar_domain *domain;
-   struct intel_iommu *iommu;
-
-   assert_spin_locked(_domain_lock);
-
-   if (WARN_ON(!info))
-   return;
-
-   iommu = info->iommu;
-   domain = info->domain;
+   struct device_domain_info *info = dev_iommu_priv_get(dev);
+   struct dmar_domain *domain = info->domain;

this local variable is not required as there is just one reference
to info->domain.


Yes. It could be removed and use info->domain directly.



btw I didn't see info->domain is cleared in this path. Is it correct?



It's better to clear here. I will make this change in my in-process
blocking domain series.

But it doesn't cause any real problems because the Intel IOMMU driver
supports default domain, hence the logic here is info->domain is
replaced, but not cleared.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 06/12] iommu/vt-d: Acquiring lock in domain ID allocation helpers

2022-06-14 Thread Baolu Lu

On 2022/6/14 14:52, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 14, 2022 10:52 AM

The iommu->lock is used to protect the per-IOMMU domain ID resource.
Moving the lock into the ID alloc/free helpers makes the code more
compact. At the same time, the device_domain_lock is irrelevant to
the domain ID resource, remove its assertion as well.

On the other hand, the iommu->lock is never used in interrupt context,
there's no need to use the irqsave variant of the spinlock calls.


I still prefer to separating reduction of lock ranges from changing irqsave.
Locking is tricky. From bisect p.o.v. it will be a lot easier if we just change
one logic in one patch.



Fair enough. I will do this in the next version.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 03/12] iommu/vt-d: Remove clearing translation data in disable_dmar_iommu()

2022-06-14 Thread Baolu Lu

On 2022/6/14 14:49, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 10:51 AM

The disable_dmar_iommu() is called when IOMMU initialization fails or
the IOMMU is hot-removed from the system. In both cases, there is no
need to clear the IOMMU translation data structures for devices.

On the initialization path, the device probing only happens after the
IOMMU is initialized successfully, hence there're no translation data
structures.

Out of curiosity. With kexec the IOMMU may contain stale mappings
from the old kernel. Then is it meaningful to disable IOMMU after the
new kernel fails to initialize it properly?


For kexec kernel, if the IOMMU is detected to be pre-enabled, the IOMMU
driver will try to copy tables from the old kernel. If copying table
fails, the IOMMU driver will disable IOMMU and do the normal
initialization.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 01/12] iommu/vt-d: debugfs: Remove device_domain_lock usage

2022-06-14 Thread Baolu Lu

Hi Kevin,

Thanks for your reviewing.

On 2022/6/14 14:43, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 14, 2022 10:51 AM

The domain_translation_struct debugfs node is used to dump the DMAR
page
tables for the PCI devices. It potentially races with setting domains to
devices. The existing code uses a global spinlock device_domain_lock to
avoid the races, but this is problematical as this lock is only used to
protect the device tracking lists of each domain.


is it really problematic at this point? Before following patches are applied
using device_domain_lock should have similar effect as holding the group
lock.


The device_domain_lock only protects the device tracking list of the
domain, it doesn't include the domain pointer stored in the dev_info
structure. That's really protected by the group->mutex.



Here it might make more sense to just focus on removing the use of
device_domain_lock outside of iommu.c. Just that using group lock is
cleaner and more compatible to following cleanups.


Fair enough. I will update the commit message with above statement.


and it's worth mentioning that racing with page table updates is out
of the scope of this series. Probably also add a comment in the code
to clarify this point.


Sure.





This replaces device_domain_lock with group->mutex to protect page tables
from setting a new domain. This also makes device_domain_lock static as
it is now only used inside the file.


s/the file/iommu.c/


Sure.





Signed-off-by: Lu Baolu 
---
  drivers/iommu/intel/iommu.h   |  1 -
  drivers/iommu/intel/debugfs.c | 49 +--
  drivers/iommu/intel/iommu.c   |  2 +-
  3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index a22adfbdf870..8a6d64d726c0 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -480,7 +480,6 @@ enum {
  #define VTD_FLAG_SVM_CAPABLE  (1 << 2)

  extern int intel_iommu_sm;
-extern spinlock_t device_domain_lock;

  #define sm_supported(iommu)   (intel_iommu_sm &&
ecap_smts((iommu)->ecap))
  #define pasid_supported(iommu)(sm_supported(iommu) &&
\
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..5ebfe32265d5 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -342,15 +342,23 @@ static void pgtable_walk_level(struct seq_file *m,
struct dma_pte *pde,
}
  }

-static int show_device_domain_translation(struct device *dev, void *data)
+struct show_domain_opaque {
+   struct device *dev;
+   struct seq_file *m;
+};


Sounds redundant as both bus_for_each_dev() and
iommu_group_for_each_dev() declares the same fn type which
accepts a device pointer and void *data.


+
+static int __show_device_domain_translation(struct device *dev, void *data)
  {
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   struct dmar_domain *domain = info->domain;
-   struct seq_file *m = data;
+   struct show_domain_opaque *opaque = data;
+   struct device_domain_info *info;
+   struct seq_file *m = opaque->m;
+   struct dmar_domain *domain;
u64 path[6] = { 0 };

-   if (!domain)
+   if (dev != opaque->dev)
return 0;


not required.


Together with above comment.

The iommu group might have other devices. I only want to dump the domain
of the secific @opaque->dev. It reads a bit confusing, but it's the
only helper I can use outside of drivers/iommu/iommu.c.

Or, since all devices in the iommu group share the same domain, hence
only dump once?




+   info = dev_iommu_priv_get(dev);
+   domain = info->domain;

seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
   (u64)virt_to_phys(domain->pgd));
@@ -359,20 +367,33 @@ static int show_device_domain_translation(struct
device *dev, void *data)
pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');

-   return 0;
+   return 1;
  }

-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
  {
-   unsigned long flags;
-   int ret;
+   struct show_domain_opaque opaque = {dev, data};
+   struct iommu_group *group;

-   spin_lock_irqsave(_domain_lock, flags);
-   ret = bus_for_each_dev(_bus_type, NULL, m,
-  show_device_domain_translation);
-   spin_unlock_irqrestore(_domain_lock, flags);
+   group = iommu_group_get(dev);
+   if (group) {
+   /*
+* The group->mutex is held across the callback, which will
+* block calls to iommu_attach/detach_group/device. Hence,
+* the domain of the device will not change during traversal.
+*/
+   iommu_group_for_each_dev(group, ,
+ 

Re: [PATCH 1/1] iommu/vt-d: Add set_dev_pasid callbacks for default domain

2022-06-14 Thread Baolu Lu

On 2022/6/14 13:36, Tian, Kevin wrote:

From: Baolu Lu
Sent: Tuesday, June 14, 2022 12:48 PM

On 2022/6/14 12:02, Tian, Kevin wrote:

From: Lu Baolu
Sent: Tuesday, June 14, 2022 11:44 AM

This allows the upper layers to set a domain to a PASID of a device
if the PASID feature is supported by the IOMMU hardware. The typical
use cases are, for example, kernel DMA with PASID and hardware
assisted mediated device drivers.


why is it not part of the series for those use cases? There is no consumer
of added callbacks in this patch...

It could be. I just wanted to maintain the integrity of Intel IOMMU
driver implementation.

but let's not add dead code. and this patch is actually a right step
simply from set_dev_pasid() p.o.v hence you should include in any
series which first tries to use that interface.



Yes, that's my intention. If it reviews well, we can include it in the
driver's implementation.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Add set_dev_pasid callbacks for default domain

2022-06-13 Thread Baolu Lu

On 2022/6/14 12:02, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Tuesday, June 14, 2022 11:44 AM

This allows the upper layers to set a domain to a PASID of a device
if the PASID feature is supported by the IOMMU hardware. The typical
use cases are, for example, kernel DMA with PASID and hardware
assisted mediated device drivers.



why is it not part of the series for those use cases? There is no consumer
of added callbacks in this patch...


It could be. I just wanted to maintain the integrity of Intel IOMMU
driver implementation.




+/* PCI domain-subdevice relationship */
+struct subdev_domain_info {
+   struct list_head link_domain;   /* link to domain siblings */
+   struct device *dev; /* physical device derived from */
+   ioasid_t pasid; /* PASID on physical device */
+};
+


It's not subdev. Just dev+pasid in iommu's context.


How about struct device_pasid_info?

Best regards,
baolu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-13 Thread Baolu Lu

On 2022/6/14 09:54, Jerry Snitselaar wrote:

On Mon, Jun 13, 2022 at 6:51 PM Baolu Lu  wrote:


On 2022/6/14 09:44, Jerry Snitselaar wrote:

On Mon, Jun 13, 2022 at 6:36 PM Baolu Lu  wrote:

On 2022/6/14 04:57, Jerry Snitselaar wrote:

On Thu, May 12, 2022 at 10:13:09AM -0500, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

drivers/iommu/intel/Kconfig | 6 ++
include/linux/dmar.h| 6 +-
2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf..fdbda77ac21e 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -9,6 +9,12 @@ config DMAR_PERF
config DMAR_DEBUG
   bool

+config DMAR_UNITS_SUPPORTED
+int "Number of DMA Remapping Units supported"

Also, should there be a "depends on (X86 || IA64)" here?

Do you have any compilation errors or warnings?

Best regards,
baolu


I think it is probably harmless since it doesn't get used elsewhere,
but our tooling was complaining to me because DMAR_UNITS_SUPPORTED was
being autogenerated into the configs for the non-x86 architectures we
build (aarch64, s390x, ppcle64).
We have files corresponding to the config options that it looks at,
and I had one for x86 and not the others so it noticed the
discrepancy.


So with "depends on (X86 || IA64)", that tool doesn't complain anymore,
right?

Best regards,
baolu



Yes, with the depends it no longer happens.


The dmar code only exists on X86 and IA64 arch's. Adding this depending
makes sense to me. I will add it if no objections.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-13 Thread Baolu Lu

On 2022/6/14 09:44, Jerry Snitselaar wrote:

On Mon, Jun 13, 2022 at 6:36 PM Baolu Lu  wrote:

On 2022/6/14 04:57, Jerry Snitselaar wrote:

On Thu, May 12, 2022 at 10:13:09AM -0500, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

   drivers/iommu/intel/Kconfig | 6 ++
   include/linux/dmar.h| 6 +-
   2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf..fdbda77ac21e 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -9,6 +9,12 @@ config DMAR_PERF
   config DMAR_DEBUG
  bool

+config DMAR_UNITS_SUPPORTED
+int "Number of DMA Remapping Units supported"

Also, should there be a "depends on (X86 || IA64)" here?

Do you have any compilation errors or warnings?

Best regards,
baolu


I think it is probably harmless since it doesn't get used elsewhere,
but our tooling was complaining to me because DMAR_UNITS_SUPPORTED was
being autogenerated into the configs for the non-x86 architectures we
build (aarch64, s390x, ppcle64).
We have files corresponding to the config options that it looks at,
and I had one for x86 and not the others so it noticed the
discrepancy.


So with "depends on (X86 || IA64)", that tool doesn't complain anymore,
right?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-13 Thread Baolu Lu

On 2022/6/14 04:57, Jerry Snitselaar wrote:

On Thu, May 12, 2022 at 10:13:09AM -0500, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl 
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

  drivers/iommu/intel/Kconfig | 6 ++
  include/linux/dmar.h| 6 +-
  2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf..fdbda77ac21e 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -9,6 +9,12 @@ config DMAR_PERF
  config DMAR_DEBUG
bool
  
+config DMAR_UNITS_SUPPORTED

+   int "Number of DMA Remapping Units supported"


Also, should there be a "depends on (X86 || IA64)" here?


Do you have any compilation errors or warnings?

Best regards,
baolu




+   default 1024 if MAXSMP
+   default 128  if X86_64
+   default 64
+
  config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64)
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 45e903d84733..0c03c1845c23 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -18,11 +18,7 @@
  
  struct acpi_dmar_header;
  
-#ifdef	CONFIG_X86

-# define   DMAR_UNITS_SUPPORTEDMAX_IO_APICS
-#else
-# define   DMAR_UNITS_SUPPORTED64
-#endif
+#defineDMAR_UNITS_SUPPORTEDCONFIG_DMAR_UNITS_SUPPORTED
  
  /* DMAR Flags */

  #define DMAR_INTR_REMAP   0x1
--
2.26.2





___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/vt-d: Make DMAR_UNITS_SUPPORTED a config setting

2022-06-13 Thread Baolu Lu

On 2022/6/14 04:38, Jerry Snitselaar wrote:

On Thu, May 12, 2022 at 10:13:09AM -0500, Steve Wahl wrote:

To support up to 64 sockets with 10 DMAR units each (640), make the
value of DMAR_UNITS_SUPPORTED adjustable by a config variable,
CONFIG_DMAR_UNITS_SUPPORTED, and make it's default 1024 when MAXSMP is
set.

If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set
to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to
allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ
remapping doesn't support X2APIC mode x2apic disabled"; and the system
fails to boot properly.

Signed-off-by: Steve Wahl 
---

Note that we could not find a reason for connecting
DMAR_UNITS_SUPPORTED to MAX_IO_APICS as was done previously.  Perhaps
it seemed like the two would continue to match on earlier processors.
There doesn't appear to be kernel code that assumes that the value of
one is related to the other.

v2: Make this value a config option, rather than a fixed constant.  The default
values should match previous configuration except in the MAXSMP case.  Keeping 
the
value at a power of two was requested by Kevin Tian.

  drivers/iommu/intel/Kconfig | 6 ++
  include/linux/dmar.h| 6 +-
  2 files changed, 7 insertions(+), 5 deletions(-)



Baolu do you have this queued up for v5.20? Also do you have a public repo where
you keep the vt-d changes before sending Joerg the patches for a release?


Yes. I have started to queue patches for v5.20. They could be found on
github:

https://github.com/LuBaolu/intel-iommu/commits/vtd-next-for-v5.20

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 02/11] iommu: Add max_pasids field in struct dev_iommu

2022-06-10 Thread Baolu Lu

On 2022/6/10 17:01, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Friday, June 10, 2022 2:47 PM

On 2022/6/10 03:01, Raj, Ashok wrote:

On Tue, Jun 07, 2022 at 09:49:33AM +0800, Lu Baolu wrote:

@@ -218,6 +219,30 @@ static void dev_iommu_free(struct device *dev)
kfree(param);
   }

+static u32 dev_iommu_get_max_pasids(struct device *dev)
+{
+   u32 max_pasids = dev->iommu->iommu_dev->max_pasids;
+   u32 num_bits;
+   int ret;
+
+   if (!max_pasids)
+   return 0;
+
+   if (dev_is_pci(dev)) {
+   ret = pci_max_pasids(to_pci_dev(dev));
+   if (ret < 0)
+   return 0;
+
+   return min_t(u32, max_pasids, ret);


Ah.. that answers my other question to consider device pasid-max. I guess
if we need any enforcement of restricting devices that aren't supporting
the full PASID, that will be done by some higher layer?


The mm->pasid style of SVA is explicitly enabled through
iommu_dev_enable_feature(IOMMU_DEV_FEAT_SVA). The IOMMU driver
specific
restriction might be put there?



too many returns in this function, maybe setup all returns to the end of
the function might be elegant?


I didn't find cleanup room after a quick scan of the code. But sure, let
me go through code again offline.



If we do care:

+static u32 dev_iommu_get_max_pasids(struct device *dev)
+{
+   u32 max_pasids = 0,
+   u32 num_bits = 0;
+   int ret;
+
+   if (dev_is_pci(dev)) {
+   ret = pci_max_pasids(to_pci_dev(dev));
+   if (ret > 0)
+   max_pasids = ret;
+   } else {
+   ret = device_property_read_u32(dev, "pasid-num-bits", 
_bits);
+   if (!ret)
+   max_pasids = 1UL << num_bits;
+   }
+
+   return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
+}


Great! Cleaner and more compact than mine. Thank you!

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 04/11] iommu: Add sva iommu_domain support

2022-06-10 Thread Baolu Lu

On 2022/6/10 04:25, Raj, Ashok wrote:

Hi Baolu


Hi Ashok,



some minor nits.


Thanks for your comments.



On Tue, Jun 07, 2022 at 09:49:35AM +0800, Lu Baolu wrote:

The sva iommu_domain represents a hardware pagetable that the IOMMU
hardware could use for SVA translation. This adds some infrastructure
to support SVA domain in the iommu common layer. It includes:

- Extend the iommu_domain to support a new IOMMU_DOMAIN_SVA domain
   type. The IOMMU drivers that support SVA should provide the sva
   domain specific iommu_domain_ops.
- Add a helper to allocate an SVA domain. The iommu_domain_free()
   is still used to free an SVA domain.
- Add helpers to attach an SVA domain to a device and the reverse
   operation.

Some buses, like PCI, route packets without considering the PASID value.
Thus a DMA target address with PASID might be treated as P2P if the
address falls into the MMIO BAR of other devices in the group. To make
things simple, the attach/detach interfaces only apply to devices
belonging to the singleton groups, and the singleton is immutable in
fabric i.e. not affected by hotplug.

The iommu_attach/detach_device_pasid() can be used for other purposes,
such as kernel DMA with pasid, mediation device, etc.

Suggested-by: Jean-Philippe Brucker 
Suggested-by: Jason Gunthorpe 
Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
  include/linux/iommu.h | 45 -
  drivers/iommu/iommu.c | 93 +++
  2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3fbad42c0bf8..9173c5741447 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -64,6 +64,9 @@ struct iommu_domain_geometry {
  #define __IOMMU_DOMAIN_PT (1U << 2)  /* Domain is identity mapped   */
  #define __IOMMU_DOMAIN_DMA_FQ (1U << 3)  /* DMA-API uses flush queue*/
  
+#define __IOMMU_DOMAIN_SHARED	(1U << 4)  /* Page table shared from CPU  */


s/from CPU/with CPU


Sure.




+#define __IOMMU_DOMAIN_HOST_VA (1U << 5)  /* Host CPU virtual address */


Do you mean general CPU VA? or Host CPU VA, I'm reading the latter as 2nd
stage?


Host CPU VA. In the near future, we will add another flag _GUEST_VA, so
that the shared page table types are distiguished.




+
  /*
   * This are the possible domain-types
   *
@@ -86,15 +89,24 @@ struct iommu_domain_geometry {
  #define IOMMU_DOMAIN_DMA_FQ   (__IOMMU_DOMAIN_PAGING |\
 __IOMMU_DOMAIN_DMA_API |   \
 __IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_SVA   (__IOMMU_DOMAIN_SHARED |\
+__IOMMU_DOMAIN_HOST_VA)


Doesn't shared automatically mean CPU VA? Do we need another flag?


Yes. Shared means CPU VA, but there're many types. Besides above two, we
also see the shared KVM/EPT.



  
  struct iommu_domain {

unsigned type;
const struct iommu_domain_ops *ops;
unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
-   iommu_fault_handler_t handler;
-   void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   union {
+   struct {/* IOMMU_DOMAIN_DMA */
+   iommu_fault_handler_t handler;
+   void *handler_token;
+   };
+   struct {/* IOMMU_DOMAIN_SVA */
+   struct mm_struct *mm;
+   };
+   };
  };
  
  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)

@@ -262,6 +274,8 @@ struct iommu_ops {
   * struct iommu_domain_ops - domain specific operations
   * @attach_dev: attach an iommu domain to a device
   * @detach_dev: detach an iommu domain from a device
+ * @set_dev_pasid: set an iommu domain to a pasid of device
+ * @block_dev_pasid: block pasid of device from using iommu domain
   * @map: map a physically contiguous memory region to an iommu domain
   * @map_pages: map a physically contiguous set of pages of the same size to
   * an iommu domain.
@@ -282,6 +296,10 @@ struct iommu_ops {
  struct iommu_domain_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+   int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
+ioasid_t pasid);
+   void (*block_dev_pasid)(struct iommu_domain *domain, struct device *dev,
+   ioasid_t pasid);
  
  	int (*map)(struct iommu_domain *domain, unsigned long iova,

   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
@@ -679,6 +697,12 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, 
void *owner);
  void iommu_group_release_dma_owner(struct iommu_group *group);
  bool iommu_group_dma_owner_claimed(struct iommu_group *group);
  
+struct 

Re: [PATCH v8 02/11] iommu: Add max_pasids field in struct dev_iommu

2022-06-10 Thread Baolu Lu

On 2022/6/10 03:01, Raj, Ashok wrote:

On Tue, Jun 07, 2022 at 09:49:33AM +0800, Lu Baolu wrote:

Use this field to save the number of PASIDs that a device is able to
consume. It is a generic attribute of a device and lifting it into the
per-device dev_iommu struct could help to avoid the boilerplate code
in various IOMMU drivers.

Signed-off-by: Lu Baolu 
---
  include/linux/iommu.h |  2 ++
  drivers/iommu/iommu.c | 26 ++
  2 files changed, 28 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 03fbb1b71536..d50afb2c9a09 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -364,6 +364,7 @@ struct iommu_fault_param {
   * @fwspec:IOMMU fwspec data
   * @iommu_dev: IOMMU device this device is linked to
   * @priv:  IOMMU Driver private data
+ * @max_pasids:  number of PASIDs device can consume
   *
   * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
   *struct iommu_group  *iommu_group;
@@ -375,6 +376,7 @@ struct dev_iommu {
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void*priv;
+   u32 max_pasids;
  };
  
  int iommu_device_register(struct iommu_device *iommu,

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..adac85ccde73 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -20,6 +20,7 @@
  #include 
  #include 
  #include 
+#include 


Is this needed for this patch?


Yes. It's for pci_max_pasids().




  #include 
  #include 
  #include 
@@ -218,6 +219,30 @@ static void dev_iommu_free(struct device *dev)
kfree(param);
  }
  
+static u32 dev_iommu_get_max_pasids(struct device *dev)

+{
+   u32 max_pasids = dev->iommu->iommu_dev->max_pasids;
+   u32 num_bits;
+   int ret;
+
+   if (!max_pasids)
+   return 0;
+
+   if (dev_is_pci(dev)) {
+   ret = pci_max_pasids(to_pci_dev(dev));
+   if (ret < 0)
+   return 0;
+
+   return min_t(u32, max_pasids, ret);


Ah.. that answers my other question to consider device pasid-max. I guess
if we need any enforcement of restricting devices that aren't supporting
the full PASID, that will be done by some higher layer?


The mm->pasid style of SVA is explicitly enabled through
iommu_dev_enable_feature(IOMMU_DEV_FEAT_SVA). The IOMMU driver specific
restriction might be put there?



too many returns in this function, maybe setup all returns to the end of
the function might be elegant?


I didn't find cleanup room after a quick scan of the code. But sure, let
me go through code again offline.




+   }
+
+   ret = device_property_read_u32(dev, "pasid-num-bits", _bits);
+   if (ret)
+   return 0;
+
+   return min_t(u32, max_pasids, 1UL << num_bits);
+}
+
  static int __iommu_probe_device(struct device *dev, struct list_head 
*group_list)
  {
const struct iommu_ops *ops = dev->bus->iommu_ops;
@@ -243,6 +268,7 @@ static int __iommu_probe_device(struct device *dev, struct 
list_head *group_list
}
  
  	dev->iommu->iommu_dev = iommu_dev;

+   dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
  
  	group = iommu_group_get_for_dev(dev);

if (IS_ERR(group)) {
--
2.25.1



Best regards,
Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 01/11] iommu: Add max_pasids field in struct iommu_device

2022-06-10 Thread Baolu Lu

On 2022/6/10 01:25, Raj, Ashok wrote:

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4f29139bbfc3..e065cbe3c857 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -479,7 +479,6 @@ enum {
  #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED(1 << 1)
  #define VTD_FLAG_SVM_CAPABLE  (1 << 2)
  
-extern int intel_iommu_sm;

  extern spinlock_t device_domain_lock;
  
  #define sm_supported(iommu)	(intel_iommu_sm && ecap_smts((iommu)->ecap))

@@ -786,6 +785,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu 
*iommu, u8 bus,
  extern const struct iommu_ops intel_iommu_ops;
  
  #ifdef CONFIG_INTEL_IOMMU

+extern int intel_iommu_sm;
  extern int iommu_calculate_agaw(struct intel_iommu *iommu);
  extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
  extern int dmar_disabled;
@@ -802,6 +802,7 @@ static inline int iommu_calculate_max_sagaw(struct 
intel_iommu *iommu)
  }
  #define dmar_disabled (1)
  #define intel_iommu_enabled (0)
+#define intel_iommu_sm (0)

Is the above part of this patch? Or should be moved up somewhere?


This is to make pasid_supported() usable in dmar.c. It's only needed by
the change in this patch. I should make this clear in the commit
message. :-)

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-10 Thread Baolu Lu

On 2022/6/10 01:06, Raj, Ashok wrote:

On Thu, Jun 09, 2022 at 03:08:10PM +0800, Lu Baolu wrote:

The IOMMU page tables are updated using iommu_map/unmap() interfaces.
Currently, there is no mandatory requirement for drivers to use locks
to ensure concurrent updates to page tables, because it's assumed that
overlapping IOVA ranges do not have concurrent updates. Therefore the
IOMMU drivers only need to take care of concurrent updates to level
page table entries.


The last part doesn't read well..
s/updates to level page table entries/ updates to page-table entries at the
same level



But enabling new features challenges this assumption. For example, the
hardware assisted dirty page tracking feature requires scanning page
tables in interfaces other than mapping and unmapping. This might result
in a use-after-free scenario in which a level page table has been freed
by the unmap() interface, while another thread is scanning the next level
page table.

This adds RCU-protected page free support so that the pages are really
freed and reused after a RCU grace period. Hence, the page tables are
safe for scanning within a rcu_read_lock critical region. Considering
that scanning the page table is a rare case, this also adds a domain
flag and the RCU-protected page free is only used when this flat is set.


s/flat/flag


Above updated. Thank you!



Signed-off-by: Lu Baolu 
---
  include/linux/iommu.h |  9 +
  drivers/iommu/iommu.c | 23 +++
  2 files changed, 32 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e1afe169549..6f68eabb8567 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -95,6 +95,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   unsigned long concurrent_traversal:1;


Does this need to be a bitfield? Even though you are needing just one bit
now, you can probably make have maskbits?



As discussed in another reply, I am about to drop the driver opt-in and
wrapper the helper around the iommu_iotlb_gather.




  };
  
  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)

@@ -657,6 +658,12 @@ static inline void dev_iommu_priv_set(struct device *dev, 
void *priv)
dev->iommu->priv = priv;
  }
  
+static inline void domain_set_concurrent_traversal(struct iommu_domain *domain,

+  bool value)
+{
+   domain->concurrent_traversal = value;
+}
+
  int iommu_probe_device(struct device *dev);
  void iommu_release_device(struct device *dev);
  
@@ -677,6 +684,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);

  void iommu_group_release_dma_owner(struct iommu_group *group);
  bool iommu_group_dma_owner_claimed(struct iommu_group *group);
  
+void iommu_free_pgtbl_pages(struct iommu_domain *domain,

+   struct list_head *pages);
  #else /* CONFIG_IOMMU_API */
  
  struct iommu_ops {};

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..ceeb97ebe3e2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3252,3 +3252,26 @@ bool iommu_group_dma_owner_claimed(struct iommu_group 
*group)
return user;
  }
  EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+
+static void pgtble_page_free_rcu(struct rcu_head *rcu)


maybe the names can be consistent? pgtble_ vs pgtbl below.

vote to drop the 'e' :-)


Updated.




+{
+   struct page *page = container_of(rcu, struct page, rcu_head);
+
+   __free_pages(page, 0);
+}
+
+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(>lru);
+   call_rcu(>rcu_head, pgtble_page_free_rcu);
+   }
+}
--
2.25.1



Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-10 Thread Baolu Lu

On 2022/6/9 21:32, Jason Gunthorpe wrote:

On Thu, Jun 09, 2022 at 02:19:06PM +0100, Robin Murphy wrote:


Is there a significant benefit to keeping both paths, or could we get away
with just always using RCU? Realistically, pagetable pages aren't likely to
be freed all that frequently, except perhaps at domain teardown, but that
shouldn't really be performance-critical, and I guess we could stick an RCU
sync point in iommu_domain_free() if we're really worried about releasing
larger quantities of pages back to the allocator ASAP?


I think you are right, anything that uses the iommu_iotlb_gather may
as well use RCU too.

IIRC the allocators already know that RCU is often sitting on
freed-memory and have some contigency to flush it out before OOMing,
so nothing special should be needed.


Fair enough. How about below code?

static void pgtble_page_free_rcu(struct rcu_head *rcu)
{
struct page *page = container_of(rcu, struct page, rcu_head);

__free_pages(page, 0);
}

/*
 * Free pages gathered in the freelist of iommu_iotlb_gather. Use RCU free
 * way so that it's safe for lock-free page table walk.
 */
void iommu_free_iotlb_gather_pages(struct iommu_iotlb_gather *iotlb_gather)
{
struct page *page, *next;

list_for_each_entry_safe(page, next, _gather->freelist, 
lru) {

list_del(>lru);
call_rcu(>rcu_head, pgtble_page_free_rcu);
}
}

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Baolu Lu

On 2022/6/9 20:49, Jason Gunthorpe wrote:

+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(>lru);
+   call_rcu(>rcu_head, pgtble_page_free_rcu);
+   }

It seems OK, but I wonder if there is benifit to using
put_pages_list() from the rcu callback


The price is that we need to allocate a "struct list_head" and free it
in the rcu callback as well. Currently the list_head is sitting in the
stack.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/5] Simplify vfio_iommu_type1 attach/detach routine

2022-06-07 Thread Baolu Lu

On 2022/6/7 19:58, Jason Gunthorpe wrote:

On Tue, Jun 07, 2022 at 03:44:43PM +0800, Baolu Lu wrote:

On 2022/6/6 14:19, Nicolin Chen wrote:

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that since KVM
won't be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.


After reading this series, I don't see that Intel IOMMU driver needs any
further change to support the new scheme. Did I miss anything?


You already did it :)


Just as I thought. Thank you!

Best regards,
baolu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/5] Simplify vfio_iommu_type1 attach/detach routine

2022-06-07 Thread Baolu Lu

On 2022/6/6 14:19, Nicolin Chen wrote:

Worths mentioning the exact match for enforce_cache_coherency is removed
with this series, since there's very less value in doing that since KVM
won't be able to take advantage of it -- this just wastes domain memory.
Instead, we rely on Intel IOMMU driver taking care of that internally.


After reading this series, I don't see that Intel IOMMU driver needs any
further change to support the new scheme. Did I miss anything?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group

2022-06-06 Thread Baolu Lu

On 2022/6/6 14:19, Nicolin Chen wrote:

+/**
+ * iommu_attach_group - Attach an IOMMU group to an IOMMU domain
+ * @domain: IOMMU domain to attach
+ * @dev: IOMMU group that will be attached


Nit: @group: ...


+ *
+ * Returns 0 on success and error code on failure
+ *
+ * Specifically, -EMEDIUMTYPE is returned if the domain and the group are
+ * incompatible in some way. This indicates that a caller should try another
+ * existing IOMMU domain or allocate a new one.
+ */
  int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
  {
int ret;


Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 09/12] iommu/vt-d: Check device list of domain in domain free path

2022-06-05 Thread Baolu Lu

On 2022/6/2 14:29, Tian, Kevin wrote:

From: Baolu Lu 
Sent: Wednesday, June 1, 2022 7:02 PM

On 2022/6/1 17:28, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Friday, May 27, 2022 2:30 PM

When the IOMMU domain is about to be freed, it should not be set on

any

device. Instead of silently dealing with some bug cases, it's better to
trigger a warning to report and fix any potential bugs at the first time.





   static void domain_exit(struct dmar_domain *domain)
   {
-
-   /* Remove associated devices and clear attached or cached domains
*/
-   domain_remove_dev_info(domain);
+   if (WARN_ON(!list_empty(>devices)))
+   return;



warning is good but it doesn't mean the driver shouldn't deal with
that situation to make it safer e.g. blocking DMA from all attached
device...


I have ever thought the same thing. :-)

Blocking DMA from attached device should be done when setting blocking
domain to the device. It should not be part of freeing a domain.


yes but here we are talking about some bug scenario.



Here, the caller asks the driver to free the domain, but the driver
finds that something is wrong. Therefore, it warns and returns directly.
The domain will still be there in use until the next set_domain().



at least it'd look safer if we always try to unmap the entire domain i.e.:

static void domain_exit(struct dmar_domain *domain)
{
-
-   /* Remove associated devices and clear attached or cached domains */
-   domain_remove_dev_info(domain);

if (domain->pgd) {
LIST_HEAD(freelist);

domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), );
put_pages_list();
}

+   if (WARN_ON(!list_empty(>devices)))
+   return;

kfree(domain);
}


Fair enough. Removing all mappings is safer.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 09/12] iommu/vt-d: Check device list of domain in domain free path

2022-06-01 Thread Baolu Lu

On 2022/6/1 17:28, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Friday, May 27, 2022 2:30 PM

When the IOMMU domain is about to be freed, it should not be set on any
device. Instead of silently dealing with some bug cases, it's better to
trigger a warning to report and fix any potential bugs at the first time.





  static void domain_exit(struct dmar_domain *domain)
  {
-
-   /* Remove associated devices and clear attached or cached domains
*/
-   domain_remove_dev_info(domain);
+   if (WARN_ON(!list_empty(>devices)))
+   return;



warning is good but it doesn't mean the driver shouldn't deal with
that situation to make it safer e.g. blocking DMA from all attached
device...


I have ever thought the same thing. :-)

Blocking DMA from attached device should be done when setting blocking
domain to the device. It should not be part of freeing a domain.

Here, the caller asks the driver to free the domain, but the driver
finds that something is wrong. Therefore, it warns and returns directly.
The domain will still be there in use until the next set_domain().

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 07/12] iommu/vt-d: Acquiring lock in pasid manipulation helpers

2022-06-01 Thread Baolu Lu

On 2022/6/1 17:18, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Friday, May 27, 2022 2:30 PM

The iommu->lock is used to protect the per-IOMMU pasid directory table
and pasid table. Move the spinlock acquisition/release into the helpers
to make the code self-contained.

Signed-off-by: Lu Baolu 


Reviewed-by: Kevin Tian , with one nit



-   /* Caller must ensure PASID entry is not in use. */
-   if (pasid_pte_is_present(pte))
-   return -EBUSY;
+   spin_lock(>lock);
+   pte = get_non_present_pasid_entry(dev, pasid);
+   if (!pte) {
+   spin_unlock(>lock);
+   return -ENODEV;
+   }


I don't think above is a good abstraction and it changes the error
code for an present entry from -EBUSY to -ENODEV.


Sure. I will roll it back to -EBUSY. I added this helper because the
same code appears at least three times in this file.

Best regards,
baolu


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 06/12] iommu/vt-d: Acquiring lock in domain ID allocation helpers

2022-06-01 Thread Baolu Lu

Hi Kevin,

Thank you for the comments.

On 2022/6/1 17:09, Tian, Kevin wrote:

From: Lu Baolu
Sent: Friday, May 27, 2022 2:30 PM

The iommu->lock is used to protect the per-IOMMU domain ID resource.
Move the spinlock acquisition/release into the helpers where domain
IDs are allocated and freed. The device_domain_lock is irrelevant to
domain ID resources, remove its assertion as well.

while moving the lock you also replace spin_lock_irqsave() with spin_lock().
It'd be cleaner to just do movement here and then replace all _irqsave()
in patch 8.


Yeah, that will be clearer.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/6] iommu: Add a per domain PASID for DMA API

2022-06-01 Thread Baolu Lu

On 2022/6/1 09:43, Tian, Kevin wrote:

From: Jacob Pan
Sent: Wednesday, June 1, 2022 1:30 AM

In both cases the pasid is stored in the attach data instead of the
domain.


So during IOTLB flush for the domain, do we loop through the attach data?

Yes and it's required.


What does the attach data mean here? Do you mean group->pasid_array?

Why not tracking the {device, pasid} info in the iommu driver when
setting domain to {device, pasid}? We have tracked device in a list when
setting a domain to device.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/12] iommu/vt-d: Use iommu_get_domain_for_dev() in debugfs

2022-06-01 Thread Baolu Lu

On 2022/6/1 02:51, Jason Gunthorpe wrote:

Oh, I've spent the last couple of weeks hacking up horrible things
manipulating entries in init_mm, and never realised that that was actually
the special case. Oh well, live and learn.

The init_mm is sort of different, it doesn't have zap in quite the
same way, for example. I was talking about the typical process mm.

Anyhow, the right solution is to use RCU as I described before, Baolu
do you want to try?


Yes, of course.

Your discussion with Robin gave me a lot of inspiration. Very
appreciated! I want to use a separate patch to solve this debugfs
problem, because it has exceeded the original intention of this series.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/12] iommu/vt-d: Use iommu_get_domain_for_dev() in debugfs

2022-05-31 Thread Baolu Lu

On 2022/5/31 23:59, Jason Gunthorpe wrote:

On Tue, May 31, 2022 at 02:52:28PM +0100, Robin Murphy wrote:


+    break;
+    pgtable_walk_level(m, phys_to_virt(phys_addr),


Also, obligatory reminder that pfn_valid() only means that pfn_to_page()
gets you a valid struct page. Whether that page is direct-mapped kernel
memory or not is a different matter.


Even though this is debugfs, if the operation is sketchy like that and
can theortically crash the kernel the driver should test capabilities,
CAP_SYS_RAWIO or something may be appropriate. I don't think we have a
better cap for 'userspace may crash the kernel'


Yes. We should test both CAP_SYS_ADMIN and CAP_SYS_RAWIO.

Best regards,
baolu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 01/12] iommu/vt-d: Use iommu_get_domain_for_dev() in debugfs

2022-05-31 Thread Baolu Lu

Hi Robin,

Thank you for the comments.

On 2022/5/31 21:52, Robin Murphy wrote:

On 2022-05-31 04:02, Baolu Lu wrote:

On 2022/5/30 20:14, Jason Gunthorpe wrote:

On Sun, May 29, 2022 at 01:14:46PM +0800, Baolu Lu wrote:


[--snip--]

diff --git a/drivers/iommu/intel/debugfs.c 
b/drivers/iommu/intel/debugfs.c

index d927ef10641b..e6f4835b8d9f 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -333,25 +333,28 @@ static void pgtable_walk_level(struct seq_file 
*m, struct dma_pte *pde,

  continue;

  path[level] = pde->val;
-    if (dma_pte_superpage(pde) || level == 1)
+    if (dma_pte_superpage(pde) || level == 1) {
  dump_page_info(m, start, path);
-    else
-    pgtable_walk_level(m, phys_to_virt(dma_pte_addr(pde)),
+    } else {
+    unsigned long phys_addr;
+
+    phys_addr = (unsigned long)dma_pte_addr(pde);
+    if (!pfn_valid(__phys_to_pfn(phys_addr)))


Given that pte_present(pde) passed just above, it was almost certainly a 
valid entry, so it seems unlikely that the physical address it pointed 
to could have disappeared in the meantime. If you're worried about the 
potential case where we've been preempted during this walk for long 
enough that the page has already been freed by an unmap, reallocated, 


Yes. This is exactly what I am worried about and what this patch wants
to solve.

and filled with someone else's data that happens to look like valid 
PTEs, this still isn't enough, since that data could just as well happen 
to look like valid physical addresses too.
I imagine that if you want to safely walk pagetables concurrently with 
them potentially being freed, you'd probably need to get RCU involved.


I don't want to make the map/unmap interface more complex or inefficient
because of a debugfs feature. I hope that the debugfs and map/unmap
interfaces are orthogonal, just like the IOMMU hardware traversing the
page tables, as long as the accessed physical address is valid and
accessible. Otherwise, stop the traversal immediately. If we can't
achieve this, I'd rather stop supporting this debugfs node.




+    break;
+    pgtable_walk_level(m, phys_to_virt(phys_addr),


Also, obligatory reminder that pfn_valid() only means that pfn_to_page() 
gets you a valid struct page. Whether that page is direct-mapped kernel 
memory or not is a different matter.


Perhaps I can check this from the page flags?




 level - 1, start, path);
+    }
  path[level] = 0;
  }
  }

-static int show_device_domain_translation(struct device *dev, void 
*data)
+static int __show_device_domain_translation(struct device *dev, void 
*data)

  {
  struct device_domain_info *info = dev_iommu_priv_get(dev);
  struct dmar_domain *domain = info->domain;
  struct seq_file *m = data;
  u64 path[6] = { 0 };

-    if (!domain)
-    return 0;
-
  seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
 (u64)virt_to_phys(domain->pgd));
  seq_puts(m, 
"IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
@@ -359,20 +362,27 @@ static int show_device_domain_translation(struct 
device *dev, void *data)

  pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
  seq_putc(m, '\n');

-    return 0;
+    return 1;
  }

-static int domain_translation_struct_show(struct seq_file *m, void 
*unused)
+static int show_device_domain_translation(struct device *dev, void 
*data)

  {
-    unsigned long flags;
-    int ret;
+    struct iommu_group *group;

-    spin_lock_irqsave(_domain_lock, flags);
-    ret = bus_for_each_dev(_bus_type, NULL, m,
-   show_device_domain_translation);
-    spin_unlock_irqrestore(_domain_lock, flags);
+    group = iommu_group_get(dev);
+    if (group) {
+    iommu_group_for_each_dev(group, data,
+ __show_device_domain_translation);


Why group_for_each_dev?


This will hold the group mutex when the callback is invoked. With the
group mutex hold, the domain could not get changed.

If there *are* multiple devices in the group 
then by definition they should be attached to the same domain, so 
dumping that domain's mappings more than once seems pointless. 
Especially given that the outer bus_for_each_dev iteration will already 
visit each individual device anyway, so this would only make the 
redundancy even worse than it already is.


__show_device_domain_translation() only dumps mappings once as it always
returns 1.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 01/12] iommu/vt-d: Use iommu_get_domain_for_dev() in debugfs

2022-05-31 Thread Baolu Lu

On 2022/5/31 21:10, Jason Gunthorpe wrote:

On Tue, May 31, 2022 at 11:02:06AM +0800, Baolu Lu wrote:


For case 2, it is a bit weird. I tried to add a rwsem lock to make the
iommu_unmap() and dumping tables in debugfs exclusive. This does not
work because debugfs may depend on the DMA of the devices to work. It
seems that what we can do is to allow this race, but when we traverse
the page table in debugfs, we will check the validity of the physical
address retrieved from the page table entry. Then, the worst case is to
print some useless information.


Sounds horrible, don't you have locking around the IOPTEs of some
kind? How does updating them work reliably?


There's no locking around updating the IOPTEs. The basic assumption is
that at any time, there's only a single thread manipulating the mappings
of the range specified in iommu_map/unmap() APIs. Therefore, the race
only exists when multiple ranges share some high-level IOPTEs. The IOMMU
driver updates those IOPTEs using the compare-and-exchange atomic
operation.



It is just debugfs, so maybe it is not the end of the world, but
still..


Fair enough. I think this is somewhat similar to that IOMMU hardware can
traverse the page table at any time without considering when the CPUs
update it. The IOMMU hardware will generate faults when it encounters
failure during the traverse of page table. Similarly, perhaps debugfs
could dump all-ones for an invalid IOPTE?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/6] iommu: Add a per domain PASID for DMA API

2022-05-31 Thread Baolu Lu

On 2022/5/31 18:12, Tian, Kevin wrote:

+++ b/include/linux/iommu.h
@@ -105,6 +105,8 @@ struct iommu_domain {
enum iommu_page_response_code (*iopf_handler)(struct

iommu_fault *fault,

  void *data);
void *fault_data;
+   ioasid_t pasid; /* Used for DMA requests with PASID */
+   atomic_t pasid_users;

These are poorly named, this is really the DMA API global PASID and
shouldn't be used for other things.



Perhaps I misunderstood, do you mind explaining more?

You still haven't really explained what this is for in this patch,
maybe it just needs a better commit message, or maybe something is
wrong.

I keep saying the DMA API usage is not special, so why do we need to
create a new global pasid and refcount? Realistically this is only
going to be used by IDXD, why can't we just allocate a PASID and
return it to the driver every time a driver asks for DMA API on PASI
mode? Why does the core need to do anything special?


Agree. I guess it was a mistake caused by treating ENQCMD as the
only user although the actual semantics of the invented interfaces
have already evolved to be quite general.

This is very similar to what we have been discussing for iommufd.
a PASID is just an additional routing info when attaching a device
to an I/O address space (DMA API in this context) and by default
it should be a per-device resource except when ENQCMD is
explicitly opt in.

Hence it's right time for us to develop common facility working
for both this DMA API usage and iommufd, i.e.:

for normal PASID attach to a domain, driver:

allocates a local pasid from device local space;
attaches the local pasid to a domain;

for PASID attach in particular for ENQCMD, driver:

allocates a global pasid in system-wide;
attaches the global pasid to a domain;
set the global pasid in PASID_MSR;

In both cases the pasid is stored in the attach data instead of the
domain.

DMA API pasid is no special from above except it needs to allow
one device attached to the same domain twice (one with RID
and the other with RID+PASID).

for iommufd those operations are initiated by userspace via
iommufd uAPI.


My understanding is that device driver owns its PASID policy. If ENQCMD
is supported on the device, the PASIDs should be allocated through
ioasid_alloc(). Otherwise, the whole PASID pool is managed by the device
driver.

For kernel DMA w/ PASID, after the driver has a PASID for this purpose,
it can just set the default domain to the PASID on device. There's no
need for enable/disable() interfaces.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RFC 09/19] iommu/amd: Access/Dirty bit support in IOPTEs

2022-05-31 Thread Baolu Lu

Hi Suravee ,

On 2022/5/31 19:34, Suravee Suthikulpanit wrote:

On 4/29/22 4:09 AM, Joao Martins wrote:

.
+static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
+    bool enable)
+{
+    struct protection_domain *pdomain = to_pdomain(domain);
+    struct iommu_dev_data *dev_data;
+    bool dom_flush = false;
+
+    if (!amd_iommu_had_support)
+    return -EOPNOTSUPP;
+
+    list_for_each_entry(dev_data, >dev_list, list) {


Since we iterate through device list for the domain, we would need to
call spin_lock_irqsave(>lock, flags) here.


Not related, just out of curiosity. Does it really need to disable the
interrupt while holding this lock? Any case this list would be traversed
in any interrupt context? Perhaps I missed anything?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 01/12] iommu/vt-d: Use iommu_get_domain_for_dev() in debugfs

2022-05-30 Thread Baolu Lu

On 2022/5/30 20:14, Jason Gunthorpe wrote:

On Sun, May 29, 2022 at 01:14:46PM +0800, Baolu Lu wrote:


 From 1e87b5df40c6ce9414cdd03988c3b52bfb17af5f Mon Sep 17 00:00:00 2001
From: Lu Baolu 
Date: Sun, 29 May 2022 10:18:56 +0800
Subject: [PATCH 1/1] iommu/vt-d: debugfs: Remove device_domain_lock usage

The domain_translation_struct debugfs node is used to dump static
mappings of PCI devices. It potentially races with setting new
domains to devices and the iommu_map/unmap() interfaces. The existing
code tries to use the global spinlock device_domain_lock to avoid the
races, but this is problematical as this lock is only used to protect
the device tracking lists of the domains.

Instead of using an immature lock to cover up the problem, it's better
to explicitly restrict the use of this debugfs node. This also makes
device_domain_lock static.


What does "explicitly restrict" mean?


I originally thought about adding restrictions on this interface to a
document. But obviously, this is a naive idea. :-) I went over the code
again. The races exist in two paths:

1. Dump the page table in use while setting a new page table to the
   device.
2. A high-level page table entry has been marked as non-present, but the
   dumping code has walked down to the low-level tables.

For case 1, we can try to solve it by dumping tables while holding the
group->mutex.

For case 2, it is a bit weird. I tried to add a rwsem lock to make the
iommu_unmap() and dumping tables in debugfs exclusive. This does not
work because debugfs may depend on the DMA of the devices to work. It
seems that what we can do is to allow this race, but when we traverse
the page table in debugfs, we will check the validity of the physical
address retrieved from the page table entry. Then, the worst case is to
print some useless information.

The real code looks like this:

From 3feb0727f9d7095729ef75ab1967270045b3a38c Mon Sep 17 00:00:00 2001
From: Lu Baolu 
Date: Sun, 29 May 2022 10:18:56 +0800
Subject: [PATCH 1/1] iommu/vt-d: debugfs: Remove device_domain_lock usage

The domain_translation_struct debugfs node is used to dump the DMAR page
tables for the PCI devices. It potentially races with setting domains to
devices and the iommu_unmap() interface. The existing code uses a global
spinlock device_domain_lock to avoid the races, but this is problematical
as this lock is only used to protect the device tracking lists of each
domain.

This replaces device_domain_lock with group->mutex to protect the traverse
of the page tables from setting a new domain and always check the physical
address retrieved from the page table entry before traversing to the next-
level page table.

As a cleanup, this also makes device_domain_lock static.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/debugfs.c | 42 ++-
 drivers/iommu/intel/iommu.c   |  2 +-
 drivers/iommu/intel/iommu.h   |  1 -
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..e6f4835b8d9f 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -333,25 +333,28 @@ static void pgtable_walk_level(struct seq_file *m, 
struct dma_pte *pde,

continue;

path[level] = pde->val;
-   if (dma_pte_superpage(pde) || level == 1)
+   if (dma_pte_superpage(pde) || level == 1) {
dump_page_info(m, start, path);
-   else
-   pgtable_walk_level(m, phys_to_virt(dma_pte_addr(pde)),
+   } else {
+   unsigned long phys_addr;
+
+   phys_addr = (unsigned long)dma_pte_addr(pde);
+   if (!pfn_valid(__phys_to_pfn(phys_addr)))
+   break;
+   pgtable_walk_level(m, phys_to_virt(phys_addr),
   level - 1, start, path);
+   }
path[level] = 0;
}
 }

-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
 {
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *domain = info->domain;
struct seq_file *m = data;
u64 path[6] = { 0 };

-   if (!domain)
-   return 0;
-
seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
   (u64)virt_to_phys(domain->pgd));
 	seq_puts(m, 
"IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
@@ -359,20 +362,27 @@ static int show_device_domain_translation(struct 
device *dev, void *data)

pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');

-   return 0;
+   return 1;
 }

-static int domain_translation_struct_show(struct seq_file *m, void *unused

Re: [PATCH 03/12] iommu/vt-d: Remove clearing translation data in disable_dmar_iommu()

2022-05-28 Thread Baolu Lu

On 2022/5/27 23:01, Jason Gunthorpe wrote:

On Fri, May 27, 2022 at 02:30:10PM +0800, Lu Baolu wrote:

The disable_dmar_iommu() is called when IOMMU initialzation fails or
the IOMMU is hot-removed from the system. In both cases, there is no
need to clear the IOMMU translation data structures for devices.

On the initialization path, the device probing only happens after the
IOMMU is initialized successfully, hence there're no translation data
structures.

On the hot-remove path, there is no real use case where the IOMMU is
hot-removed, but the devices that it manages are still alive in the
system. The translation data structures were torn down during device
release, hence there's no need to repeat it in IOMMU hot-remove path
either.


Can you leave behind a 1 statement WARN_ON of some kind to check this?


Sure. As the default domain is the first domain allocated for a device
and the last one freed. We can WARN_ON the case where there's still
domain IDs in use. How do you like this?

+   /*
+* All iommu domains must have been detached from the devices,
+* hence there should be no domain IDs in use.
+*/
+   if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
+   != NUM_RESERVED_DID))
+   return;

Best regards,
baolu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/12] iommu/vt-d: Use iommu_get_domain_for_dev() in debugfs

2022-05-28 Thread Baolu Lu

On 2022/5/27 22:59, Jason Gunthorpe wrote:

On Fri, May 27, 2022 at 02:30:08PM +0800, Lu Baolu wrote:

Retrieve the attached domain for a device through the generic interface
exposed by the iommu core. This also makes device_domain_lock static.

Signed-off-by: Lu Baolu 
  drivers/iommu/intel/iommu.h   |  1 -
  drivers/iommu/intel/debugfs.c | 20 
  drivers/iommu/intel/iommu.c   |  2 +-
  3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index a22adfbdf870..8a6d64d726c0 100644
+++ b/drivers/iommu/intel/iommu.h
@@ -480,7 +480,6 @@ enum {
  #define VTD_FLAG_SVM_CAPABLE  (1 << 2)
  
  extern int intel_iommu_sm;

-extern spinlock_t device_domain_lock;
  
  #define sm_supported(iommu)	(intel_iommu_sm && ecap_smts((iommu)->ecap))

  #define pasid_supported(iommu)(sm_supported(iommu) && 
\
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..eea8727aa7bc 100644
+++ b/drivers/iommu/intel/debugfs.c
@@ -344,19 +344,21 @@ static void pgtable_walk_level(struct seq_file *m, struct 
dma_pte *pde,
  
  static int show_device_domain_translation(struct device *dev, void *data)

  {
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   struct dmar_domain *domain = info->domain;
+   struct dmar_domain *dmar_domain;
+   struct iommu_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };
  
+	domain = iommu_get_domain_for_dev(dev);

if (!domain)
return 0;


The iommu_get_domain_for_dev() API should be called something like
'iommu_get_dma_api_domain()' and clearly documented that it is safe to
call only so long as a DMA API using driver is attached to the device,
which is most of the current callers.


Yes, agreed.


This use in random sysfs inside the iommu driver is not OK because it
doesn't have any locking protecting domain from concurrent free.


This is not sysfs, but debugfs. The description of this patch is
confusing. I should make it specific and straight-forward.

How about below one?

From 1e87b5df40c6ce9414cdd03988c3b52bfb17af5f Mon Sep 17 00:00:00 2001
From: Lu Baolu 
Date: Sun, 29 May 2022 10:18:56 +0800
Subject: [PATCH 1/1] iommu/vt-d: debugfs: Remove device_domain_lock usage

The domain_translation_struct debugfs node is used to dump static
mappings of PCI devices. It potentially races with setting new
domains to devices and the iommu_map/unmap() interfaces. The existing
code tries to use the global spinlock device_domain_lock to avoid the
races, but this is problematical as this lock is only used to protect
the device tracking lists of the domains.

Instead of using an immature lock to cover up the problem, it's better
to explicitly restrict the use of this debugfs node. This also makes
device_domain_lock static.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/debugfs.c | 17 -
 drivers/iommu/intel/iommu.c   |  2 +-
 drivers/iommu/intel/iommu.h   |  1 -
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..9642e3e9d6b0 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -362,17 +362,16 @@ static int show_device_domain_translation(struct 
device *dev, void *data)

return 0;
 }

+/*
+ * Dump the static mappings of PCI devices. This is only for DEBUGFS code,
+ * don't use it for other purposes.  It potentially races with setting new
+ * domains to devices and iommu_map/unmap(). Use the trace events under
+ * /sys/kernel/debug/tracing/events/iommu/ for dynamic debugging.
+ */
 static int domain_translation_struct_show(struct seq_file *m, void 
*unused)

 {
-   unsigned long flags;
-   int ret;
-
-   spin_lock_irqsave(_domain_lock, flags);
-   ret = bus_for_each_dev(_bus_type, NULL, m,
-  show_device_domain_translation);
-   spin_unlock_irqrestore(_domain_lock, flags);
-
-   return ret;
+   return bus_for_each_dev(_bus_type, NULL, m,
+   show_device_domain_translation);
 }
 DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1af4b6562266..cacae8bdaa65 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -314,7 +314,7 @@ static int iommu_skip_te_disable;
 #define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4

-DEFINE_SPINLOCK(device_domain_lock);
+static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);

 /*
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index a22adfbdf870..8a6d64d726c0 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -480,7 +480,6 @@ enum {
 #define VTD_FLAG_SVM_CAPABLE   (1 << 2)

 extern int intel_iommu_sm;
-extern 

Re: [PATCH v7 03/10] iommu/sva: Add iommu_sva_domain support

2022-05-25 Thread Baolu Lu

On 2022/5/25 23:25, Jason Gunthorpe wrote:

On Wed, May 25, 2022 at 01:19:08PM +0800, Baolu Lu wrote:

Hi Jason,

On 2022/5/24 21:44, Jason Gunthorpe wrote:

diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 106506143896..210c376f6043 100644
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -69,3 +69,51 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
return ioasid_find(_sva_pasid, pasid, __mmget_not_zero);
   }
   EXPORT_SYMBOL_GPL(iommu_sva_find);
+
+/*
+ * IOMMU SVA driver-oriented interfaces
+ */
+struct iommu_domain *
+iommu_sva_alloc_domain(struct bus_type *bus, struct mm_struct *mm)

This should return the proper type



Can you please elaborate a bit on "return the proper type"? Did you mean
return iommu_sva_domain instead? That's a wrapper of iommu_domain and
only for iommu internal usage.


If you are exposing special SVA APIs then it is not internal usage
only anymore, so expose the type.


Ah, got you. Thanks for the explanation.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 03/10] iommu/sva: Add iommu_sva_domain support

2022-05-25 Thread Baolu Lu

On 2022/5/25 19:06, Jean-Philippe Brucker wrote:

On Wed, May 25, 2022 at 11:07:49AM +0100, Robin Murphy wrote:

Did you mean @handler and @handler_token staffs below?

struct iommu_domain {
      unsigned type;
      const struct iommu_domain_ops *ops;
      unsigned long pgsize_bitmap;    /* Bitmap of page sizes in use */
      iommu_fault_handler_t handler;
      void *handler_token;
      struct iommu_domain_geometry geometry;
      struct iommu_dma_cookie *iova_cookie;
};

Is it only for DMA domains? From the point view of IOMMU faults, it
seems to be generic.

Yes, it's the old common iommu_set_fault_handler() stuff (which arguably is
more of a "notifier" than a "handler"), but I assume that that's irrelevant
if SVA is using IOPF instead?

Yes IOMMU drivers call either the newer iommu_report_device_fault() or the
old report_iommu_fault(), and only the former can support IOPF/SVA. I've
tried to merge them before but never completed it. I think the main issue
was with finding the endpoint that caused the fault from the fault
handler. Some IOMMU drivers just pass the IOMMU device to
report_iommu_fault(). I'll probably pick that up at some point.


Thank you all for the comments and suggestions. Below is the refreshed
patch. Hope that I didn't miss anything.

From 463c04cada8e8640598f981d8d16157781b9de6f Mon Sep 17 00:00:00 2001
From: Lu Baolu 
Date: Wed, 11 May 2022 20:59:24 +0800
Subject: [PATCH 04/11] iommu: Add sva iommu_domain support

The sva iommu_domain represents a hardware pagetable that the IOMMU
hardware could use for SVA translation. This adds some infrastructure
to support SVA domain in the iommu common layer. It includes:

- Extend the iommu_domain to support a new IOMMU_DOMAIN_SVA domain
  type. The IOMMU drivers that support SVA should provide the sva
  domain specific iommu_domain_ops.
- Add a helper to allocate an SVA domain. The iommu_domain_free()
  is still used to free an SVA domain.
- Add helpers to attach an SVA domain to a device and the reverse
  operation.

Some buses, like PCI, route packets without considering the PASID value.
Thus a DMA target address with PASID might be treated as P2P if the
address falls into the MMIO BAR of other devices in the group. To make
things simple, the attach/detach interfaces only apply to devices
belonging to the singleton groups, and the singleton is immutable in
fabric i.e. not affected by hotplug.

The iommu_attach/detach_device_pasid() can be used for other purposes,
such as kernel DMA with pasid, mediation device, etc.

Suggested-by: Jean-Philippe Brucker 
Suggested-by: Jason Gunthorpe 
Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
 drivers/iommu/iommu.c | 93 +++
 include/linux/iommu.h | 45 -
 2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 63b64b4e8a38..b1a2ad64a413 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 

 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
@@ -39,6 +40,7 @@ struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
struct list_head devices;
+   struct xarray pasid_array;
struct mutex mutex;
void *iommu_data;
void (*iommu_data_release)(void *iommu_data);
@@ -666,6 +668,7 @@ struct iommu_group *iommu_group_alloc(void)
mutex_init(>mutex);
INIT_LIST_HEAD(>devices);
INIT_LIST_HEAD(>entry);
+   xa_init(>pasid_array);

ret = ida_simple_get(_group_ida, 0, 0, GFP_KERNEL);
if (ret < 0) {
@@ -1961,6 +1964,8 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);

 void iommu_domain_free(struct iommu_domain *domain)
 {
+   if (domain->type == IOMMU_DOMAIN_SVA)
+   mmdrop(domain->mm);
iommu_put_dma_cookie(domain);
domain->ops->free(domain);
 }
@@ -3277,3 +3282,91 @@ bool iommu_group_dma_owner_claimed(struct 
iommu_group *group)

return user;
 }
 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+   struct mm_struct *mm)
+{
+   const struct iommu_ops *ops = dev_iommu_ops(dev);
+   struct iommu_domain *domain;
+
+   domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
+   if (!domain)
+   return NULL;
+
+   domain->type = IOMMU_DOMAIN_SVA;
+   mmgrab(mm);
+   domain->mm = mm;
+
+   return domain;
+}
+
+static bool iommu_group_immutable_singleton(struct iommu_group *group,
+   struct device *dev)
+{
+   int count;
+
+   mutex_lock(>mutex);
+   count = iommu_group_device_count(group);
+   mutex_unlock(>mutex);
+
+   if (count != 1)
+   return false;
+
+   /*
+* The PCI device could be considered to 

Re: [PATCH 1/3] iommu: Add Visconti5 IOMMU driver

2022-05-25 Thread Baolu Lu

On 2022/5/25 09:31, Nobuhiro Iwamatsu wrote:

+static const struct iommu_ops visconti_atu_ops = {
+   .domain_alloc = visconti_atu_domain_alloc,
+   .probe_device = visconti_atu_probe_device,
+   .release_device = visconti_atu_release_device,
+   .device_group = generic_device_group,
+   .of_xlate = visconti_atu_of_xlate,
+   .pgsize_bitmap = ATU_IOMMU_PGSIZE_BITMAP,
+   .default_domain_ops = &(const struct iommu_domain_ops) {
+   .attach_dev = visconti_atu_attach_device,
+   .detach_dev = visconti_atu_detach_device,


The detach_dev callback is about to be deprecated. The new drivers
should implement the default domain and blocking domain instead.


+   .map = visconti_atu_map,
+   .unmap = visconti_atu_unmap,
+   .iova_to_phys = visconti_atu_iova_to_phys,
+   .free = visconti_atu_domain_free,
+   }
+};


Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 03/10] iommu/sva: Add iommu_sva_domain support

2022-05-25 Thread Baolu Lu

Hi Robin,

On 2022/5/24 22:36, Robin Murphy wrote:

On 2022-05-19 08:20, Lu Baolu wrote:
[...]
diff --git a/drivers/iommu/iommu-sva-lib.c 
b/drivers/iommu/iommu-sva-lib.c

index 106506143896..210c376f6043 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -69,3 +69,51 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
  return ioasid_find(_sva_pasid, pasid, __mmget_not_zero);
  }
  EXPORT_SYMBOL_GPL(iommu_sva_find);
+
+/*
+ * IOMMU SVA driver-oriented interfaces
+ */
+struct iommu_domain *
+iommu_sva_alloc_domain(struct bus_type *bus, struct mm_struct *mm)


Argh, please no new bus-based external interfaces! Domain allocation 
needs to resolve to the right IOMMU instance to solve a number of 
issues, and cleaning up existing users of iommu_domain_alloc() to 
prepare for that is already hard enough. This is arguably even more 
relevant here than for other domain types, since SVA support is more 
likely to depend on specific features that can vary between IOMMU 
instances even with the same driver. Please make the external interface 
take a struct device, then resolve the ops through dev->iommu.


Further nit: the naming inconsistency bugs me a bit - 
iommu_sva_domain_alloc() seems more natural. Also I'd question the 
symmetry vs. usability dichotomy of whether we *really* want two 
different free functions for a struct iommu_domain pointer, where any 
caller which might mix SVA and non-SVA usage then has to remember how 
they allocated any particular domain :/



+{
+    struct iommu_sva_domain *sva_domain;
+    struct iommu_domain *domain;
+
+    if (!bus->iommu_ops || !bus->iommu_ops->sva_domain_ops)
+    return ERR_PTR(-ENODEV);
+
+    sva_domain = kzalloc(sizeof(*sva_domain), GFP_KERNEL);
+    if (!sva_domain)
+    return ERR_PTR(-ENOMEM);
+
+    mmgrab(mm);
+    sva_domain->mm = mm;
+
+    domain = _domain->domain;
+    domain->type = IOMMU_DOMAIN_SVA;
+    domain->ops = bus->iommu_ops->sva_domain_ops;


I'd have thought it would be logical to pass IOMMU_DOMAIN_SVA to the 
normal domain_alloc call, so that driver-internal stuff like context 
descriptors can be still be hung off the domain as usual (rather than 
all drivers having to implement some extra internal lookup mechanism to 
handle all the SVA domain ops), but that's something we're free to come 


Agreed with above comments. Thanks! I will post an additional patch
for review later.

back and change later. FWIW I'd just stick the mm pointer in struct 
iommu_domain, in a union with the fault handler stuff and/or iova_cookie 
- those are mutually exclusive with SVA, right?


"iova_cookie" is mutually exclusive with SVA, but I am not sure about
the fault handler stuff.

Did you mean @handler and @handler_token staffs below?

struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
iommu_fault_handler_t handler;
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
};

Is it only for DMA domains? From the point view of IOMMU faults, it
seems to be generic.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  1   2   >