Re: [PATCH v3 3/4] iommu/vt-d: Add page request draining support

2020-04-29 Thread Jacob Pan
On Wed, 29 Apr 2020 14:00:05 +0800
Lu Baolu  wrote:

> Hi Jacob,
> 
> On 2020/4/29 11:36, Jacob Pan wrote:
> > On Wed, 22 Apr 2020 16:06:10 +0800
> > Lu Baolu  wrote:
> >   
> >> When a PASID is stopped or terminated, there can be pending PRQs
> >> (requests that haven't received responses) in remapping hardware.
> >> This adds the interface to drain page requests and call it when a
> >> PASID is terminated.
> >>
> >> Signed-off-by: Jacob Pan
> >> Signed-off-by: Liu Yi L
> >> Signed-off-by: Lu Baolu
> >> ---
> >>   drivers/iommu/intel-svm.c   | 103
> >> ++-- include/linux/intel-iommu.h |
> >> 4 ++ 2 files changed, 102 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> >> index 83dc4319f661..2534641ef707 100644
> >> --- a/drivers/iommu/intel-svm.c
> >> +++ b/drivers/iommu/intel-svm.c
> >> @@ -23,6 +23,7 @@
> >>   #include "intel-pasid.h"
> >>   
> >>   static irqreturn_t prq_event_thread(int irq, void *d);
> >> +static void intel_svm_drain_prq(struct device *dev, int pasid);
> >>   
> >>   #define PRQ_ORDER 0
> >>   
> >> @@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu
> >> *iommu) dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
> >>dmar_writeq(iommu->reg + DMAR_PQA_REG,
> >> virt_to_phys(iommu->prq) | PRQ_ORDER);
> >> +  init_completion(>prq_complete);
> >> +
> >>return 0;
> >>   }
> >>   
> >> @@ -208,6 +211,7 @@ static void intel_mm_release(struct
> >> mmu_notifier *mn, struct mm_struct *mm) rcu_read_lock();
> >>list_for_each_entry_rcu(sdev, >devs, list) {
> >>intel_pasid_tear_down_entry(svm->iommu,
> >> sdev->dev, svm->pasid);
> >> +  intel_svm_drain_prq(sdev->dev, svm->pasid);  
> > mmu_notifier release is called in atomic context, drain_prq needs to
> > wait for completion. I tested exit path when a process crashes. I
> > got
> > 
> > [  +0.696214] BUG: sleeping function called from invalid context at
> > kernel/sched/completion.c:101 [  +0.68] in_atomic(): 1,
> > irqs_disabled(): 0, non_block: 0, pid: 3235, name: dsatest
> > [  +0.46] INFO: lockdep is turned off. [  +0.02] CPU: 1
> > PID: 3235 Comm: dsatest Not tainted 5.7.0-rc1-z-svmtest+ #1637
> > [  +0.00] Hardware name: Intel Corporation Kabylake Client
> > platform/Skylake Halo DDR4 RVP11, BIOS 04.1709050855 09/05/2017
> > [  +0.01] Call Trace: [  +0.04]  dump_stack+0x68/0x9b
> > [  +0.03]  ___might_sleep+0x229/0x250
> > [  +0.03]  wait_for_completion_timeout+0x3c/0x110
> > [  +0.03]  intel_svm_drain_prq+0x12f/0x210
> > [  +0.03]  intel_mm_release+0x73/0x110
> > [  +0.03]  __mmu_notifier_release+0x94/0x220
> > [  +0.02]  ? do_munmap+0x10/0x10
> > [  +0.02]  ? prepare_ftrace_return+0x5c/0x80
> > [  +0.03]  exit_mmap+0x156/0x1a0
> > [  +0.02]  ? mmput+0x44/0x120
> > [  +0.03]  ? exit_mmap+0x5/0x1a0
> > [  +0.02]  ? ftrace_graph_caller+0xa0/0xa0
> > [  +0.01]  mmput+0x5e/0x120
> > 
> >   
> 
> Thanks a lot!
> 
> Actually, we can't drain page requests in this mm_notifier code path,
> right? The assumptions of page request draining are that 1) the device
> driver has drained DMA requests in the device end; 2) the pasid entry
> has been marked as non-present. So we could only drain page requests
> in the unbind path.
> 
> Thought?
> 
Right, we could save the drain here. unbind will come soon when mm
exits. So even the in flight PRs come through, we could just respond
with "Invalid Response" after mm exit starts. The current code already
checks if the mm is exiting by mmget_not_zero.

> Best regards,
> baolu

[Jacob Pan]
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] iommu/vt-d: Add page request draining support

2020-04-29 Thread Lu Baolu

Hi Jacob,

On 2020/4/29 11:36, Jacob Pan wrote:

On Wed, 22 Apr 2020 16:06:10 +0800
Lu Baolu  wrote:


When a PASID is stopped or terminated, there can be pending PRQs
(requests that haven't received responses) in remapping hardware.
This adds the interface to drain page requests and call it when a
PASID is terminated.

Signed-off-by: Jacob Pan
Signed-off-by: Liu Yi L
Signed-off-by: Lu Baolu
---
  drivers/iommu/intel-svm.c   | 103
++-- include/linux/intel-iommu.h |
4 ++ 2 files changed, 102 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 83dc4319f661..2534641ef707 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
  #include "intel-pasid.h"
  
  static irqreturn_t prq_event_thread(int irq, void *d);

+static void intel_svm_drain_prq(struct device *dev, int pasid);
  
  #define PRQ_ORDER 0
  
@@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)

dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG,
virt_to_phys(iommu->prq) | PRQ_ORDER);
+   init_completion(>prq_complete);
+
return 0;
  }
  
@@ -208,6 +211,7 @@ static void intel_mm_release(struct mmu_notifier

*mn, struct mm_struct *mm) rcu_read_lock();
list_for_each_entry_rcu(sdev, >devs, list) {
intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
svm->pasid);
+   intel_svm_drain_prq(sdev->dev, svm->pasid);

mmu_notifier release is called in atomic context, drain_prq needs to
wait for completion. I tested exit path when a process crashes. I got

[  +0.696214] BUG: sleeping function called from invalid context at 
kernel/sched/completion.c:101
[  +0.68] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 3235, 
name: dsatest
[  +0.46] INFO: lockdep is turned off.
[  +0.02] CPU: 1 PID: 3235 Comm: dsatest Not tainted 5.7.0-rc1-z-svmtest+ 
#1637
[  +0.00] Hardware name: Intel Corporation Kabylake Client platform/Skylake 
Halo DDR4 RVP11, BIOS
04.1709050855 09/05/2017
[  +0.01] Call Trace:
[  +0.04]  dump_stack+0x68/0x9b
[  +0.03]  ___might_sleep+0x229/0x250
[  +0.03]  wait_for_completion_timeout+0x3c/0x110
[  +0.03]  intel_svm_drain_prq+0x12f/0x210
[  +0.03]  intel_mm_release+0x73/0x110
[  +0.03]  __mmu_notifier_release+0x94/0x220
[  +0.02]  ? do_munmap+0x10/0x10
[  +0.02]  ? prepare_ftrace_return+0x5c/0x80
[  +0.03]  exit_mmap+0x156/0x1a0
[  +0.02]  ? mmput+0x44/0x120
[  +0.03]  ? exit_mmap+0x5/0x1a0
[  +0.02]  ? ftrace_graph_caller+0xa0/0xa0
[  +0.01]  mmput+0x5e/0x120




Thanks a lot!

Actually, we can't drain page requests in this mm_notifier code path,
right? The assumptions of page request draining are that 1) the device
driver has drained DMA requests in the device end; 2) the pasid entry
has been marked as non-present. So we could only drain page requests in
the unbind path.

Thought?

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] iommu/vt-d: Add page request draining support

2020-04-28 Thread Jacob Pan
On Wed, 22 Apr 2020 16:06:10 +0800
Lu Baolu  wrote:

> When a PASID is stopped or terminated, there can be pending PRQs
> (requests that haven't received responses) in remapping hardware.
> This adds the interface to drain page requests and call it when a
> PASID is terminated.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu Yi L 
> Signed-off-by: Lu Baolu 
> ---
>  drivers/iommu/intel-svm.c   | 103
> ++-- include/linux/intel-iommu.h |
> 4 ++ 2 files changed, 102 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 83dc4319f661..2534641ef707 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -23,6 +23,7 @@
>  #include "intel-pasid.h"
>  
>  static irqreturn_t prq_event_thread(int irq, void *d);
> +static void intel_svm_drain_prq(struct device *dev, int pasid);
>  
>  #define PRQ_ORDER 0
>  
> @@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
>   dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
>   dmar_writeq(iommu->reg + DMAR_PQA_REG,
> virt_to_phys(iommu->prq) | PRQ_ORDER); 
> + init_completion(>prq_complete);
> +
>   return 0;
>  }
>  
> @@ -208,6 +211,7 @@ static void intel_mm_release(struct mmu_notifier
> *mn, struct mm_struct *mm) rcu_read_lock();
>   list_for_each_entry_rcu(sdev, >devs, list) {
>   intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
> svm->pasid);
> + intel_svm_drain_prq(sdev->dev, svm->pasid);
mmu_notifier release is called in atomic context, drain_prq needs to
wait for completion. I tested exit path when a process crashes. I got

[  +0.696214] BUG: sleeping function called from invalid context at 
kernel/sched/completion.c:101
[  +0.68] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 3235, 
name: dsatest
[  +0.46] INFO: lockdep is turned off.
[  +0.02] CPU: 1 PID: 3235 Comm: dsatest Not tainted 5.7.0-rc1-z-svmtest+ 
#1637
[  +0.00] Hardware name: Intel Corporation Kabylake Client platform/Skylake 
Halo DDR4 RVP11, BIOS 
04.1709050855 09/05/2017
[  +0.01] Call Trace:
[  +0.04]  dump_stack+0x68/0x9b
[  +0.03]  ___might_sleep+0x229/0x250
[  +0.03]  wait_for_completion_timeout+0x3c/0x110
[  +0.03]  intel_svm_drain_prq+0x12f/0x210
[  +0.03]  intel_mm_release+0x73/0x110
[  +0.03]  __mmu_notifier_release+0x94/0x220
[  +0.02]  ? do_munmap+0x10/0x10
[  +0.02]  ? prepare_ftrace_return+0x5c/0x80
[  +0.03]  exit_mmap+0x156/0x1a0
[  +0.02]  ? mmput+0x44/0x120
[  +0.03]  ? exit_mmap+0x5/0x1a0
[  +0.02]  ? ftrace_graph_caller+0xa0/0xa0
[  +0.01]  mmput+0x5e/0x120


>   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
>   }
>   rcu_read_unlock();
> @@ -401,12 +405,8 @@ int intel_svm_unbind_gpasid(struct device *dev,
> int pasid) if (!sdev->users) {
>   list_del_rcu(>list);
>   intel_pasid_tear_down_entry(iommu, dev,
> svm->pasid);
> + intel_svm_drain_prq(dev, svm->pasid);
>   intel_flush_svm_range_dev(svm, sdev, 0, -1,
> 0);
> - /* TODO: Drain in flight PRQ for the PASID
> since it
> -  * may get reused soon, we don't want to
> -  * confuse with its previous life.
> -  * intel_svm_drain_prq(dev, pasid);
> -  */
>   kfree_rcu(sdev, rcu);
>  
>   if (list_empty(>devs)) {
> @@ -644,6 +644,7 @@ int intel_svm_unbind_mm(struct device *dev, int
> pasid)
>* large and has to be physically
> contiguous. So it's
>* hard to be as defensive as we might like.
> */ intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
> + intel_svm_drain_prq(dev, svm->pasid);
>   intel_flush_svm_range_dev(svm, sdev, 0, -1,
> 0); kfree_rcu(sdev, rcu);
>  
> @@ -722,6 +723,92 @@ static bool is_canonical_address(u64 addr)
>   return (((saddr << shift) >> shift) == saddr);
>  }
>  
> +/**
> + * intel_svm_drain_prq:
> + *
> + * Drain all pending page requests and responses related to a
> specific
> + * pasid in both software and hardware.
> + */
> +static void intel_svm_drain_prq(struct device *dev, int pasid)
> +{
> + struct device_domain_info *info;
> + struct dmar_domain *domain;
> + struct intel_iommu *iommu;
> + struct qi_desc desc[3];
> + struct pci_dev *pdev;
> + int head, tail;
> + u16 sid, did;
> + int qdep;
> +
> + info = get_domain_info(dev);
> + if (WARN_ON(!info || !dev_is_pci(dev)))
> + return;
> +
> + if (!info->ats_enabled)
> + return;
> +
> + iommu = info->iommu;
> + domain = info->domain;
> + pdev = to_pci_dev(dev);
> + sid = PCI_DEVID(info->bus, info->devfn);
> + did = domain->iommu_did[iommu->seq_id];
> + qdep = pci_ats_queue_depth(pdev);
> +

[PATCH v3 3/4] iommu/vt-d: Add page request draining support

2020-04-22 Thread Lu Baolu
When a PASID is stopped or terminated, there can be pending PRQs
(requests that haven't received responses) in remapping hardware.
This adds the interface to drain page requests and call it when a
PASID is terminated.

Signed-off-by: Jacob Pan 
Signed-off-by: Liu Yi L 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel-svm.c   | 103 ++--
 include/linux/intel-iommu.h |   4 ++
 2 files changed, 102 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 83dc4319f661..2534641ef707 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
 #include "intel-pasid.h"
 
 static irqreturn_t prq_event_thread(int irq, void *d);
+static void intel_svm_drain_prq(struct device *dev, int pasid);
 
 #define PRQ_ORDER 0
 
@@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | 
PRQ_ORDER);
 
+   init_completion(>prq_complete);
+
return 0;
 }
 
@@ -208,6 +211,7 @@ static void intel_mm_release(struct mmu_notifier *mn, 
struct mm_struct *mm)
rcu_read_lock();
list_for_each_entry_rcu(sdev, >devs, list) {
intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
+   intel_svm_drain_prq(sdev->dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
}
rcu_read_unlock();
@@ -401,12 +405,8 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
if (!sdev->users) {
list_del_rcu(>list);
intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
+   intel_svm_drain_prq(dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
-   /* TODO: Drain in flight PRQ for the PASID since it
-* may get reused soon, we don't want to
-* confuse with its previous life.
-* intel_svm_drain_prq(dev, pasid);
-*/
kfree_rcu(sdev, rcu);
 
if (list_empty(>devs)) {
@@ -644,6 +644,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 * large and has to be physically contiguous. So it's
 * hard to be as defensive as we might like. */
intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
+   intel_svm_drain_prq(dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
kfree_rcu(sdev, rcu);
 
@@ -722,6 +723,92 @@ static bool is_canonical_address(u64 addr)
return (((saddr << shift) >> shift) == saddr);
 }
 
+/**
+ * intel_svm_drain_prq:
+ *
+ * Drain all pending page requests and responses related to a specific
+ * pasid in both software and hardware.
+ */
+static void intel_svm_drain_prq(struct device *dev, int pasid)
+{
+   struct device_domain_info *info;
+   struct dmar_domain *domain;
+   struct intel_iommu *iommu;
+   struct qi_desc desc[3];
+   struct pci_dev *pdev;
+   int head, tail;
+   u16 sid, did;
+   int qdep;
+
+   info = get_domain_info(dev);
+   if (WARN_ON(!info || !dev_is_pci(dev)))
+   return;
+
+   if (!info->ats_enabled)
+   return;
+
+   iommu = info->iommu;
+   domain = info->domain;
+   pdev = to_pci_dev(dev);
+   sid = PCI_DEVID(info->bus, info->devfn);
+   did = domain->iommu_did[iommu->seq_id];
+   qdep = pci_ats_queue_depth(pdev);
+
+   memset(desc, 0, sizeof(desc));
+   desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+   QI_IWD_FENCE |
+   QI_IWD_TYPE;
+   desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
+   QI_EIOTLB_DID(did) |
+   QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+   QI_EIOTLB_TYPE;
+   desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
+   QI_DEV_EIOTLB_SID(sid) |
+   QI_DEV_EIOTLB_QDEP(qdep) |
+   QI_DEIOTLB_TYPE |
+   QI_DEV_IOTLB_PFSID(info->pfsid);
+
+   /*
+* Submit an invalidation wait descriptor with fence and page request
+* drain flags set to invalidation queue. This ensures that all requests
+* submitted to the invalidation queue ahead of this wait descriptor are
+* processed and completed, and all already issued page requests from
+* the device are put in the page request queue.
+*/
+   qi_submit_sync(iommu, desc, 1, QI_OPT_WAIT_DRAIN);
+
+   /*
+* Check and wait until all pending page requests in the queue are
+* handled by the intr thread.
+*/
+prq_retry:
+   tail =