Re: [patch] dma-pool: warn when coherent pool is depleted

2020-06-27 Thread David Rientjes via iommu
On Sun, 21 Jun 2020, Guenter Roeck wrote:

> > When a DMA coherent pool is depleted, allocation failures may or may not
> > get reported in the kernel log depending on the allocator.
> > 
> > The admin does have a workaround, however, by using coherent_pool= on the
> > kernel command line.
> > 
> > Provide some guidance on the failure and a recommended minimum size for
> > the pools (double the size).
> > 
> > Signed-off-by: David Rientjes 
> 
> Tested-by: Guenter Roeck 
> 
> Also confirmed that coherent_pool=256k works around the crash
> I had observed.
> 

Thanks Guenter.  Christoph, does it make sense to apply this patch since 
there may not be an artifact left behind in the kernel log on allocation 
failure by the caller?

> Guenter
> 
> > ---
> >  kernel/dma/pool.c | 6 +-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> > 
> > diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> > --- a/kernel/dma/pool.c
> > +++ b/kernel/dma/pool.c
> > @@ -239,12 +239,16 @@ void *dma_alloc_from_pool(struct device *dev, size_t 
> > size,
> > }
> >  
> > val = gen_pool_alloc(pool, size);
> > -   if (val) {
> > +   if (likely(val)) {
> > phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
> >  
> > *ret_page = pfn_to_page(__phys_to_pfn(phys));
> > ptr = (void *)val;
> > memset(ptr, 0, size);
> > +   } else {
> > +   WARN_ONCE(1, "DMA coherent pool depleted, increase size "
> > +"(recommended min coherent_pool=%zuK)\n",
> > + gen_pool_size(pool) >> 9);
> > }
> > if (gen_pool_avail(pool) < atomic_pool_size)
> > schedule_work(_pool_work);
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] iommu/vt-d: Refactor device_to_iommu() helper

2020-06-27 Thread Lu Baolu
It is refactored in two ways:

- Make it global so that it could be used in other files.

- Make bus/devfn optional so that callers could ignore these two returned
values when they only want to get the coresponding iommu pointer.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 55 +++--
 drivers/iommu/intel/svm.c   |  8 +++---
 include/linux/intel-iommu.h |  3 +-
 3 files changed, 21 insertions(+), 45 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..de17952ed133 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -778,16 +778,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct 
device *bridge)
return false;
 }
 
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 
*devfn)
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
struct dmar_drhd_unit *drhd = NULL;
+   struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
-   struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
 
-   if (iommu_dummy(dev))
+   if (!dev || iommu_dummy(dev))
return NULL;
 
if (dev_is_pci(dev)) {
@@ -818,8 +818,10 @@ static struct intel_iommu *device_to_iommu(struct device 
*dev, u8 *bus, u8 *devf
if (pdev && pdev->is_virtfn)
goto got_pdev;
 
-   *bus = drhd->devices[i].bus;
-   *devfn = drhd->devices[i].devfn;
+   if (bus && devfn) {
+   *bus = drhd->devices[i].bus;
+   *devfn = drhd->devices[i].devfn;
+   }
goto out;
}
 
@@ -829,8 +831,10 @@ static struct intel_iommu *device_to_iommu(struct device 
*dev, u8 *bus, u8 *devf
 
if (pdev && drhd->include_all) {
got_pdev:
-   *bus = pdev->bus->number;
-   *devfn = pdev->devfn;
+   if (bus && devfn) {
+   *bus = pdev->bus->number;
+   *devfn = pdev->devfn;
+   }
goto out;
}
}
@@ -5146,11 +5150,10 @@ static int aux_domain_add_dev(struct dmar_domain 
*domain,
  struct device *dev)
 {
int ret;
-   u8 bus, devfn;
unsigned long flags;
struct intel_iommu *iommu;
 
-   iommu = device_to_iommu(dev, , );
+   iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
 
@@ -5236,9 +5239,8 @@ static int prepare_domain_attach_device(struct 
iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
-   u8 bus, devfn;
 
-   iommu = device_to_iommu(dev, , );
+   iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
 
@@ -5658,9 +5660,8 @@ static bool intel_iommu_capable(enum iommu_cap cap)
 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 {
struct intel_iommu *iommu;
-   u8 bus, devfn;
 
-   iommu = device_to_iommu(dev, , );
+   iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return ERR_PTR(-ENODEV);
 
@@ -5673,9 +5674,8 @@ static struct iommu_device 
*intel_iommu_probe_device(struct device *dev)
 static void intel_iommu_release_device(struct device *dev)
 {
struct intel_iommu *iommu;
-   u8 bus, devfn;
 
-   iommu = device_to_iommu(dev, , );
+   iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return;
 
@@ -5825,37 +5825,14 @@ static struct iommu_group 
*intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
 }
 
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
-   struct intel_iommu *iommu;
-   u8 bus, devfn;
-
-   if (iommu_dummy(dev)) {
-   dev_warn(dev,
-"No IOMMU translation for device; cannot enable 
SVM\n");
-   return NULL;
-   }
-
-   iommu = device_to_iommu(dev, , );
-   if ((!iommu)) {
-   dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
-   return NULL;
-   }
-
-   return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
 static int intel_iommu_enable_auxd(struct device *dev)
 {
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
-   u8 bus, devfn;
int ret;
 
-   iommu = device_to_iommu(dev, , );
+   iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu 

[PATCH 0/4] iommu/vt-d: Add prq report and response support

2020-06-27 Thread Lu Baolu
Hi,

This series adds page request event reporting and response support to
the Intel IOMMU driver. This is necessary when the page requests must
be processed by any component other than the vendor IOMMU driver. For
example, when a guest page table was bound to a PASID through the
iommu_ops->sva_bind_gpasid() api, the page requests should be routed to
the guest, and after the page is served, the device should be responded
with the result.

Your review comments are very appreciated.

Best regards,
baolu

Lu Baolu (4):
  iommu/vt-d: Refactor device_to_iommu() helper
  iommu/vt-d: Add a helper to get svm and sdev for pasid
  iommu/vt-d: Report page request faults for guest SVA
  iommu/vt-d: Add page response ops support

 drivers/iommu/intel/iommu.c |  56 +++
 drivers/iommu/intel/svm.c   | 285 
 include/linux/intel-iommu.h |   6 +-
 3 files changed, 246 insertions(+), 101 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/4] iommu/vt-d: Add page response ops support

2020-06-27 Thread Lu Baolu
After a page request is handled, software must response the device which
raised the page request with the handling result. This is done through
the iommu ops.page_response if the request was reported to outside of
vendor iommu driver through iommu_report_device_fault(). This adds the
VT-d implementation of page_response ops.

Co-developed-by: Jacob Pan 
Signed-off-by: Jacob Pan 
Co-developed-by: Liu Yi L 
Signed-off-by: Liu Yi L 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c |  1 +
 drivers/iommu/intel/svm.c   | 73 +
 include/linux/intel-iommu.h |  3 ++
 3 files changed, 77 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index de17952ed133..7eb29167e8f9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
.sva_bind   = intel_svm_bind,
.sva_unbind = intel_svm_unbind,
.sva_get_pasid  = intel_svm_get_pasid,
+   .page_response  = intel_svm_page_response,
 #endif
 };
 
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4800bb6f8794..003ea9579632 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -1092,3 +1092,76 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
 
return pasid;
 }
+
+int intel_svm_page_response(struct device *dev,
+   struct iommu_fault_event *evt,
+   struct iommu_page_response *msg)
+{
+   struct iommu_fault_page_request *prm;
+   struct intel_svm_dev *sdev;
+   struct intel_iommu *iommu;
+   struct intel_svm *svm;
+   bool private_present;
+   bool pasid_present;
+   bool last_page;
+   u8 bus, devfn;
+   int ret = 0;
+   u16 sid;
+
+   if (!dev || !dev_is_pci(dev))
+   return -ENODEV;
+
+   iommu = device_to_iommu(dev, , );
+   if (!iommu)
+   return -ENODEV;
+
+   if (!msg || !evt)
+   return -EINVAL;
+
+   mutex_lock(_mutex);
+
+   prm = >fault.prm;
+   sid = PCI_DEVID(bus, devfn);
+   pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+   private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+   last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+   if (pasid_present) {
+   /* VT-d supports devices with full 20 bit PASIDs only */
+   if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) {
+   ret = -EINVAL;
+   goto out;
+   }
+
+   ret = pasid_to_svm_sdev(dev, prm->pasid, , );
+   if (ret || !sdev)
+   goto out;
+   }
+
+   /*
+* Per VT-d spec. v3.0 ch7.7, system software must respond
+* with page group response if private data is present (PDP)
+* or last page in group (LPIG) bit is set. This is an
+* additional VT-d feature beyond PCI ATS spec.
+*/
+   if (last_page || private_present) {
+   struct qi_desc desc;
+
+   desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+   QI_PGRP_PASID_P(pasid_present) |
+   QI_PGRP_PDP(private_present) |
+   QI_PGRP_RESP_CODE(msg->code) |
+   QI_PGRP_RESP_TYPE;
+   desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+   desc.qw2 = 0;
+   desc.qw3 = 0;
+   if (private_present)
+   memcpy(, prm->private_data,
+  sizeof(prm->private_data));
+
+   qi_submit_sync(iommu, , 1, 0);
+   }
+out:
+   mutex_unlock(_mutex);
+   return ret;
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index fc2cfc3db6e1..bf6009a344f5 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct 
mm_struct *mm,
 void *drvdata);
 void intel_svm_unbind(struct iommu_sva *handle);
 int intel_svm_get_pasid(struct iommu_sva *handle);
+int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+   struct iommu_page_response *msg);
+
 struct svm_dev_ops;
 
 struct intel_svm_dev {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/4] iommu/vt-d: Report page request faults for guest SVA

2020-06-27 Thread Lu Baolu
A pasid might be bound to a page table from a VM guest via the iommu
ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
on the physical IOMMU, we need to inject the page fault request into
the guest. After the guest completes handling the page fault, a page
response need to be sent back via the iommu ops.page_response().

This adds support to report a page request fault. Any external module
which is interested in handling this fault should regiester a notifier
callback.

Co-developed-by: Jacob Pan 
Signed-off-by: Jacob Pan 
Co-developed-by: Liu Yi L 
Signed-off-by: Liu Yi L 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/svm.c | 83 +--
 1 file changed, 80 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c23167877b2b..4800bb6f8794 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -815,6 +815,69 @@ static void intel_svm_drain_prq(struct device *dev, int 
pasid)
}
 }
 
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+   int prot = 0;
+
+   if (req->rd_req)
+   prot |= IOMMU_FAULT_PERM_READ;
+   if (req->wr_req)
+   prot |= IOMMU_FAULT_PERM_WRITE;
+   if (req->exe_req)
+   prot |= IOMMU_FAULT_PERM_EXEC;
+   if (req->pm_req)
+   prot |= IOMMU_FAULT_PERM_PRIV;
+
+   return prot;
+}
+
+static int
+intel_svm_prq_report(struct intel_iommu *iommu, struct page_req_dsc *desc)
+{
+   struct iommu_fault_event event;
+   struct pci_dev *pdev;
+   u8 bus, devfn;
+   int ret = 0;
+
+   memset(, 0, sizeof(struct iommu_fault_event));
+   bus = PCI_BUS_NUM(desc->rid);
+   devfn = desc->rid & 0xff;
+   pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
+
+   if (!pdev) {
+   pr_err("No PCI device found for PRQ [%02x:%02x.%d]\n",
+  bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+   return -ENODEV;
+   }
+
+   /* Fill in event data for device specific processing */
+   event.fault.type = IOMMU_FAULT_PAGE_REQ;
+   event.fault.prm.addr = desc->addr;
+   event.fault.prm.pasid = desc->pasid;
+   event.fault.prm.grpid = desc->prg_index;
+   event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+   /*
+* Set last page in group bit if private data is present,
+* page response is required as it does for LPIG.
+*/
+   if (desc->lpig)
+   event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+   if (desc->pasid_present)
+   event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+   if (desc->priv_data_present) {
+   event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+   event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+   memcpy(event.fault.prm.private_data, desc->priv_data,
+  sizeof(desc->priv_data));
+   }
+
+   ret = iommu_report_device_fault(>dev, );
+   pci_dev_put(pdev);
+
+   return ret;
+}
+
 static irqreturn_t prq_event_thread(int irq, void *d)
 {
struct intel_iommu *iommu = d;
@@ -874,6 +937,19 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (!is_canonical_address(address))
goto bad_req;
 
+   /*
+* If prq is to be handled outside iommu driver via receiver of
+* the fault notifiers, we skip the page response here.
+*/
+   if (svm->flags & SVM_FLAG_GUEST_MODE) {
+   int res = intel_svm_prq_report(iommu, req);
+
+   if (!res)
+   goto prq_advance;
+   else
+   goto bad_req;
+   }
+
/* If the mm is already defunct, don't handle faults. */
if (!mmget_not_zero(svm->mm))
goto bad_req;
@@ -892,10 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
goto invalid;
 
result = QI_RESP_SUCCESS;
-   invalid:
+invalid:
mmap_read_unlock(svm->mm);
mmput(svm->mm);
-   bad_req:
+bad_req:
/* Accounting for major/minor faults? */
rcu_read_lock();
list_for_each_entry_rcu(sdev, >devs, list) {
@@ -920,7 +996,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
   and these can be NULL. Do not use them below this point! */
sdev = NULL;
svm = NULL;
-   no_pasid:
+no_pasid:
if (req->lpig || req->priv_data_present) {
/*
 * Per VT-d spec. v3.0 ch7.7, system software must
@@ -945,6 +1021,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
resp.qw3 = 0;
 

[PATCH 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid

2020-06-27 Thread Lu Baolu
There are several places in the code that need to get the pointers of
svm and sdev according to a pasid and device. Add a helper to achieve
this for code consolidation and readability.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/svm.c | 121 +-
 1 file changed, 68 insertions(+), 53 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 25dd74f27252..c23167877b2b 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -228,6 +228,50 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
 
+static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
+struct intel_svm **rsvm,
+struct intel_svm_dev **rsdev)
+{
+   struct intel_svm_dev *d, *sdev = NULL;
+   struct intel_svm *svm;
+
+   /* The caller should hold the pasid_mutex lock */
+   if (WARN_ON(!mutex_is_locked(_mutex)))
+   return -EINVAL;
+
+   if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+   return -EINVAL;
+
+   svm = ioasid_find(NULL, pasid, NULL);
+   if (IS_ERR(svm))
+   return PTR_ERR(svm);
+
+   if (!svm)
+   goto out;
+
+   /*
+* If we found svm for the PASID, there must be at least one device
+* bond.
+*/
+   if (WARN_ON(list_empty(>devs)))
+   return -EINVAL;
+
+   rcu_read_lock();
+   list_for_each_entry_rcu(d, >devs, list) {
+   if (d->dev == dev) {
+   sdev = d;
+   break;
+   }
+   }
+   rcu_read_unlock();
+
+out:
+   *rsvm = svm;
+   *rsdev = sdev;
+
+   return 0;
+}
+
 int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
  struct iommu_gpasid_bind_data *data)
 {
@@ -261,39 +305,27 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, 
struct device *dev,
dmar_domain = to_dmar_domain(domain);
 
mutex_lock(_mutex);
-   svm = ioasid_find(NULL, data->hpasid, NULL);
-   if (IS_ERR(svm)) {
-   ret = PTR_ERR(svm);
+   ret = pasid_to_svm_sdev(dev, data->hpasid, , );
+   if (ret)
goto out;
-   }
 
-   if (svm) {
+   if (sdev) {
/*
-* If we found svm for the PASID, there must be at
-* least one device bond, otherwise svm should be freed.
+* For devices with aux domains, we should allow
+* multiple bind calls with the same PASID and pdev.
 */
-   if (WARN_ON(list_empty(>devs))) {
-   ret = -EINVAL;
-   goto out;
+   if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) {
+   sdev->users++;
+   } else {
+   dev_warn_ratelimited(dev,
+"Already bound with PASID %u\n",
+svm->pasid);
+   ret = -EBUSY;
}
+   goto out;
+   }
 
-   for_each_svm_dev(sdev, svm, dev) {
-   /*
-* For devices with aux domains, we should allow
-* multiple bind calls with the same PASID and pdev.
-*/
-   if (iommu_dev_feature_enabled(dev,
- IOMMU_DEV_FEAT_AUX)) {
-   sdev->users++;
-   } else {
-   dev_warn_ratelimited(dev,
-"Already bound with PASID 
%u\n",
-svm->pasid);
-   ret = -EBUSY;
-   }
-   goto out;
-   }
-   } else {
+   if (!svm) {
/* We come here when PASID has never been bond to a device. */
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
@@ -376,25 +408,17 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev;
struct intel_svm *svm;
-   int ret = -EINVAL;
+   int ret;
 
if (WARN_ON(!iommu))
return -EINVAL;
 
mutex_lock(_mutex);
-   svm = ioasid_find(NULL, pasid, NULL);
-   if (!svm) {
-   ret = -EINVAL;
-   goto out;
-   }
-
-   if (IS_ERR(svm)) {
-   ret = PTR_ERR(svm);
+   ret = pasid_to_svm_sdev(dev, pasid, , );
+   if (ret)
goto out;
-   }
 
-   for_each_svm_dev(sdev, svm, dev) {
-   ret = 0;
+  

Re: [GIT PULL] dma-mapping fixes for 5.8

2020-06-27 Thread pr-tracker-bot
The pull request you sent on Sat, 27 Jun 2020 09:49:02 +0200:

> git://git.infradead.org/users/hch/dma-mapping.git tags/dma-mapping-5.8-4

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/f05baa066d0f6a38f0624c28008fb2f53cd00e17

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 6/6] drm/msm/a6xx: Add support for per-instance pagetables

2020-06-27 Thread Rob Clark
On Sat, Jun 27, 2020 at 12:56 PM Rob Clark  wrote:
>
> On Fri, Jun 26, 2020 at 1:04 PM Jordan Crouse  wrote:
> >
> > Add support for using per-instance pagetables if all the dependencies are
> > available.
> >
> > Signed-off-by: Jordan Crouse 
> > ---
> >
> >  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++
> >  drivers/gpu/drm/msm/msm_ringbuffer.h  |  1 +
> >  2 files changed, 44 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > index aa53f47b7e8b..95ed2ceac121 100644
> > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer 
> > *ring, u32 counter,
> > OUT_RING(ring, upper_32_bits(iova));
> >  }
> >
> > +static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer 
> > *ring,
> > +   struct msm_file_private *ctx)
> > +{
> > +   phys_addr_t ttbr;
> > +   u32 asid;
> > +
> > +   if (msm_iommu_pagetable_params(ctx->aspace->mmu, , ))
> > +   return;
> > +
> > +   /* Execute the table update */
> > +   OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
> > +   OUT_RING(ring, lower_32_bits(ttbr));
> > +   OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
> > +   /* CONTEXTIDR is currently unused */
> > +   OUT_RING(ring, 0);
> > +   /* CONTEXTBANK is currently unused */
> > +   OUT_RING(ring, 0);
> > +
> > +   /*
> > +* Write the new TTBR0 to the memstore. This is good for debugging.
> > +*/
> > +   OUT_PKT7(ring, CP_MEM_WRITE, 4);
> > +   OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
> > +   OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
> > +   OUT_RING(ring, lower_32_bits(ttbr));
> > +   OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
> > +}
> > +
> >  static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> > struct msm_file_private *ctx)
> >  {
> > @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct 
> > msm_gem_submit *submit,
> > struct msm_ringbuffer *ring = submit->ring;
> > unsigned int i;
> >
> > +   a6xx_set_pagetable(gpu, ring, ctx);
> > +
> > get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
> > rbmemptr_stats(ring, index, cpcycles_start));
> >
> > @@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
> > return (unsigned long)busy_time;
> >  }
> >
> > +struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu 
> > *gpu)
> > +{
> > +   struct msm_mmu *mmu;
> > +
> > +   mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
> > +   if (IS_ERR(mmu))
> > +   return msm_gem_address_space_get(gpu->aspace);
> > +
> > +   return msm_gem_address_space_create(mmu,
> > +   "gpu", 0x1ULL, 0x1ULL);
> > +}
> > +
> >  static const struct adreno_gpu_funcs funcs = {
> > .base = {
> > .get_param = adreno_get_param,
> > @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = {
> > .gpu_state_put = a6xx_gpu_state_put,
> >  #endif
> > .create_address_space = adreno_iommu_create_address_space,
> > +   .address_space_instance = a6xx_address_space_instance,
>
> Hmm, maybe instead of .address_space_instance, something like
> .create_context_address_space?
>
> Since like .create_address_space, it is creating an address space..
> the difference is that it is a per context/process aspace..
>


or maybe just .create_pgtable and return the 'struct msm_mmu' (which
is itself starting to become less of a great name)..

The only other thing a6xx_address_space_instance() adds is knowing
where the split is between the kernel and user pgtables, and I suppose
that isn't a thing that would really be changing between gens?

BR,
-R

> BR,
> -R
>
> > },
> > .get_timestamp = a6xx_get_timestamp,
> >  };
> > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h 
> > b/drivers/gpu/drm/msm/msm_ringbuffer.h
> > index 7764373d0ed2..0987d6bf848c 100644
> > --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> > @@ -31,6 +31,7 @@ struct msm_rbmemptrs {
> > volatile uint32_t fence;
> >
> > volatile struct msm_gpu_submit_stats 
> > stats[MSM_GPU_SUBMIT_STATS_COUNT];
> > +   volatile u64 ttbr0;
> >  };
> >
> >  struct msm_ringbuffer {
> > --
> > 2.17.1
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 6/6] drm/msm/a6xx: Add support for per-instance pagetables

2020-06-27 Thread Rob Clark
On Fri, Jun 26, 2020 at 1:04 PM Jordan Crouse  wrote:
>
> Add support for using per-instance pagetables if all the dependencies are
> available.
>
> Signed-off-by: Jordan Crouse 
> ---
>
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++
>  drivers/gpu/drm/msm/msm_ringbuffer.h  |  1 +
>  2 files changed, 44 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index aa53f47b7e8b..95ed2ceac121 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer *ring, 
> u32 counter,
> OUT_RING(ring, upper_32_bits(iova));
>  }
>
> +static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer 
> *ring,
> +   struct msm_file_private *ctx)
> +{
> +   phys_addr_t ttbr;
> +   u32 asid;
> +
> +   if (msm_iommu_pagetable_params(ctx->aspace->mmu, , ))
> +   return;
> +
> +   /* Execute the table update */
> +   OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
> +   OUT_RING(ring, lower_32_bits(ttbr));
> +   OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
> +   /* CONTEXTIDR is currently unused */
> +   OUT_RING(ring, 0);
> +   /* CONTEXTBANK is currently unused */
> +   OUT_RING(ring, 0);
> +
> +   /*
> +* Write the new TTBR0 to the memstore. This is good for debugging.
> +*/
> +   OUT_PKT7(ring, CP_MEM_WRITE, 4);
> +   OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
> +   OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
> +   OUT_RING(ring, lower_32_bits(ttbr));
> +   OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
> +}
> +
>  static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> struct msm_file_private *ctx)
>  {
> @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct 
> msm_gem_submit *submit,
> struct msm_ringbuffer *ring = submit->ring;
> unsigned int i;
>
> +   a6xx_set_pagetable(gpu, ring, ctx);
> +
> get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
> rbmemptr_stats(ring, index, cpcycles_start));
>
> @@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
> return (unsigned long)busy_time;
>  }
>
> +struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu 
> *gpu)
> +{
> +   struct msm_mmu *mmu;
> +
> +   mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
> +   if (IS_ERR(mmu))
> +   return msm_gem_address_space_get(gpu->aspace);
> +
> +   return msm_gem_address_space_create(mmu,
> +   "gpu", 0x1ULL, 0x1ULL);
> +}
> +
>  static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = {
> .gpu_state_put = a6xx_gpu_state_put,
>  #endif
> .create_address_space = adreno_iommu_create_address_space,
> +   .address_space_instance = a6xx_address_space_instance,

Hmm, maybe instead of .address_space_instance, something like
.create_context_address_space?

Since like .create_address_space, it is creating an address space..
the difference is that it is a per context/process aspace..

BR,
-R

> },
> .get_timestamp = a6xx_get_timestamp,
>  };
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h 
> b/drivers/gpu/drm/msm/msm_ringbuffer.h
> index 7764373d0ed2..0987d6bf848c 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> @@ -31,6 +31,7 @@ struct msm_rbmemptrs {
> volatile uint32_t fence;
>
> volatile struct msm_gpu_submit_stats 
> stats[MSM_GPU_SUBMIT_STATS_COUNT];
> +   volatile u64 ttbr0;
>  };
>
>  struct msm_ringbuffer {
> --
> 2.17.1
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Freedreno] [PATCH v9 6/7] drm/msm: Set the global virtual address range from the IOMMU domain

2020-06-27 Thread Rob Clark
On Fri, Jun 26, 2020 at 1:01 PM Jordan Crouse  wrote:
>
> Use the aperture settings from the IOMMU domain to set up the virtual
> address range for the GPU. This allows us to transparently deal with
> IOMMU side features (like split pagetables).
>
> Signed-off-by: Jordan Crouse 
> ---
>
>  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 13 +++--
>  drivers/gpu/drm/msm/msm_iommu.c |  7 +++
>  2 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
> b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index 5db06b590943..3e717c1ebb7f 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -192,9 +192,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
> struct iommu_domain *iommu = iommu_domain_alloc(_bus_type);
> struct msm_mmu *mmu = msm_iommu_new(>dev, iommu);
> struct msm_gem_address_space *aspace;
> +   u64 start, size;
>
> -   aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M,
> -   0x - SZ_16M);
> +   /*
> +* Use the aperture start or SZ_16M, whichever is greater. This will
> +* ensure that we align with the allocated pagetable range while still
> +* allowing room in the lower 32 bits for GMEM and whatnot
> +*/
> +   start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
> +   size = iommu->geometry.aperture_end - start + 1;
> +
> +   aspace = msm_gem_address_space_create(mmu, "gpu",
> +   start & GENMASK(48, 0), size);

hmm, I kinda think this isn't going to play well for the 32b gpus
(pre-a5xx).. possibly we should add address space size to 'struct
adreno_info'?

Or I guess it is always going to be the same for all devices within a
generation?  So it could just be passed in to adreno_gpu_init()

Hopefully that makes things smoother if we someday had more than 48bits..

BR,
-R

>
> if (IS_ERR(aspace) && !IS_ERR(mmu))
> mmu->funcs->destroy(mmu);
> diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> index 3a381a9674c9..1b6635504069 100644
> --- a/drivers/gpu/drm/msm/msm_iommu.c
> +++ b/drivers/gpu/drm/msm/msm_iommu.c
> @@ -36,6 +36,10 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t 
> iova,
> struct msm_iommu *iommu = to_msm_iommu(mmu);
> size_t ret;
>
> +   /* The arm-smmu driver expects the addresses to be sign extended */
> +   if (iova & BIT_ULL(48))
> +   iova |= GENMASK_ULL(63, 49);
> +
> ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
> WARN_ON(!ret);
>
> @@ -46,6 +50,9 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint64_t 
> iova, size_t len)
>  {
> struct msm_iommu *iommu = to_msm_iommu(mmu);
>
> +   if (iova & BIT_ULL(48))
> +   iova |= GENMASK_ULL(63, 49);
> +
> iommu_unmap(iommu->domain, iova, len);
>
> return 0;
> --
> 2.17.1
>
> ___
> Freedreno mailing list
> freedr...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] dma-pool: Fix too large DMA pools on medium systems

2020-06-27 Thread Marion & Christophe JAILLET


Le 24/06/2020 à 09:38, Christoph Hellwig a écrit :

Hi Guenter,

can you try the patch below?  This just converts the huge allocations
in mptbase to use GFP_KERNEL.  Christophe (added to Cc) actually has
a scripted conversion for the rest that he hasn't posted yet, so I'll
aim for the minimal version here.


Hi,

I'm sorry, but I will not send pci_ --> dma_ conversion for this driver.
I'm a bit puzzled by some choice of GFP_KERNEL and GFP_ATOMIC that not 
all that obvious to me.


I'll try to send some patches for other easier drivers in the coming weeks.

Best regards,

CJ

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: Passing NULL dev to dma_alloc_coherent() allowed or not?

2020-06-27 Thread Dan Carpenter
On Sat, Jun 27, 2020 at 01:45:16PM +0200, Richard Weinberger wrote:
> Hi!
> 
> While porting on an old out-of-tree driver I noticed that dma_alloc_coherent()
> was used with dev being NULL.
> 
> commit 148a97d5a02a62f81b5d6176f871c94a65e1f3af
> Author: Dan Carpenter 
> Date:   Wed Apr 24 17:24:37 2019 +0300
> 
> dma-mapping: remove an unnecessary NULL check
> 
> We already dereferenced "dev" when we called get_dma_ops() so this NULL
> check is too late.  We're not supposed to pass NULL "dev" pointers to
> dma_alloc_attrs().
> 
> Signed-off-by: Dan Carpenter 
> Signed-off-by: Christoph Hellwig 
> 
> says that dma_alloc_attrs() with dev being NULL is not allowed, but in
> include/linux/dma-mapping.h we have:
> 
> static inline void *dma_alloc_coherent(struct device *dev, size_t size,
> dma_addr_t *dma_handle, gfp_t gfp)
> {
> 
> return dma_alloc_attrs(dev, size, dma_handle, gfp,
> (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
> }
> 
> In Linus' tree I see at least three callers of dma_alloc_coherent() with a 
> NULL device.
> drivers/staging/emxx_udc/emxx_udc.c:2596:   ep->virt_buf 
> = dma_alloc_coherent(NULL, PAGE_SIZE,
> drivers/tty/synclink.c:3667:info->buffer_list = 
> dma_alloc_coherent(NULL, BUFFERLISTSIZE, >buffer_list_dma_addr, 
> GFP_KERNEL);
> drivers/tty/synclink.c:3777:BufferList[i].virt_addr = 
> dma_alloc_coherent(NULL, DMABUFFERSIZE, [i].dma_addr, GFP_KERNEL);
> 
> I think these callers are wrong.
> Can you please clarify?

The are wrong.  It was slightly worse when I originally sent the patch
but we fixed comedi.


https://lkml.org/lkml/2019/4/24/668

regards,
dan carpenter

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Passing NULL dev to dma_alloc_coherent() allowed or not?

2020-06-27 Thread Richard Weinberger
Hi!

While porting on an old out-of-tree driver I noticed that dma_alloc_coherent()
was used with dev being NULL.

commit 148a97d5a02a62f81b5d6176f871c94a65e1f3af
Author: Dan Carpenter 
Date:   Wed Apr 24 17:24:37 2019 +0300

dma-mapping: remove an unnecessary NULL check

We already dereferenced "dev" when we called get_dma_ops() so this NULL
check is too late.  We're not supposed to pass NULL "dev" pointers to
dma_alloc_attrs().

Signed-off-by: Dan Carpenter 
Signed-off-by: Christoph Hellwig 

says that dma_alloc_attrs() with dev being NULL is not allowed, but in
include/linux/dma-mapping.h we have:

static inline void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{

return dma_alloc_attrs(dev, size, dma_handle, gfp,
(gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
}

In Linus' tree I see at least three callers of dma_alloc_coherent() with a NULL 
device.
drivers/staging/emxx_udc/emxx_udc.c:2596:   ep->virt_buf = 
dma_alloc_coherent(NULL, PAGE_SIZE,
drivers/tty/synclink.c:3667:info->buffer_list = 
dma_alloc_coherent(NULL, BUFFERLISTSIZE, >buffer_list_dma_addr, 
GFP_KERNEL);
drivers/tty/synclink.c:3777:BufferList[i].virt_addr = 
dma_alloc_coherent(NULL, DMABUFFERSIZE, [i].dma_addr, GFP_KERNEL);

I think these callers are wrong.
Can you please clarify?

Thanks,
//richard
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/5] irqchip: Allow QCOM_PDC to be loadable as a permanent module

2020-06-27 Thread Marc Zyngier
On Sat, 27 Jun 2020 02:34:25 +0100,
John Stultz  wrote:
> 
> On Fri, Jun 26, 2020 at 12:42 AM Stephen Boyd  wrote:
> >
> > Quoting John Stultz (2020-06-24 17:10:37)
> > > diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> > > index 6ae9e1f0819d..3fee8b655da1 100644
> > > --- a/drivers/irqchip/qcom-pdc.c
> > > +++ b/drivers/irqchip/qcom-pdc.c
> > > @@ -430,4 +432,33 @@ static int qcom_pdc_init(struct device_node *node, 
> > > struct device_node *parent)
> > > return ret;
> > >  }
> > >
> > > +#ifdef MODULE
> > > +static int qcom_pdc_probe(struct platform_device *pdev)
> > > +{
> > > +   struct device_node *np = pdev->dev.of_node;
> > > +   struct device_node *parent = of_irq_find_parent(np);
> > > +
> > > +   return qcom_pdc_init(np, parent);
> > > +}
> > > +
> > > +static const struct of_device_id qcom_pdc_match_table[] = {
> > > +   { .compatible = "qcom,pdc" },
> > > +   {}
> > > +};
> > > +MODULE_DEVICE_TABLE(of, qcom_pdc_match_table);
> > > +
> > > +static struct platform_driver qcom_pdc_driver = {
> > > +   .probe = qcom_pdc_probe,
> > > +   .driver = {
> > > +   .name = "qcom-pdc",
> > > +   .of_match_table = qcom_pdc_match_table,
> > > +   .suppress_bind_attrs = true,
> > > +   },
> > > +};
> > > +module_platform_driver(qcom_pdc_driver);
> > > +#else
> > >  IRQCHIP_DECLARE(qcom_pdc, "qcom,pdc", qcom_pdc_init);
> >
> > Is there any reason to use IRQCHIP_DECLARE if this can work as a
> > platform device driver?
> >
> 
> Hey! Thanks so much for the review!
> 
> Mostly it was done this way to minimize the change in the non-module
> case. But if you'd rather avoid the #ifdefery I'll respin it without.

That would certainly be my own preference. In general, IRQCHIP_DECLARE
and platform drivers should be mutually exclusive in the same driver:
if you can delay the probing and have it as a proper platform device,
then this should be the one true way.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[GIT PULL] dma-mapping fixes for 5.8

2020-06-27 Thread Christoph Hellwig
The following changes since commit dbed452a078d56bc7f1abecc3edd6a75e8e4484e:

  dma-pool: decouple DMA_REMAP from DMA_COHERENT_POOL (2020-06-15 08:35:30 
+0200)

are available in the Git repository at:

  git://git.infradead.org/users/hch/dma-mapping.git tags/dma-mapping-5.8-4

for you to fetch changes up to 8e36baf97b252cdcafa53589e8227cbb1e85f0b0:

  dma-remap: align the size in dma_common_*_remap() (2020-06-23 14:14:41 +0200)


dma-mapping fixes for 5.8:

 - fix dma coherent mmap in nommu (me)
 - more AMD SEV fallout (David Rientjes, me)
 - fix alignment in dma_common_*_remap (Eric Auger)


Christoph Hellwig (3):
  dma-direct: re-enable mmap for !CONFIG_MMU
  dma-direct: mark __dma_direct_alloc_pages static
  dma-mapping: DMA_COHERENT_POOL should select GENERIC_ALLOCATOR

David Rientjes (4):
  dma-direct: always align allocation size in dma_direct_alloc_pages()
  dma-direct: re-encrypt memory if dma_direct_alloc_pages() fails
  dma-direct: check return value when encrypting or decrypting memory
  dma-direct: add missing set_memory_decrypted() for coherent mapping

Eric Auger (1):
  dma-remap: align the size in dma_common_*_remap()

 include/linux/dma-direct.h |  2 --
 kernel/dma/Kconfig |  3 ++-
 kernel/dma/direct.c| 59 ++
 kernel/dma/remap.c |  5 ++--
 4 files changed, 39 insertions(+), 30 deletions(-)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH net] xsk: remove cheap_dma optimization

2020-06-27 Thread Christoph Hellwig
On Sat, Jun 27, 2020 at 01:00:19AM +0200, Daniel Borkmann wrote:
> Given there is roughly a ~5 weeks window at max where this removal could
> still be applied in the worst case, could we come up with a fix / proposal
> first that moves this into the DMA mapping core? If there is something that
> can be agreed upon by all parties, then we could avoid re-adding the 9%
> slowdown. :/

I'd rather turn it upside down - this abuse of the internals blocks work
that has basically just missed the previous window and I'm not going
to wait weeks to sort out the API misuse.  But we can add optimizations
back later if we find a sane way.

That being said I really can't see how this would make so much of a
difference.  What architecture and what dma_ops are you using for
those measurements?  What is the workload?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: the XSK buffer pool needs be to reverted

2020-06-27 Thread Christoph Hellwig
On Fri, Jun 26, 2020 at 01:54:12PM -0700, Jonathan Lemon wrote:
> On Fri, Jun 26, 2020 at 09:47:25AM +0200, Christoph Hellwig wrote:
> >
> > Note that this is somewhat urgent, as various of the APIs that the code
> > is abusing are slated to go away for Linux 5.9, so this addition comes
> > at a really bad time.
> 
> Could you elaborate on what is upcoming here?

Moving all these calls out of line, and adding a bypass flag to avoid
the indirect function call for IOMMUs in direct mapped mode.

> Also, on a semi-related note, are there limitations on how many pages
> can be left mapped by the iommu?  Some of the page pool work involves
> leaving the pages mapped instead of constantly mapping/unmapping them.

There are, but I think for all modern IOMMUs they are so big that they
don't matter.  Maintaines of the individual IOMMU drivers might know
more.

> On a heavily loaded box with iommu enabled, it seems that quite often
> there is contention on the iova_lock.  Are there known issues in this
> area?

I'll have to defer to the IOMMU maintainers, and for that you'll need
to say what code you are using.  Current mainlaine doesn't even have
an iova_lock anywhere.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v3 02/14] iommu: Report domain nesting info

2020-06-27 Thread Liu, Yi L
Hi Robin,

> From: Robin Murphy 
> Sent: Saturday, June 27, 2020 12:05 AM
> 
> On 2020-06-26 08:47, Jean-Philippe Brucker wrote:
> > On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote:
> >> IOMMUs that support nesting translation needs report the capability
> >> info to userspace, e.g. the format of first level/stage paging structures.
> >>
> >> This patch reports nesting info by DOMAIN_ATTR_NESTING. Caller can
> >> get nesting info after setting DOMAIN_ATTR_NESTING.
> >>
> >> v2 -> v3:
> >> *) remvoe cap/ecap_mask in iommu_nesting_info.
> >> *) reuse DOMAIN_ATTR_NESTING to get nesting info.
> >> *) return an empty iommu_nesting_info for SMMU drivers per Jean'
> >> suggestion.
> >>
> >> Cc: Kevin Tian 
> >> CC: Jacob Pan 
> >> Cc: Alex Williamson 
> >> Cc: Eric Auger 
> >> Cc: Jean-Philippe Brucker 
> >> Cc: Joerg Roedel 
> >> Cc: Lu Baolu 
> >> Signed-off-by: Liu Yi L 
> >> Signed-off-by: Jacob Pan 
> >> ---
> >>   drivers/iommu/arm-smmu-v3.c | 29 --
> >>   drivers/iommu/arm-smmu.c| 29 --
> >
> > Looks reasonable to me. Please move the SMMU changes to a separate
> > patch and Cc the SMMU maintainers:
> 
> Cheers Jean, I'll admit I've been skipping over a lot of these patches lately 
> :)
> 
> A couple of comments below...
> 
> >
> > Cc: Will Deacon 
> > Cc: Robin Murphy 
> >
> > Thanks,
> > Jean
> >
> >>   include/uapi/linux/iommu.h  | 59
> +
> >>   3 files changed, 113 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/iommu/arm-smmu-v3.c
> >> b/drivers/iommu/arm-smmu-v3.c index f578677..0c45d4d 100644
> >> --- a/drivers/iommu/arm-smmu-v3.c
> >> +++ b/drivers/iommu/arm-smmu-v3.c
> >> @@ -3019,6 +3019,32 @@ static struct iommu_group
> *arm_smmu_device_group(struct device *dev)
> >>return group;
> >>   }
> >>
> >> +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain
> *smmu_domain,
> >> +  void *data)
> >> +{
> >> +  struct iommu_nesting_info *info = (struct iommu_nesting_info *) data;
> >> +  u32 size;
> >> +
> >> +  if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> >> +  return -ENODEV;
> >> +
> >> +  size = sizeof(struct iommu_nesting_info);
> >> +
> >> +  /*
> >> +   * if provided buffer size is not equal to the size, should
> >> +   * return 0 and also the expected buffer size to caller.
> >> +   */
> >> +  if (info->size != size) {
> >> +  info->size = size;
> >> +  return 0;
> >> +  }
> >> +
> >> +  /* report an empty iommu_nesting_info for now */
> >> +  memset(info, 0x0, size);
> >> +  info->size = size;
> >> +  return 0;
> >> +}
> >> +
> >>   static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> >>enum iommu_attr attr, void *data)
> >>   {
> >> @@ -3028,8 +3054,7 @@ static int arm_smmu_domain_get_attr(struct
> iommu_domain *domain,
> >>case IOMMU_DOMAIN_UNMANAGED:
> >>switch (attr) {
> >>case DOMAIN_ATTR_NESTING:
> >> -  *(int *)data = (smmu_domain->stage ==
> ARM_SMMU_DOMAIN_NESTED);
> >> -  return 0;
> >> +  return arm_smmu_domain_nesting_info(smmu_domain,
> data);
> >>default:
> >>return -ENODEV;
> >>}
> >> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> >> index 243bc4c..908607d 100644
> >> --- a/drivers/iommu/arm-smmu.c
> >> +++ b/drivers/iommu/arm-smmu.c
> >> @@ -1506,6 +1506,32 @@ static struct iommu_group
> *arm_smmu_device_group(struct device *dev)
> >>return group;
> >>   }
> >>
> >> +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain
> *smmu_domain,
> >> +  void *data)
> >> +{
> >> +  struct iommu_nesting_info *info = (struct iommu_nesting_info *) data;
> >> +  u32 size;
> >> +
> >> +  if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> >> +  return -ENODEV;
> >> +
> >> +  size = sizeof(struct iommu_nesting_info);
> >> +
> >> +  /*
> >> +   * if provided buffer size is not equal to the size, should
> >> +   * return 0 and also the expected buffer size to caller.
> >> +   */
> >> +  if (info->size != size) {
> >> +  info->size = size;
> >> +  return 0;
> >> +  }
> >> +
> >> +  /* report an empty iommu_nesting_info for now */
> >> +  memset(info, 0x0, size);
> >> +  info->size = size;
> >> +  return 0;
> >> +}
> >> +
> >>   static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> >>enum iommu_attr attr, void *data)
> >>   {
> >> @@ -1515,8 +1541,7 @@ static int arm_smmu_domain_get_attr(struct
> iommu_domain *domain,
> >>case IOMMU_DOMAIN_UNMANAGED:
> >>switch (attr) {
> >>case DOMAIN_ATTR_NESTING:
> >> -  *(int *)data = (smmu_domain->stage ==
> ARM_SMMU_DOMAIN_NESTED);
> >> -  return 0;
> >> +  return 

RE: [PATCH v3 02/14] iommu: Report domain nesting info

2020-06-27 Thread Liu, Yi L
> From: Jean-Philippe Brucker 
> Sent: Friday, June 26, 2020 3:48 PM
> 
> On Wed, Jun 24, 2020 at 01:55:15AM -0700, Liu Yi L wrote:
> > IOMMUs that support nesting translation needs report the capability
> > info to userspace, e.g. the format of first level/stage paging structures.
> >
> > This patch reports nesting info by DOMAIN_ATTR_NESTING. Caller can get
> > nesting info after setting DOMAIN_ATTR_NESTING.
> >
> > v2 -> v3:
> > *) remvoe cap/ecap_mask in iommu_nesting_info.
> > *) reuse DOMAIN_ATTR_NESTING to get nesting info.
> > *) return an empty iommu_nesting_info for SMMU drivers per Jean'
> >suggestion.
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Liu Yi L 
> > Signed-off-by: Jacob Pan 
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 29 --
> >  drivers/iommu/arm-smmu.c| 29 --
> 
> Looks reasonable to me. Please move the SMMU changes to a separate patch
> and Cc the SMMU maintainers:
> 
> Cc: Will Deacon 
> Cc: Robin Murphy 

got you. will do it.

Regards,
Yi Liu

> Thanks,
> Jean
> 
> >  include/uapi/linux/iommu.h  | 59
> > +
> >  3 files changed, 113 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index f578677..0c45d4d 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -3019,6 +3019,32 @@ static struct iommu_group
> *arm_smmu_device_group(struct device *dev)
> > return group;
> >  }
> >
> > +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain
> *smmu_domain,
> > +   void *data)
> > +{
> > +   struct iommu_nesting_info *info = (struct iommu_nesting_info *) data;
> > +   u32 size;
> > +
> > +   if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> > +   return -ENODEV;
> > +
> > +   size = sizeof(struct iommu_nesting_info);
> > +
> > +   /*
> > +* if provided buffer size is not equal to the size, should
> > +* return 0 and also the expected buffer size to caller.
> > +*/
> > +   if (info->size != size) {
> > +   info->size = size;
> > +   return 0;
> > +   }
> > +
> > +   /* report an empty iommu_nesting_info for now */
> > +   memset(info, 0x0, size);
> > +   info->size = size;
> > +   return 0;
> > +}
> > +
> >  static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> > enum iommu_attr attr, void *data)  { @@ -
> 3028,8 +3054,7 @@
> > static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> > case IOMMU_DOMAIN_UNMANAGED:
> > switch (attr) {
> > case DOMAIN_ATTR_NESTING:
> > -   *(int *)data = (smmu_domain->stage ==
> ARM_SMMU_DOMAIN_NESTED);
> > -   return 0;
> > +   return arm_smmu_domain_nesting_info(smmu_domain,
> data);
> > default:
> > return -ENODEV;
> > }
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index
> > 243bc4c..908607d 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -1506,6 +1506,32 @@ static struct iommu_group
> *arm_smmu_device_group(struct device *dev)
> > return group;
> >  }
> >
> > +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain
> *smmu_domain,
> > +   void *data)
> > +{
> > +   struct iommu_nesting_info *info = (struct iommu_nesting_info *) data;
> > +   u32 size;
> > +
> > +   if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> > +   return -ENODEV;
> > +
> > +   size = sizeof(struct iommu_nesting_info);
> > +
> > +   /*
> > +* if provided buffer size is not equal to the size, should
> > +* return 0 and also the expected buffer size to caller.
> > +*/
> > +   if (info->size != size) {
> > +   info->size = size;
> > +   return 0;
> > +   }
> > +
> > +   /* report an empty iommu_nesting_info for now */
> > +   memset(info, 0x0, size);
> > +   info->size = size;
> > +   return 0;
> > +}
> > +
> >  static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> > enum iommu_attr attr, void *data)  { @@ -
> 1515,8 +1541,7 @@
> > static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> > case IOMMU_DOMAIN_UNMANAGED:
> > switch (attr) {
> > case DOMAIN_ATTR_NESTING:
> > -   *(int *)data = (smmu_domain->stage ==
> ARM_SMMU_DOMAIN_NESTED);
> > -   return 0;
> > +   return arm_smmu_domain_nesting_info(smmu_domain,
> data);
> > default:
> > return -ENODEV;
> > }
> > diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> > index 1afc661..898c99a 100644
> > ---