Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-28 Thread Jacob Pan
Hi BaoLu,

On Fri, 18 Mar 2022 20:43:54 +0800, Lu Baolu 
wrote:

> On 2022/3/15 13:07, Jacob Pan wrote:
> > DMA mapping API is the de facto standard for in-kernel DMA. It operates
> > on a per device/RID basis which is not PASID-aware.
> > 
> > Some modern devices such as Intel Data Streaming Accelerator, PASID is
> > required for certain work submissions. To allow such devices use DMA
> > mapping API, we need the following functionalities:
> > 1. Provide device a way to retrieve a PASID for work submission within
> > the kernel
> > 2. Enable the kernel PASID on the IOMMU for the device
> > 3. Attach the kernel PASID to the device's default DMA domain, let it
> > be IOVA or physical address in case of pass-through.
> > 
> > This patch introduces a driver facing API that enables DMA API
> > PASID usage. Once enabled, device drivers can continue to use DMA APIs
> > as is. There is no difference in dma_handle between without PASID and
> > with PASID.
> > 
> > Signed-off-by: Jacob Pan 
> > ---
> >   drivers/iommu/dma-iommu.c | 65 +++
> >   include/linux/dma-iommu.h |  7 +
> >   include/linux/iommu.h |  9 ++
> >   3 files changed, 81 insertions(+)
> > 
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index b22034975301..d0ff1a34b1b6 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -39,6 +39,8 @@ enum iommu_dma_cookie_type {
> > IOMMU_DMA_MSI_COOKIE,
> >   };
> >   
> > +static DECLARE_IOASID_SET(iommu_dma_pasid);
> > +
> >   struct iommu_dma_cookie {
> > enum iommu_dma_cookie_type  type;
> > union {
> > @@ -370,6 +372,69 @@ void iommu_put_dma_cookie(struct iommu_domain
> > *domain) domain->iova_cookie = NULL;
> >   }
> >   
> > +/**
> > + * iommu_enable_pasid_dma --Enable in-kernel DMA request with PASID
> > + * @dev:   Device to be enabled
> > + *
> > + * DMA request with PASID will be mapped the same way as the legacy
> > DMA.
> > + * If the device is in pass-through, PASID will also pass-through. If
> > the
> > + * device is in IOVA map, the supervisor PASID will point to the same
> > IOVA
> > + * page table.
> > + *
> > + * @return the kernel PASID to be used for DMA or INVALID_IOASID on
> > failure  
> 
> The comment on the return value should be rephrased according to the
> real code.
> 
yes, will do.

> > + */
> > +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
> > +{
> > +   struct iommu_domain *dom;
> > +   ioasid_t id, max;
> > +   int ret;
> > +
> > +   dom = iommu_get_domain_for_dev(dev);
> > +   if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
> > +   return -ENODEV;
> > +   max = iommu_get_dev_pasid_max(dev);
> > +   if (!max)
> > +   return -EINVAL;
> > +
> > +   id = ioasid_alloc(_dma_pasid, 1, max, dev);
> > +   if (id == INVALID_IOASID)
> > +   return -ENOMEM;
> > +
> > +   ret = dom->ops->attach_dev_pasid(dom, dev, id);
> > +   if (ret) {
> > +   ioasid_put(id);
> > +   return ret;
> > +   }
> > +   *pasid = id;
> > +
> > +   return ret;
> > +}
> > +EXPORT_SYMBOL(iommu_enable_pasid_dma);
> > +
> > +/**
> > + * iommu_disable_pasid_dma --Disable in-kernel DMA request with PASID
> > + * @dev:   Device's PASID DMA to be disabled
> > + *
> > + * It is the device driver's responsibility to ensure no more incoming
> > DMA
> > + * requests with the kernel PASID before calling this function. IOMMU
> > driver
> > + * ensures PASID cache, IOTLBs related to the kernel PASID are cleared
> > and
> > + * drained.
> > + *
> > + * @return 0 on success or error code on failure  
> 
> Ditto.
> 
same

> > + */
> > +void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid)
> > +{
> > +   struct iommu_domain *dom;
> > +
> > +   /* TODO: check the given PASID is within the ioasid_set */
> > +   dom = iommu_get_domain_for_dev(dev);
> > +   if (!dom->ops->detach_dev_pasid)
> > +   return;
> > +   dom->ops->detach_dev_pasid(dom, dev, pasid);
> > +   ioasid_put(pasid);
> > +}
> > +EXPORT_SYMBOL(iommu_disable_pasid_dma);
> > +
> >   /**
> >* iommu_dma_get_resv_regions - Reserved region driver helper
> >* @dev: Device from iommu_get_resv_regions()
> > diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
> > index 24607dc3c2ac..e6cb9b52a420 100644
> > --- a/include/linux/dma-iommu.h
> > +++ b/include/linux/dma-iommu.h
> > @@ -18,6 +18,13 @@ int iommu_get_dma_cookie(struct iommu_domain
> > *domain); int iommu_get_msi_cookie(struct iommu_domain *domain,
> > dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain
> > *domain); 
> > +/*
> > + * For devices that can do DMA request with PASID, setup a system
> > PASID.
> > + * Address modes (IOVA, PA) are selected by the platform code.
> > + */  
> 
> No need for a comment here. Or move it to the function if need.
> 
right, this comment is obsolete. will remove.

> > +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid);
> > +void 

Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-18 Thread Lu Baolu

On 2022/3/15 13:07, Jacob Pan wrote:

DMA mapping API is the de facto standard for in-kernel DMA. It operates
on a per device/RID basis which is not PASID-aware.

Some modern devices such as Intel Data Streaming Accelerator, PASID is
required for certain work submissions. To allow such devices use DMA
mapping API, we need the following functionalities:
1. Provide device a way to retrieve a PASID for work submission within
the kernel
2. Enable the kernel PASID on the IOMMU for the device
3. Attach the kernel PASID to the device's default DMA domain, let it
be IOVA or physical address in case of pass-through.

This patch introduces a driver facing API that enables DMA API
PASID usage. Once enabled, device drivers can continue to use DMA APIs as
is. There is no difference in dma_handle between without PASID and with
PASID.

Signed-off-by: Jacob Pan 
---
  drivers/iommu/dma-iommu.c | 65 +++
  include/linux/dma-iommu.h |  7 +
  include/linux/iommu.h |  9 ++
  3 files changed, 81 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index b22034975301..d0ff1a34b1b6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -39,6 +39,8 @@ enum iommu_dma_cookie_type {
IOMMU_DMA_MSI_COOKIE,
  };
  
+static DECLARE_IOASID_SET(iommu_dma_pasid);

+
  struct iommu_dma_cookie {
enum iommu_dma_cookie_type  type;
union {
@@ -370,6 +372,69 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
domain->iova_cookie = NULL;
  }
  
+/**

+ * iommu_enable_pasid_dma --Enable in-kernel DMA request with PASID
+ * @dev:   Device to be enabled
+ *
+ * DMA request with PASID will be mapped the same way as the legacy DMA.
+ * If the device is in pass-through, PASID will also pass-through. If the
+ * device is in IOVA map, the supervisor PASID will point to the same IOVA
+ * page table.
+ *
+ * @return the kernel PASID to be used for DMA or INVALID_IOASID on failure


The comment on the return value should be rephrased according to the
real code.


+ */
+int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
+{
+   struct iommu_domain *dom;
+   ioasid_t id, max;
+   int ret;
+
+   dom = iommu_get_domain_for_dev(dev);
+   if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
+   return -ENODEV;
+   max = iommu_get_dev_pasid_max(dev);
+   if (!max)
+   return -EINVAL;
+
+   id = ioasid_alloc(_dma_pasid, 1, max, dev);
+   if (id == INVALID_IOASID)
+   return -ENOMEM;
+
+   ret = dom->ops->attach_dev_pasid(dom, dev, id);
+   if (ret) {
+   ioasid_put(id);
+   return ret;
+   }
+   *pasid = id;
+
+   return ret;
+}
+EXPORT_SYMBOL(iommu_enable_pasid_dma);
+
+/**
+ * iommu_disable_pasid_dma --Disable in-kernel DMA request with PASID
+ * @dev:   Device's PASID DMA to be disabled
+ *
+ * It is the device driver's responsibility to ensure no more incoming DMA
+ * requests with the kernel PASID before calling this function. IOMMU driver
+ * ensures PASID cache, IOTLBs related to the kernel PASID are cleared and
+ * drained.
+ *
+ * @return 0 on success or error code on failure


Ditto.


+ */
+void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid)
+{
+   struct iommu_domain *dom;
+
+   /* TODO: check the given PASID is within the ioasid_set */
+   dom = iommu_get_domain_for_dev(dev);
+   if (!dom->ops->detach_dev_pasid)
+   return;
+   dom->ops->detach_dev_pasid(dom, dev, pasid);
+   ioasid_put(pasid);
+}
+EXPORT_SYMBOL(iommu_disable_pasid_dma);
+
  /**
   * iommu_dma_get_resv_regions - Reserved region driver helper
   * @dev: Device from iommu_get_resv_regions()
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 24607dc3c2ac..e6cb9b52a420 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -18,6 +18,13 @@ int iommu_get_dma_cookie(struct iommu_domain *domain);
  int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
  void iommu_put_dma_cookie(struct iommu_domain *domain);
  
+/*

+ * For devices that can do DMA request with PASID, setup a system PASID.
+ * Address modes (IOVA, PA) are selected by the platform code.
+ */


No need for a comment here. Or move it to the function if need.


+int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid);
+void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid);
+
  /* Setup call for arch DMA mapping code */
  void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
  int iommu_dma_init_fq(struct iommu_domain *domain);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index fde5b933dbe3..fb011722e4f8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -395,6 +395,15 @@ static inline void iommu_set_dev_pasid_max(struct device 
*dev,
  
  	param->pasid_max = max;

  }
+static inline 

Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-16 Thread Jason Gunthorpe via iommu
On Wed, Mar 16, 2022 at 08:41:27AM +, Tian, Kevin wrote:

> 1) When the kernel wants a more scalable way of using IDXD e.g. having
> multiple CPUs simultaneously submitting works in a lockless way to a 
> shared work queue via a new instruction (ENQCMD) which carries
> PASID.

IMHO the misdesign is the CPU can't submit work with ENQCMD from
kernel space that will do DMA on the RID.

> 2) When the host wants to share a workqueue between multiple VMs.
> In that case the virtual IDXD device exposed to each VM will only support
> the shared workqueue mode. Only in this case the DMA API in the
> guest must be attached by a PASID as ENQCMD is the only way to submit
> works.

It is the same issue - if ENQCMD had 'excute on the RID' then the
virtualization layer could translate that to 'execute on this PASID
setup by the hypervisor' and the kernel would not see additional
differences between SIOV and physical devices. IMHO mandatory kernel
PASID support in the guest just to support the kernel doing DMA to a
device is not nice.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-16 Thread Tian, Kevin
> From: Jacob Pan 
> Sent: Wednesday, March 16, 2022 5:24 AM
> 
> Hi Jason,
> 
> On Tue, 15 Mar 2022 14:05:07 -0300, Jason Gunthorpe 
> wrote:
> 
> > On Tue, Mar 15, 2022 at 09:31:35AM -0700, Jacob Pan wrote:
> >
> > > > IMHO it is a device mis-design of IDXD to require all DMA be PASID
> > > > tagged. Devices should be able to do DMA on their RID when the PCI
> >
> > > IDXD can do DMA w/ RID, the PASID requirement is only for shared WQ
> > > where ENQCMDS is used. ENQCMDS has the benefit of avoiding locking
> > > where work submission is done from multiple CPUs.
> > > Tony, Dave?
> >
> > This is what I mean, it has an operating mode you want to use from the
> > kernel driver that cannot do RID DMA. It is a HW mis-design, IMHO.

Well, that mode is configured per work queue and the device just provides
flexibility for the software to use it for a potential value (scalability). So 
in 
the end it is a software question whether we can find a clean way to manage
this mode (fortunately with your proposal it does ). From this angle I'd
not call it a mis-design because the software has other options if there is no
willing of using that mode. Even in the virtual IDXD case that I explained in
another thread, the admin should not share work queue between VMs unless
he knows that guest can support it. Otherwise the admin should just dedicate
a workqueue to the guest and then let the guest itself to decide whether to
use the shared mode capability for its own purpose.

Also in concept the IOVA space created with DMA API is not different from
other I/O address spaces. There is no architectural restriction why this space
cannot be attached by PASID.

> >
> > Something like PASID0 in the ENQCMDS should have triggered RID DMA.
> >
> That would simplify things a lot, it would be just a device change I think.
> From IA pov, only ENQCMD will #GP if PASID==0. I will bring this back to HW
> team to consider for future generations.
> 

Maybe you can have a quick try?

Per SDM CPU doesn't #GP on PASID0 for ENQCMDS.

Also I don't think the device should do such check since with RID2PASID
the actual PASID value used to mark RID requests in the IOMMU is 
configured by the iommu driver. In that regard it is not correct for any 
hardware outside of the iommu to assume that PASID0 is for RID.

Then the only uncertain thing is within VT-d. In a quick check I didn't 
find any VT-d fault specifically for a DMA request which contains
a value same as RID2PASID.  

There is one interest paragraph in 6.2.1 (Tagging of cached translations):

  In scalable mode, requests-without-PASID are treated as requests-with-
  PASID when looking up the paging-structure cache, and PASID-cache.
  Such lookups use the PASID value from the RID_PASID field in the
  scalable-mode context-entry used to process the request-withoutPASID.
  Refer to Section 9.4 for more details on scalable-mode context-entry.
  Additionally, after translation process when such requests fill into
  IOTLB, the entries are tagged with PASID value obtained from RID_PASID
  field but are still marked as entries for requests-without-PASID.
  Tagging of such entries with PASID value is required so that 
  PASID-selective P_IOTLB invalidation can correctly remove all stale
  mappings. Implementation may allow requests-with-PASID from a given
  Requester-ID to hit entries brought into IOTLB by requests-without-PASID
  from the same Requester-ID to improve performance.

The last sentence gives me the impression that there is no problem for
VT-d to receive a request which happens to contain RID2PASID except
there might be a performance issue (if duplicated entries are created).

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

RE: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-16 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Tuesday, March 15, 2022 10:22 PM
> 
> On Tue, Mar 15, 2022 at 11:16:41AM +, Robin Murphy wrote:
> > On 2022-03-15 05:07, Jacob Pan wrote:
> > > DMA mapping API is the de facto standard for in-kernel DMA. It operates
> > > on a per device/RID basis which is not PASID-aware.
> > >
> > > Some modern devices such as Intel Data Streaming Accelerator, PASID is
> > > required for certain work submissions. To allow such devices use DMA
> > > mapping API, we need the following functionalities:
> > > 1. Provide device a way to retrieve a PASID for work submission within
> > > the kernel
> > > 2. Enable the kernel PASID on the IOMMU for the device
> > > 3. Attach the kernel PASID to the device's default DMA domain, let it
> > > be IOVA or physical address in case of pass-through.
> > >
> > > This patch introduces a driver facing API that enables DMA API
> > > PASID usage. Once enabled, device drivers can continue to use DMA APIs
> as
> > > is. There is no difference in dma_handle between without PASID and with
> > > PASID.
> >
> > Surely the main point of PASIDs is to be able to use more than one
> > of them?
> 
> IMHO, not for the DMA API.
> 
> I can't think of good reasons why a single in-kernel device should
> require more than one iommu_domain for use by the DMA API. Even with
> the SIOV cases we have been looking at we don't really see a use case
> for more than one DMA API iommu_domain on a single physical device.

This is correct.

> Do you know of something on the horizon?
> 
> From my view the main point of PASIDs is to assign iommu_domains that
> are not used by the DMA API.
> 
> IMHO it is a device mis-design of IDXD to require all DMA be PASID
> tagged. Devices should be able to do DMA on their RID when the PCI
> function is controlled by a kernel driver. I see this driver facing
> API as addressing a device quirk by aliasing the DMA API of the RID
> into a PASID and that is really all it is good for.

One clarification as I don't agree it is a mis-design.

The IDXD device does support RID-based DMA as the base. PASID-based
DMA API comes to play in two scenarios:

1) When the kernel wants a more scalable way of using IDXD e.g. having
multiple CPUs simultaneously submitting works in a lockless way to a 
shared work queue via a new instruction (ENQCMD) which carries PASID.
Having PASID in the instruction is to identify multiple CPU address spaces 
attached to the shared queue (as required by user SVA). For kernel use 
of IDXD (e.g. page zeroing) in this series all CPUs are accessing the same
IOVA space associated with DMA API thus just requires one PASID attached
to DMA API to use ENQCMD in this optimized mode.

2) When the host wants to share a workqueue between multiple VMs.
In that case the virtual IDXD device exposed to each VM will only support
the shared workqueue mode. Only in this case the DMA API in the
guest must be attached by a PASID as ENQCMD is the only way to submit
works.

So it's just a trick to enable a more scalable use of the IDXD device. Other
than that IDXD does support normal RID-based DMA API when its work
queues are configured in dedicated mode. 

> 
> In any case I think we are better to wait for an actual user for multi
> DMA API iommu_domains to come forward before we try to build an API
> for it.
> 
> Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jason Gunthorpe via iommu
On Tue, Mar 15, 2022 at 09:38:10AM -0700, Jacob Pan wrote:
> > > +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
> > > +{
> > > + struct iommu_domain *dom;
> > > + ioasid_t id, max;
> > > + int ret;
> > > +
> > > + dom = iommu_get_domain_for_dev(dev);
> > > + if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
> > > + return -ENODEV;  
> > 
> > Given the purpose of this API I think it should assert that the device
> > has the DMA API in-use using the machinery from the other series.
> > 
> > ie this should not be used to clone non-DMA API iommu_domains..
> > 
> Let me try to confirm the specific here. I should check domain type and
> rejects all others except IOMMU_DOMAIN_DMA type, right? Should also allow
> IOMMU_DOMAIN_IDENTITY.
> 
> That makes sense.

That is one way, the other is to check the new group data that Lu's
patch added.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jacob Pan
Hi Jason,

On Tue, 15 Mar 2022 14:05:07 -0300, Jason Gunthorpe  wrote:

> On Tue, Mar 15, 2022 at 09:31:35AM -0700, Jacob Pan wrote:
> 
> > > IMHO it is a device mis-design of IDXD to require all DMA be PASID
> > > tagged. Devices should be able to do DMA on their RID when the PCI  
> 
> > IDXD can do DMA w/ RID, the PASID requirement is only for shared WQ
> > where ENQCMDS is used. ENQCMDS has the benefit of avoiding locking
> > where work submission is done from multiple CPUs.
> > Tony, Dave?  
> 
> This is what I mean, it has an operating mode you want to use from the
> kernel driver that cannot do RID DMA. It is a HW mis-design, IMHO.
> 
> Something like PASID0 in the ENQCMDS should have triggered RID DMA.
> 
That would simplify things a lot, it would be just a device change I think.
>From IA pov, only ENQCMD will #GP if PASID==0. I will bring this back to HW
team to consider for future generations.

> > > In any case I think we are better to wait for an actual user for multi
> > > DMA API iommu_domains to come forward before we try to build an API
> > > for it.  
> > 
> > What would you recommend in the interim?  
> 
> Oh, I mean this approach at a high level is fine - I was saying we
> shouldn't try to broaden it like Robin was suggesting without a driver
> that needs multiple iommu_domains for the DMA API.
> 
Got it. Thanks for the clarification.

> Jason


Thanks,

Jacob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jason Gunthorpe via iommu
On Tue, Mar 15, 2022 at 09:31:35AM -0700, Jacob Pan wrote:

> > IMHO it is a device mis-design of IDXD to require all DMA be PASID
> > tagged. Devices should be able to do DMA on their RID when the PCI

> IDXD can do DMA w/ RID, the PASID requirement is only for shared WQ where
> ENQCMDS is used. ENQCMDS has the benefit of avoiding locking where work
> submission is done from multiple CPUs.
> Tony, Dave?

This is what I mean, it has an operating mode you want to use from the
kernel driver that cannot do RID DMA. It is a HW mis-design, IMHO.

Something like PASID0 in the ENQCMDS should have triggered RID DMA.

> > In any case I think we are better to wait for an actual user for multi
> > DMA API iommu_domains to come forward before we try to build an API
> > for it.
> 
> What would you recommend in the interim?

Oh, I mean this approach at a high level is fine - I was saying we
shouldn't try to broaden it like Robin was suggesting without a driver
that needs multiple iommu_domains for the DMA API.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jacob Pan
Hi Jason,

On Tue, 15 Mar 2022 11:35:35 -0300, Jason Gunthorpe  wrote:

> On Mon, Mar 14, 2022 at 10:07:09PM -0700, Jacob Pan wrote:
> > DMA mapping API is the de facto standard for in-kernel DMA. It operates
> > on a per device/RID basis which is not PASID-aware.
> > 
> > Some modern devices such as Intel Data Streaming Accelerator, PASID is
> > required for certain work submissions. To allow such devices use DMA
> > mapping API, we need the following functionalities:
> > 1. Provide device a way to retrieve a PASID for work submission within
> > the kernel
> > 2. Enable the kernel PASID on the IOMMU for the device
> > 3. Attach the kernel PASID to the device's default DMA domain, let it
> > be IOVA or physical address in case of pass-through.
> > 
> > This patch introduces a driver facing API that enables DMA API
> > PASID usage. Once enabled, device drivers can continue to use DMA APIs
> > as is. There is no difference in dma_handle between without PASID and
> > with PASID.
> > 
> > Signed-off-by: Jacob Pan 
> >  drivers/iommu/dma-iommu.c | 65 +++
> >  include/linux/dma-iommu.h |  7 +
> >  include/linux/iommu.h |  9 ++
> >  3 files changed, 81 insertions(+)
> > 
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index b22034975301..d0ff1a34b1b6 100644
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -39,6 +39,8 @@ enum iommu_dma_cookie_type {
> > IOMMU_DMA_MSI_COOKIE,
> >  };
> >  
> > +static DECLARE_IOASID_SET(iommu_dma_pasid);
> > +
> >  struct iommu_dma_cookie {
> > enum iommu_dma_cookie_type  type;
> > union {
> > @@ -370,6 +372,69 @@ void iommu_put_dma_cookie(struct iommu_domain
> > *domain) domain->iova_cookie = NULL;
> >  }
> >  
> > +/**
> > + * iommu_enable_pasid_dma --Enable in-kernel DMA request with PASID
> > + * @dev:   Device to be enabled
> > + *
> > + * DMA request with PASID will be mapped the same way as the legacy
> > DMA.
> > + * If the device is in pass-through, PASID will also pass-through. If
> > the
> > + * device is in IOVA map, the supervisor PASID will point to the same
> > IOVA
> > + * page table.
> > + *
> > + * @return the kernel PASID to be used for DMA or INVALID_IOASID on
> > failure
> > + */
> > +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
> > +{
> > +   struct iommu_domain *dom;
> > +   ioasid_t id, max;
> > +   int ret;
> > +
> > +   dom = iommu_get_domain_for_dev(dev);
> > +   if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
> > +   return -ENODEV;  
> 
> Given the purpose of this API I think it should assert that the device
> has the DMA API in-use using the machinery from the other series.
> 
> ie this should not be used to clone non-DMA API iommu_domains..
> 
Let me try to confirm the specific here. I should check domain type and
rejects all others except IOMMU_DOMAIN_DMA type, right? Should also allow
IOMMU_DOMAIN_IDENTITY.

That makes sense.

> > diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
> > index 24607dc3c2ac..e6cb9b52a420 100644
> > +++ b/include/linux/dma-iommu.h
> > @@ -18,6 +18,13 @@ int iommu_get_dma_cookie(struct iommu_domain
> > *domain); int iommu_get_msi_cookie(struct iommu_domain *domain,
> > dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain
> > *domain); 
> > +/*
> > + * For devices that can do DMA request with PASID, setup a system
> > PASID.
> > + * Address modes (IOVA, PA) are selected by the platform code.
> > + */
> > +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid);
> > +void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid);  
> 
> The functions already have a kdoc, don't need two..
> 
Good point!

Thanks,

Jacob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jacob Pan
Hi Jason,

On Tue, 15 Mar 2022 11:22:16 -0300, Jason Gunthorpe  wrote:

> On Tue, Mar 15, 2022 at 11:16:41AM +, Robin Murphy wrote:
> > On 2022-03-15 05:07, Jacob Pan wrote:  
> > > DMA mapping API is the de facto standard for in-kernel DMA. It
> > > operates on a per device/RID basis which is not PASID-aware.
> > > 
> > > Some modern devices such as Intel Data Streaming Accelerator, PASID is
> > > required for certain work submissions. To allow such devices use DMA
> > > mapping API, we need the following functionalities:
> > > 1. Provide device a way to retrieve a PASID for work submission within
> > > the kernel
> > > 2. Enable the kernel PASID on the IOMMU for the device
> > > 3. Attach the kernel PASID to the device's default DMA domain, let it
> > > be IOVA or physical address in case of pass-through.
> > > 
> > > This patch introduces a driver facing API that enables DMA API
> > > PASID usage. Once enabled, device drivers can continue to use DMA
> > > APIs as is. There is no difference in dma_handle between without
> > > PASID and with PASID.  
> > 
> > Surely the main point of PASIDs is to be able to use more than one
> > of them?  
> 
> IMHO, not for the DMA API.
> 
Right, but we really need two here. One for DMA request w/o PASID (PASID 0)
and a kernel PASID for DMA request tagged w/ PASID.
Since DMA API is not per process, there is no need for more right now.

> I can't think of good reasons why a single in-kernel device should
> require more than one iommu_domain for use by the DMA API. Even with
> the SIOV cases we have been looking at we don't really see a use case
> for more than one DMA API iommu_domain on a single physical device.
> Do you know of something on the horizon?
> 
Not that I know.

> From my view the main point of PASIDs is to assign iommu_domains that
> are not used by the DMA API.
> 
Right, DMA API default to PASID 0. But IDXD device cannot use PASID 0 for
enqcmds.

> IMHO it is a device mis-design of IDXD to require all DMA be PASID
> tagged. Devices should be able to do DMA on their RID when the PCI
IDXD can do DMA w/ RID, the PASID requirement is only for shared WQ where
ENQCMDS is used. ENQCMDS has the benefit of avoiding locking where work
submission is done from multiple CPUs.
Tony, Dave?

> function is controlled by a kernel driver. I see this driver facing
> API as addressing a device quirk by aliasing the DMA API of the RID
> into a PASID and that is really all it is good for.
> 
> In any case I think we are better to wait for an actual user for multi
> DMA API iommu_domains to come forward before we try to build an API
> for it.
> 
What would you recommend in the interim?

Shall we let VT-d driver set up a special global PASID for DMA API? Then
IDXD driver can retrieve it somehow? But that still needs an API similar to
what I did in the previous version where PASID #1 was used.

Thanks,

Jacob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jason Gunthorpe via iommu
On Mon, Mar 14, 2022 at 10:07:09PM -0700, Jacob Pan wrote:
> DMA mapping API is the de facto standard for in-kernel DMA. It operates
> on a per device/RID basis which is not PASID-aware.
> 
> Some modern devices such as Intel Data Streaming Accelerator, PASID is
> required for certain work submissions. To allow such devices use DMA
> mapping API, we need the following functionalities:
> 1. Provide device a way to retrieve a PASID for work submission within
> the kernel
> 2. Enable the kernel PASID on the IOMMU for the device
> 3. Attach the kernel PASID to the device's default DMA domain, let it
> be IOVA or physical address in case of pass-through.
> 
> This patch introduces a driver facing API that enables DMA API
> PASID usage. Once enabled, device drivers can continue to use DMA APIs as
> is. There is no difference in dma_handle between without PASID and with
> PASID.
> 
> Signed-off-by: Jacob Pan 
>  drivers/iommu/dma-iommu.c | 65 +++
>  include/linux/dma-iommu.h |  7 +
>  include/linux/iommu.h |  9 ++
>  3 files changed, 81 insertions(+)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index b22034975301..d0ff1a34b1b6 100644
> +++ b/drivers/iommu/dma-iommu.c
> @@ -39,6 +39,8 @@ enum iommu_dma_cookie_type {
>   IOMMU_DMA_MSI_COOKIE,
>  };
>  
> +static DECLARE_IOASID_SET(iommu_dma_pasid);
> +
>  struct iommu_dma_cookie {
>   enum iommu_dma_cookie_type  type;
>   union {
> @@ -370,6 +372,69 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
>   domain->iova_cookie = NULL;
>  }
>  
> +/**
> + * iommu_enable_pasid_dma --Enable in-kernel DMA request with PASID
> + * @dev: Device to be enabled
> + *
> + * DMA request with PASID will be mapped the same way as the legacy DMA.
> + * If the device is in pass-through, PASID will also pass-through. If the
> + * device is in IOVA map, the supervisor PASID will point to the same IOVA
> + * page table.
> + *
> + * @return the kernel PASID to be used for DMA or INVALID_IOASID on failure
> + */
> +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
> +{
> + struct iommu_domain *dom;
> + ioasid_t id, max;
> + int ret;
> +
> + dom = iommu_get_domain_for_dev(dev);
> + if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
> + return -ENODEV;

Given the purpose of this API I think it should assert that the device
has the DMA API in-use using the machinery from the other series.

ie this should not be used to clone non-DMA API iommu_domains..

> diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
> index 24607dc3c2ac..e6cb9b52a420 100644
> +++ b/include/linux/dma-iommu.h
> @@ -18,6 +18,13 @@ int iommu_get_dma_cookie(struct iommu_domain *domain);
>  int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
>  void iommu_put_dma_cookie(struct iommu_domain *domain);
>  
> +/*
> + * For devices that can do DMA request with PASID, setup a system PASID.
> + * Address modes (IOVA, PA) are selected by the platform code.
> + */
> +int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid);
> +void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid);

The functions already have a kdoc, don't need two..

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Jason Gunthorpe via iommu
On Tue, Mar 15, 2022 at 11:16:41AM +, Robin Murphy wrote:
> On 2022-03-15 05:07, Jacob Pan wrote:
> > DMA mapping API is the de facto standard for in-kernel DMA. It operates
> > on a per device/RID basis which is not PASID-aware.
> > 
> > Some modern devices such as Intel Data Streaming Accelerator, PASID is
> > required for certain work submissions. To allow such devices use DMA
> > mapping API, we need the following functionalities:
> > 1. Provide device a way to retrieve a PASID for work submission within
> > the kernel
> > 2. Enable the kernel PASID on the IOMMU for the device
> > 3. Attach the kernel PASID to the device's default DMA domain, let it
> > be IOVA or physical address in case of pass-through.
> > 
> > This patch introduces a driver facing API that enables DMA API
> > PASID usage. Once enabled, device drivers can continue to use DMA APIs as
> > is. There is no difference in dma_handle between without PASID and with
> > PASID.
> 
> Surely the main point of PASIDs is to be able to use more than one
> of them?

IMHO, not for the DMA API.

I can't think of good reasons why a single in-kernel device should
require more than one iommu_domain for use by the DMA API. Even with
the SIOV cases we have been looking at we don't really see a use case
for more than one DMA API iommu_domain on a single physical device.
Do you know of something on the horizon?

>From my view the main point of PASIDs is to assign iommu_domains that
are not used by the DMA API.

IMHO it is a device mis-design of IDXD to require all DMA be PASID
tagged. Devices should be able to do DMA on their RID when the PCI
function is controlled by a kernel driver. I see this driver facing
API as addressing a device quirk by aliasing the DMA API of the RID
into a PASID and that is really all it is good for.

In any case I think we are better to wait for an actual user for multi
DMA API iommu_domains to come forward before we try to build an API
for it.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-15 Thread Robin Murphy

On 2022-03-15 05:07, Jacob Pan wrote:

DMA mapping API is the de facto standard for in-kernel DMA. It operates
on a per device/RID basis which is not PASID-aware.

Some modern devices such as Intel Data Streaming Accelerator, PASID is
required for certain work submissions. To allow such devices use DMA
mapping API, we need the following functionalities:
1. Provide device a way to retrieve a PASID for work submission within
the kernel
2. Enable the kernel PASID on the IOMMU for the device
3. Attach the kernel PASID to the device's default DMA domain, let it
be IOVA or physical address in case of pass-through.

This patch introduces a driver facing API that enables DMA API
PASID usage. Once enabled, device drivers can continue to use DMA APIs as
is. There is no difference in dma_handle between without PASID and with
PASID.


Surely the main point of PASIDs is to be able to use more than one of 
them? The way I expected this to work is that iommu_enable_pasid_dma() 
returns a *new* struct device representing the dev+PASID combination, 
and the driver can then pass that to subsequent DMA API and/or IOMMU API 
calls as normal, and they know to do the right thing. Automatically 
inferring a PASID for the original physical device clearly can't scale, 
and seems like a dead-end approach that only helps this one niche use-case.


Either way, I think this is also still fundamentally an IOMMU API 
operation that belongs in iommu.[ch] - since the iommu_dma_ops 
consolidation I'd prefer to continue working towards making dma-iommu.h 
a private header just for IOMMU API internal helpers.


Thanks,
Robin.


Signed-off-by: Jacob Pan 
---
  drivers/iommu/dma-iommu.c | 65 +++
  include/linux/dma-iommu.h |  7 +
  include/linux/iommu.h |  9 ++
  3 files changed, 81 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index b22034975301..d0ff1a34b1b6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -39,6 +39,8 @@ enum iommu_dma_cookie_type {
IOMMU_DMA_MSI_COOKIE,
  };
  
+static DECLARE_IOASID_SET(iommu_dma_pasid);

+
  struct iommu_dma_cookie {
enum iommu_dma_cookie_type  type;
union {
@@ -370,6 +372,69 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
domain->iova_cookie = NULL;
  }
  
+/**

+ * iommu_enable_pasid_dma --Enable in-kernel DMA request with PASID
+ * @dev:   Device to be enabled
+ *
+ * DMA request with PASID will be mapped the same way as the legacy DMA.
+ * If the device is in pass-through, PASID will also pass-through. If the
+ * device is in IOVA map, the supervisor PASID will point to the same IOVA
+ * page table.
+ *
+ * @return the kernel PASID to be used for DMA or INVALID_IOASID on failure
+ */
+int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
+{
+   struct iommu_domain *dom;
+   ioasid_t id, max;
+   int ret;
+
+   dom = iommu_get_domain_for_dev(dev);
+   if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
+   return -ENODEV;
+   max = iommu_get_dev_pasid_max(dev);
+   if (!max)
+   return -EINVAL;
+
+   id = ioasid_alloc(_dma_pasid, 1, max, dev);
+   if (id == INVALID_IOASID)
+   return -ENOMEM;
+
+   ret = dom->ops->attach_dev_pasid(dom, dev, id);
+   if (ret) {
+   ioasid_put(id);
+   return ret;
+   }
+   *pasid = id;
+
+   return ret;
+}
+EXPORT_SYMBOL(iommu_enable_pasid_dma);
+
+/**
+ * iommu_disable_pasid_dma --Disable in-kernel DMA request with PASID
+ * @dev:   Device's PASID DMA to be disabled
+ *
+ * It is the device driver's responsibility to ensure no more incoming DMA
+ * requests with the kernel PASID before calling this function. IOMMU driver
+ * ensures PASID cache, IOTLBs related to the kernel PASID are cleared and
+ * drained.
+ *
+ * @return 0 on success or error code on failure
+ */
+void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid)
+{
+   struct iommu_domain *dom;
+
+   /* TODO: check the given PASID is within the ioasid_set */
+   dom = iommu_get_domain_for_dev(dev);
+   if (!dom->ops->detach_dev_pasid)
+   return;
+   dom->ops->detach_dev_pasid(dom, dev, pasid);
+   ioasid_put(pasid);
+}
+EXPORT_SYMBOL(iommu_disable_pasid_dma);
+
  /**
   * iommu_dma_get_resv_regions - Reserved region driver helper
   * @dev: Device from iommu_get_resv_regions()
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 24607dc3c2ac..e6cb9b52a420 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -18,6 +18,13 @@ int iommu_get_dma_cookie(struct iommu_domain *domain);
  int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
  void iommu_put_dma_cookie(struct iommu_domain *domain);
  
+/*

+ * For devices that can do DMA request with PASID, setup a system PASID.
+ * Address modes (IOVA, PA) are selected by the