[PATCH AUTOSEL 3.18 8/8] iommu/amd: Set exclusion range correctly

2019-04-26 Thread Sasha Levin
From: Joerg Roedel 

[ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ]

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO 
space')
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 2f3475247f0f..127f9cc563e9 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -294,7 +294,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 
address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
u64 start = iommu->exclusion_start & PAGE_MASK;
-   u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+   u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
 
if (!iommu->exclusion_start)
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 4.4 9/9] iommu/amd: Set exclusion range correctly

2019-04-26 Thread Sasha Levin
From: Joerg Roedel 

[ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ]

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO 
space')
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 94f1bf772ec9..db85cc5791dc 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -295,7 +295,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 
address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
u64 start = iommu->exclusion_start & PAGE_MASK;
-   u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+   u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
 
if (!iommu->exclusion_start)
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 4.9 15/16] iommu/amd: Set exclusion range correctly

2019-04-26 Thread Sasha Levin
From: Joerg Roedel 

[ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ]

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO 
space')
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 157e93421fb8..13bbe5795e4e 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -318,7 +318,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 
address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
u64 start = iommu->exclusion_start & PAGE_MASK;
-   u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+   u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
 
if (!iommu->exclusion_start)
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 4.14 30/32] iommu/amd: Set exclusion range correctly

2019-04-26 Thread Sasha Levin
From: Joerg Roedel 

[ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ]

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO 
space')
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index b97984a5ddad..9b3d8fd5f7c1 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -355,7 +355,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 
address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
u64 start = iommu->exclusion_start & PAGE_MASK;
-   u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+   u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
 
if (!iommu->exclusion_start)
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 4.19 50/53] iommu/amd: Set exclusion range correctly

2019-04-26 Thread Sasha Levin
From: Joerg Roedel 

[ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ]

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO 
space')
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index e062ab9687c7..5689e07799e3 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -356,7 +356,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 
address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
u64 start = iommu->exclusion_start & PAGE_MASK;
-   u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+   u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
 
if (!iommu->exclusion_start)
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 5.0 75/79] iommu/amd: Set exclusion range correctly

2019-04-26 Thread Sasha Levin
From: Joerg Roedel 

[ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ]

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO 
space')
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 66123b911ec8..8ae6b350e64c 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -358,7 +358,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 
address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
u64 start = iommu->exclusion_start & PAGE_MASK;
-   u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+   u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
 
if (!iommu->exclusion_start)
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-04-26 Thread Thiago Jung Bauermann


Michael S. Tsirkin  writes:

> On Wed, Apr 24, 2019 at 10:01:56PM -0300, Thiago Jung Bauermann wrote:
>>
>> Michael S. Tsirkin  writes:
>>
>> > On Wed, Apr 17, 2019 at 06:42:00PM -0300, Thiago Jung Bauermann wrote:
>> >>
>> >> Michael S. Tsirkin  writes:
>> >>
>> >> > On Thu, Mar 21, 2019 at 09:05:04PM -0300, Thiago Jung Bauermann wrote:
>> >> >>
>> >> >> Michael S. Tsirkin  writes:
>> >> >>
>> >> >> > On Wed, Mar 20, 2019 at 01:13:41PM -0300, Thiago Jung Bauermann 
>> >> >> > wrote:
>> >> >> >> >From what I understand of the ACCESS_PLATFORM definition, the host 
>> >> >> >> >will
>> >> >> >> only ever try to access memory addresses that are supplied to it by 
>> >> >> >> the
>> >> >> >> guest, so all of the secure guest memory that the host cares about 
>> >> >> >> is
>> >> >> >> accessible:
>> >> >> >>
>> >> >> >> If this feature bit is set to 0, then the device has same 
>> >> >> >> access to
>> >> >> >> memory addresses supplied to it as the driver has. In 
>> >> >> >> particular,
>> >> >> >> the device will always use physical addresses matching addresses
>> >> >> >> used by the driver (typically meaning physical addresses used 
>> >> >> >> by the
>> >> >> >> CPU) and not translated further, and can access any address 
>> >> >> >> supplied
>> >> >> >> to it by the driver. When clear, this overrides any
>> >> >> >> platform-specific description of whether device access is 
>> >> >> >> limited or
>> >> >> >> translated in any way, e.g. whether an IOMMU may be present.
>> >> >> >>
>> >> >> >> All of the above is true for POWER guests, whether they are secure
>> >> >> >> guests or not.
>> >> >> >>
>> >> >> >> Or are you saying that a virtio device may want to access memory
>> >> >> >> addresses that weren't supplied to it by the driver?
>> >> >> >
>> >> >> > Your logic would apply to IOMMUs as well.  For your mode, there are
>> >> >> > specific encrypted memory regions that driver has access to but 
>> >> >> > device
>> >> >> > does not. that seems to violate the constraint.
>> >> >>
>> >> >> Right, if there's a pre-configured 1:1 mapping in the IOMMU such that
>> >> >> the device can ignore the IOMMU for all practical purposes I would
>> >> >> indeed say that the logic would apply to IOMMUs as well. :-)
>> >> >>
>> >> >> I guess I'm still struggling with the purpose of signalling to the
>> >> >> driver that the host may not have access to memory addresses that it
>> >> >> will never try to access.
>> >> >
>> >> > For example, one of the benefits is to signal to host that driver does
>> >> > not expect ability to access all memory. If it does, host can
>> >> > fail initialization gracefully.
>> >>
>> >> But why would the ability to access all memory be necessary or even
>> >> useful? When would the host access memory that the driver didn't tell it
>> >> to access?
>> >
>> > When I say all memory I mean even memory not allowed by the IOMMU.
>>
>> Yes, but why? How is that memory relevant?
>
> It's relevant when driver is not trusted to only supply correct
> addresses. The feature was originally designed to support userspace
> drivers within guests.

Ah, thanks for clarifying. I don't think that's a problem in our case.
If the guest provides an incorrect address, the hardware simply won't
allow the host to access it.

>> >> >> > Another idea is maybe something like virtio-iommu?
>> >> >>
>> >> >> You mean, have legacy guests use virtio-iommu to request an IOMMU
>> >> >> bypass? If so, it's an interesting idea for new guests but it doesn't
>> >> >> help with guests that are out today in the field, which don't have A
>> >> >> virtio-iommu driver.
>> >> >
>> >> > I presume legacy guests don't use encrypted memory so why do we
>> >> > worry about them at all?
>> >>
>> >> They don't use encrypted memory, but a host machine will run a mix of
>> >> secure and legacy guests. And since the hypervisor doesn't know whether
>> >> a guest will be secure or not at the time it is launched, legacy guests
>> >> will have to be launched with the same configuration as secure guests.
>> >
>> > OK and so I think the issue is that hosts generally fail if they set
>> > ACCESS_PLATFORM and guests do not negotiate it.
>> > So you can not just set ACCESS_PLATFORM for everyone.
>> > Is that the issue here?
>>
>> Yes, that is one half of the issue. The other is that even if hosts
>> didn't fail, existing legacy guests wouldn't "take the initiative" of
>> not negotiating ACCESS_PLATFORM to get the improved performance. They'd
>> have to be modified to do that.
>
> So there's a non-encrypted guest, hypervisor wants to set
> ACCESS_PLATFORM to allow encrypted guests but that will slow down legacy
> guests since their vIOMMU emulation is very slow.

Yes.

> So enabling support for encryption slows down non-encrypted guests. Not
> great but not the end of the world, considering even older guests that
> don't support ACCESS_PLATFORM are completely broken and you do not seem
> to be too worried by that.


Re: [PATCH v2 14/19] iommu: Add guest PASID bind function

2019-04-26 Thread Jacob Pan
On Fri, 26 Apr 2019 17:53:43 +0200
Auger Eric  wrote:

> Hi Jacob,
> 
> On 4/24/19 1:31 AM, Jacob Pan wrote:
> > Guest shared virtual address (SVA) may require host to shadow guest
> > PASID tables. Guest PASID can also be allocated from the host via
> > enlightened interfaces. In this case, guest needs to bind the guest
> > mm, i.e. cr3 in guest phisical address to the actual PASID table
> > in  
> physical
got it

> > the host IOMMU. Nesting will be turned on such that guest virtual
> > address can go through a two level translation:
> > - 1st level translates GVA to GPA
> > - 2nd level translates GPA to HPA
> > This patch introduces APIs to bind guest PASID data to the assigned
> > device entry in the physical IOMMU. See the diagram below for usage
> > explaination.
> > 
> > .-.  .---.
> > |   vIOMMU|  | Guest process mm, FL only |
> > | |  '---'
> > ./
> > | PASID Entry |--- PASID cache flush -
> > '-'   |
> > | |   V
> > | |
> > '-'
> > Guest
> > --| Shadow |--|
> >   vv  v
> > Host
> > .-.  .--.
> > |   pIOMMU|  | Bind FL for GVA-GPA  |
> > | |  '--'
> > ./  |
> > | PASID Entry | V (Nested xlate)
> > '\.-.
> > | |   |Set SL to GPA-HPA|
> > | |   '-'
> > '-'
> > 
> > Where:
> >  - FL = First level/stage one page tables
> >  - SL = Second level/stage two page tables
> > 
> > Signed-off-by: Jacob Pan 
> > Signed-off-by: Liu Yi L 
> > ---
> >  drivers/iommu/iommu.c  | 20 
> >  include/linux/iommu.h  | 10 ++
> >  include/uapi/linux/iommu.h | 15 ++-
> >  3 files changed, 44 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 498c28a..072f8f3 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -1561,6 +1561,26 @@ int iommu_cache_invalidate(struct
> > iommu_domain *domain, struct device *dev, }
> >  EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
> >  
> > +int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> > +   struct device *dev, struct
> > gpasid_bind_data *data) +{
> > +   if (unlikely(!domain->ops->sva_bind_gpasid))
> > +   return -ENODEV;
> > +
> > +   return domain->ops->sva_bind_gpasid(domain, dev, data);
> > +}
> > +EXPORT_SYMBOL_GPL(iommu_sva_bind_gpasid);
> > +
> > +int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct
> > device *dev,
> > +   int pasid)
> > +{
> > +   if (unlikely(!domain->ops->sva_unbind_gpasid))
> > +   return -ENODEV;
> > +
> > +   return domain->ops->sva_unbind_gpasid(dev, pasid);
> > +}
> > +EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
> > +
> >  static void __iommu_detach_device(struct iommu_domain *domain,
> >   struct device *dev)
> >  {
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index 4b92e4b..611388e 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -231,6 +231,8 @@ struct iommu_sva_ops {
> >   * @detach_pasid_table: detach the pasid table
> >   * @cache_invalidate: invalidate translation caches
> >   * @pgsize_bitmap: bitmap of all possible supported page sizes
> > + * @sva_bind_gpasid: bind guest pasid and mm
> > + * @sva_unbind_gpasid: unbind guest pasid and mm
> >   */
> >  struct iommu_ops {
> > bool (*capable)(enum iommu_cap);
> > @@ -295,6 +297,10 @@ struct iommu_ops {
> >  
> > int (*cache_invalidate)(struct iommu_domain *domain,
> > struct device *dev, struct iommu_cache_invalidate_info *inv_info);
> > +   int (*sva_bind_gpasid)(struct iommu_domain *domain,
> > +   struct device *dev, struct
> > gpasid_bind_data *data); +
> > +   int (*sva_unbind_gpasid)(struct device *dev, int pasid);  
> So I am confused now. As the scalable mode PASID table entry contains
> both the FL and SL PT pointers, will you ever use the
> attach/detach_pasid_table or are we the only known users on ARM?
> 
In scalable mode, we will not use attach pasid table. So ARM will be
the only user for now.
Guest PASID table is shadowed, PASID cache flush in the guest will
trigger sva_bind_gpasid.
I introduced bind PASID table for the previous VT-d spec that has
extend context mode (deprecated), where SL is shared by all PASIDs on 
the same device.
> >  
> > unsigned long pgsize_bitmap;
> >  };
> > @@ -409,6 +415,10 @@ extern void iommu_detach_pasid_table(struct
> > iommu_domain *domain); extern int iommu_cache_invalidate(struct
> > iommu_domain *domain, struct device *dev,
> >   

Re: [PATCH v2 13/19] iommu/vt-d: Add nested translation support

2019-04-26 Thread Jacob Pan
On Fri, 26 Apr 2019 17:42:05 +0200
Auger Eric  wrote:

> Hi Jacob,
> 
> On 4/24/19 1:31 AM, Jacob Pan wrote:
> > Nested translation mode is supported in VT-d 3.0 Spec.CH 3.8.
> > With PASID granular translation type set to 0x11b, translation
> > result from the first level(FL) also subject to a second level(SL)
> > page table translation. This mode is used for SVA virtualization,
> > where FL performs guest virtual to guest physical translation and
> > SL performs guest physical to host physical translation.  
> 
> The title of the patch sounds a bit misleading to me as this patch
> "just" adds a helper to set the PASID table entry in nested mode.
> There is no caller yet.
right, will rename to "Add nested translation helper function"
> > 
> > Signed-off-by: Jacob Pan 
> > Signed-off-by: Liu, Yi L 
> > ---
> >  drivers/iommu/intel-pasid.c | 101
> > 
> > drivers/iommu/intel-pasid.h |  11 + 2 files changed, 112
> > insertions(+)
> > 
> > diff --git a/drivers/iommu/intel-pasid.c
> > b/drivers/iommu/intel-pasid.c index d339e8f..04127cf 100644
> > --- a/drivers/iommu/intel-pasid.c
> > +++ b/drivers/iommu/intel-pasid.c
> > @@ -688,3 +688,104 @@ int intel_pasid_setup_pass_through(struct
> > intel_iommu *iommu, 
> > return 0;
> >  }
> > +
> > +/**
> > + * intel_pasid_setup_nested() - Set up PASID entry for nested
> > translation
> > + * which is used for vSVA. The first level page tables are used for
> > + * GVA-GPA translation in the guest, second level page tables are
> > used
> > + * for GPA to HPA translation.
> > + *
> > + * @iommu:  Iommu which the device belong to
> > + * @dev:Device to be set up for translation
> > + * @pgd:First level PGD, treated as GPA  
> nit: @gpgd
> 
> spec naming could be used as well: FLPTPTR: First Level Page
> Translation Pointer
more precise. sounds good

> > + * @pasid:  PASID to be programmed in the device PASID table
> > + * @flags:  Additional info such as supervisor PASID
> > + * @domain: Domain info for setting up second level page tables
> > + * @addr_width: Address width of the first level (guest)
> > + */
> > +int intel_pasid_setup_nested(struct intel_iommu *iommu,
> > +   struct device *dev, pgd_t *gpgd,
> > +   int pasid, int flags,
> > +   struct dmar_domain *domain,
> > +   int addr_width)
> > +{
> > +   struct pasid_entry *pte;
> > +   struct dma_pte *pgd;
> > +   u64 pgd_val;
> > +   int agaw;
> > +   u16 did;
> > +
> > +   if (!ecap_nest(iommu->ecap)) {
> > +   pr_err("No nested translation support on %s\n",
> > +  iommu->name);  
> IOMMU: %s: ;-)
will do

> > +   return -EINVAL;
> > +   }
> > +
> > +   pte = intel_pasid_get_entry(dev, pasid);
> > +   if (WARN_ON(!pte))
> > +   return -EINVAL;
> > +
> > +   pasid_clear_entry(pte);
> > +
> > +   /* Sanity checking performed by caller to make sure address
> > +* width matching in two dimensions:
> > +* 1. CPU vs. IOMMU
> > +* 2. Guest vs. Host.
> > +*/
> > +   switch (addr_width) {
> > +   case 57:
> > +   pasid_set_flpm(pte, 1);
> > +   break;
> > +   case 48:
> > +   pasid_set_flpm(pte, 0);
> > +   break;
> > +   default:
> > +   dev_err(dev, "Invalid paging mode %d\n",
> > addr_width);
> > +   return -EINVAL;
> > +   }
> > +
> > +   /* Setup the first level page table pointer in GPA */
> > +   pasid_set_flptr(pte, (u64)gpgd);
> > +   if (flags & PASID_FLAG_SUPERVISOR_MODE) {
> > +   if (!ecap_srs(iommu->ecap)) {
> > +   pr_err("No supervisor request support on
> > %s\n",
> > +  iommu->name);
> > +   return -EINVAL;
> > +   }
> > +   pasid_set_sre(pte);
> > +   }
> > +
> > +   /* Setup the second level based on the given domain */
> > +   pgd = domain->pgd;
> > +
> > +   for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
> > +   pgd = phys_to_virt(dma_pte_addr(pgd));
> > +   if (!dma_pte_present(pgd)) {
> > +   dev_err(dev, "Invalid domain page
> > table\n");
> > +   return -EINVAL;
> > +   }
> > +   }
> > +   pgd_val = virt_to_phys(pgd);
> > +   pasid_set_slptr(pte, pgd_val);
> > +   pasid_set_fault_enable(pte);
> > +
> > +   did = domain->iommu_did[iommu->seq_id];
> > +   pasid_set_domain_id(pte, did);
> > +
> > +   pasid_set_address_width(pte, agaw);
> > +   pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
> > +
> > +   pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
> > +   pasid_set_present(pte);
> > +
> > +   if (!ecap_coherent(iommu->ecap))
> > +   clflush_cache_range(pte, sizeof(*pte));
> > +
> > +   if (cap_caching_mode(iommu->cap)) {
> > +   pasid_cache_invalidation_with_pasid(iommu, did,
> > pasid);
> > +   iotlb_invalidation_with_pasid(iommu, did, pasid);
> > +   } else
> > + 

Re: [PATCH v2 11/19] iommu/vt-d: Replace Intel specific PASID allocator with IOASID

2019-04-26 Thread Jacob Pan
On Thu, 25 Apr 2019 12:04:01 +0200
Auger Eric  wrote:

> Hi Jacob,
> 
> On 4/24/19 1:31 AM, Jacob Pan wrote:
> > Make use of generic IOASID code to manage PASID allocation,
> > free, and lookup.
> > 
> > Signed-off-by: Jacob Pan 
> > ---
> >  drivers/iommu/Kconfig   |  1 +
> >  drivers/iommu/intel-iommu.c |  9 -
> >  drivers/iommu/intel-pasid.c | 36
> >  drivers/iommu/intel-svm.c   |
> > 41 - 4 files changed, 29
> > insertions(+), 58 deletions(-)
> > 
> > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> > index 6f07f3b..7f92009 100644
> > --- a/drivers/iommu/Kconfig
> > +++ b/drivers/iommu/Kconfig
> > @@ -204,6 +204,7 @@ config INTEL_IOMMU_SVM
> > bool "Support for Shared Virtual Memory with Intel IOMMU"
> > depends on INTEL_IOMMU && X86
> > select PCI_PASID
> > +   select IOASID
> > select MMU_NOTIFIER
> > help
> >   Shared Virtual Memory (SVM) provides a facility for
> > devices diff --git a/drivers/iommu/intel-iommu.c
> > b/drivers/iommu/intel-iommu.c index ec6f22d..785330a 100644
> > --- a/drivers/iommu/intel-iommu.c
> > +++ b/drivers/iommu/intel-iommu.c
> > @@ -5153,7 +5153,7 @@ static void auxiliary_unlink_device(struct
> > dmar_domain *domain, domain->auxd_refcnt--;
> >  
> > if (!domain->auxd_refcnt && domain->default_pasid > 0)
> > -   intel_pasid_free_id(domain->default_pasid);
> > +   ioasid_free(domain->default_pasid);
> >  }
> >  
> >  static int aux_domain_add_dev(struct dmar_domain *domain,
> > @@ -5171,9 +5171,8 @@ static int aux_domain_add_dev(struct
> > dmar_domain *domain, if (domain->default_pasid <= 0) {
> > int pasid;
> >  
> > -   pasid = intel_pasid_alloc_id(domain, PASID_MIN,
> > -
> > pci_max_pasids(to_pci_dev(dev)),
> > -GFP_KERNEL);
> > +   pasid = ioasid_alloc(NULL, PASID_MIN,
> > pci_max_pasids(to_pci_dev(dev)) - 1,
> > +   domain);
> > if (pasid <= 0) {  
> ioasid_t is a uint and returns INVALID_IOASID on error. Wouldn't it be
> simpler to make ioasid_alloc return an int?
Well, I think we still want the full uint range - 1(INVALID_IOASID).
Intel uses 20bit but I think SMMUs use 32 bits for streamID? I
should just check 
if (pasid == INVALID_IOASID) {

> > pr_err("Can't allocate default pasid\n");
> > return -ENODEV;
> > @@ -5210,7 +5209,7 @@ static int aux_domain_add_dev(struct
> > dmar_domain *domain, spin_unlock(>lock);
> > spin_unlock_irqrestore(_domain_lock, flags);
> > if (!domain->auxd_refcnt && domain->default_pasid > 0)
> > -   intel_pasid_free_id(domain->default_pasid);
> > +   ioasid_free(domain->default_pasid);
> >  
> > return ret;
> >  }
> > diff --git a/drivers/iommu/intel-pasid.c
> > b/drivers/iommu/intel-pasid.c index 5b1d3be..d339e8f 100644
> > --- a/drivers/iommu/intel-pasid.c
> > +++ b/drivers/iommu/intel-pasid.c
> > @@ -26,42 +26,6 @@
> >   */
> >  static DEFINE_SPINLOCK(pasid_lock);
> >  u32 intel_pasid_max_id = PASID_MAX;
> > -static DEFINE_IDR(pasid_idr);
> > -
> > -int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp)
> > -{
> > -   int ret, min, max;
> > -
> > -   min = max_t(int, start, PASID_MIN);
> > -   max = min_t(int, end, intel_pasid_max_id);
> > -
> > -   WARN_ON(in_interrupt());
> > -   idr_preload(gfp);
> > -   spin_lock(_lock);
> > -   ret = idr_alloc(_idr, ptr, min, max, GFP_ATOMIC);
> > -   spin_unlock(_lock);
> > -   idr_preload_end();
> > -
> > -   return ret;
> > -}
> > -
> > -void intel_pasid_free_id(int pasid)
> > -{
> > -   spin_lock(_lock);
> > -   idr_remove(_idr, pasid);
> > -   spin_unlock(_lock);
> > -}
> > -
> > -void *intel_pasid_lookup_id(int pasid)
> > -{
> > -   void *p;
> > -
> > -   spin_lock(_lock);
> > -   p = idr_find(_idr, pasid);
> > -   spin_unlock(_lock);
> > -
> > -   return p;
> > -}
> >  
> >  int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int
> > *pasid) {
> > diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> > index 8f87304..8fff212 100644
> > --- a/drivers/iommu/intel-svm.c
> > +++ b/drivers/iommu/intel-svm.c
> > @@ -25,6 +25,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  
> >  #include "intel-pasid.h"
> > @@ -211,7 +212,9 @@ static void intel_mm_release(struct
> > mmu_notifier *mn, struct mm_struct *mm) rcu_read_lock();
> > list_for_each_entry_rcu(sdev, >devs, list) {
> > intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
> > svm->pasid);
> > -   intel_flush_svm_range_dev(svm, sdev, 0, -1,
> > 0, !svm->mm)> + /* for emulated iommu, PASID cache
> > invalidation implies IOTLB/DTLB */
> > +   if (!cap_caching_mode(svm->iommu->cap))
> > +   intel_flush_svm_range_dev(svm, sdev, 0,
> > -1, 0, !svm->mm);  
> This change is not documented in the commit 

Re: [PATCH v2 RFC/RFT 1/5] ARM: dma-mapping: Add fallback normal page allocations

2019-04-26 Thread Christoph Hellwig
On Fri, Apr 26, 2019 at 01:21:12PM -0700, Nicolin Chen wrote:
> What do you think of dma_free_contiguous() instead? I feel "free"
> is a bit more commonly used (in dma-mapping.h) and it's shorter.

Yeah, that sounds good.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 RFC/RFT 1/5] ARM: dma-mapping: Add fallback normal page allocations

2019-04-26 Thread Nicolin Chen
On Wed, Apr 24, 2019 at 05:06:38PM +0200, Christoph Hellwig wrote:
> > +   if (!dma_release_from_contiguous(dev, page, count))
> > +   __free_pages(page, get_order(size));
> 
> Same for dma_release_from_contiguous - drop the _from, pass the
> actual size, and handle the free_pages fallback.

What do you think of dma_free_contiguous() instead? I feel "free"
is a bit more commonly used (in dma-mapping.h) and it's shorter.

Thanks
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 10/19] iommu/vt-d: Add custom allocator for IOASID

2019-04-26 Thread Jacob Pan
On Wed, 24 Apr 2019 19:27:26 +0200
Auger Eric  wrote:

> Hi Jacob,
> 
> On 4/24/19 1:31 AM, Jacob Pan wrote:
> > When VT-d driver runs in the guest, PASID allocation must be
> > performed via virtual command interface. This patch register a  
> registers
will fix
> > custom IOASID allocator which takes precedence over the default
> > IDR based allocator.  
> nit: s/IDR based// . It is xarray based now.
right, changed code but not commit message :)
>  The resulting IOASID allocation will always
> > come from the host. This ensures that PASID namespace is system-
> > wide.
> > 
> > Signed-off-by: Lu Baolu 
> > Signed-off-by: Liu, Yi L 
> > Signed-off-by: Jacob Pan 
> > ---
> >  drivers/iommu/intel-iommu.c | 58
> > +
> > include/linux/intel-iommu.h |  2 ++ 2 files changed, 60
> > insertions(+)
> > 
> > diff --git a/drivers/iommu/intel-iommu.c
> > b/drivers/iommu/intel-iommu.c index d93c4bd..ec6f22d 100644
> > --- a/drivers/iommu/intel-iommu.c
> > +++ b/drivers/iommu/intel-iommu.c
> > @@ -1711,6 +1711,8 @@ static void free_dmar_iommu(struct
> > intel_iommu *iommu) if (ecap_prs(iommu->ecap))
> > intel_svm_finish_prq(iommu);
> > }
> > +   ioasid_unregister_allocator(>pasid_allocator);
> > +
> >  #endif
> >  }
> >  
> > @@ -4811,6 +4813,46 @@ static int __init
> > platform_optin_force_iommu(void) return 1;
> >  }
> >  
> > +static ioasid_t intel_ioasid_alloc(ioasid_t min, ioasid_t max,
> > void *data) +{
> > +   struct intel_iommu *iommu = data;
> > +   ioasid_t ioasid;
> > +
> > +   /*
> > +* VT-d virtual command interface always uses the full 20
> > bit
> > +* PASID range. Host can partition guest PASID range based
> > on
> > +* policies but it is out of guest's control.
> > +*/  
> The above comment does not exactly relate to the check below
> > +   if (min < PASID_MIN || max > PASID_MAX)
> > +   return -EINVAL;
> > +
> > +   if (vcmd_alloc_pasid(iommu, ))
> > +   return INVALID_IOASID;
> > +
> > +   return ioasid;
> > +}
> > +
> > +static int intel_ioasid_free(ioasid_t ioasid, void *data)
> > +{
> > +   struct iommu_pasid_alloc_info *svm;
> > +   struct intel_iommu *iommu = data;
> > +
> > +   if (!iommu || !cap_caching_mode(iommu->cap))
> > +   return -EINVAL;  
> can !cap_caching_mode(iommu->cap) be true as the allocator only is set
> if CM?
right, should never happen.
> > +   /*
> > +* Sanity check the ioasid owner is done at upper layer,
> > e.g. VFIO
> > +* We can only free the PASID when all the devices are
> > unbond.
> > +*/
> > +   svm = ioasid_find(NULL, ioasid, NULL);
> > +   if (!svm) {  
> you can avoid using the local svm variable.
> > +   pr_warn("Freeing unbond IOASID %d\n", ioasid);  
> unbound
> > +   return -EBUSY;  
> -EINVAL?
It meant the PASID is still being used, bond to a device doing DMA etc.
thus -EBUSY. But I will make the free a void function.
> > +   }
> > +   vcmd_free_pasid(iommu, ioasid);
> > +
> > +   return 0;
> > +}
> > +
> >  int __init intel_iommu_init(void)
> >  {
> > int ret = -ENODEV;
> > @@ -4912,6 +4954,22 @@ int __init intel_iommu_init(void)
> >"%s", iommu->name);
> > iommu_device_set_ops(>iommu,
> > _iommu_ops); iommu_device_register(>iommu);
> > +   if (cap_caching_mode(iommu->cap) &&
> > sm_supported(iommu)) {  
> so shouldn't you test VCCAP_REG as well?
> > +   /*
> > +* Register a custom ASID allocator if we
> > are running
> > +* in a guest, the purpose is to have a
> > system wide PASID
> > +* namespace among all PASID users.
> > +* There can be multiple vIOMMUs in each
> > guest but only
> > +* one allocator is active. All vIOMMU
> > allocators will
> > +* eventually be calling the same host
> > allocator.
> > +*/
> > +   iommu->pasid_allocator.alloc =
> > intel_ioasid_alloc;
> > +   iommu->pasid_allocator.free =
> > intel_ioasid_free;
> > +   iommu->pasid_allocator.pdata = (void
> > *)iommu;
> > +   ret =
> > ioasid_register_allocator(>pasid_allocator);
> > +   if (ret)
> > +   pr_warn("Custom PASID allocator
> > registeration failed\n");  
> registration
> > +   }
> > }
> >  
> > bus_set_iommu(_bus_type, _iommu_ops);
> > diff --git a/include/linux/intel-iommu.h
> > b/include/linux/intel-iommu.h index bff907b..c24c8aa 100644
> > --- a/include/linux/intel-iommu.h
> > +++ b/include/linux/intel-iommu.h
> > @@ -31,6 +31,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  
> >  #include 
> >  #include 
> > @@ -549,6 +550,7 @@ struct intel_iommu {
> >  #ifdef CONFIG_INTEL_IOMMU_SVM
> > struct page_req_dsc *prq;
> > unsigned char prq_name[16];/* Name for PRQ interrupt */

Re: [PATCH v2 19/19] iommu/vt-d: Add svm/sva invalidate function

2019-04-26 Thread Auger Eric
Hi Jacob,
On 4/24/19 1:31 AM, Jacob Pan wrote:
> When Shared Virtual Address (SVA) is enabled for a guest OS via
> vIOMMU, we need to provide invalidation support at IOMMU API and driver
> level. This patch adds Intel VT-d specific function to implement
> iommu passdown invalidate API for shared virtual address.
> 
> The use case is for supporting caching structure invalidation
> of assigned SVM capable devices. Emulated IOMMU exposes queue
> invalidation capability and passes down all descriptors from the guest
> to the physical IOMMU.
> 
> The assumption is that guest to host device ID mapping should be
> resolved prior to calling IOMMU driver. Based on the device handle,
> host IOMMU driver can replace certain fields before submit to the
> invalidation queue.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Liu, Yi L 
> ---
>  drivers/iommu/intel-iommu.c | 159 
> 
>  1 file changed, 159 insertions(+)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 89989b5..54a3d22 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -5338,6 +5338,164 @@ static void intel_iommu_aux_detach_device(struct 
> iommu_domain *domain,
>   aux_domain_remove_dev(to_dmar_domain(domain), dev);
>  }
>  
> +/*
> + * 2D array for converting and sanitizing IOMMU generic TLB granularity to
> + * VT-d granularity. Invalidation is typically included in the unmap 
> operation
> + * as a result of DMA or VFIO unmap. However, for assigned device where guest
> + * could own the first level page tables without being shadowed by QEMU. In
> + * this case there is no pass down unmap to the host IOMMU as a result of 
> unmap
> + * in the guest. Only invalidations are trapped and passed down.
> + * In all cases, only first level TLB invalidation (request with PASID) can 
> be
> + * passed down, therefore we do not include IOTLB granularity for request
> + * without PASID (second level).
> + *
> + * For an example, to find the VT-d granularity encoding for IOTLB
> + * type and page selective granularity within PASID:
> + * X: indexed by iommu cache type
> + * Y: indexed by enum iommu_inv_granularity
> + * [IOMMU_INV_TYPE_TLB][IOMMU_INV_GRANU_PAGE_PASID]
> + *
> + * Granu_map array indicates validity of the table. 1: valid, 0: invalid
> + *
> + */
> +const static int 
> inv_type_granu_map[NR_IOMMU_CACHE_TYPE][NR_IOMMU_CACHE_INVAL_GRANU] = {
The size is frozen for a given uapi version so I guess you can hardcode
the limits for a given version.
> + /* PASID based IOTLB, support PASID selective and page selective */
> + {0, 1, 1},
> + /* PASID based dev TLBs, only support all PASIDs or single PASID */
> + {1, 1, 0},
> + /* PASID cache */
> + {1, 1, 0}
> +};
> +
> +const static u64 
> inv_type_granu_table[NR_IOMMU_CACHE_TYPE][NR_IOMMU_CACHE_INVAL_GRANU] = {
> + /* PASID based IOTLB */
> + {0, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
> + /* PASID based dev TLBs */
> + {QI_DEV_IOTLB_GRAN_ALL, QI_DEV_IOTLB_GRAN_PASID_SEL, 0},
> + /* PASID cache */
> + {QI_PC_ALL_PASIDS, QI_PC_PASID_SEL, 0},
> +};
Can't you use a single matrix instead, ie. inv_type_granu_table

> +
> +static inline int to_vtd_granularity(int type, int granu, u64 *vtd_granu)
> +{
> + if (type >= NR_IOMMU_CACHE_TYPE || granu >= NR_IOMMU_CACHE_INVAL_GRANU 
> ||
> + !inv_type_granu_map[type][granu])
> + return -EINVAL;
> +
> + *vtd_granu = inv_type_granu_table[type][granu];
> +
> + return 0;
> +}
> +
> +static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
> +{
> + u64 nr_pages;
direct initialization?
> + /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
> +  * IOMMU cache invalidate API passes granu_size in bytes, and number of
> +  * granu size in contiguous memory.
> +  */
> +
> + nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
> + return order_base_2(nr_pages);
> +}
> +
> +static int intel_iommu_sva_invalidate(struct iommu_domain *domain,
> + struct device *dev, struct iommu_cache_invalidate_info 
> *inv_info)
> +{
> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> + struct device_domain_info *info;
> + struct intel_iommu *iommu;
> + unsigned long flags;
> + int cache_type;
> + u8 bus, devfn;
> + u16 did, sid;
> + int ret = 0;
> + u64 granu;
> + u64 size;
> +
> + if (!inv_info || !dmar_domain ||
> + inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
> + return -EINVAL;
> +
> + if (!dev || !dev_is_pci(dev))
> + return -ENODEV;
> +
> + iommu = device_to_iommu(dev, , );
> + if (!iommu)
> + return -ENODEV;
> +
> + spin_lock(>lock);
> + spin_lock_irqsave(_domain_lock, flags);
mix of _irqsave and non _irqsave looks suspicious to me.
> + info = 

Re: [PATCH v2 06/19] drivers core: Add I/O ASID allocator

2019-04-26 Thread Jacob Pan
On Fri, 26 Apr 2019 05:21:15 -0700
Christoph Hellwig  wrote:

> On Fri, Apr 26, 2019 at 12:47:43PM +0100, Jean-Philippe Brucker wrote:
> > >> On Tue, Apr 23, 2019 at 04:31:06PM -0700, Jacob Pan wrote:  
> > >>> The allocator doesn't really belong in drivers/iommu because
> > >>> some drivers would like to allocate PASIDs for devices that
> > >>> aren't managed by an IOMMU, using the same ID space as IOMMU.
> > >>> It doesn't really belong in drivers/pci either since platform
> > >>> device also support PASID. Add the allocator in
> > >>> drivers/base.
> > >>
> > >> I'd still add it to drivers/iommu, just selectable separately
> > >> from the core iommu code..  
> > > Perhaps I misunderstood. If a driver wants to use IOASIDs w/o
> > > iommu subsystem even turned on, how could selecting from the core
> > > iommu code help? Could you elaborate on "selectable"?  
> > 
> > How about doing the same as CONFIG_IOMMU_IOVA? The code is in
> > drivers/iommu but can be selected by non-IOMMU_API users,
> > independently of CONFIG_IOMMU_SUPPORT. It's true that this
> > allocator will mostly be used by IOMMU drivers.  
> 
> That is exactly what I meant!

Make sense, will do that in the next round. Thanks!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 17/19] iommu: Add max num of cache and granu types

2019-04-26 Thread Auger Eric
Hi Jacob,

On 4/24/19 1:31 AM, Jacob Pan wrote:
> To convert to/from cache types and granularities between generic and
> VT-d specific counterparts, a 2D arrary is used. Introduce the limits
array
> to help define the converstion array size.
conversion
> 
> Signed-off-by: Jacob Pan 
> ---
>  include/uapi/linux/iommu.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 5c95905..2d8fac8 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -197,6 +197,7 @@ struct iommu_inv_addr_info {
>   __u64   granule_size;
>   __u64   nb_granules;
>  };
> +#define NR_IOMMU_CACHE_INVAL_GRANU   (3)
>  
>  /**
>   * First level/stage invalidation information
> @@ -235,6 +236,7 @@ struct iommu_cache_invalidate_info {
>   struct iommu_inv_addr_info addr_info;
>   };
>  };
> +#define NR_IOMMU_CACHE_TYPE  (3)
>  /**
>   * struct gpasid_bind_data - Information about device and guest PASID binding
>   * @gcr3:Guest CR3 value from guest mm
> 
Is it really something that needs to be exposed in the uapi?

Thanks

Eric
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 16/19] iommu/vtd: Clean up for SVM device list

2019-04-26 Thread Auger Eric
Hi

On 4/24/19 1:31 AM, Jacob Pan wrote:
> Use combined macro for_each_svm_dev() to simplify SVM device iteration.
> 
> Suggested-by: Andy Shevchenko 
> Signed-off-by: Jacob Pan 
Reviewed-by: Eric Auger 

Thanks

Eric

> ---
>  drivers/iommu/intel-svm.c | 76 
> ++-
>  1 file changed, 36 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 0a973c2..39dfb2e 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -447,15 +447,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, 
> int flags, struct svm_dev_
>   goto out;
>   }
>  
> - list_for_each_entry(sdev, >devs, list) {
> - if (dev == sdev->dev) {
> - if (sdev->ops != ops) {
> - ret = -EBUSY;
> - goto out;
> - }
> - sdev->users++;
> - goto success;
> + for_each_svm_dev() {
> + if (sdev->ops != ops) {
> + ret = -EBUSY;
> + goto out;
>   }
> + sdev->users++;
> + goto success;
>   }
>  
>   break;
> @@ -585,40 +583,38 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
>   if (!svm)
>   goto out;
>  
> - list_for_each_entry(sdev, >devs, list) {
> - if (dev == sdev->dev) {
> - ret = 0;
> - sdev->users--;
> - if (!sdev->users) {
> - list_del_rcu(>list);
> - /* Flush the PASID cache and IOTLB for this 
> device.
> -  * Note that we do depend on the hardware *not* 
> using
> -  * the PASID any more. Just as we depend on 
> other
> -  * devices never using PASIDs that they have no 
> right
> -  * to use. We have a *shared* PASID table, 
> because it's
> -  * large and has to be physically contiguous. 
> So it's
> -  * hard to be as defensive as we might like. */
> - intel_pasid_tear_down_entry(iommu, dev, 
> svm->pasid);
> - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 
> !svm->mm);
> - kfree_rcu(sdev, rcu);
> -
> - if (list_empty(>devs)) {
> - ioasid_free(svm->pasid);
> - if (svm->mm)
> - 
> mmu_notifier_unregister(>notifier, svm->mm);
> -
> - list_del(>list);
> -
> - /* We mandate that no page faults may 
> be outstanding
> -  * for the PASID when 
> intel_svm_unbind_mm() is called.
> -  * If that is not obeyed, subtle errors 
> will happen.
> -  * Let's make them less subtle... */
> - memset(svm, 0x6b, sizeof(*svm));
> - kfree(svm);
> - }
> + for_each_svm_dev() {
> + ret = 0;
> + sdev->users--;
> + if (!sdev->users) {
> + list_del_rcu(>list);
> + /* Flush the PASID cache and IOTLB for this device.
> +  * Note that we do depend on the hardware *not* using
> +  * the PASID any more. Just as we depend on other
> +  * devices never using PASIDs that they have no right
> +  * to use. We have a *shared* PASID table, because it's
> +  * large and has to be physically contiguous. So it's
> +  * hard to be as defensive as we might like. */
> + intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
> + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 
> !svm->mm);
> + kfree_rcu(sdev, rcu);
> +
> + if (list_empty(>devs)) {
> + ioasid_free(svm->pasid);
> + if (svm->mm)
> + mmu_notifier_unregister(>notifier, 
> svm->mm);
> +
> + list_del(>list);
> +
> + /* We mandate that no page faults may be 
> outstanding
> +  * for 

Re: "iommu/amd: Set exclusion range correctly" causes smartpqi offline

2019-04-26 Thread Qian Cai
On Fri, 2019-04-26 at 17:26 +0200, Joerg Roedel wrote:
> On Fri, Apr 26, 2019 at 10:52:28AM -0400, Qian Cai wrote:
> > Applying some memory pressure would causes smartpqi offline even in today's
> > linux-next. This can always be reproduced by a LTP test cases [1] or
> > sometimes
> > just compiling kernels.
> > 
> > Reverting the commit "iommu/amd: Set exclusion range correctly" fixed the
> > issue.
> > 
> > [  213.437112] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> > domain=0x address=0x1000 flags=0x]
> > [  213.447659] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> > domain=0x address=0x1800 flags=0x]
> > [  233.362013] smartpqi :23:00.0: controller is offline: status code
> > 0x14803
> > [  233.369359] smartpqi :23:00.0: controller offline
> > [  233.388915] print_req_error: I/O error, dev sdb, sector 3317352 flags
> > 201
> > [  233.388921] sd 0:0:0:0: [sdb] tag#95 UNKNOWN(0x2003) Result:
> > hostbyte=0x01
> > driverbyte=0x00
> > [  233.388931] sd 0:0:0:0: [sdb] tag#95 CDB: opcode=0x2a 2a 00 00 55 89 00
> > 00 01
> > 08 00
> > [  233.389003] Write-error on swap-device (254:1:4474640)
> > [  233.389015] Write-error on swap-device (254:1:2190776)
> > [  233.389023] Write-error on swap-device (254:1:8351936)
> > 
> > [1] /opt/ltp/testcases/bin/mtest01 -p80 -w
> 
> I can't explain that, can you please boot with 'amd_iommu_dump' on the
> kernel command line and send me dmesg after boot?

https://git.sr.ht/~cai/linux-debug/blob/master/dmesg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 14/19] iommu: Add guest PASID bind function

2019-04-26 Thread Auger Eric
Hi Jacob,

On 4/24/19 1:31 AM, Jacob Pan wrote:
> Guest shared virtual address (SVA) may require host to shadow guest
> PASID tables. Guest PASID can also be allocated from the host via
> enlightened interfaces. In this case, guest needs to bind the guest
> mm, i.e. cr3 in guest phisical address to the actual PASID table in
physical
> the host IOMMU. Nesting will be turned on such that guest virtual
> address can go through a two level translation:
> - 1st level translates GVA to GPA
> - 2nd level translates GPA to HPA
> This patch introduces APIs to bind guest PASID data to the assigned
> device entry in the physical IOMMU. See the diagram below for usage
> explaination.
> 
> .-.  .---.
> |   vIOMMU|  | Guest process mm, FL only |
> | |  '---'
> ./
> | PASID Entry |--- PASID cache flush -
> '-'   |
> | |   V
> | |
> '-'
> Guest
> --| Shadow |--|
>   vv  v
> Host
> .-.  .--.
> |   pIOMMU|  | Bind FL for GVA-GPA  |
> | |  '--'
> ./  |
> | PASID Entry | V (Nested xlate)
> '\.-.
> | |   |Set SL to GPA-HPA|
> | |   '-'
> '-'
> 
> Where:
>  - FL = First level/stage one page tables
>  - SL = Second level/stage two page tables
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu Yi L 
> ---
>  drivers/iommu/iommu.c  | 20 
>  include/linux/iommu.h  | 10 ++
>  include/uapi/linux/iommu.h | 15 ++-
>  3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 498c28a..072f8f3 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1561,6 +1561,26 @@ int iommu_cache_invalidate(struct iommu_domain 
> *domain, struct device *dev,
>  }
>  EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
>  
> +int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data)
> +{
> + if (unlikely(!domain->ops->sva_bind_gpasid))
> + return -ENODEV;
> +
> + return domain->ops->sva_bind_gpasid(domain, dev, data);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_bind_gpasid);
> +
> +int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
> + int pasid)
> +{
> + if (unlikely(!domain->ops->sva_unbind_gpasid))
> + return -ENODEV;
> +
> + return domain->ops->sva_unbind_gpasid(dev, pasid);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 4b92e4b..611388e 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -231,6 +231,8 @@ struct iommu_sva_ops {
>   * @detach_pasid_table: detach the pasid table
>   * @cache_invalidate: invalidate translation caches
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @sva_bind_gpasid: bind guest pasid and mm
> + * @sva_unbind_gpasid: unbind guest pasid and mm
>   */
>  struct iommu_ops {
>   bool (*capable)(enum iommu_cap);
> @@ -295,6 +297,10 @@ struct iommu_ops {
>  
>   int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
>   struct iommu_cache_invalidate_info *inv_info);
> + int (*sva_bind_gpasid)(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data);
> +
> + int (*sva_unbind_gpasid)(struct device *dev, int pasid);
So I am confused now. As the scalable mode PASID table entry contains
both the FL and SL PT pointers, will you ever use the
attach/detach_pasid_table or are we the only known users on ARM?

>  
>   unsigned long pgsize_bitmap;
>  };
> @@ -409,6 +415,10 @@ extern void iommu_detach_pasid_table(struct iommu_domain 
> *domain);
>  extern int iommu_cache_invalidate(struct iommu_domain *domain,
> struct device *dev,
> struct iommu_cache_invalidate_info *inv_info);
> +extern int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data);
> +extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
> + struct device *dev, int pasid);
definition in !CONFIG_IOMMU_API case?
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct 

Re: [PATCH] iommu/dmar: Use struct_size() helper

2019-04-26 Thread Gustavo A. R. Silva



On 4/26/19 9:44 AM, Joerg Roedel wrote:
> On Thu, Apr 18, 2019 at 01:46:24PM -0500, Gustavo A. R. Silva wrote:
>> Make use of the struct_size() helper instead of an open-coded version
>> in order to avoid any potential type mistakes, in particular in the
>> context in which this code is being used.
>>
>> So, replace code of the following form:
>>
>> size = sizeof(*info) + level * sizeof(info->path[0]);
>>
>> with:
>>
>> size = struct_size(info, path, level);
>>
>> Signed-off-by: Gustavo A. R. Silva 
> 
> Applied, thanks.
> 

Great. :)

Thanks, Joerg.
--
Gustavo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: iommu/vt-d: drop mm use count if address is not canonical

2019-04-26 Thread Jacob Pan
On Fri, 26 Apr 2019 15:38:46 +0200
Joerg Roedel  wrote:

> [ Adding more people. ]
> 
> On Wed, Apr 17, 2019 at 05:12:57PM +0800, Pan Bian wrote:
> > The use count of svm->mm is incremented by mmget_not_zero. However,
> > it is not dropped when the address is not canonical. This patch
> > fixes the bug.
> > 
> > Fixes: 9d8c3af31607("iommu/vt-d: IOMMU Page Request needs to check
> > if address is canonical.")
> > 
> > Signed-off-by: Pan Bian 
> > ---
> >  drivers/iommu/intel-svm.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> > index 3a4b09a..2630d2e 100644
> > --- a/drivers/iommu/intel-svm.c
> > +++ b/drivers/iommu/intel-svm.c
> > @@ -574,8 +574,10 @@ static irqreturn_t prq_event_thread(int irq,
> > void *d) goto bad_req;
> >  
> > /* If address is not canonical, return invalid
> > response */
> > -   if (!is_canonical_address(address))
> > +   if (!is_canonical_address(address)) {
> > +   mmput(svm->mm);
> > goto bad_req;
> > +   }
> >  
> > down_read(>mm->mmap_sem);
> > vma = find_extend_vma(svm->mm, address);  
> 
> I think it is better to move the canonical address check before
> mmget_not_zero() like in the diff below. But I let David and Jacob
> comment on this.
> 
Looks good to me. Tested with PRQ and fake non canonical address then
send an invalid page response.

 Tested-by: Jacob Pan 


> Regards,
> 
>   Joerg
> 
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 8f87304f915c..5e9e1eee8336 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -586,14 +586,15 @@ static irqreturn_t prq_event_thread(int irq,
> void *d)
>* any faults on kernel addresses. */
>   if (!svm->mm)
>   goto bad_req;
> - /* If the mm is already defunct, don't handle
> faults. */
> - if (!mmget_not_zero(svm->mm))
> - goto bad_req;
>  
>   /* If address is not canonical, return invalid
> response */ if (!is_canonical_address(address))
>   goto bad_req;
>  
> + /* If the mm is already defunct, don't handle
> faults. */
> + if (!mmget_not_zero(svm->mm))
> + goto bad_req;
> +
>   down_read(>mm->mmap_sem);
>   vma = find_extend_vma(svm->mm, address);
>   if (!vma || address < vma->vm_start)

[Jacob Pan]
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 13/19] iommu/vt-d: Add nested translation support

2019-04-26 Thread Auger Eric
Hi Jacob,

On 4/24/19 1:31 AM, Jacob Pan wrote:
> Nested translation mode is supported in VT-d 3.0 Spec.CH 3.8.
> With PASID granular translation type set to 0x11b, translation
> result from the first level(FL) also subject to a second level(SL)
> page table translation. This mode is used for SVA virtualization,
> where FL performs guest virtual to guest physical translation and
> SL performs guest physical to host physical translation.

The title of the patch sounds a bit misleading to me as this patch
"just" adds a helper to set the PASID table entry in nested mode. There
is no caller yet.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu, Yi L 
> ---
>  drivers/iommu/intel-pasid.c | 101 
> 
>  drivers/iommu/intel-pasid.h |  11 +
>  2 files changed, 112 insertions(+)
> 
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index d339e8f..04127cf 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -688,3 +688,104 @@ int intel_pasid_setup_pass_through(struct intel_iommu 
> *iommu,
>  
>   return 0;
>  }
> +
> +/**
> + * intel_pasid_setup_nested() - Set up PASID entry for nested translation
> + * which is used for vSVA. The first level page tables are used for
> + * GVA-GPA translation in the guest, second level page tables are used
> + * for GPA to HPA translation.
> + *
> + * @iommu:  Iommu which the device belong to
> + * @dev:Device to be set up for translation
> + * @pgd:First level PGD, treated as GPA
nit: @gpgd

spec naming could be used as well: FLPTPTR: First Level Page
Translation Pointer
> + * @pasid:  PASID to be programmed in the device PASID table
> + * @flags:  Additional info such as supervisor PASID
> + * @domain: Domain info for setting up second level page tables
> + * @addr_width: Address width of the first level (guest)
> + */
> +int intel_pasid_setup_nested(struct intel_iommu *iommu,
> + struct device *dev, pgd_t *gpgd,
> + int pasid, int flags,
> + struct dmar_domain *domain,
> + int addr_width)
> +{
> + struct pasid_entry *pte;
> + struct dma_pte *pgd;
> + u64 pgd_val;
> + int agaw;
> + u16 did;
> +
> + if (!ecap_nest(iommu->ecap)) {
> + pr_err("No nested translation support on %s\n",
> +iommu->name);
IOMMU: %s: ;-)
> + return -EINVAL;
> + }
> +
> + pte = intel_pasid_get_entry(dev, pasid);
> + if (WARN_ON(!pte))
> + return -EINVAL;
> +
> + pasid_clear_entry(pte);
> +
> + /* Sanity checking performed by caller to make sure address
> +  * width matching in two dimensions:
> +  * 1. CPU vs. IOMMU
> +  * 2. Guest vs. Host.
> +  */
> + switch (addr_width) {
> + case 57:
> + pasid_set_flpm(pte, 1);
> + break;
> + case 48:
> + pasid_set_flpm(pte, 0);
> + break;
> + default:
> + dev_err(dev, "Invalid paging mode %d\n", addr_width);
> + return -EINVAL;
> + }
> +
> + /* Setup the first level page table pointer in GPA */
> + pasid_set_flptr(pte, (u64)gpgd);
> + if (flags & PASID_FLAG_SUPERVISOR_MODE) {
> + if (!ecap_srs(iommu->ecap)) {
> + pr_err("No supervisor request support on %s\n",
> +iommu->name);
> + return -EINVAL;
> + }
> + pasid_set_sre(pte);
> + }
> +
> + /* Setup the second level based on the given domain */
> + pgd = domain->pgd;
> +
> + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
> + pgd = phys_to_virt(dma_pte_addr(pgd));
> + if (!dma_pte_present(pgd)) {
> + dev_err(dev, "Invalid domain page table\n");
> + return -EINVAL;
> + }
> + }
> + pgd_val = virt_to_phys(pgd);
> + pasid_set_slptr(pte, pgd_val);
> + pasid_set_fault_enable(pte);
> +
> + did = domain->iommu_did[iommu->seq_id];
> + pasid_set_domain_id(pte, did);
> +
> + pasid_set_address_width(pte, agaw);
> + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
> +
> + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
> + pasid_set_present(pte);
> +
> + if (!ecap_coherent(iommu->ecap))
> + clflush_cache_range(pte, sizeof(*pte));
> +
> + if (cap_caching_mode(iommu->cap)) {
> + pasid_cache_invalidation_with_pasid(iommu, did, pasid);
> + iotlb_invalidation_with_pasid(iommu, did, pasid);
> + } else
> + iommu_flush_write_buffer(iommu);
a bunch of that code is duplicated from
intel_pasid_setup_second_level(). I wonder if you could devise a common
helper function?

Thanks

Eric
> +
> + return 0;
> +}
> diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
> index 

"iommu/amd: Set exclusion range correctly" causes smartpqi offline

2019-04-26 Thread Qian Cai
Applying some memory pressure would causes smartpqi offline even in today's
linux-next. This can always be reproduced by a LTP test cases [1] or sometimes
just compiling kernels.

Reverting the commit "iommu/amd: Set exclusion range correctly" fixed the issue.

[  213.437112] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
domain=0x address=0x1000 flags=0x]
[  213.447659] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
domain=0x address=0x1800 flags=0x]
[  233.362013] smartpqi :23:00.0: controller is offline: status code 0x14803
[  233.369359] smartpqi :23:00.0: controller offline
[  233.388915] print_req_error: I/O error, dev sdb, sector 3317352 flags 201
[  233.388921] sd 0:0:0:0: [sdb] tag#95 UNKNOWN(0x2003) Result: hostbyte=0x01
driverbyte=0x00
[  233.388931] sd 0:0:0:0: [sdb] tag#95 CDB: opcode=0x2a 2a 00 00 55 89 00 00 01
08 00
[  233.389003] Write-error on swap-device (254:1:4474640)
[  233.389015] Write-error on swap-device (254:1:2190776)
[  233.389023] Write-error on swap-device (254:1:8351936)

[1] /opt/ltp/testcases/bin/mtest01 -p80 -w
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: "iommu/amd: Set exclusion range correctly" causes smartpqi offline

2019-04-26 Thread Joerg Roedel
On Fri, Apr 26, 2019 at 10:52:28AM -0400, Qian Cai wrote:
> Applying some memory pressure would causes smartpqi offline even in today's
> linux-next. This can always be reproduced by a LTP test cases [1] or sometimes
> just compiling kernels.
> 
> Reverting the commit "iommu/amd: Set exclusion range correctly" fixed the 
> issue.
> 
> [  213.437112] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> domain=0x address=0x1000 flags=0x]
> [  213.447659] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT
> domain=0x address=0x1800 flags=0x]
> [  233.362013] smartpqi :23:00.0: controller is offline: status code 
> 0x14803
> [  233.369359] smartpqi :23:00.0: controller offline
> [  233.388915] print_req_error: I/O error, dev sdb, sector 3317352 flags 
> 201
> [  233.388921] sd 0:0:0:0: [sdb] tag#95 UNKNOWN(0x2003) Result: hostbyte=0x01
> driverbyte=0x00
> [  233.388931] sd 0:0:0:0: [sdb] tag#95 CDB: opcode=0x2a 2a 00 00 55 89 00 00 
> 01
> 08 00
> [  233.389003] Write-error on swap-device (254:1:4474640)
> [  233.389015] Write-error on swap-device (254:1:2190776)
> [  233.389023] Write-error on swap-device (254:1:8351936)
> 
> [1] /opt/ltp/testcases/bin/mtest01 -p80 -w

I can't explain that, can you please boot with 'amd_iommu_dump' on the
kernel command line and send me dmesg after boot?

Thanks,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 08/19] ioasid: Add custom IOASID allocator

2019-04-26 Thread Jacob Pan
On Fri, 26 Apr 2019 11:06:54 +0200
Auger Eric  wrote:

> Hi Jacob,
> 
> On 4/25/19 11:29 PM, Jacob Pan wrote:
> > Hi Eric,
> > 
> > Thanks for the review.
> > 
> > On Thu, 25 Apr 2019 12:03:42 +0200
> > Auger Eric  wrote:
> >   
> >> Hi Jacob,
> >>
> >> On 4/24/19 1:31 AM, Jacob Pan wrote:  
> >>> Sometimes, IOASID allocation must be handled by platform specific
> >>> code. The use cases are guest vIOMMU and pvIOMMU where IOASIDs
> >>> need to be allocated by the host via enlightened or paravirt
> >>> interfaces.
> >>>
> >>> This patch adds an extension to the IOASID allocator APIs such
> >>> that platform drivers can register a custom allocator, possibly
> >>> at boot time, to take over the allocation. Xarray is still used
> >>> for tracking and searching purposes internal to the IOASID code.
> >>> Private data of an IOASID can also be set after the allocation.
> >>>
> >>> There can be multiple custom allocators registered but only one is
> >>> used at a time. In case of hot removal of devices that provides
> >>> the allocator, all IOASIDs must be freed prior to unregistering
> >>> the allocator. Default XArray based allocator cannot be mixed with
> >>> custom allocators, i.e. custom allocators will not be used if
> >>> there are outstanding IOASIDs allocated by the default XA
> >>> allocator.
> >>
> >> What's the exact use case behind allowing several custom IOASID
> >> allocators to be registered?  
> > It is mainly for supporting multiple PCI segments thus multiple
> > vIOMMUs. Even though, all allocators will end up calling the host to
> > allocate PASIDs.  
> 
> Yes that was my understanding actually.
> 
> Another question is how do you handle the reserved RID_PASID
> requirement?
> 
We always use PASID 0 for request w/o PASID, so it does not go through
the allocator.
 #define PASID_RID2PASID0x0

>  QEMU does not support multiple PCI segments/domains
> > afaik but others might.  
>  [...]  
>  [...]  
> > Yes, more clear this way.
> >   
>  [...]  
> >> is already in use?  
>  [...]  
> >> s/The reason is that custom allocator/The reason is that custom
> >> allocators  
>  [...]  
> >> This last sentence may be moved to the unregister() kerneldoc  
>  [...]  
> >> The fact the first registered custom allocator gets automatically
> >> active was not obvious to me and may deserve a comment.  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
> >> At this stage it is difficult to assess whether using a BUG_ON() is
> >> safe here. Who is responsible for freeing the IOASIDs?  
>  [...]  
>  [...]  
> >> s/data also sanitiy check/data, also sanity check  
> >>> +  */
> >>> + min = id;
> >>> + max = id + 1;
> >>> + } else
> >>> + default_allocator_used = 1;
> >> shouldn't default_allocator_used be protected as well?  
>  [...]  
> >> wouldn't it be possible to integrate the default io asid allocator
> >> as any custom allocator, ie. implement an alloc callback using
> >> xa_alloc. Then the active io allocator could be either a custom or
> >> a default one.  
> > That is an interesting idea. I think it is possible.
> > But since default xa allocator is internal to ioasid infrastructure,
> > why implement it as a callback?  
> 
> I mean your could directly define a static const default_allocator in
> ioasid.c and assign it by default. Do I miss something?
> 
got it, seems cleaner. let me give it a try.

Thanks

Jacob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem

2019-04-26 Thread Christoph Hellwig
On Thu, Apr 25, 2019 at 11:30:54AM +, Laurentiu Tudor wrote:
> > I think the first step is to move the two USB controller that can only
> > DMA to their own BAR off the existing DMA coherent infrastructure.  The
> > controllers are already identified using the HCD_LOCAL_MEM flag, so we
> > just need to key off that in the hcd_buffer_* routines and call into a
> 
> So if HCD_LOCAL_MEM is set I should call into the gen_pool api instead 
> of existing dma_{alloc,free}_coherent().

Yes.

> > genalloc that has been fed using the bar, replacing the current
> > dma_declare_coherent usage.  Take a look at drivers/pci/p2pdma.c
> > for another example of allocating bits of a BAR using genalloc.
> 
> Where should I place the reference to the gen_pool? How does local_pool 
> in 'struct hcd_usb' sound?

Sounds fine to me, but in the end the usb maintainers will have
to decide.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 02/10] swiotlb: Factor out slot allocation and free

2019-04-26 Thread Christoph Hellwig
On Thu, Apr 25, 2019 at 10:07:19AM +0800, Lu Baolu wrote:
> This is not VT-d specific. It's just how generic IOMMU works.
>
> Normally, IOMMU works in paging mode. So if a driver issues DMA with
> IOVA  0x0123, IOMMU can remap it with a physical address 0x0123.
> But we should never expect IOMMU to remap 0x0123 with physical
> address of 0x. That's the reason why I said that IOMMU will not
> work there.

Well, with the iommu it doesn't happen.  With swiotlb it obviosuly
can happen, so drivers are fine with it.  Why would that suddenly
become an issue when swiotlb is called from the iommu code?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 09/19] iommu/vt-d: Enlightened PASID allocation

2019-04-26 Thread Jacob Pan
On Fri, 26 Apr 2019 09:24:29 +0200
Auger Eric  wrote:

> > Agreed  
> >>> +#define VCMD_VRSP_RESULE(e)  (((e) >> 8) &
> >>> 0xf)
> >> nit: s/RESULE/RSLT?  
> > yes. Also the mask bits should be 8 to 63
> > s/0xf/GENMASK_ULL(63, 8))/  
> Well the macro definition looks correct as 63:28 is RsvdZ

you are right, I misread the spec.

Thanks,

Jacob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] Remove old no iommu direct mapping code

2019-04-26 Thread Joerg Roedel
On Tue, Apr 23, 2019 at 09:03:34PM +0100, Tom Murphy wrote:
> These checks were intended to handle devices not mapped by the IOMMU.
> Since the AMD IOMMU driver uses per-device dma_ops these functions can
> no longer be called by direct mapped devices. So these checks aren't
> needed anymore.
> 
> Signed-off-by: Tom Murphy 
> ---
>  drivers/iommu/amd_iommu.c | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)

This has already been done by Christoph, see


https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/commit/?id=7a5dbf3ab2f04905cf8468c66fcdbfb643068bcb

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH next 13/25] iommu/omap: Use dev_get_drvdata()

2019-04-26 Thread Joerg Roedel
On Tue, Apr 23, 2019 at 03:50:08PM +0800, Kefeng Wang wrote:
>  drivers/iommu/omap-iommu.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Don't request page request irq under dmar_global_lock

2019-04-26 Thread Joerg Roedel
On Fri, Apr 19, 2019 at 02:43:29PM +0800, Lu Baolu wrote:
>  drivers/iommu/intel-iommu.c | 6 ++
>  1 file changed, 6 insertions(+)

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dmar: Use struct_size() helper

2019-04-26 Thread Joerg Roedel
On Thu, Apr 18, 2019 at 01:46:24PM -0500, Gustavo A. R. Silva wrote:
> Make use of the struct_size() helper instead of an open-coded version
> in order to avoid any potential type mistakes, in particular in the
> context in which this code is being used.
> 
> So, replace code of the following form:
> 
> size = sizeof(*info) + level * sizeof(info->path[0]);
> 
> with:
> 
> size = struct_size(info, path, level);
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH v1 04/15] iommu: Add DOMAIN_ATTR_PTBASE

2019-04-26 Thread Joerg Roedel
Hi Jordan,

On Mon, Mar 18, 2019 at 08:19:12AM -0600, Jordan Crouse wrote:
> Adreno GPUs can an internal mechanism to switch the pagetable address in the
> attached arm-smmu v2 IOMMU so that each individual rendering process can have
> their own pagetable. The driver uses iommu_map and iommu_unmap to write
> the pagetable but the address for each individual pagetable needs to be 
> queried
> so it can be sent to the hardware. You can see the driver specific code that
> does this here:

Okay, thanks for the explanation. I still don't like it, but it is
probably better putting gpu-specfic context-switch logic into the iommu
driver, so I guess this is okay.


Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1] iommu/amd: flush not present cache in iommu_map_page

2019-04-26 Thread Joerg Roedel
On Wed, Apr 24, 2019 at 05:50:51PM +0100, Tom Murphy wrote:
> check if there is a not-present cache present and flush it if there is.
> 
> Signed-off-by: Tom Murphy 
> ---
>  drivers/iommu/amd_iommu.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index f7cdd2ab7f11..91fe5cb10f50 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -1637,6 +1637,11 @@ static int iommu_map_page(struct protection_domain 
> *dom,
>  
>   update_domain(dom);
>  
> + if (unlikely(amd_iommu_np_cache && !dom->updated)) {
> + domain_flush_pages(dom, bus_addr, page_size);
> + domain_flush_complete(dom);
> + }
> +

The iommu_map_page function is called once per physical page that is
mapped, so in the worst case for every 4k mapping established. So it is
not the right place to put this check in.

>From a quick glance this check belongs into the map_sg() and the
amd_iommu_map() function, but without the dom->updated check.

Besides, to really support systems with np-cache in a way that doesn't
destroy all performance, the driver also needs range-flushes for IOTLBs.
Currently it can only flush a 4k page of the full address space of a
domain. But that doesn't mean we shouldn't fix the missing flushes now.

So please re-send the patch with the check at the two places I pointed
out above.

Thanks,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [GIT PULL] iommu/arm-smmu: Updates for 5.2

2019-04-26 Thread Joerg Roedel
On Wed, Apr 24, 2019 at 12:52:52PM +0100, Will Deacon wrote:
>   git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git 
> for-joerg/arm-smmu/updates

Pulled, thanks Will.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 7/9] iommu/vt-d: use helper pci_dev_id

2019-04-26 Thread Joerg Roedel
On Wed, Apr 24, 2019 at 09:16:10PM +0200, Heiner Kallweit wrote:
> Use new helper pci_dev_id() to simplify the code.
> 
> Signed-off-by: Heiner Kallweit 

Reviewed-by: Joerg Roedel 

> ---
>  drivers/iommu/intel-iommu.c | 2 +-
>  drivers/iommu/intel_irq_remapping.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index d93c4bd7d..3f475a23a 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1391,7 +1391,7 @@ static void iommu_enable_dev_iotlb(struct 
> device_domain_info *info)
>  
>   /* pdev will be returned if device is not a vf */
>   pf_pdev = pci_physfn(pdev);
> - info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
> + info->pfsid = pci_dev_id(pf_pdev);
>   }
>  
>  #ifdef CONFIG_INTEL_IOMMU_SVM
> diff --git a/drivers/iommu/intel_irq_remapping.c 
> b/drivers/iommu/intel_irq_remapping.c
> index 634d8f059..4160aa9f3 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -424,7 +424,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev 
> *dev)
>   set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
>   else
>   set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
> -  PCI_DEVID(dev->bus->number, dev->devfn));
> +  pci_dev_id(dev));
>  
>   return 0;
>  }
> -- 
> 2.21.0
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 6/9] iommu/amd: use helper pci_dev_id

2019-04-26 Thread Joerg Roedel
On Wed, Apr 24, 2019 at 09:15:25PM +0200, Heiner Kallweit wrote:
> Use new helper pci_dev_id() to simplify the code.
> 
> Signed-off-by: Heiner Kallweit 

Reviewed-by: Joerg Roedel 

> ---
>  drivers/iommu/amd_iommu.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index f467cc4b4..5cb201422 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -165,7 +165,7 @@ static inline u16 get_pci_device_id(struct device *dev)
>  {
>   struct pci_dev *pdev = to_pci_dev(dev);
>  
> - return PCI_DEVID(pdev->bus->number, pdev->devfn);
> + return pci_dev_id(pdev);
>  }
>  
>  static inline int get_acpihid_device_id(struct device *dev,
> -- 
> 2.21.0
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: iommu/vt-d: drop mm use count if address is not canonical

2019-04-26 Thread Joerg Roedel
[ Adding more people. ]

On Wed, Apr 17, 2019 at 05:12:57PM +0800, Pan Bian wrote:
> The use count of svm->mm is incremented by mmget_not_zero. However, it
> is not dropped when the address is not canonical. This patch fixes the
> bug.
> 
> Fixes: 9d8c3af31607("iommu/vt-d: IOMMU Page Request needs to check if
> address is canonical.")
> 
> Signed-off-by: Pan Bian 
> ---
>  drivers/iommu/intel-svm.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 3a4b09a..2630d2e 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -574,8 +574,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>   goto bad_req;
>  
>   /* If address is not canonical, return invalid response */
> - if (!is_canonical_address(address))
> + if (!is_canonical_address(address)) {
> + mmput(svm->mm);
>   goto bad_req;
> + }
>  
>   down_read(>mm->mmap_sem);
>   vma = find_extend_vma(svm->mm, address);

I think it is better to move the canonical address check before
mmget_not_zero() like in the diff below. But I let David and Jacob
comment on this.

Regards,

Joerg

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 8f87304f915c..5e9e1eee8336 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -586,14 +586,15 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 * any faults on kernel addresses. */
if (!svm->mm)
goto bad_req;
-   /* If the mm is already defunct, don't handle faults. */
-   if (!mmget_not_zero(svm->mm))
-   goto bad_req;
 
/* If address is not canonical, return invalid response */
if (!is_canonical_address(address))
goto bad_req;
 
+   /* If the mm is already defunct, don't handle faults. */
+   if (!mmget_not_zero(svm->mm))
+   goto bad_req;
+
down_read(>mm->mmap_sem);
vma = find_extend_vma(svm->mm, address);
if (!vma || address < vma->vm_start)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/mediatek: fix leaked of_node references

2019-04-26 Thread Joerg Roedel
On Wed, Apr 17, 2019 at 10:41:19AM +0800, Wen Yang wrote:
>  drivers/iommu/mtk_iommu.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/amd: Remove amd_iommu_pd_list

2019-04-26 Thread Joerg Roedel
From: Joerg Roedel 

This variable hold a global list of allocated protection
domains in the AMD IOMMU driver. By now this list is never
traversed anymore, so the list and the lock protecting it
can be removed.

Cc: Tom Murphy 
Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c   | 33 -
 drivers/iommu/amd_iommu_init.c  |  8 
 drivers/iommu/amd_iommu_types.h |  6 --
 3 files changed, 47 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f467cc4b498e..3e0696cf965c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1723,31 +1723,6 @@ static void dma_ops_free_iova(struct dma_ops_domain 
*dma_dom,
  *
  /
 
-/*
- * This function adds a protection domain to the global protection domain list
- */
-static void add_domain_to_list(struct protection_domain *domain)
-{
-   unsigned long flags;
-
-   spin_lock_irqsave(_iommu_pd_lock, flags);
-   list_add(>list, _iommu_pd_list);
-   spin_unlock_irqrestore(_iommu_pd_lock, flags);
-}
-
-/*
- * This function removes a protection domain to the global
- * protection domain list
- */
-static void del_domain_from_list(struct protection_domain *domain)
-{
-   unsigned long flags;
-
-   spin_lock_irqsave(_iommu_pd_lock, flags);
-   list_del(>list);
-   spin_unlock_irqrestore(_iommu_pd_lock, flags);
-}
-
 static u16 domain_id_alloc(void)
 {
int id;
@@ -1838,8 +1813,6 @@ static void dma_ops_domain_free(struct dma_ops_domain 
*dom)
if (!dom)
return;
 
-   del_domain_from_list(>domain);
-
put_iova_domain(>iovad);
 
free_pagetable(>domain);
@@ -1880,8 +1853,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
/* Initialize reserved ranges */
copy_reserved_iova(_iova_ranges, _dom->iovad);
 
-   add_domain_to_list(_dom->domain);
-
return dma_dom;
 
 free_dma_dom:
@@ -2834,8 +2805,6 @@ static void protection_domain_free(struct 
protection_domain *domain)
if (!domain)
return;
 
-   del_domain_from_list(domain);
-
if (domain->id)
domain_id_free(domain->id);
 
@@ -2865,8 +2834,6 @@ static struct protection_domain 
*protection_domain_alloc(void)
if (protection_domain_init(domain))
goto out_err;
 
-   add_domain_to_list(domain);
-
return domain;
 
 out_err:
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 1b1378619fc9..4497c6bb9e70 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -188,12 +188,6 @@ static bool amd_iommu_pc_present __read_mostly;
 
 bool amd_iommu_force_isolation __read_mostly;
 
-/*
- * List of protection domains - used during resume
- */
-LIST_HEAD(amd_iommu_pd_list);
-spinlock_t amd_iommu_pd_lock;
-
 /*
  * Pointer to the device table which is shared by all AMD IOMMUs
  * it is indexed by the PCI device id or the HT unit id and contains
@@ -2526,8 +2520,6 @@ static int __init early_amd_iommu_init(void)
 */
__set_bit(0, amd_iommu_pd_alloc_bitmap);
 
-   spin_lock_init(_iommu_pd_lock);
-
/*
 * now the data structures are allocated and basically initialized
 * start the real acpi table scan
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 87965e4d9647..85c488b8daea 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -674,12 +674,6 @@ extern struct list_head amd_iommu_list;
  */
 extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
 
-/*
- * Declarations for the global list of all protection domains
- */
-extern spinlock_t amd_iommu_pd_lock;
-extern struct list_head amd_iommu_pd_list;
-
 /*
  * Structure defining one entry in the device table
  */
-- 
2.16.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Add allocated domain to global list earlier

2019-04-26 Thread Joerg Roedel
Hi Tom,

On Mon, Apr 15, 2019 at 07:05:56PM +0100, Tom Murphy wrote:
> dma_ops_domain_free() expects domain to be in a global list.
> Arguably, could be called before protection_domain_init().

This could in theory create new races because a not fully initialized
domain becomes visible in the global list. In practice this never
happens because that global list is unused by now.

So it is better to remove this global list like the attached diff does.
I'll send out a patch separatly and put it into the iommu-tree.

Regards,

Joerg

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f467cc4b498e..3e0696cf965c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1723,31 +1723,6 @@ static void dma_ops_free_iova(struct dma_ops_domain 
*dma_dom,
  *
  /
 
-/*
- * This function adds a protection domain to the global protection domain list
- */
-static void add_domain_to_list(struct protection_domain *domain)
-{
-   unsigned long flags;
-
-   spin_lock_irqsave(_iommu_pd_lock, flags);
-   list_add(>list, _iommu_pd_list);
-   spin_unlock_irqrestore(_iommu_pd_lock, flags);
-}
-
-/*
- * This function removes a protection domain to the global
- * protection domain list
- */
-static void del_domain_from_list(struct protection_domain *domain)
-{
-   unsigned long flags;
-
-   spin_lock_irqsave(_iommu_pd_lock, flags);
-   list_del(>list);
-   spin_unlock_irqrestore(_iommu_pd_lock, flags);
-}
-
 static u16 domain_id_alloc(void)
 {
int id;
@@ -1838,8 +1813,6 @@ static void dma_ops_domain_free(struct dma_ops_domain 
*dom)
if (!dom)
return;
 
-   del_domain_from_list(>domain);
-
put_iova_domain(>iovad);
 
free_pagetable(>domain);
@@ -1880,8 +1853,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
/* Initialize reserved ranges */
copy_reserved_iova(_iova_ranges, _dom->iovad);
 
-   add_domain_to_list(_dom->domain);
-
return dma_dom;
 
 free_dma_dom:
@@ -2834,8 +2805,6 @@ static void protection_domain_free(struct 
protection_domain *domain)
if (!domain)
return;
 
-   del_domain_from_list(domain);
-
if (domain->id)
domain_id_free(domain->id);
 
@@ -2865,8 +2834,6 @@ static struct protection_domain 
*protection_domain_alloc(void)
if (protection_domain_init(domain))
goto out_err;
 
-   add_domain_to_list(domain);
-
return domain;
 
 out_err:
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 1b1378619fc9..4497c6bb9e70 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -188,12 +188,6 @@ static bool amd_iommu_pc_present __read_mostly;
 
 bool amd_iommu_force_isolation __read_mostly;
 
-/*
- * List of protection domains - used during resume
- */
-LIST_HEAD(amd_iommu_pd_list);
-spinlock_t amd_iommu_pd_lock;
-
 /*
  * Pointer to the device table which is shared by all AMD IOMMUs
  * it is indexed by the PCI device id or the HT unit id and contains
@@ -2526,8 +2520,6 @@ static int __init early_amd_iommu_init(void)
 */
__set_bit(0, amd_iommu_pd_alloc_bitmap);
 
-   spin_lock_init(_iommu_pd_lock);
-
/*
 * now the data structures are allocated and basically initialized
 * start the real acpi table scan
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 87965e4d9647..85c488b8daea 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -674,12 +674,6 @@ extern struct list_head amd_iommu_list;
  */
 extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
 
-/*
- * Declarations for the global list of all protection domains
- */
-extern spinlock_t amd_iommu_pd_lock;
-extern struct list_head amd_iommu_pd_list;
-
 /*
  * Structure defining one entry in the device table
  */
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 06/19] drivers core: Add I/O ASID allocator

2019-04-26 Thread Christoph Hellwig
On Fri, Apr 26, 2019 at 12:47:43PM +0100, Jean-Philippe Brucker wrote:
> >> On Tue, Apr 23, 2019 at 04:31:06PM -0700, Jacob Pan wrote:
> >>> The allocator doesn't really belong in drivers/iommu because some
> >>> drivers would like to allocate PASIDs for devices that aren't
> >>> managed by an IOMMU, using the same ID space as IOMMU. It doesn't
> >>> really belong in drivers/pci either since platform device also
> >>> support PASID. Add the allocator in drivers/base.  
> >>
> >> I'd still add it to drivers/iommu, just selectable separately from the
> >> core iommu code..
> > Perhaps I misunderstood. If a driver wants to use IOASIDs w/o iommu
> > subsystem even turned on, how could selecting from the core iommu code
> > help? Could you elaborate on "selectable"?
> 
> How about doing the same as CONFIG_IOMMU_IOVA? The code is in
> drivers/iommu but can be selected by non-IOMMU_API users, independently
> of CONFIG_IOMMU_SUPPORT. It's true that this allocator will mostly be
> used by IOMMU drivers.

That is exactly what I meant!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 06/19] drivers core: Add I/O ASID allocator

2019-04-26 Thread Jean-Philippe Brucker
On 25/04/2019 19:19, Jacob Pan wrote:
> Hi Christoph,
> 
> On Tue, 23 Apr 2019 23:19:03 -0700
> Christoph Hellwig  wrote:
> 
>> On Tue, Apr 23, 2019 at 04:31:06PM -0700, Jacob Pan wrote:
>>> The allocator doesn't really belong in drivers/iommu because some
>>> drivers would like to allocate PASIDs for devices that aren't
>>> managed by an IOMMU, using the same ID space as IOMMU. It doesn't
>>> really belong in drivers/pci either since platform device also
>>> support PASID. Add the allocator in drivers/base.  
>>
>> I'd still add it to drivers/iommu, just selectable separately from the
>> core iommu code..
> Perhaps I misunderstood. If a driver wants to use IOASIDs w/o iommu
> subsystem even turned on, how could selecting from the core iommu code
> help? Could you elaborate on "selectable"?

How about doing the same as CONFIG_IOMMU_IOVA? The code is in
drivers/iommu but can be selected by non-IOMMU_API users, independently
of CONFIG_IOMMU_SUPPORT. It's true that this allocator will mostly be
used by IOMMU drivers.

> From VT-d's perspective, PASIDs are only used with IOMMU on. Jean
> knows other use cases.

I know of one: the AMD GPU driver may use IOASID for context IDs, even
if IOMMU is disabled. As I understand it, if IOMMU is enabled they need
to use the same allocator as IOMMU since it's the same ID space. And I
think it's more convenient to use the same allocation code in the GPU
driver regardless of CONFIG_IOMMU_SUPPORT.

See the previous discussion at
https://www.spinics.net/lists/iommu/msg31200.html

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 08/19] ioasid: Add custom IOASID allocator

2019-04-26 Thread Auger Eric
Hi Jacob,

On 4/25/19 11:29 PM, Jacob Pan wrote:
> Hi Eric,
> 
> Thanks for the review.
> 
> On Thu, 25 Apr 2019 12:03:42 +0200
> Auger Eric  wrote:
> 
>> Hi Jacob,
>>
>> On 4/24/19 1:31 AM, Jacob Pan wrote:
>>> Sometimes, IOASID allocation must be handled by platform specific
>>> code. The use cases are guest vIOMMU and pvIOMMU where IOASIDs need
>>> to be allocated by the host via enlightened or paravirt interfaces.
>>>
>>> This patch adds an extension to the IOASID allocator APIs such that
>>> platform drivers can register a custom allocator, possibly at boot
>>> time, to take over the allocation. Xarray is still used for tracking
>>> and searching purposes internal to the IOASID code. Private data of
>>> an IOASID can also be set after the allocation.
>>>
>>> There can be multiple custom allocators registered but only one is
>>> used at a time. In case of hot removal of devices that provides the
>>> allocator, all IOASIDs must be freed prior to unregistering the
>>> allocator. Default XArray based allocator cannot be mixed with
>>> custom allocators, i.e. custom allocators will not be used if there
>>> are outstanding IOASIDs allocated by the default XA allocator.  
>>
>> What's the exact use case behind allowing several custom IOASID
>> allocators to be registered?
> It is mainly for supporting multiple PCI segments thus multiple
> vIOMMUs. Even though, all allocators will end up calling the host to
> allocate PASIDs.

Yes that was my understanding actually.

Another question is how do you handle the reserved RID_PASID requirement?

 QEMU does not support multiple PCI segments/domains
> afaik but others might.
>>>
>>> Signed-off-by: Jacob Pan 
>>> ---
>>>  drivers/base/ioasid.c  | 182
>>> ++---
>>> include/linux/ioasid.h |  15 +++- 2 files changed, 187
>>> insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/base/ioasid.c b/drivers/base/ioasid.c
>>> index c4012aa..5cb36a4 100644
>>> --- a/drivers/base/ioasid.c
>>> +++ b/drivers/base/ioasid.c
>>> @@ -17,6 +17,120 @@ struct ioasid_data {
>>>  };
>>>  
>>>  static DEFINE_XARRAY_ALLOC(ioasid_xa);
>>> +static DEFINE_MUTEX(ioasid_allocator_lock);
>>> +static struct ioasid_allocator *ioasid_allocator;  
>> A more explicit name may be chosen. If I understand correctly that's
>> the active_custom_allocator
> Yes, more clear this way.
> 
>>> +
>>> +static LIST_HEAD(custom_allocators);
>>> +/*
>>> + * A flag to track if ioasid default allocator already been used,
>>> this will  
>> is already in use?
>>> + * prevent custom allocator from being used. The reason is that
>>> custom allocator  
>> s/The reason is that custom allocator/The reason is that custom
>> allocators
>>> + * must have unadulterated space to track private data with
>>> xarray, there cannot
>>> + * be a mix been default and custom allocated IOASIDs.
>>> + */
>>> +static int default_allocator_used;
>>> +
>>> +/**
>>> + * ioasid_register_allocator - register a custom allocator
>>> + * @allocator: the custom allocator to be registered
>>> + *
>>> + * Custom allocator take precedence over the default xarray based
>>> allocator.
>>> + * Private data associated with the ASID are managed by ASID
>>> common code
>>> + * similar to data stored in xa.
>>> + *
>>> + * There can be multiple allocators registered but only one is
>>> active. In case
>>> + * of runtime removal of an custom allocator, the next one is
>>> activated based
>>> + * on the registration ordering.  
>> This last sentence may be moved to the unregister() kerneldoc
>>> + */
>>> +int ioasid_register_allocator(struct ioasid_allocator *allocator)
>>> +{
>>> +   struct ioasid_allocator *pallocator;
>>> +   int ret = 0;
>>> +
>>> +   if (!allocator)
>>> +   return -EINVAL;
>>> +
>>> +   mutex_lock(_allocator_lock);
>>> +   if (list_empty(_allocators))
>>> +   ioasid_allocator = allocator;  
>> The fact the first registered custom allocator gets automatically
>> active was not obvious to me and may deserve a comment.
> Will do. I will add:
> "No particular preference since all custom allocators end up calling
> the host to allocate IOASIDs. We activate the first allocator and keep
> the later ones in a list in case the first one gets removed due to
> hotplug."
> 
>>> +   else {
>>> +   /* Check if the allocator is already registered */
>>> +   list_for_each_entry(pallocator,
>>> _allocators, list) {
>>> +   if (pallocator == allocator) {
>>> +   pr_err("IOASID allocator already
>>> exist\n");  
>> s/exist/registered?
> make sense.
>>> +   ret = -EEXIST;
>>> +   goto out_unlock;
>>> +   }
>>> +   }
>>> +   }
>>> +   list_add_tail(>list, _allocators);
>>> +
>>> +out_unlock:
>>> +   mutex_unlock(_allocator_lock);
>>> +   return ret;
>>> +}
>>> +EXPORT_SYMBOL_GPL(ioasid_register_allocator);
>>> +
>>> +/**
>>> + * ioasid_unregister_allocator - Remove a 

Re: [PATCH v2 09/19] iommu/vt-d: Enlightened PASID allocation

2019-04-26 Thread Auger Eric



On 4/26/19 1:40 AM, Jacob Pan wrote:
> On Wed, 24 Apr 2019 19:27:52 +0200
> Auger Eric  wrote:
> 
>> Hi Jacob,
>>
>> On 4/24/19 1:31 AM, Jacob Pan wrote:
>>> From: Lu Baolu 
>>>
>>> If Intel IOMMU runs in caching mode, a.k.a. virtual IOMMU, the
>>> IOMMU driver should rely on the emulation software to allocate
>>> and free PASID IDs.  
>> Do we make the decision depending on the CM or depending on the
>> VCCAP_REG?
>>
>> VCCAP_REG description says:
>>
>> If Set, software must use Virtual Command Register interface to
>> allocate and free PASIDs.
>>
>>  The Intel vt-d spec revision 3.0 defines a
>>> register set to support this. This includes a capability register,
>>> a virtual command register and a virtual response register. Refer
>>> to section 10.4.42, 10.4.43, 10.4.44 for more information.
>>>
>>> This patch adds the enlightened PASID allocation/free interfaces  
>> For mu curiosity why is it called "enlightened"?
> I don't know the origin but "enlightened" means guest is tipped with
> information that it is not running on real HW.
> 
>>> via the virtual command register.
>>>
>>> Cc: Ashok Raj 
>>> Cc: Jacob Pan 
>>> Cc: Kevin Tian 
>>> Signed-off-by: Liu Yi L 
>>> Signed-off-by: Lu Baolu 
>>> ---
>>>  drivers/iommu/intel-pasid.c | 70
>>> +
>>> drivers/iommu/intel-pasid.h | 13 -
>>> include/linux/intel-iommu.h |  2 ++ 3 files changed, 84
>>> insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/iommu/intel-pasid.c
>>> b/drivers/iommu/intel-pasid.c index 03b12d2..5b1d3be 100644
>>> --- a/drivers/iommu/intel-pasid.c
>>> +++ b/drivers/iommu/intel-pasid.c
>>> @@ -63,6 +63,76 @@ void *intel_pasid_lookup_id(int pasid)
>>> return p;
>>>  }
>>>  
>>> +int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int
>>> *pasid) +{
>>> +   u64 res;
>>> +   u64 cap;
>>> +   u8 err_code;
>>> +   unsigned long flags;
>>> +   int ret = 0;
>>> +
>>> +   if (!ecap_vcs(iommu->ecap)) {
>>> +   pr_warn("IOMMU: %s: Hardware doesn't support
>>> virtual command\n",
>>> +   iommu->name);  
>> nit: other pr_* messages don't have the "IOMMU: %s:" prefix.
> Are you suggesting just use the prefix defined in pr_fmt? I guess i can
> remove "IOMMU" if Allen is OK with it :).
I aimed to signal the trace formats are not homogeneous in this .c file
but that's not a big deal. In the feature you may use the "IOMMU: %s"
prefix for all pr_* traces.

> 
>>> +   return -ENODEV;
>>> +   }
>>> +
>>> +   cap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
>>> +   if (!(cap & DMA_VCS_PAS)) {
>>> +   pr_warn("IOMMU: %s: Emulation software doesn't
>>> support PASID allocation\n",
>>> +   iommu->name);
>>> +   return -ENODEV;
>>> +   }
>>> +
>>> +   raw_spin_lock_irqsave(>register_lock, flags);
>>> +   dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
>>> +   IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
>>> + !(res & VCMD_VRSP_IP), res);
>>> +   raw_spin_unlock_irqrestore(>register_lock, flags);
>>> +
>>> +   err_code = VCMD_VRSP_EC(res);
>>> +   switch (err_code) {
>>> +   case VCMD_VRSP_EC_SUCCESS:
>>> +   *pasid = VCMD_VRSP_RESULE(res);
>>> +   break;
>>> +   case VCMD_VRSP_EC_UNAVAIL:
>>> +   pr_info("IOMMU: %s: No PASID available\n",
>>> iommu->name);
>>> +   ret = -ENOMEM;
>>> +   break;
>>> +   default:
>>> +   ret = -ENODEV;
>>> +   pr_warn("IOMMU: %s: Unkonwn error code %d\n",  
>> unknown
>>> +   iommu->name, err_code);
>>> +   }
>>> +
>>> +   return ret;
>>> +}
>>> +
>>> +void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid)
>>> +{
>>> +   u64 res;
>>> +   u8 err_code;
>>> +   unsigned long flags;  
>> Shall we check as well the cap is set?
> yes, good point.
> 
>>> +
>>> +   raw_spin_lock_irqsave(>register_lock, flags);
>>> +   dmar_writeq(iommu->reg + DMAR_VCMD_REG, (pasid << 8) |
>>> VCMD_CMD_FREE);
>>> +   IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
>>> + !(res & VCMD_VRSP_IP), res);
>>> +   raw_spin_unlock_irqrestore(>register_lock, flags);
>>> +
>>> +   err_code = VCMD_VRSP_EC(res);
>>> +   switch (err_code) {
>>> +   case VCMD_VRSP_EC_SUCCESS:
>>> +   break;
>>> +   case VCMD_VRSP_EC_INVAL:
>>> +   pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
>>> +   break;
>>> +   default:
>>> +   pr_warn("IOMMU: %s: Unkonwn error code %d\n",  
>> unknown
>>> +   iommu->name, err_code);
>>> +   }
>>> +}
>>> +
>>>  /*
>>>   * Per device pasid table management:
>>>   */
>>> diff --git a/drivers/iommu/intel-pasid.h
>>> b/drivers/iommu/intel-pasid.h index 23537b3..0999dfe 100644
>>> --- a/drivers/iommu/intel-pasid.h
>>> +++ b/drivers/iommu/intel-pasid.h
>>> @@ -19,6 +19,16 @@
>>>  #define PASID_PDE_SHIFT6
>>>  #define MAX_NR_PASID_BITS  20
>>>  
>>> +/* Virtual command interface for enlightened pasid management. */
>>>