[PATCH 2/2] iommu/vt-d: tylersburg isoch identity map check is done too late.

2017-01-30 Thread Ashok Raj
The check to set identity map for tylersburg is done too late. It needs
to be done before the check for identity_map domain is done.

To: Joerg Roedel <j...@8bytes.org>
To: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: Ashok Raj <ashok@intel.com>

Fixes: 86080ccc22 ("iommu/vt-d: Allocate si_domain in init_dmars()")
Signed-off-by: Ashok Raj <ashok@intel.com>
Reported-by: Yunhong Jiang <yunhong.ji...@intel.com>
---
 drivers/iommu/intel-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 8a18525..23eead3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3325,13 +3325,14 @@ static int __init init_dmars(void)
iommu_identity_mapping |= IDENTMAP_GFX;
 #endif
 
+   check_tylersburg_isoch();
+
if (iommu_identity_mapping) {
ret = si_domain_init(hw_pass_through);
if (ret)
goto free_iommu;
}
 
-   check_tylersburg_isoch();
 
/*
 * If we copied translations from a previous kernel in the kdump
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/vt-d: Fix some macros that are incorrectly specified in intel-iommu

2017-01-30 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

Some of the macros are incorrect with wrong bit-shifts resulting in picking
the incorrect invalidation granularity. Incorrect Source-ID in extended
devtlb invalidation caused device side errors.

To: Joerg Roedel <j...@8bytes.org>
To: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: CQ Tang <cq.t...@intel.com>
Cc: Ashok Raj <ashok@intel.com>

Fixes: 2f26e0a9 ("iommu/vt-d: Add basic SVM PASID support")
Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
Tested-by: CQ Tang <cq.t...@intel.com>
---
 include/linux/intel-iommu.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c..23e129e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
 #define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
 #define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
 #define DMA_TLB_READ_DRAIN (((u64)1) << 49)
 #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
 #define DMA_TLB_DID(id)(((u64)((id) & 0x)) << 32)
@@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 
 /* INVALID_DESC */
 #define DMA_CCMD_INVL_GRANU_OFFSET  61
-#define DMA_ID_TLB_GLOBAL_FLUSH(((u64)1) << 3)
-#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 3)
-#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 3)
+#define DMA_ID_TLB_GLOBAL_FLUSH(((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 4)
 #define DMA_ID_TLB_READ_DRAIN  (((u64)1) << 7)
 #define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
 #define DMA_ID_TLB_DID(id) (((u64)((id & 0x) << 16)))
@@ -316,8 +316,8 @@ enum {
 #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
 #define QI_DEV_EIOTLB_GLOB(g)  ((u64)g)
 #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
-#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0x) << 32)
-#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0x) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
 #define QI_DEV_EIOTLB_MAX_INVS 32
 
 #define QI_PGRP_IDX(idx)   (((u64)(idx)) << 55)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/vt-d: tylersburg isoch identity map check is done too late.

2017-01-27 Thread Ashok Raj
The check to set identity map for tylersburg is done too late. It needs
to be done before the check for identity_map domain is done.

To: Joerg Roedel <j...@8bytes.org>
To: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: Ashok Raj <ashok@intel.com>

Signed-off-by: Ashok Raj <ashok@intel.com>
Reported-by: Yunhong Jiang <yunhong.ji...@intel.com>
---
 drivers/iommu/intel-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 8a18525..23eead3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3325,13 +3325,14 @@ static int __init init_dmars(void)
iommu_identity_mapping |= IDENTMAP_GFX;
 #endif
 
+   check_tylersburg_isoch();
+
if (iommu_identity_mapping) {
ret = si_domain_init(hw_pass_through);
if (ret)
goto free_iommu;
}
 
-   check_tylersburg_isoch();
 
/*
 * If we copied translations from a previous kernel in the kdump
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/vt-d: Fix some macros that are incorrectly specified in intel-iommu

2017-01-27 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

Some of the macros are incorrect with wrong bit-shifts resulting in picking
the incorrect invalidation granularity. Incorrect Source-ID in extended
devtlb invalidation caused device side errors.

To: Joerg Roedel <j...@8bytes.org>
To: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: CQ Tang <cq.t...@intel.com>
Cc: Ashok Raj <ashok@intel.com>

Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
Tested-by: CQ Tang <cq.t...@intel.com>
---
 include/linux/intel-iommu.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c..23e129e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
 #define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
 #define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
 #define DMA_TLB_READ_DRAIN (((u64)1) << 49)
 #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
 #define DMA_TLB_DID(id)(((u64)((id) & 0x)) << 32)
@@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 
 /* INVALID_DESC */
 #define DMA_CCMD_INVL_GRANU_OFFSET  61
-#define DMA_ID_TLB_GLOBAL_FLUSH(((u64)1) << 3)
-#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 3)
-#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 3)
+#define DMA_ID_TLB_GLOBAL_FLUSH(((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 4)
 #define DMA_ID_TLB_READ_DRAIN  (((u64)1) << 7)
 #define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
 #define DMA_ID_TLB_DID(id) (((u64)((id & 0x) << 16)))
@@ -316,8 +316,8 @@ enum {
 #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
 #define QI_DEV_EIOTLB_GLOB(g)  ((u64)g)
 #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
-#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0x) << 32)
-#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0x) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
 #define QI_DEV_EIOTLB_MAX_INVS 32
 
 #define QI_PGRP_IDX(idx)   (((u64)(idx)) << 55)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/vt-d: tylersburg isoch identity map check is done too late.

2017-01-26 Thread Ashok Raj
The check to set identity map for tylersburg is done too late. It needs
to be done before the check for identity_map domain is done.

To: Joerg Roedel <j...@8bytes.org>
To: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: Ashok Raj <ashok@intel.com>

Signed-off-by: Ashok Raj <ashok@intel.com>
Reported-by: Yunhong Jiang <yunhong.ji...@intel.com>
---
 drivers/iommu/intel-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 8a18525..23eead3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3325,13 +3325,14 @@ static int __init init_dmars(void)
iommu_identity_mapping |= IDENTMAP_GFX;
 #endif
 
+   check_tylersburg_isoch();
+
if (iommu_identity_mapping) {
ret = si_domain_init(hw_pass_through);
if (ret)
goto free_iommu;
}
 
-   check_tylersburg_isoch();
 
/*
 * If we copied translations from a previous kernel in the kdump
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/vt-d: Fix some macros that are incorrectly specified in intel-iommu

2017-01-26 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

Some of the macros are incorrect with wrong bit-shifts resulting in picking
the incorrect invalidation granularity. Incorrect Source-ID in extended
devtlb invalidation caused device side errors.

To: Joerg Roedel <j...@8bytes.org>
To: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: CQ Tang <cq.t...@intel.com>
Cc: Ashok Raj <ashok@intel.com>

Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
Tested-by: CQ Tang <cq.t...@intel.com>
---
 include/linux/intel-iommu.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c..23e129e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
 #define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
 #define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
 #define DMA_TLB_READ_DRAIN (((u64)1) << 49)
 #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
 #define DMA_TLB_DID(id)(((u64)((id) & 0x)) << 32)
@@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 
 /* INVALID_DESC */
 #define DMA_CCMD_INVL_GRANU_OFFSET  61
-#define DMA_ID_TLB_GLOBAL_FLUSH(((u64)1) << 3)
-#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 3)
-#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 3)
+#define DMA_ID_TLB_GLOBAL_FLUSH(((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 4)
 #define DMA_ID_TLB_READ_DRAIN  (((u64)1) << 7)
 #define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
 #define DMA_ID_TLB_DID(id) (((u64)((id & 0x) << 16)))
@@ -316,8 +316,8 @@ enum {
 #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
 #define QI_DEV_EIOTLB_GLOB(g)  ((u64)g)
 #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
-#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0x) << 32)
-#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0x) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
 #define QI_DEV_EIOTLB_MAX_INVS 32
 
 #define QI_PGRP_IDX(idx)   (((u64)(idx)) << 55)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/vt-d: Fix IOMMU lookup for VF's

2016-10-21 Thread Ashok Raj
IOMMU driver must pick the same IOMMU as that of a Physical Function (PF) for
any of its Virtual Functions (VF). It is not practical to list all the VF's
in the DMAR scope, as this list could be quite large. Linux also ignores any
VF's listed in DMAR. See dmar_pci_bus_notifier() for virtfn handling.

Since the driver is looking for the bdf of the VF, it will not find one from
searching the DRHD listed in BIOS.  As a result, the IOMMU driver associates
the VF's under the INCLUDE_ALL iommu incorrectly.

This patch looks up the IOMMU of the PF when handling VF's.

Cc: David Woodhouse <dw...@infradead.org>
Cc: Joerg Roedel <j...@8bytes.org>
Cc: Ashok Raj <ashok@intel.com>
Cc: Sainath Grandhi <sainath.gran...@intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: sta...@vger.kernel.org

Signed-off-by: Sainath Grandhi <sainath.gran...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/iommu/intel-iommu.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a4407ea..c2e4b5c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -885,6 +885,7 @@ static struct intel_iommu *device_to_iommu(struct device 
*dev, u8 *bus, u8 *devf
struct intel_iommu *iommu;
struct device *tmp;
struct pci_dev *ptmp, *pdev = NULL;
+   struct pci_dev *pf_pdev = NULL;
u16 segment = 0;
int i;
 
@@ -893,6 +894,11 @@ static struct intel_iommu *device_to_iommu(struct device 
*dev, u8 *bus, u8 *devf
 
if (dev_is_pci(dev)) {
pdev = to_pci_dev(dev);
+   /*
+* Always lookup the PF's IOMMU when handling VF's
+*/
+   pf_pdev = pci_physfn(pdev);
+   dev = _pdev->dev;
segment = pci_domain_nr(pdev->bus);
} else if (has_acpi_companion(dev))
dev = _COMPANION(dev)->dev;
@@ -905,6 +911,9 @@ static struct intel_iommu *device_to_iommu(struct device 
*dev, u8 *bus, u8 *devf
for_each_active_dev_scope(drhd->devices,
  drhd->devices_cnt, i, tmp) {
if (tmp == dev) {
+   if (pdev->is_virtfn)
+   goto got_pdev;
+
*bus = drhd->devices[i].bus;
*devfn = drhd->devices[i].devfn;
goto out;
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/4] Patches to support ring0 SVM and devtlb

2017-08-08 Thread Ashok Raj
Hi

Sorry for resending.. iommu list email was mistyped :-(

The first 2 patches in the series fix some simple bugs in Intel vt-d driver.
The 3rd patch Adds support for kmem notify required to support ring0 SVM.
4th patch uses the hooks to perform device tlb invalidations.

Ashok Raj (3):
  iommu/vt-d: IOMMU Page Request needs to check if address is canonical.
  iommu/vt-d: Avoid calling virt_to_phys() on null pointer
  iommu/vt-d: Hooks to invalidate iotlb/devtlb when using supervisor
PASID's.

Huang Ying (1):
  mm: Add kernel MMU notifier to manage remote TLB

 arch/x86/include/asm/tlbflush.h |  1 +
 arch/x86/mm/tlb.c   |  1 +
 drivers/iommu/intel-iommu.c |  3 ++-
 drivers/iommu/intel-svm.c   | 43 +++--
 include/linux/intel-iommu.h |  5 -
 include/linux/mmu_notifier.h| 33 +++
 mm/mmu_notifier.c   | 25 
 7 files changed, 107 insertions(+), 4 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] iommu/vt-d: IOMMU Page Request needs to check if address is canonical.

2017-08-08 Thread Ashok Raj
Page Request from devices that support device-tlb would request translation
to pre-cache them in device to avoid overhead of IOMMU lookups.

IOMMU needs to check for canonicallity of the address before performing
page-fault processing.

To: Joerg Roedel <j...@8bytes.org>
To: linux-ker...@vger.kernel.org>
Cc: iommu@lists.linux-foundation.org
Cc: David Woodhouse <dw...@infradead.org>
Cc: Jacob Pan <jacob.jun....@intel.com>
Cc: Ashok Raj <ashok@intel.com>

Signed-off-by: Ashok Raj <ashok@intel.com>
Reported-by: Sudeep Dutt <sudeep.d...@intel.com>
---
 drivers/iommu/intel-svm.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index f167c0d..0c9f077 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 
@@ -555,6 +556,14 @@ static bool access_error(struct vm_area_struct *vma, 
struct page_req_dsc *req)
return (requested & ~vma->vm_flags) != 0;
 }
 
+static bool is_canonical_address(u64 addr)
+{
+   int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
+   long saddr = (long) addr;
+
+   return (((saddr << shift) >> shift) == saddr);
+}
+
 static irqreturn_t prq_event_thread(int irq, void *d)
 {
struct intel_iommu *iommu = d;
@@ -612,6 +621,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
/* If the mm is already defunct, don't handle faults. */
if (!mmget_not_zero(svm->mm))
goto bad_req;
+
+   /* If address is not canonical, return invalid response */
+   if (!is_canonical_address(address))
+   goto bad_req;
+
down_read(>mm->mmap_sem);
vma = find_extend_vma(svm->mm, address);
if (!vma || address < vma->vm_start)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/4] iommu/vt-d: Avoid calling virt_to_phys() on null pointer

2017-08-08 Thread Ashok Raj
New kernels with debug show panic() from __phys_addr() checks. Avoid
calling virt_to_phys()  when pasid_state_tbl pointer is null

To: Joerg Roedel <j...@8bytes.org>
To: linux-ker...@vger.kernel.org>
Cc: iommu@lists.linux-foundation.org
Cc: David Woodhouse <dw...@infradead.org>
Cc: Jacob Pan <jacob.jun....@intel.com>
Cc: Ashok Raj <ashok@intel.com>

Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/iommu/intel-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 687f18f..5c6118d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5341,7 +5341,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, 
struct intel_svm_dev *sd
sdev->sid = PCI_DEVID(info->bus, info->devfn);
 
if (!(ctx_lo & CONTEXT_PASIDE)) {
-   context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+   if (iommu->pasid_state_table)
+   context[1].hi = 
(u64)virt_to_phys(iommu->pasid_state_table);
context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
intel_iommu_get_pts(iommu);
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/4] mm: Add kernel MMU notifier to manage remote TLB

2017-08-08 Thread Ashok Raj
From: Huang Ying <ying.hu...@intel.com>

Shared Virtual Memory (SVM) devices have TLBs that cache entries from
the CPU's page tables.  We need SVM device drivers to flush them at
the same time that we flush the CPU TLBs.  We can use the existing MMU
notifiers for userspace updates, but we lack a mechanism to get
notified when kernel page tables are updated.

To implement the MMU notification mechanism for the kernel address
space, a kernel MMU notifier chain is defined, and will be called when
the CPU TLB is flushed for the kernel address space.  The IOMMU SVM
driver can register on the notifier chain to flush the device TLBs
when necessary.

To: linux-ker...@vger.kernel.org
To: Joerg Roedel <j...@8bytes.org>
Cc: Ashok Raj <ashok@intel.com>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: CQ Tang <cq.t...@intel.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Rik van Riel <r...@redhat.com>
Cc: Kees Cook <keesc...@chromium.org>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: "Kirill A. Shutemov" <kirill.shute...@linux.intel.com>
Cc: Michal Hocko <mho...@suse.com>
Cc: "Paul E. McKenney" <paul...@linux.vnet.ibm.com>
Cc: Vegard Nossum <vegard.nos...@oracle.com>
Cc: x...@kernel.org
Cc: linux...@kvack.org
Cc: iommu@lists.linux-foundation.org
Cc: David Woodhouse <dw...@infradead.org>
CC: Jean-Phillipe Brucker <jean-philippe.bruc...@arm.com>
Signed-off-by: "Huang, Ying" <ying.hu...@intel.com>
---
 arch/x86/include/asm/tlbflush.h |  1 +
 arch/x86/mm/tlb.c   |  1 +
 include/linux/mmu_notifier.h| 33 +
 mm/mmu_notifier.c   | 25 +
 4 files changed, 60 insertions(+)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50ea348..f5fd0b8 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -3,6 +3,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 014d07a..6dea8e9 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -314,6 +314,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned 
long end)
info.end = end;
on_each_cpu(do_kernel_range_flush, , 1);
}
+   kernel_mmu_notifier_invalidate_range(start, end);
 }
 
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index c91b3bc..4a96089 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -418,6 +418,25 @@ extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
   void (*func)(struct rcu_head *rcu));
 extern void mmu_notifier_synchronize(void);
 
+struct kernel_mmu_address_range {
+   unsigned long start;
+   unsigned long end;
+};
+
+/*
+ * Before the virtual address range managed by kernel (vmalloc/kmap)
+ * is reused, That is, remapped to the new physical addresses, the
+ * kernel MMU notifier will be called with KERNEL_MMU_INVALIDATE_RANGE
+ * and struct kernel_mmu_address_range as parameters.  This is used to
+ * manage the remote TLB.
+ */
+#define KERNEL_MMU_INVALIDATE_RANGE1
+extern int kernel_mmu_notifier_register(struct notifier_block *nb);
+extern int kernel_mmu_notifier_unregister(struct notifier_block *nb);
+
+extern int kernel_mmu_notifier_invalidate_range(unsigned long start,
+   unsigned long end);
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
@@ -479,6 +498,20 @@ static inline void mmu_notifier_mm_destroy(struct 
mm_struct *mm)
 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 #define set_pte_at_notify set_pte_at
 
+static inline int kernel_mmu_notifier_register(struct notifier_block *nb)
+{
+   return 0;
+}
+
+static inline int kernel_mmu_notifier_unregister(struct notifier_block *nb)
+{
+   return 0;
+}
+
+static inline void kernel_mmu_notifier_invalidate_range(unsigned long start,
+   unsigned long end)
+{
+}
 #endif /* CONFIG_MMU_NOTIFIER */
 
 #endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 54ca545..a919038 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -400,3 +400,28 @@ void mmu_notifier_unregister_no_release(struct 
mmu_notifier *mn,
mmdrop(mm);
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+
+static ATOMIC_NOTIFIER_HEAD(kernel_mmu_notifier_list);
+
+int kernel_mmu_notifier_register(struct notifier_block *nb)
+{
+   return atomic_notifier_chain_register(_mmu_notifier_list, nb);
+}
+
+int

[PATCH 4/4] iommu/vt-d: Hooks to invalidate iotlb/devtlb when using supervisor PASID's.

2017-08-08 Thread Ashok Raj
When a kernel client uses intel_svm_bind_mm() and requests a supervisor
PASID, IOMMU needs to track changes to these addresses. Otherwise the device
tlb will be stale compared to what's on the cpu for kernel mappings. This
is similar to what's done for user space registrations via
mmu_notifier_register() api's.

To: linux-ker...@vger.kernel.org
To: Joerg Roedel <j...@8bytes.org>
Cc: Ashok Raj <ashok@intel.com>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: Huang Ying <ying.hu...@intel.com>
Cc: CQ Tang <cq.t...@intel.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Rik van Riel <r...@redhat.com>
Cc: Kees Cook <keesc...@chromium.org>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Michal Hocko <mho...@suse.com>
Cc: "Paul E. McKenney" <paul...@linux.vnet.ibm.com>
Cc: Vegard Nossum <vegard.nos...@oracle.com>
Cc: x...@kernel.org
Cc: linux...@kvack.org
Cc: iommu@lists.linux-foundation.org
Cc: David Woodhouse <dw...@infradead.org>
CC: Jean-Phillipe Brucker <jean-philippe.bruc...@arm.com>

Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/iommu/intel-svm.c   | 29 +++--
 include/linux/intel-iommu.h |  5 -
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 0c9f077..1758814 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -292,6 +292,26 @@ static const struct mmu_notifier_ops intel_mmuops = {
 
 static DEFINE_MUTEX(pasid_mutex);
 
+static int intel_init_mm_inval_range(struct notifier_block *nb,
+   unsigned long action, void *data)
+{
+   struct kernel_mmu_address_range *range;
+   struct intel_svm *svm = container_of(nb, struct intel_svm, init_mm_nb);
+   unsigned long start, end;
+   struct intel_iommu *iommu;
+
+   if (action == KERNEL_MMU_INVALIDATE_RANGE) {
+   range = data;
+   start = range->start;
+   end = range->end;
+   iommu = svm->iommu;
+
+   intel_flush_svm_range(svm, start,
+   (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
+   }
+   return 0;
+}
+
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct 
svm_dev_ops *ops)
 {
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
@@ -391,12 +411,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
goto out;
}
svm->pasid = ret;
-   svm->notifier.ops = _mmuops;
svm->mm = mm;
svm->flags = flags;
INIT_LIST_HEAD_RCU(>devs);
ret = -ENOMEM;
if (mm) {
+   svm->notifier.ops = _mmuops;
ret = mmu_notifier_register(>notifier, mm);
if (ret) {
idr_remove(>iommu->pasid_idr, svm->pasid);
@@ -405,8 +425,11 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
goto out;
}
iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) 
| 1;
-   } else
+   } else {
+   svm->init_mm_nb.notifier_call = 
intel_init_mm_inval_range;
+   kernel_mmu_notifier_register(>init_mm_nb);
iommu->pasid_table[svm->pasid].val = 
(u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+   }
wmb();
/* In caching mode, we still have to flush with PASID 0 when
 * a PASID table entry becomes present. Not entirely clear
@@ -471,6 +494,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
idr_remove(>iommu->pasid_idr, 
svm->pasid);
if (svm->mm)

mmu_notifier_unregister(>notifier, svm->mm);
+   else
+   
kernel_mmu_notifier_unregister(>init_mm_nb);
 
/* We mandate that no page faults may 
be outstanding
 * for the PASID when 
intel_svm_unbind_mm() is called.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 485a5b4..d6019b4 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -477,7 +477,10 @@ struct intel_svm_dev {
 };
 
 struct intel_svm {
-   struct mmu_notifier notifier;
+   union {
+   struct mmu_notifier notifier

[PATCH 2/2] iommu/vt-d: Helper function to query if a pasid has any active users

2017-05-10 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

A driver would need to know if there are any active references to a
a PASID before cleaning up its resources. This function helps check
if there are any active users of a PASID before it can perform any
recovery on that device.

To: Joerg Roedel <j...@8bytes.org>
To: linux-ker...@vger.kernel.org
To: David Woodhouse <d...@infradead.org>
Cc: Jean-Phillipe Brucker <jean-philippe.bruc...@arm.com>
Cc: iommu@lists.linux-foundation.org

Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/iommu/intel-svm.c | 30 ++
 include/linux/intel-svm.h | 20 
 2 files changed, 50 insertions(+)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 23c4276..f167c0d 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+   struct intel_iommu *iommu;
+   struct intel_svm *svm;
+   int ret = -EINVAL;
+
+   mutex_lock(_mutex);
+   iommu = intel_svm_device_to_iommu(dev);
+   if (!iommu || !iommu->pasid_table)
+   goto out;
+
+   svm = idr_find(>pasid_idr, pasid);
+   if (!svm)
+   goto out;
+
+   /* init_mm is used in this case */
+   if (!svm->mm)
+   ret = 1;
+   else if (atomic_read(>mm->mm_users) > 0)
+   ret = 1;
+   else
+   ret = 0;
+
+ out:
+   mutex_unlock(_mutex);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
u64 srr:1;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 3c25794..99bc5b3 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int 
*pasid, int flags,
  */
 extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev:   Device for which PASID was allocated
+ * @pasid: PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
 static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, 
int pasid)
 {
BUG();
 }
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+   return -EINVAL;
+}
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
 #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/vt-d: Helper function to query if a pasid has any active users

2017-05-10 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

A driver would need to know if there are any active references to a
a PASID before cleaning up its resources. This function helps check
if there are any active users of a PASID before it can perform any
recovery on that device.

To: Joerg Roedel <j...@8bytes.org>
To: linux-ker...@vger.kernel.org
To: David Woodhouse <dw...@infradead.org>
Cc: Jean-Phillipe Brucker <jean-philippe.bruc...@arm.com>
Cc: iommu@lists.linux-foundation.org

Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/iommu/intel-svm.c | 30 ++
 include/linux/intel-svm.h | 20 
 2 files changed, 50 insertions(+)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 23c4276..f167c0d 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+   struct intel_iommu *iommu;
+   struct intel_svm *svm;
+   int ret = -EINVAL;
+
+   mutex_lock(_mutex);
+   iommu = intel_svm_device_to_iommu(dev);
+   if (!iommu || !iommu->pasid_table)
+   goto out;
+
+   svm = idr_find(>pasid_idr, pasid);
+   if (!svm)
+   goto out;
+
+   /* init_mm is used in this case */
+   if (!svm->mm)
+   ret = 1;
+   else if (atomic_read(>mm->mm_users) > 0)
+   ret = 1;
+   else
+   ret = 0;
+
+ out:
+   mutex_unlock(_mutex);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
u64 srr:1;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 3c25794..99bc5b3 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int 
*pasid, int flags,
  */
 extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev:   Device for which PASID was allocated
+ * @pasid: PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
 static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, 
int pasid)
 {
BUG();
 }
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+   return -EINVAL;
+}
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
 #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] PCI: Save properties required to handle FLR for replay purposes.

2017-05-10 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

Requires: https://patchwork.kernel.org/patch/9593891

After a FLR, pci-states need to be restored. This patch saves PASID features
and PRI reqs cached.

Cc: Jean-Phillipe Brucker <jean-philippe.bruc...@arm.com>
Cc: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org

Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/ats.c   | 65 +
 drivers/pci/pci.c   |  3 +++
 include/linux/pci-ats.h | 10 
 include/linux/pci.h |  6 +
 4 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 2126497..a769955 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -160,17 +160,16 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
if (!pos)
return -EINVAL;
 
-   pci_read_config_word(pdev, pos + PCI_PRI_CTRL, );
pci_read_config_word(pdev, pos + PCI_PRI_STATUS, );
-   if ((control & PCI_PRI_CTRL_ENABLE) ||
-   !(status & PCI_PRI_STATUS_STOPPED))
+   if (!(status & PCI_PRI_STATUS_STOPPED))
return -EBUSY;
 
pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, _requests);
reqs = min(max_requests, reqs);
+   pdev->pri_reqs_alloc = reqs;
pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
 
-   control |= PCI_PRI_CTRL_ENABLE;
+   control = PCI_PRI_CTRL_ENABLE;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
pdev->pri_enabled = 1;
@@ -206,6 +205,29 @@ void pci_disable_pri(struct pci_dev *pdev)
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
 /**
+ * pci_restore_pri_state - Restore PRI
+ * @pdev: PCI device structure
+ *
+ */
+void pci_restore_pri_state(struct pci_dev *pdev)
+{
+   u16 control = PCI_PRI_CTRL_ENABLE;
+   u32 reqs = pdev->pri_reqs_alloc;
+   int pos;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+   if (!pos)
+   return;
+
+   if (!pdev->pri_enabled)
+   return;
+
+   pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
+   pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+}
+EXPORT_SYMBOL_GPL(pci_restore_pri_state);
+
+/**
  * pci_reset_pri - Resets device's PRI state
  * @pdev: PCI device structure
  *
@@ -224,12 +246,7 @@ int pci_reset_pri(struct pci_dev *pdev)
if (!pos)
return -EINVAL;
 
-   pci_read_config_word(pdev, pos + PCI_PRI_CTRL, );
-   if (control & PCI_PRI_CTRL_ENABLE)
-   return -EBUSY;
-
-   control |= PCI_PRI_CTRL_RESET;
-
+   control = PCI_PRI_CTRL_RESET;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
return 0;
@@ -259,12 +276,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
if (!pos)
return -EINVAL;
 
-   pci_read_config_word(pdev, pos + PCI_PASID_CTRL, );
pci_read_config_word(pdev, pos + PCI_PASID_CAP, );
-
-   if (control & PCI_PASID_CTRL_ENABLE)
-   return -EINVAL;
-
supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
/* User wants to enable anything unsupported? */
@@ -272,6 +284,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
return -EINVAL;
 
control = PCI_PASID_CTRL_ENABLE | features;
+   pdev->pasid_features = features;
 
pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
@@ -305,6 +318,28 @@ void pci_disable_pasid(struct pci_dev *pdev)
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
 /**
+ * pci_restore_pasid_state - Restore PASID capabilities.
+ * @pdev: PCI device structure
+ *
+ */
+void pci_restore_pasid_state(struct pci_dev *pdev)
+{
+   u16 control;
+   int pos;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+   if (!pos)
+   return;
+
+   if (!pdev->pasid_enabled)
+   return;
+
+   control = PCI_PASID_CTRL_ENABLE | pdev->pasid_features;
+   pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
+}
+EXPORT_SYMBOL_GPL(pci_restore_pasid_state);
+
+/**
  * pci_pasid_features - Check which PASID features are supported
  * @pdev: PCI device structure
  *
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7904d02..c9a6510 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1171,6 +1172,8 @@ void pci_restore_state(struct pci_dev *dev)
 
/* PCI Express register must be restored first */
pci_restore_pcie_state(dev);
+   pci_restore_pasid_state(dev);
+   pci_restore_pri_state(dev);
pci_restore_ats_state(dev);
pci_restore_vc_state(dev);
 
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 57e0b82..782fb8e 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -7,6 +7,7 @@
 
 int pci_enabl

[PATCH 2/2] PCI: Save properties required to handle FLR for replay purposes.

2017-05-30 Thread Ashok Raj
From: CQ Tang <cq.t...@intel.com>

Requires: https://patchwork.kernel.org/patch/9593891


After a FLR, pci-states need to be restored. This patch saves PASID features
and PRI reqs cached.

To: Bjorn Helgaas <bhelg...@google.com>
To: Joerg Roedel <j...@8bytes.org>
To: linux-...@vger.kernel.org
To: linux-ker...@vger.kernel.org
Cc: Jean-Phillipe Brucker <jean-philippe.bruc...@arm.com>
Cc: David Woodhouse <dw...@infradead.org>
Cc: iommu@lists.linux-foundation.org

Signed-off-by: CQ Tang <cq.t...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/ats.c   | 65 +
 drivers/pci/pci.c   |  3 +++
 include/linux/pci-ats.h | 10 
 include/linux/pci.h |  6 +
 4 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 2126497..a769955 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -160,17 +160,16 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
if (!pos)
return -EINVAL;
 
-   pci_read_config_word(pdev, pos + PCI_PRI_CTRL, );
pci_read_config_word(pdev, pos + PCI_PRI_STATUS, );
-   if ((control & PCI_PRI_CTRL_ENABLE) ||
-   !(status & PCI_PRI_STATUS_STOPPED))
+   if (!(status & PCI_PRI_STATUS_STOPPED))
return -EBUSY;
 
pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, _requests);
reqs = min(max_requests, reqs);
+   pdev->pri_reqs_alloc = reqs;
pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
 
-   control |= PCI_PRI_CTRL_ENABLE;
+   control = PCI_PRI_CTRL_ENABLE;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
pdev->pri_enabled = 1;
@@ -206,6 +205,29 @@ void pci_disable_pri(struct pci_dev *pdev)
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
 /**
+ * pci_restore_pri_state - Restore PRI
+ * @pdev: PCI device structure
+ *
+ */
+void pci_restore_pri_state(struct pci_dev *pdev)
+{
+   u16 control = PCI_PRI_CTRL_ENABLE;
+   u32 reqs = pdev->pri_reqs_alloc;
+   int pos;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+   if (!pos)
+   return;
+
+   if (!pdev->pri_enabled)
+   return;
+
+   pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
+   pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+}
+EXPORT_SYMBOL_GPL(pci_restore_pri_state);
+
+/**
  * pci_reset_pri - Resets device's PRI state
  * @pdev: PCI device structure
  *
@@ -224,12 +246,7 @@ int pci_reset_pri(struct pci_dev *pdev)
if (!pos)
return -EINVAL;
 
-   pci_read_config_word(pdev, pos + PCI_PRI_CTRL, );
-   if (control & PCI_PRI_CTRL_ENABLE)
-   return -EBUSY;
-
-   control |= PCI_PRI_CTRL_RESET;
-
+   control = PCI_PRI_CTRL_RESET;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
return 0;
@@ -259,12 +276,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
if (!pos)
return -EINVAL;
 
-   pci_read_config_word(pdev, pos + PCI_PASID_CTRL, );
pci_read_config_word(pdev, pos + PCI_PASID_CAP, );
-
-   if (control & PCI_PASID_CTRL_ENABLE)
-   return -EINVAL;
-
supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
/* User wants to enable anything unsupported? */
@@ -272,6 +284,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
return -EINVAL;
 
control = PCI_PASID_CTRL_ENABLE | features;
+   pdev->pasid_features = features;
 
pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
@@ -305,6 +318,28 @@ void pci_disable_pasid(struct pci_dev *pdev)
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
 /**
+ * pci_restore_pasid_state - Restore PASID capabilities.
+ * @pdev: PCI device structure
+ *
+ */
+void pci_restore_pasid_state(struct pci_dev *pdev)
+{
+   u16 control;
+   int pos;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+   if (!pos)
+   return;
+
+   if (!pdev->pasid_enabled)
+   return;
+
+   control = PCI_PASID_CTRL_ENABLE | pdev->pasid_features;
+   pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
+}
+EXPORT_SYMBOL_GPL(pci_restore_pasid_state);
+
+/**
  * pci_pasid_features - Check which PASID features are supported
  * @pdev: PCI device structure
  *
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7904d02..c9a6510 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1171,6 +1172,8 @@ void pci_restore_state(struct pci_dev *dev)
 
/* PCI Express register must be restored first */
pci_restore_pcie_state(dev);
+   pci_restore_pasid_state(dev);
+   pci_restore_pri_state(dev);
pci_restore_ats_state(dev);
pci_restore_vc_state(dev);
 
diff --git a/include/linux/pci-ats.

[PATCH 0/2] Save and restore pci properties to support FLR

2017-05-30 Thread Ashok Raj
Resending Jean's patch so it can be included earlier than his large
SVM commits. Original patch https://patchwork.kernel.org/patch/9593891
was ack'ed by Bjorn. Let's commit these separately since we need
functionality earlier.

Resending this series as requested by Jean.

CQ Tang (1):
  PCI: Save properties required to handle FLR for replay purposes.

Jean-Philippe Brucker (1):
  PCI: Cache PRI and PASID bits in pci_dev

 drivers/pci/ats.c   | 88 -
 drivers/pci/pci.c   |  3 ++
 include/linux/pci-ats.h | 10 ++
 include/linux/pci.h |  8 +
 4 files changed, 94 insertions(+), 15 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] vfio/pci: Some buggy virtual functions incorrectly report 1 for intx.

2018-08-09 Thread Ashok Raj
PCI_INTERRUPT_PIN should always read  0 for SRIOV Virtual Functions.

Some SRIOV devices have some bugs in RTL and VF's end up reading 1
instead of 0 for the PIN.

Since this is a spec required value, rather than having a device specific
quirk, we could fix it permanently in vfio.

Reworked suggestions from Alex https://lkml.org/lkml/2018/7/16/1052

Reported-by: Gage Eads 
Tested-by: Gage Eads 
Signed-off-by: Ashok Raj 
Cc: k...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: Joerg Roedel 
Cc: Bjorn Helgaas 
Cc: Gage Eads 
---
 drivers/vfio/pci/vfio_pci.c| 12 +---
 drivers/vfio/pci/vfio_pci_config.c |  3 ++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b423a30..32943dd 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -433,10 +433,16 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
 {
if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
u8 pin;
-   pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, );
-   if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin)
-   return 1;
+   /*
+* INTx must be 0 for all VF's. Enforce that for all
+* VF's since this is a spec requirement.
+*/
+   if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx ||
+   vdev->pdev->is_virtfn)
+   return 0;
 
+   pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, );
+   return (pin ? 1 : 0);
} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
u8 pos;
u16 flags;
diff --git a/drivers/vfio/pci/vfio_pci_config.c 
b/drivers/vfio/pci/vfio_pci_config.c
index 115a36f..e36b7c3 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1676,7 +1676,8 @@ int vfio_config_init(struct vfio_pci_device *vdev)
*(__le16 *)[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
}
 
-   if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
+   if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx ||
+   pdev->is_virtfn)
vconfig[PCI_INTERRUPT_PIN] = 0;
 
ret = vfio_cap_init(vdev);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] x86/pci: Some buggy virtual functions incorrectly report 1 for intx.

2018-07-16 Thread Ashok Raj
PCI_INTERRUPT_PIN should always read  0 for SRIOV Virtual Functions.

Some SRIOV devices have some bugs in RTL and VF's end up reading 1
instead of 0 for the PIN.

We could enforce it by default in vfio_pci_nointx.

Reported-by: Gage Eads 
Tested-by: Gage Eads 
Signed-off-by: Ashok Raj 
Cc: linux-ker...@vger.kernel.org
Cc: sta...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: Joerg Roedel 
Cc: Bjorn Helgaas 
Cc: Gage Eads 
---
 drivers/vfio/pci/vfio_pci.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b423a30..bc3f4fa 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -192,6 +192,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev);
  */
 static bool vfio_pci_nointx(struct pci_dev *pdev)
 {
+   /*
+* Per PCI, no VF's should have INTx
+* Simply disable it here
+*/
+   if (pdev->is_virtfn)
+   return true;
+
switch (pdev->vendor) {
case PCI_VENDOR_ID_INTEL:
switch (pdev->device) {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu: Relax ACS requirement for RCiEP devices.

2020-05-04 Thread Ashok Raj
PCIe Spec recommends we can relax ACS requirement for RCIEP devices.

PCIe 5.0 Specification.
6.12 Access Control Services (ACS)
Implementation of ACS in RCiEPs is permitted but not required. It is
explicitly permitted that, within a single Root Complex, some RCiEPs
implement ACS and some do not. It is strongly recommended that Root Complex
implementations ensure that all accesses originating from RCiEPs
(PFs and VFs) without ACS capability are first subjected to processing by
the Translation Agent (TA) in the Root Complex before further decoding and
processing. The details of such Root Complex handling are outside the scope
of this specification.

Since Linux didn't give special treatment to allow this exception, certain
RCiEP MFD devices are getting grouped in a single iommu group. This
doesn't permit a single device to be assigned to a guest for instance.

In one vendor system: Device 14.x were grouped in a single IOMMU group.

/sys/kernel/iommu_groups/5/devices/:00:14.0
/sys/kernel/iommu_groups/5/devices/:00:14.2
/sys/kernel/iommu_groups/5/devices/:00:14.3

After the patch:
/sys/kernel/iommu_groups/5/devices/:00:14.0
/sys/kernel/iommu_groups/5/devices/:00:14.2
/sys/kernel/iommu_groups/6/devices/:00:14.3 <<< new group

14.0 and 14.2 are integrated devices, but legacy end points.
Whereas 14.3 was a PCIe compliant RCiEP.

00:14.3 Network controller: Intel Corporation Device 9df0 (rev 30)
Capabilities: [40] Express (v2) Root Complex Integrated Endpoint, MSI 00

This permits assigning this device to a guest VM.

Fixes: f096c061f552 ("iommu: Rework iommu_group_get_for_pci_dev()")
Signed-off-by: Ashok Raj 
To: Joerg Roedel 
To: Bjorn Helgaas 
Cc: linux-ker...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: Lu Baolu 
Cc: Alex Williamson 
Cc: Darrel Goeddel 
Cc: Mark Scott ,
Cc: Romil Sharma 
Cc: Ashok Raj 
---
 drivers/iommu/iommu.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2b471419e26c..5744bd65f3e2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1187,7 +1187,20 @@ static struct iommu_group 
*get_pci_function_alias_group(struct pci_dev *pdev,
struct pci_dev *tmp = NULL;
struct iommu_group *group;
 
-   if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
+   /*
+* PCI Spec 5.0, Section 6.12 Access Control Service
+* Implementation of ACS in RCiEPs is permitted but not required.
+* It is explicitly permitted that, within a single Root
+* Complex, some RCiEPs implement ACS and some do not. It is
+* strongly recommended that Root Complex implementations ensure
+* that all accesses originating from RCiEPs (PFs and VFs) without
+* ACS capability are first subjected to processing by the Translation
+* Agent (TA) in the Root Complex before further decoding and
+* processing.
+*/
+   if (!pdev->multifunction ||
+   (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END) ||
+pci_acs_enabled(pdev, REQ_ACS_FLAGS))
return NULL;
 
for_each_pci_dev(tmp) {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V8 0/3] iommu: Add support to change default domain of an iommu group

2020-09-25 Thread Ashok Raj
Presently, the default domain of an iommu group is allocated during boot time
and it cannot be changed later. So, the device would typically be either in
identity (pass_through) mode or the device would be in DMA mode as long as the
system is up and running. There is no way to change the default domain type
dynamically i.e. after booting, a device cannot switch between identity mode and
DMA mode.

Assume a use case wherein the privileged user would want to use the device in
pass-through mode when the device is used for host so that it would be high
performing. Presently, this is not supported. Hence add support to change the
default domain of an iommu group dynamically.

Support this by writing to a sysfs file, namely
"/sys/kernel/iommu_groups//type".

Testing:

Tested by dynamically changing storage device (nvme) from
1. identity mode to DMA and making sure file transfer works
2. DMA mode to identity mode and making sure file transfer works
Tested only for intel_iommu/vt-d. Would appreciate if someone could test on AMD
and ARM based machines.

Based on iommu maintainer's 'next' branch.

Changes from V6,v7:

1. None except for version bump.
https://lore.kernel.org/linux-iommu/20200925073423.gt27...@8bytes.org/

Changes from V5:

1. None except for version bump because Joerg had asked to resend the patches
   after the merge window closes.

Changes from V4:

1. Created device direct mappings before attaching the device to the domain
2. Used list_first_entry() instead of list_for_each_entry() to get the first
   element of a linked list.
3. Used get_device() and put_device() before and after device_lock()
4. Passed device as an argument to iommu_change_dev_def_domain() to check that
   the device hasn't changed between calls.
5. Changed error message from "Group assigned to user level for direct access"
   to "Group not assigned to default domain".
6. Changed error message from "Cannot change default domain of a group with two
   or more devices" to "Cannot change default domain: Group has more than one
   device".
7. Removed printing error message "'def_domain_type' call back isn't registered"

Changes from V3:

1. Made changes to commit message as suggested by Baolu.
2. Don't pass "prev_dom" and "dev" as parameters to
   iommu_change_dev_def_domain(). Instead get them from group.
3. Sanitize the logic to validate user default domain type request. The logic
   remains same but is implmented differently.
4. Push lot of error checking into iommu_change_dev_def_domain() from
   iommu_group_store_type().
5. iommu_change_dev_def_domain() takes/releases group mutex as needed. So, it
   shouldn't be called holding a group mutex.
6. Use pr_err_ratelimited() instead of pr_err() to avoid DOS attack.

Changes from V2:

1. Change the logic of updating default domain from V2 because
   ops->probe_finalize() could be used to update dma_ops.
2. Drop 1st and 2nd patch of V2 series because they are no longer needed on
   iommu maintainer's 'next' branch.
3. Limit this feature to iommu groups with only one device.
4. Hold device_lock and group mutex until the default domain is changed.

Changes from V1:

1. V1 patch set wasn't updating dma_ops for some vendors (Eg: AMD), hence,
   change the logic of updating default domain as below (because adding a device
   to iommu_group automatically updates dma_ops)
   a. Allocate a new domain
   b. For every device in the group
i. Remove the device from the group
ii. Add the device back to the group
   c. Free previous domain
2. Drop 1st patch of V1 (iommu/vt-d: Modify device_def_domain_type() to use at
   runtime) because "iommu=pt" has no effect on this function anymore.
3. Added a patch to take/release lock while reading 
iommu_group->default_domain->type
   because it can be changed any time by user.
4. Before changing default domain type of a group, check if the group is
   directly assigned for user level access. If so, abort.
5. Sanitize return path (using ternary operator) in iommu_group_store_type()
6. Split 2nd patch of V1 (iommu: Add device_def_domain_type() call back function
   to iommu_ops) into two patches such that iommu generic changes are now in 1st
   patch of V2 and vt-d specific changes are in 2nd patch of V2.
7. Rename device_def_domain_type() to dev_def_domain_type()
8. Remove example from documentation
9. Change the value written to file "/sys/kernel/iommu_groups//type"
   from "dma" to "DMA".

Changes from RFC:
-
1. Added support for "auto" type, so that kernel selects one among identity or
   dma mode.
2. Use "system_state" in device_def_domain_type() instead of an argument.

Sai Praneeth Prakhya (3):
  iommu: Add support to change default domain of an iommu group
  iommu: Take lock before reading iommu group default domain type
  iommu: Document usage of "/sys/kernel/iommu_groups//type" file


Sai Praneeth Prakhya (3):
  

[Patch V8 3/3] iommu: Document usage of "/sys/kernel/iommu_groups//type" file

2020-09-25 Thread Ashok Raj
From: Sai Praneeth Prakhya 

The default domain type of an iommu group can be changed by writing to
"/sys/kernel/iommu_groups//type" file. Hence, document it's usage
and more importantly spell out its limitations.

Cc: Christoph Hellwig 
Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Will Deacon 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: Robin Murphy 
Cc: Jacob Pan 
Reviewed-by: Lu Baolu 
Signed-off-by: Sai Praneeth Prakhya 
---
 .../ABI/testing/sysfs-kernel-iommu_groups  | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups 
b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index 017f5bc3920c..effde9d23f4f 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -33,3 +33,33 @@ Description:In case an RMRR is used only by graphics or 
USB devices
it is now exposed as "direct-relaxable" instead of "direct".
In device assignment use case, for instance, those RMRR
are considered to be relaxable and safe.
+
+What:  /sys/kernel/iommu_groups//type
+Date:  September 2020
+KernelVersion: v5.10
+Contact:   Sai Praneeth Prakhya 
+Description:   Let the user know the type of default domain in use by iommu
+   for this group. A privileged user could request kernel to change
+   the group type by writing to this file. Presently, only three
+   types are supported
+   1. DMA: All the DMA transactions from the device in this group
+   are translated by the iommu.
+   2. identity: All the DMA transactions from the device in this
+group are *not* translated by the iommu.
+   3. auto: Change to the type the device was booted with. When the
+user reads the file he would never see "auto". This is
+just a write only value.
+   Note:
+   -
+   A group type could be modified only when
+   1. The group has *only* one device
+   2. The device in the group is not bound to any device driver.
+  So, the user must first unbind the appropriate driver and
+  then change the default domain type.
+   Caution:
+   
+   Unbinding a device driver will take away the driver's control
+   over the device and if done on devices that host root file
+   system could lead to catastrophic effects (the user might
+   need to reboot the machine to get it to normal state). So, it's
+   expected that the user understands what he is doing.
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V8 2/3] iommu: Take lock before reading iommu group default domain type

2020-09-25 Thread Ashok Raj
From: Sai Praneeth Prakhya 

"/sys/kernel/iommu_groups//type" file could be read to find out the
default domain type of an iommu group. The default domain of an iommu group
doesn't change after booting and hence could be read directly. But,
after addding support to dynamically change iommu group default domain, the
above assumption no longer stays valid.

iommu group default domain type could be changed at any time by writing to
"/sys/kernel/iommu_groups//type". So, take group mutex before
reading iommu group default domain type so that the user wouldn't see stale
values or iommu_group_show_type() doesn't try to derefernce stale pointers.

Cc: Christoph Hellwig 
Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Will Deacon 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: Robin Murphy 
Cc: Jacob Pan 
Reviewed-by: Lu Baolu 
Signed-off-by: Sai Praneeth Prakhya 
---
 drivers/iommu/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2e93c48ce248..b540ae1e679d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -501,6 +501,7 @@ static ssize_t iommu_group_show_type(struct iommu_group 
*group,
 {
char *type = "unknown\n";
 
+   mutex_lock(>mutex);
if (group->default_domain) {
switch (group->default_domain->type) {
case IOMMU_DOMAIN_BLOCKED:
@@ -517,6 +518,7 @@ static ssize_t iommu_group_show_type(struct iommu_group 
*group,
break;
}
}
+   mutex_unlock(>mutex);
strcpy(buf, type);
 
return strlen(type);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V8 1/3] iommu: Add support to change default domain of an iommu group

2020-09-25 Thread Ashok Raj
From: Sai Praneeth Prakhya 

Presently, the default domain of an iommu group is allocated during boot
time and it cannot be changed later. So, the device would typically be
either in identity (also known as pass_through) mode or the device would be
in DMA mode as long as the machine is up and running. There is no way to
change the default domain type dynamically i.e. after booting, a device
cannot switch between identity mode and DMA mode.

But, assume a use case wherein the user trusts the device and believes that
the OS is secure enough and hence wants *only* this device to bypass IOMMU
(so that it could be high performing) whereas all the other devices to go
through IOMMU (so that the system is protected). Presently, this use case
is not supported. It will be helpful if there is some way to change the
default domain of an iommu group dynamically. Hence, add such support.

A privileged user could request the kernel to change the default domain
type of a iommu group by writing to
"/sys/kernel/iommu_groups//type" file. Presently, only three values
are supported
1. identity: all the DMA transactions from the device in this group are
 *not* translated by the iommu
2. DMA: all the DMA transactions from the device in this group are
translated by the iommu
3. auto: change to the type the device was booted with

Note:
1. Default domain of an iommu group with two or more devices cannot be
   changed.
2. The device in the iommu group shouldn't be bound to any driver.
3. The device shouldn't be assigned to user for direct access.
4. The vendor iommu driver is required to add def_domain_type() callback.
   The change request will fail if the request type conflicts with that
   returned from the callback.

Please see "Documentation/ABI/testing/sysfs-kernel-iommu_groups" for more
information.

Cc: Christoph Hellwig 
Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Will Deacon 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: Robin Murphy 
Cc: Jacob Pan 
Reviewed-by: Lu Baolu 
Signed-off-by: Sai Praneeth Prakhya 
---
 drivers/iommu/iommu.c | 225 +-
 1 file changed, 224 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6c14c88cd525..2e93c48ce248 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -93,6 +93,8 @@ static void __iommu_detach_group(struct iommu_domain *domain,
 static int iommu_create_device_direct_mappings(struct iommu_group *group,
   struct device *dev);
 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
+static ssize_t iommu_group_store_type(struct iommu_group *group,
+ const char *buf, size_t count);
 
 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)  \
 struct iommu_group_attribute iommu_group_attr_##_name =\
@@ -525,7 +527,8 @@ static IOMMU_GROUP_ATTR(name, S_IRUGO, 
iommu_group_show_name, NULL);
 static IOMMU_GROUP_ATTR(reserved_regions, 0444,
iommu_group_show_resv_regions, NULL);
 
-static IOMMU_GROUP_ATTR(type, 0444, iommu_group_show_type, NULL);
+static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
+   iommu_group_store_type);
 
 static void iommu_group_release(struct kobject *kobj)
 {
@@ -2849,3 +2852,223 @@ int iommu_sva_get_pasid(struct iommu_sva *handle)
return ops->sva_get_pasid(handle);
 }
 EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
+
+/*
+ * Changes the default domain of an iommu group that has *only* one device
+ *
+ * @group: The group for which the default domain should be changed
+ * @prev_dev: The device in the group (this is used to make sure that the 
device
+ *  hasn't changed after the caller has called this function)
+ * @type: The type of the new default domain that gets associated with the 
group
+ *
+ * Returns 0 on success and error code on failure
+ *
+ * Note:
+ * 1. Presently, this function is called only when user requests to change the
+ *group's default domain type through 
/sys/kernel/iommu_groups//type
+ *Please take a closer look if intended to use for other purposes.
+ */
+static int iommu_change_dev_def_domain(struct iommu_group *group,
+  struct device *prev_dev, int type)
+{
+   struct iommu_domain *prev_dom;
+   struct group_device *grp_dev;
+   const struct iommu_ops *ops;
+   int ret, dev_def_dom;
+   struct device *dev;
+
+   if (!group)
+   return -EINVAL;
+
+   mutex_lock(>mutex);
+
+   if (group->default_domain != group->domain) {
+   pr_err_ratelimited("Group not assigned to default domain\n");
+   ret = -EBUSY;
+   goto out;
+   }
+
+   /*
+* iommu group wasn't locked while acquiring device lock in
+* iommu_group_store_type(). So, make sure that the device count hasn't
+   

[PATCH 1/1] pci: pciehp: Handle MRL interrupts to enable slot for hotplug.

2020-09-25 Thread Ashok Raj
When Mechanical Retention Lock (MRL) is present, Linux doesn't process
those change events.

The following changes need to be enabled when MRL is present.

1. Subscribe to MRL change events in SlotControl.
2. When MRL is closed,
   - If there is no ATTN button, then POWER on the slot.
   - If there is ATTN button, and an MRL event pending, ignore
 Presence Detect. Since we want ATTN button to drive the
 hotplug event.


Signed-off-by: Ashok Raj 
Co-developed-by: Kuppuswamy Sathyanarayanan 

---
 drivers/pci/hotplug/pciehp.h  |  1 +
 drivers/pci/hotplug/pciehp_ctrl.c | 69 +++
 drivers/pci/hotplug/pciehp_hpc.c  | 27 ++-
 3 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 4fd200d8b0a9..24a1c9c8ac78 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -155,6 +155,7 @@ void pciehp_request(struct controller *ctrl, int action);
 void pciehp_handle_button_press(struct controller *ctrl);
 void pciehp_handle_disable_request(struct controller *ctrl);
 void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 
events);
+void pciehp_handle_mrl_change(struct controller *ctrl);
 int pciehp_configure_device(struct controller *ctrl);
 void pciehp_unconfigure_device(struct controller *ctrl, bool presence);
 void pciehp_queue_pushbutton_work(struct work_struct *work);
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c 
b/drivers/pci/hotplug/pciehp_ctrl.c
index 9f85815b4f53..c4310ee3678b 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -227,6 +227,7 @@ void pciehp_handle_disable_request(struct controller *ctrl)
 void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
 {
int present, link_active;
+   u8 getstatus = 0;
 
/*
 * If the slot is on and presence or link has changed, turn it off.
@@ -275,6 +276,16 @@ void pciehp_handle_presence_or_link_change(struct 
controller *ctrl, u32 events)
if (link_active)
ctrl_info(ctrl, "Slot(%s): Link Up\n",
  slot_name(ctrl));
+   if (MRL_SENS(ctrl)) {
+   pciehp_get_latch_status(ctrl, );
+   /*
+* If slot is closed && ATTN button exists
+* don't continue, let the ATTN button
+* drive the hot-plug
+*/
+   if (!getstatus && ATTN_BUTTN(ctrl))
+   return;
+   }
ctrl->request_result = pciehp_enable_slot(ctrl);
break;
default:
@@ -283,6 +294,64 @@ void pciehp_handle_presence_or_link_change(struct 
controller *ctrl, u32 events)
}
 }
 
+void pciehp_handle_mrl_change(struct controller *ctrl)
+{
+   u8 getstatus = 0;
+   int present, link_active;
+
+   pciehp_get_latch_status(ctrl, );
+
+   present = pciehp_card_present(ctrl);
+   link_active = pciehp_check_link_active(ctrl);
+
+   ctrl_info(ctrl, "Slot(%s): Card %spresent\n",
+ slot_name(ctrl), present ? "" : "not ");
+
+   ctrl_info(ctrl, "Slot(%s): Link %s\n",
+ slot_name(ctrl), link_active ? "Up" : "Down");
+
+   ctrl_info(ctrl, "Slot(%s): Latch %s\n",
+ slot_name(ctrl), getstatus ? "Open" : "Closed");
+
+   /*
+* Need to handle only MRL Open. When MRL is closed with
+* a Card Present, either the ATTN button, or the PDC indication
+* should power the slot and add the card in the slot
+*/
+   if (getstatus) {
+   /*
+* If slot was powered on, time to power off
+* and remove the card
+*/
+   mutex_lock(>state_lock);
+   if (ctrl->state == ON_STATE) {
+   mutex_unlock(>state_lock);
+   pciehp_handle_disable_request(ctrl);
+   } else
+   mutex_unlock(>state_lock);
+   } else {
+   /*
+* If latch is closed, and previous state is OFF
+* Then enable the slot
+*/
+   mutex_lock(>state_lock);
+   if (ctrl->state == OFF_STATE) {
+   /*
+* Only continue to power on the slot when the
+* Attention button is not present. When button
+* present, button press event will process the
+* hot-add part of the flow.
+*/
+   if ((present || link_active) && !ATTN_BUTTN(ctrl)) {
+  

[PATCH] PCI: Relax ACS requirement for Intel RCiEP devices.

2020-05-28 Thread Ashok Raj
All Intel platforms guarantee that all root complex implementations
must send transactions up to IOMMU for address translations. Hence for
RCiEP devices that are Vendor ID Intel, can claim exception for lack of
ACS support.


3.16 Root-Complex Peer to Peer Considerations
When DMA remapping is enabled, peer-to-peer requests through the
Root-Complex must be handled
as follows:
• The input address in the request is translated (through first-level,
  second-level or nested translation) to a host physical address (HPA).
  The address decoding for peer addresses must be done only on the
  translated HPA. Hardware implementations are free to further limit
  peer-to-peer accesses to specific host physical address regions
  (or to completely disallow peer-forwarding of translated requests).
• Since address translation changes the contents (address field) of
  the PCI Express Transaction Layer Packet (TLP), for PCI Express
  peer-to-peer requests with ECRC, the Root-Complex hardware must use
  the new ECRC (re-computed with the translated address) if it
  decides to forward the TLP as a peer request.
• Root-ports, and multi-function root-complex integrated endpoints, may
  support additional peerto-peer control features by supporting PCI Express
  Access Control Services (ACS) capability. Refer to ACS capability in
  PCI Express specifications for details.

Since Linux didn't give special treatment to allow this exception, certain
RCiEP MFD devices are getting grouped in a single iommu group. This
doesn't permit a single device to be assigned to a guest for instance.

In one vendor system: Device 14.x were grouped in a single IOMMU group.

/sys/kernel/iommu_groups/5/devices/:00:14.0
/sys/kernel/iommu_groups/5/devices/:00:14.2
/sys/kernel/iommu_groups/5/devices/:00:14.3

After the patch:
/sys/kernel/iommu_groups/5/devices/:00:14.0
/sys/kernel/iommu_groups/5/devices/:00:14.2
/sys/kernel/iommu_groups/6/devices/:00:14.3 <<< new group

14.0 and 14.2 are integrated devices, but legacy end points.
Whereas 14.3 was a PCIe compliant RCiEP.

00:14.3 Network controller: Intel Corporation Device 9df0 (rev 30)
Capabilities: [40] Express (v2) Root Complex Integrated Endpoint, MSI 00

This permits assigning this device to a guest VM.

Fixes: f096c061f552 ("iommu: Rework iommu_group_get_for_pci_dev()")
Signed-off-by: Ashok Raj 
To: Joerg Roedel 
To: Bjorn Helgaas 
Cc: linux-ker...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: Lu Baolu 
Cc: Alex Williamson 
Cc: Darrel Goeddel 
Cc: Mark Scott ,
Cc: Romil Sharma 
Cc: Ashok Raj 
---
v2: Moved functionality from iommu to pci quirks - Alex Williamson

 drivers/pci/quirks.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 28c9a2409c50..63373ca0a3fe 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4682,6 +4682,20 @@ static int pci_quirk_mf_endpoint_acs(struct pci_dev 
*dev, u16 acs_flags)
PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT);
 }
 
+static int pci_quirk_rciep_acs(struct pci_dev *dev, u16 acs_flags)
+{
+   /*
+* RCiEP's are required to allow p2p only on translated addresses.
+* Refer to Intel VT-d specification Section 3.16 Root-Complex Peer
+* to Peer Considerations
+*/
+   if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_END)
+   return -ENOTTY;
+
+   return pci_acs_ctrl_enabled(acs_flags,
+   PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
 static int pci_quirk_brcm_acs(struct pci_dev *dev, u16 acs_flags)
 {
/*
@@ -4764,6 +4778,7 @@ static const struct pci_dev_acs_enabled {
/* I219 */
{ PCI_VENDOR_ID_INTEL, 0x15b7, pci_quirk_mf_endpoint_acs },
{ PCI_VENDOR_ID_INTEL, 0x15b8, pci_quirk_mf_endpoint_acs },
+   { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_rciep_acs },
/* QCOM QDF2xxx root ports */
{ PCI_VENDOR_ID_QCOM, 0x0400, pci_quirk_qcom_rp_acs },
{ PCI_VENDOR_ID_QCOM, 0x0401, pci_quirk_qcom_rp_acs },
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] iommu: Relax ACS requirement for Intel RCiEP devices.

2020-05-26 Thread Ashok Raj
All Intel platforms guarantee that all root complex implementations
must send transactions up to IOMMU for address translations. Hence for
RCiEP devices that are Vendor ID Intel, can claim exception for lack of
ACS support.


3.16 Root-Complex Peer to Peer Considerations
When DMA remapping is enabled, peer-to-peer requests through the
Root-Complex must be handled
as follows:
• The input address in the request is translated (through first-level,
  second-level or nested translation) to a host physical address (HPA).
  The address decoding for peer addresses must be done only on the
  translated HPA. Hardware implementations are free to further limit
  peer-to-peer accesses to specific host physical address regions
  (or to completely disallow peer-forwarding of translated requests).
• Since address translation changes the contents (address field) of
  the PCI Express Transaction Layer Packet (TLP), for PCI Express
  peer-to-peer requests with ECRC, the Root-Complex hardware must use
  the new ECRC (re-computed with the translated address) if it
  decides to forward the TLP as a peer request.
• Root-ports, and multi-function root-complex integrated endpoints, may
  support additional peerto-peer control features by supporting PCI Express
  Access Control Services (ACS) capability. Refer to ACS capability in
  PCI Express specifications for details.

Since Linux didn't give special treatment to allow this exception, certain
RCiEP MFD devices are getting grouped in a single iommu group. This
doesn't permit a single device to be assigned to a guest for instance.

In one vendor system: Device 14.x were grouped in a single IOMMU group.

/sys/kernel/iommu_groups/5/devices/:00:14.0
/sys/kernel/iommu_groups/5/devices/:00:14.2
/sys/kernel/iommu_groups/5/devices/:00:14.3

After the patch:
/sys/kernel/iommu_groups/5/devices/:00:14.0
/sys/kernel/iommu_groups/5/devices/:00:14.2
/sys/kernel/iommu_groups/6/devices/:00:14.3 <<< new group

14.0 and 14.2 are integrated devices, but legacy end points.
Whereas 14.3 was a PCIe compliant RCiEP.

00:14.3 Network controller: Intel Corporation Device 9df0 (rev 30)
Capabilities: [40] Express (v2) Root Complex Integrated Endpoint, MSI 00

This permits assigning this device to a guest VM.

Fixes: f096c061f552 ("iommu: Rework iommu_group_get_for_pci_dev()")
Signed-off-by: Ashok Raj 
To: Joerg Roedel 
To: Bjorn Helgaas 
Cc: linux-ker...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: Lu Baolu 
Cc: Alex Williamson 
Cc: Darrel Goeddel 
Cc: Mark Scott ,
Cc: Romil Sharma 
Cc: Ashok Raj 
---
 drivers/iommu/iommu.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2b471419e26c..31b595dfedde 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1187,7 +1187,18 @@ static struct iommu_group 
*get_pci_function_alias_group(struct pci_dev *pdev,
struct pci_dev *tmp = NULL;
struct iommu_group *group;
 
-   if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
+   /*
+* Intel VT-d Specification Section 3.16, Root-Complex Peer to Peer
+* Considerations manadate that all transactions in RCiEP's and
+* even Integrated MFD's *must* be sent up to the IOMMU. P2P is
+* only possible on translated addresses. This gives enough
+* guarantee that such devices can be forgiven for lack of ACS
+* support.
+*/
+   if (!pdev->multifunction ||
+   (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END) ||
+pci_acs_enabled(pdev, REQ_ACS_FLAGS))
return NULL;
 
for_each_pci_dev(tmp) {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] PCI/ATS: PASID and PRI are only enumerated in PF devices.

2020-07-21 Thread Ashok Raj
PASID and PRI capabilities are only enumerated in PF devices. VF devices
do not enumerate these capabilites. IOMMU drivers also need to enumerate
them before enabling features in the IOMMU. Extending the same support as
PASID feature discovery (pci_pasid_features) for PRI.

Signed-off-by: Ashok Raj 

v2: Fixed build failure from lkp when CONFIG_PRI=n
Almost all the PRI functions were called only when CONFIG_PASID is
set. Except the new pci_pri_supported().

previous version sent in error because i missed dry-run before recompile
:-(

To: Bjorn Helgaas 
To: Joerg Roedel 
To: Lu Baolu 
Cc: sta...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Cc: Ashok Raj 
Cc: iommu@lists.linux-foundation.org
---
 drivers/iommu/intel/iommu.c |  2 +-
 drivers/pci/ats.c   | 13 +
 include/linux/pci-ats.h |  4 
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..276452f5e6a7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2560,7 +2560,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
if (info->ats_supported && ecap_prs(iommu->ecap) &&
-   pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+   pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..2e6cf0c700f7 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,19 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
 
return pdev->pasid_required;
 }
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+   /* VFs share the PF PRI configuration */
+   return !!(pci_physfn(pdev)->pri_cap);
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
 #endif /* CONFIG_PCI_PRI */
 
 #ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
 void pci_disable_pri(struct pci_dev *pdev);
 int pci_reset_pri(struct pci_dev *pdev);
 int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
 #endif /* CONFIG_PCI_PRI */
 
 #ifdef CONFIG_PCI_PASID
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] PCI/ATS: PASID and PRI are only enumerated in PF devices.

2020-07-21 Thread Ashok Raj
PASID and PRI capabilities are only enumerated in PF devices. VF devices
do not enumerate these capabilites. IOMMU drivers also need to enumerate
them before enabling features in the IOMMU. Extending the same support as
PASID feature discovery (pci_pasid_features) for PRI.

Signed-off-by: Ashok Raj 

v2: Fixed build failure from lkp when CONFIG_PRI=n
Almost all the PRI functions were called only when CONFIG_PASID is
set. Except the new pci_pri_supported().

To: Bjorn Helgaas 
To: Joerg Roedel 
To: Lu Baolu 
Cc: sta...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Cc: Ashok Raj 
Cc: iommu@lists.linux-foundation.org
---
 drivers/iommu/intel/iommu.c |  2 +-
 drivers/pci/ats.c   | 14 ++
 include/linux/pci-ats.h |  4 
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..276452f5e6a7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2560,7 +2560,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
if (info->ats_supported && ecap_prs(iommu->ecap) &&
-   pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+   pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..ffb4de8c5a77 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -461,6 +461,20 @@ int pci_pasid_features(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(pci_pasid_features);
 
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns false when no PRI capability is present.
+ * Returns true if PRI feature is supported and enabled
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+   /* VFs share the PF PRI configuration */
+   return !!(pci_physfn(pdev)->pri_cap);
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
+
 #define PASID_NUMBER_SHIFT 8
 #define PASID_NUMBER_MASK  (0x1f << PASID_NUMBER_SHIFT)
 /**
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..fc989295daf3 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
 void pci_disable_pri(struct pci_dev *pdev);
 int pci_reset_pri(struct pci_dev *pdev);
 int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
 #endif /* CONFIG_PCI_PRI */
 
 #ifdef CONFIG_PCI_PASID
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/1] PCI/ATS: Check PRI supported on the PF device when SRIOV is enabled

2020-07-23 Thread Ashok Raj
PASID and PRI capabilities are only enumerated in PF devices. VF devices
do not enumerate these capabilites. IOMMU drivers also need to enumerate
them before enabling features in the IOMMU. Extending the same support as
PASID feature discovery (pci_pasid_features) for PRI.

Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities 
before ATS")
Signed-off-by: Ashok Raj 

To: Bjorn Helgaas 
To: Joerg Roedel 
To: Lu Baolu 
Cc: sta...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Cc: Ashok Raj 
Cc: iommu@lists.linux-foundation.org
---
v3: Added Fixes tag
v2: Fixed build failure reported from lkp when CONFIG_PRI=n

 drivers/iommu/intel/iommu.c |  2 +-
 drivers/pci/ats.c   | 13 +
 include/linux/pci-ats.h |  4 
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..276452f5e6a7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2560,7 +2560,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
if (info->ats_supported && ecap_prs(iommu->ecap) &&
-   pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+   pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..2e6cf0c700f7 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,19 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
 
return pdev->pasid_required;
 }
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+   /* VFs share the PF PRI configuration */
+   return !!(pci_physfn(pdev)->pri_cap);
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
 #endif /* CONFIG_PCI_PRI */
 
 #ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
 void pci_disable_pri(struct pci_dev *pdev);
 int pci_reset_pri(struct pci_dev *pdev);
 int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
 #endif /* CONFIG_PCI_PRI */
 
 #ifdef CONFIG_PCI_PASID
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] PCI/ATS: PASID and PRI are only enumerated in PF devices.

2020-07-20 Thread Ashok Raj
PASID and PRI capabilities are only enumerated in PF devices. VF devices
do not enumerate these capabilites. IOMMU drivers also need to enumerate
them before enabling features in the IOMMU. Extending the same support as
PASID feature discovery (pci_pasid_features) for PRI.

Signed-off-by: Ashok Raj 

To: Bjorn Helgaas 
To: Joerg Roedel 
To: Lu Baolu 
Cc: sta...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Cc: Ashok Raj 
Cc: iommu@lists.linux-foundation.org
---
 drivers/iommu/intel/iommu.c |  2 +-
 drivers/pci/ats.c   | 14 ++
 include/linux/pci-ats.h |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..276452f5e6a7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2560,7 +2560,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
if (info->ats_supported && ecap_prs(iommu->ecap) &&
-   pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+   pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..ffb4de8c5a77 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -461,6 +461,20 @@ int pci_pasid_features(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(pci_pasid_features);
 
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns false when no PRI capability is present.
+ * Returns true if PRI feature is supported and enabled
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+   /* VFs share the PF PRI configuration */
+   return !!(pci_physfn(pdev)->pri_cap);
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
+
 #define PASID_NUMBER_SHIFT 8
 #define PASID_NUMBER_MASK  (0x1f << PASID_NUMBER_SHIFT)
 /**
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..073d57292445 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
 void pci_disable_pri(struct pci_dev *pdev);
 int pci_reset_pri(struct pci_dev *pdev);
 int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
 #endif /* CONFIG_PCI_PRI */
 
 #ifdef CONFIG_PCI_PASID
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu