[PATCH v2] memory: mtk-smi: Handle return value of clk_prepare_enable

2017-08-09 Thread Arvind Yadav
clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav 
---
changes in v2:
 Rebase patch[1]https://lkml.org/lkml/2017/8/3/968
 and apply this change. Otherwise will merge conflict.

 drivers/memory/mtk-smi.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index 2b798bb..583fb8d 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -315,6 +315,7 @@ static int mtk_smi_common_probe(struct platform_device 
*pdev)
struct mtk_smi *common;
struct resource *res;
enum mtk_smi_gen smi_gen;
+   int ret;
 
if (!dev->pm_domain)
return -EPROBE_DEFER;
@@ -349,7 +350,9 @@ static int mtk_smi_common_probe(struct platform_device 
*pdev)
if (IS_ERR(common->clk_async))
return PTR_ERR(common->clk_async);
 
-   clk_prepare_enable(common->clk_async);
+   ret = clk_prepare_enable(common->clk_async);
+   if (ret)
+   return ret;
}
pm_runtime_enable(dev);
platform_set_drvdata(pdev, common);
-- 
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] iommu/tegra*: Add support for struct iommu_device

2017-08-09 Thread Joerg Roedel
Hi,

here are two patches to add support for 'struct iommu_device'
to the tegra iommu-drivers. This will make the iommu-core
code aware of the hardware iommus that a driver manages.

It will also add the iommus to sysfs and link them to the
devices managed by them.

The patches apply on-top of Robin's iommu-group patches.

Please review.

Regards,

Joerg

Joerg Roedel (2):
  iommu/tegra: Add support for struct iommu_device
  iommu/tegra-gart: Add support for struct iommu_device

 drivers/iommu/tegra-gart.c | 26 ++
 drivers/iommu/tegra-smmu.c | 25 +
 2 files changed, 51 insertions(+)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/tegra-gart: Add support for struct iommu_device

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

Add a struct iommu_device to each tegra-gart and register it
with the iommu-core. Also link devices added to the driver
to their respective hardware iommus.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/tegra-gart.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 29bafc6..b62f790 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -61,6 +61,8 @@ struct gart_device {
struct list_headclient;
spinlock_t  client_lock;/* for client list */
struct device   *dev;
+
+   struct iommu_device iommu;  /* IOMMU Core handle */
 };
 
 struct gart_domain {
@@ -342,12 +344,16 @@ static int gart_iommu_add_device(struct device *dev)
return PTR_ERR(group);
 
iommu_group_put(group);
+
+   iommu_device_link(&gart_handle->iommu, dev);
+
return 0;
 }
 
 static void gart_iommu_remove_device(struct device *dev)
 {
iommu_group_remove_device(dev);
+   iommu_device_unlink(&gart_handle->iommu, dev);
 }
 
 static const struct iommu_ops gart_iommu_ops = {
@@ -397,6 +403,7 @@ static int tegra_gart_probe(struct platform_device *pdev)
struct resource *res, *res_remap;
void __iomem *gart_regs;
struct device *dev = &pdev->dev;
+   int ret;
 
if (gart_handle)
return -EIO;
@@ -423,6 +430,22 @@ static int tegra_gart_probe(struct platform_device *pdev)
return -ENXIO;
}
 
+   ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL,
+dev_name(&pdev->dev));
+   if (ret) {
+   dev_err(dev, "Failed to register IOMMU in sysfs\n");
+   return ret;
+   }
+
+   iommu_device_set_ops(&gart->iommu, &gart_iommu_ops);
+
+   ret = iommu_device_register(&gart->iommu);
+   if (ret) {
+   dev_err(dev, "Failed to register IOMMU\n");
+   iommu_device_sysfs_remove(&gart->iommu);
+   return ret;
+   }
+
gart->dev = &pdev->dev;
spin_lock_init(&gart->pte_lock);
spin_lock_init(&gart->client_lock);
@@ -449,6 +472,9 @@ static int tegra_gart_remove(struct platform_device *pdev)
 {
struct gart_device *gart = platform_get_drvdata(pdev);
 
+   iommu_device_unregister(&gart->iommu);
+   iommu_device_sysfs_remove(&gart->iommu);
+
writel(0, gart->regs + GART_CONFIG);
if (gart->savedata)
vfree(gart->savedata);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/tegra: Add support for struct iommu_device

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

Add a struct iommu_device to each tegra-smmu and register it
with the iommu-core. Also link devices added to the driver
to their respective hardware iommus.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/tegra-smmu.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index faa9c1e..2802e12 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -36,6 +36,8 @@ struct tegra_smmu {
struct list_head list;
 
struct dentry *debugfs;
+
+   struct iommu_device iommu;  /* IOMMU Core code handle */
 };
 
 struct tegra_smmu_as {
@@ -720,6 +722,9 @@ static int tegra_smmu_add_device(struct device *dev)
 * first match.
 */
dev->archdata.iommu = smmu;
+
+   iommu_device_link(&smmu->iommu, dev);
+
break;
}
 
@@ -737,6 +742,11 @@ static int tegra_smmu_add_device(struct device *dev)
 
 static void tegra_smmu_remove_device(struct device *dev)
 {
+   struct tegra_smmu *smmu = dev->archdata.iommu;
+
+   if (smmu)
+   iommu_device_unlink(&smmu->iommu, dev);
+
dev->archdata.iommu = NULL;
iommu_group_remove_device(dev);
 }
@@ -943,6 +953,18 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
if (err < 0)
return ERR_PTR(err);
 
+   err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, dev_name(dev));
+   if (err)
+   return ERR_PTR(err);
+
+   iommu_device_set_ops(&smmu->iommu, &tegra_smmu_ops);
+
+   err = iommu_device_register(&smmu->iommu);
+   if (err) {
+   iommu_device_sysfs_remove(&smmu->iommu);
+   return ERR_PTR(err);
+   }
+
if (IS_ENABLED(CONFIG_DEBUG_FS))
tegra_smmu_debugfs_init(smmu);
 
@@ -951,6 +973,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
 
 void tegra_smmu_remove(struct tegra_smmu *smmu)
 {
+   iommu_device_unregister(&smmu->iommu);
+   iommu_device_sysfs_remove(&smmu->iommu);
+
if (IS_ENABLED(CONFIG_DEBUG_FS))
tegra_smmu_debugfs_exit(smmu);
 }
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/arm-smmu: fix null-pointer dereference in arm_smmu_add_device

2017-08-09 Thread David Daney

On 08/08/2017 04:21 AM, Will Deacon wrote:

[+ Joerg]

On Tue, Aug 08, 2017 at 11:37:40AM +0100, Robin Murphy wrote:

On 08/08/17 11:26, Artem Savkov wrote:

Commit c54451a "iommu/arm-smmu: Fix the error path in arm_smmu_add_device"
removed fwspec assignment in legacy_binding path as redundant which is
wrong. It needs to be updated after fwspec initialisation in
arm_smmu_register_legacy_master() as it is dereferenced later. Without
this there is a NULL-pointer dereference panic during boot on some hosts.


Reviewed-by: Robin Murphy 

Thanks for fixing it up, and sorry for failing to document the
unfortunately subtle logic in the first place!


Well, I was the one that messed it up:

Acked-by: Will Deacon 

Joerg, can you pick this up as a fix for 4.13, please?


I hit the Oops as well.  This patch fixes it for me on a Cavium CN88xx 
system, so:


Acked-by: David Daney 

Thanks for working on this.





Will


Signed-off-by: Artem Savkov 
---
  drivers/iommu/arm-smmu.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b97188a..2d80fa8 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1519,6 +1519,13 @@ static int arm_smmu_add_device(struct device *dev)
  
  	if (using_legacy_binding) {

ret = arm_smmu_register_legacy_master(dev, &smmu);
+
+   /*
+* If dev->iommu_fwspec is initally NULL, 
arm_smmu_register_legacy_master()
+* will allocate/initialise a new one. Thus we need to update 
fwspec for
+* later use.
+*/
+   fwspec = dev->iommu_fwspec;
if (ret)
goto out_free;
} else if (fwspec && fwspec->ops == &arm_smmu_ops) {





___
linux-arm-kernel mailing list
linux-arm-ker...@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: Support SVM without PASID

2017-08-09 Thread Jean-Philippe Brucker
On 08/08/17 01:51, Bob Liu wrote:
> On 2017/8/7 20:52, Jean-Philippe Brucker wrote:
>> Hi Bob,
>>
>> On 07/08/17 13:18, Bob Liu wrote:
>>> On 2017/8/7 18:31, Jean-Philippe Brucker wrote:
 On 05/08/17 06:14, valmiki wrote:
 [...]
> Hi Jean, Thanks a lot, now i understood the flow. From vfio kernel
> documentation we fill vaddr and iova in struct vfio_iommu_type1_dma_map
> and pass them to VFIO. But if we use dynamic allocation in application
> (say malloc), do we need to use dma API to get iova and then call
> VFIO_IOMMU_MAP ioctl ?
> If application needs multiple such dynamic allocations, then it need to
> allocate large chunk and program it via VFIO_IOMMU_MAP ioctl and then
> manage rest allocations requirements from this buffer ?

 Yes, without SVM, the application allocates large buffers, allocates IOVAs
 itself, and maps them with VFIO_IOMMU_MAP. Userspace doesn't rely on the
 DMA API at all, it manages IOVAs as it wants. Sizes passed to
 VFIO_IOMMU_MAP have to be multiples of the MMU or IOMMU page granularity
 (that is at least 4kB), and both iova and vaddr have to be aligned on that
 granularity as well. So malloc isn't really suitable in this case, you'll
 need mmap. The application can then implement a small allocator to manage
 the DMA pool created with VFIO_IOMMU_MAP.

 With SVM the application binds its address space to the device, and then
 uses malloc for all DMA buffers, no need for VFIO_IOMMU_MAP.

>>>
>>> Hi Jean,
>>>
>>> I think there is another way to support SVM without PASID.
>>>
>>> Suppose there is a device in the same SOC-chip, the device access memory 
>>> through SMMU(using internal bus instead of PCIe)
>>> Once page fault, the device send an event with (vaddr, substreamID) to 
>>> SMMU, then SMMU triggers an event interrupt.
>>>
>>> In the event interrupt handler, we can implement the same logic as PRI 
>>> interrupt in your patch.
>>> What do you think about that?
>> What you're describing is the SMMU stall model for platform devices. From
>> the driver perspective it's the same as PRI and PASID (SubstreamID=PASID).
>>
> 
> Indeed!
> 
>> When a stall-capable device accesses unmapped memory, the SMMU parks the
>> transaction and sends an event marked "stall" on the event queue, with a
>> stall tag (STAG, roughly equivalent to PRG Index). The OS handles the
>> fault and sends a CMD_RESUME command with the status and the STAG. Then
>> the SMMU completes the access or terminates it.
>>
>> In a prototype I have, the stall implementation reuses most of the
> 
> Glad to hear that.
> Would you mind to share me the prototype patch?
> 
>> PASID/PRI code. The main difficulty is defining SSID and stall capability
>> in firmware, as there are no standard capability probing for platform
>> devices. Stall-capable devices must be able to wait an indefinite amount
>> of time that their DMA transactions returns, therefore the stall model
>> cannot work with PCI, only some integrated devices.
>>
> 
> I happen to have a board with such devices and like to do the test.
> Will re-post a full version patch upstream once completing the verification.

Cool! You can find the prototype here:
git://linux-arm.org/linux-jpb.git svm/stall

Please let me know if you get anywhere with it, I'd like to get the series
moving again.

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 0/4] iommu/pamu: Support generic iommu-device handles

2017-08-09 Thread Joerg Roedel
Hi,

Here is a patch-set to support the iommu_device_register()
interface in the fsl-pamu iommu driver. To make it work a
few fixes (Patch 1 and 2), an additional check (Patch 3)
were necessary.

Please review.

Regards,

Joerg

Joerg Roedel (4):
  iommu/pamu: Let PAMU depend on PCI
  iommu/pamu: Make driver depend on CONFIG_PHYS_64BIT
  iommu/pamu: WARN when fsl_pamu_probe() is called more than once
  iommu/pamu: Add support for generic iommu-device

 drivers/iommu/Kconfig   |  2 ++
 drivers/iommu/fsl_pamu.c| 24 
 drivers/iommu/fsl_pamu.h|  3 +++
 drivers/iommu/fsl_pamu_domain.c |  5 -
 drivers/iommu/fsl_pamu_domain.h |  2 ++
 5 files changed, 35 insertions(+), 1 deletion(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/4] iommu/pamu: WARN when fsl_pamu_probe() is called more than once

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

The function probes the PAMU hardware from device-tree
specifications. It initializes global variables and can thus
be only safely called once.

Add a check that that prints a warning when its called more
than once.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/fsl_pamu.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index a34355f..9ee8e9e 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -42,6 +42,8 @@ struct pamu_isr_data {
 static struct paace *ppaact;
 static struct paace *spaact;
 
+static bool probed;/* Has PAMU been probed? */
+
 /*
  * Table for matching compatible strings, for device tree
  * guts node, for QorIQ SOCs.
@@ -1033,6 +1035,9 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 * NOTE : All PAMUs share the same LIODN tables.
 */
 
+   if (WARN_ON(probed))
+   return -EBUSY;
+
pamu_regs = of_iomap(dev->of_node, 0);
if (!pamu_regs) {
dev_err(dev, "ioremap of PAMU node failed\n");
@@ -1172,6 +1177,8 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 
setup_liodns();
 
+   probed = true;
+
return 0;
 
 error_genpool:
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/4] iommu/pamu: Add support for generic iommu-device

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

This patch adds a global iommu-handle to the pamu driver and
initializes it at probe time. Also link devices added to the
iommu to this handle.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/fsl_pamu.c| 17 +
 drivers/iommu/fsl_pamu.h|  3 +++
 drivers/iommu/fsl_pamu_domain.c |  5 -
 drivers/iommu/fsl_pamu_domain.h |  2 ++
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 9ee8e9e..9238a85 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -44,6 +44,8 @@ static struct paace *spaact;
 
 static bool probed;/* Has PAMU been probed? */
 
+struct iommu_device pamu_iommu;/* IOMMU core code handle */
+
 /*
  * Table for matching compatible strings, for device tree
  * guts node, for QorIQ SOCs.
@@ -1154,6 +1156,18 @@ static int fsl_pamu_probe(struct platform_device *pdev)
if (ret)
goto error_genpool;
 
+   ret = iommu_device_sysfs_add(&pamu_iommu, dev, NULL, "iommu0");
+   if (ret)
+   goto error_genpool;
+
+   iommu_device_set_ops(&pamu_iommu, &fsl_pamu_ops);
+
+   ret = iommu_device_register(&pamu_iommu);
+   if (ret) {
+   dev_err(dev, "Can't register iommu device\n");
+   goto error_sysfs;
+   }
+
pamubypenr = in_be32(&guts_regs->pamubypenr);
 
for (pamu_reg_off = 0, pamu_counter = 0x8000; pamu_reg_off < size;
@@ -1181,6 +1195,9 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 
return 0;
 
+error_sysfs:
+   iommu_device_sysfs_remove(&pamu_iommu);
+
 error_genpool:
gen_pool_destroy(spaace_pool);
 
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
index c3434f2..fa48222 100644
--- a/drivers/iommu/fsl_pamu.h
+++ b/drivers/iommu/fsl_pamu.h
@@ -391,6 +391,9 @@ struct ome {
 #define EOE_WWSAOL  0x1e/* Write with stash allocate only and lock */
 #define EOE_VALID   0x80
 
+extern const struct iommu_ops fsl_pamu_ops;
+extern struct iommu_device pamu_iommu; /* IOMMU core code handle */
+
 /* Function prototypes */
 int pamu_domain_init(void);
 int pamu_enable_liodn(int liodn);
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index da0e1e3..914953b 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -983,11 +983,14 @@ static int fsl_pamu_add_device(struct device *dev)
 
iommu_group_put(group);
 
+   iommu_device_link(&pamu_iommu, dev);
+
return 0;
 }
 
 static void fsl_pamu_remove_device(struct device *dev)
 {
+   iommu_device_unlink(&pamu_iommu, dev);
iommu_group_remove_device(dev);
 }
 
@@ -1047,7 +1050,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain 
*domain)
return dma_domain->win_cnt;
 }
 
-static const struct iommu_ops fsl_pamu_ops = {
+const struct iommu_ops fsl_pamu_ops = {
.capable= fsl_pamu_capable,
.domain_alloc   = fsl_pamu_domain_alloc,
.domain_free= fsl_pamu_domain_free,
diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h
index f2b0f74..6d8661e 100644
--- a/drivers/iommu/fsl_pamu_domain.h
+++ b/drivers/iommu/fsl_pamu_domain.h
@@ -21,6 +21,8 @@
 
 #include "fsl_pamu.h"
 
+const struct iommu_ops fsl_pamu_ops;
+
 struct dma_window {
phys_addr_t paddr;
u64 size;
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] iommu/pamu: Let PAMU depend on PCI

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

The driver does not compile when PCI is not selected, so
make it depend on it.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f73ff28..e73b7c5 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -76,6 +76,7 @@ config IOMMU_DMA
 
 config FSL_PAMU
bool "Freescale IOMMU support"
+   depends on PCI
depends on PPC_E500MC || (COMPILE_TEST && PPC)
select IOMMU_API
select GENERIC_ALLOCATOR
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/4] iommu/pamu: Make driver depend on CONFIG_PHYS_64BIT

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

Certain address calculations in the driver make the
assumption that phys_addr_t and dma_addr_t are 64 bit wide.
Force this by depending on CONFIG_PHYS_64BIT to be set.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e73b7c5..e163b02 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -77,6 +77,7 @@ config IOMMU_DMA
 config FSL_PAMU
bool "Freescale IOMMU support"
depends on PCI
+   depends on PHYS_64BIT
depends on PPC_E500MC || (COMPILE_TEST && PPC)
select IOMMU_API
select GENERIC_ALLOCATOR
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH 3/4] iommu: add qcom_iommu

2017-08-09 Thread Rob Clark
An iommu driver for Qualcomm "B" family devices which do implement the
ARM SMMU spec, but not in a way that is compatible with how the arm-smmu
driver is designed.  It seems SMMU_SCR1.GASRAE=1 so the global register
space is not accessible.  This means it needs to get configuration from
devicetree instead of setting it up dynamically.

In the end, other than register definitions, there is not much code to
share with arm-smmu (other than what has already been refactored out
into the pgtable helpers).

Signed-off-by: Rob Clark 
Tested-by: Riku Voipio 
---
 drivers/iommu/Kconfig  |  10 +
 drivers/iommu/Makefile |   1 +
 drivers/iommu/qcom_iommu.c | 868 +
 3 files changed, 879 insertions(+)
 create mode 100644 drivers/iommu/qcom_iommu.c

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f73ff28f77e2..92f5fd2e0e4b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -367,4 +367,14 @@ config MTK_IOMMU_V1
 
  if unsure, say N here.
 
+config QCOM_IOMMU
+   # Note: iommu drivers cannot (yet?) be built as modules
+   bool "Qualcomm IOMMU Support"
+   depends on ARCH_QCOM || COMPILE_TEST
+   select IOMMU_API
+   select IOMMU_IO_PGTABLE_LPAE
+   select ARM_DMA_USE_IOMMU
+   help
+ Support for IOMMU on certain Qualcomm SoCs.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 195f7b997d8e..b910aea813a1 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,3 +27,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
new file mode 100644
index ..860cad1cb167
--- /dev/null
+++ b/drivers/iommu/qcom_iommu.c
@@ -0,0 +1,868 @@
+/*
+ * IOMMU API for QCOM secure IOMMUs.  Somewhat based on arm-smmu.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ *
+ * Copyright (C) 2013 ARM Limited
+ * Copyright (C) 2017 Red Hat
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "io-pgtable.h"
+#include "arm-smmu-regs.h"
+
+#define SMMU_INTR_SEL_NS 0x2000
+
+struct qcom_iommu_ctx;
+
+struct qcom_iommu_dev {
+   /* IOMMU core code handle */
+   struct iommu_device  iommu;
+   struct device   *dev;
+   struct clk  *iface_clk;
+   struct clk  *bus_clk;
+   void __iomem*local_base;
+   u32  sec_id;
+   u8   num_ctxs;
+   struct qcom_iommu_ctx   *ctxs[0];   /* indexed by asid-1 */
+};
+
+struct qcom_iommu_ctx {
+   struct device   *dev;
+   void __iomem*base;
+   bool secure_init;
+   u8   asid;  /* asid and ctx bank # are 1:1 */
+};
+
+struct qcom_iommu_domain {
+   struct io_pgtable_ops   *pgtbl_ops;
+   spinlock_t   pgtbl_lock;
+   struct mutex init_mutex; /* Protects iommu pointer */
+   struct iommu_domain  domain;
+   struct qcom_iommu_dev   *iommu;
+};
+
+static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom)
+{
+   return container_of(dom, struct qcom_iommu_domain, domain);
+}
+
+static const struct iommu_ops qcom_iommu_ops;
+
+static struct qcom_iommu_dev * to_iommu(struct iommu_fwspec *fwspec)
+{
+   if (!fwspec || fwspec->ops != &qcom_iommu_ops)
+   return NULL;
+   return fwspec->iommu_priv;
+}
+
+static struct qcom_iommu_ctx * to_ctx(struct iommu_fwspec *fwspec, unsigned 
asid)
+{
+   struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
+   if (!qcom_iommu)
+   return NULL;
+   return qcom_iommu->ctxs[asid - 1];
+}
+
+static inline void
+iommu_writel(struct qcom_iommu_ctx *ctx, unsigned reg, u32 val)
+{
+   writel_relaxed(val, ctx->base + reg);
+}
+
+static inline void
+iommu_writeq(struct qcom_iommu_ctx *ctx, unsigned reg, u64 val)
+{
+   writeq_relaxed(val, ctx->base + reg);
+}
+
+static inline u3

[RESEND PATCH 4/4] iommu: qcom: initialize secure page table

2017-08-09 Thread Rob Clark
From: Stanimir Varbanov 

This basically gets the secure page table size, allocates memory for
secure pagetables and passes the physical address to the trusted zone.

Signed-off-by: Stanimir Varbanov 
Signed-off-by: Rob Clark 
---
 drivers/iommu/qcom_iommu.c | 64 ++
 1 file changed, 64 insertions(+)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 860cad1cb167..48b62aa52787 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -604,6 +604,51 @@ static void qcom_iommu_disable_clocks(struct 
qcom_iommu_dev *qcom_iommu)
clk_disable_unprepare(qcom_iommu->iface_clk);
 }
 
+static int qcom_iommu_sec_ptbl_init(struct device *dev)
+{
+   size_t psize = 0;
+   unsigned int spare = 0;
+   void *cpu_addr;
+   dma_addr_t paddr;
+   unsigned long attrs;
+   static bool allocated = false;
+   int ret;
+
+   if (allocated)
+   return 0;
+
+   ret = qcom_scm_iommu_secure_ptbl_size(spare, &psize);
+   if (ret) {
+   dev_err(dev, "failed to get iommu secure pgtable size (%d)\n",
+   ret);
+   return ret;
+   }
+
+   dev_info(dev, "iommu sec: pgtable size: %zu\n", psize);
+
+   attrs = DMA_ATTR_NO_KERNEL_MAPPING;
+
+   cpu_addr = dma_alloc_attrs(dev, psize, &paddr, GFP_KERNEL, attrs);
+   if (!cpu_addr) {
+   dev_err(dev, "failed to allocate %zu bytes for pgtable\n",
+   psize);
+   return -ENOMEM;
+   }
+
+   ret = qcom_scm_iommu_secure_ptbl_init(paddr, psize, spare);
+   if (ret) {
+   dev_err(dev, "failed to init iommu pgtable (%d)\n", ret);
+   goto free_mem;
+   }
+
+   allocated = true;
+   return 0;
+
+free_mem:
+   dma_free_attrs(dev, psize, cpu_addr, paddr, attrs);
+   return ret;
+}
+
 static int get_asid(const struct device_node *np)
 {
u32 reg;
@@ -700,6 +745,17 @@ static struct platform_driver qcom_iommu_ctx_driver = {
.remove = qcom_iommu_ctx_remove,
 };
 
+static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu)
+{
+   struct device_node *child;
+
+   for_each_child_of_node(qcom_iommu->dev->of_node, child)
+   if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec"))
+   return true;
+
+   return false;
+}
+
 static int qcom_iommu_device_probe(struct platform_device *pdev)
 {
struct device_node *child;
@@ -744,6 +800,14 @@ static int qcom_iommu_device_probe(struct platform_device 
*pdev)
return -ENODEV;
}
 
+   if (qcom_iommu_has_secure_context(qcom_iommu)) {
+   ret = qcom_iommu_sec_ptbl_init(dev);
+   if (ret) {
+   dev_err(dev, "cannot init secure pg table(%d)\n", ret);
+   return ret;
+   }
+   }
+
platform_set_drvdata(pdev, qcom_iommu);
 
pm_runtime_enable(dev);
-- 
2.13.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH 1/4] Docs: dt: document qcom iommu bindings

2017-08-09 Thread Rob Clark
Cc: devicet...@vger.kernel.org
Signed-off-by: Rob Clark 
Reviewed-by: Rob Herring 
---
 .../devicetree/bindings/iommu/qcom,iommu.txt   | 121 +
 1 file changed, 121 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iommu/qcom,iommu.txt

diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.txt 
b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt
new file mode 100644
index ..b2641ceb2b40
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt
@@ -0,0 +1,121 @@
+* QCOM IOMMU v1 Implementation
+
+Qualcomm "B" family devices which are not compatible with arm-smmu have
+a similar looking IOMMU but without access to the global register space,
+and optionally requiring additional configuration to route context irqs
+to non-secure vs secure interrupt line.
+
+** Required properties:
+
+- compatible   : Should be one of:
+
+"qcom,msm8916-iommu"
+
+ Followed by "qcom,msm-iommu-v1".
+
+- clock-names  : Should be a pair of "iface" (required for IOMMUs
+ register group access) and "bus" (required for
+ the IOMMUs underlying bus access).
+
+- clocks   : Phandles for respective clocks described by
+ clock-names.
+
+- #address-cells   : must be 1.
+
+- #size-cells  : must be 1.
+
+- #iommu-cells : Must be 1.  Index identifies the context-bank #.
+
+- ranges   : Base address and size of the iommu context banks.
+
+- qcom,iommu-secure-id  : secure-id.
+
+- List of sub-nodes, one per translation context bank.  Each sub-node
+  has the following required properties:
+
+  - compatible : Should be one of:
+- "qcom,msm-iommu-v1-ns"  : non-secure context bank
+- "qcom,msm-iommu-v1-sec" : secure context bank
+  - reg: Base address and size of context bank within the iommu
+  - interrupts : The context fault irq.
+
+** Optional properties:
+
+- reg  : Base address and size of the SMMU local base, should
+ be only specified if the iommu requires configuration
+ for routing of context bank irq's to secure vs non-
+ secure lines.  (Ie. if the iommu contains secure
+ context banks)
+
+
+** Examples:
+
+   apps_iommu: iommu@1e2 {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   #iommu-cells = <1>;
+   compatible = "qcom,msm8916-iommu", "qcom,msm-iommu-v1";
+   ranges = <0 0x1e2 0x4>;
+   reg = <0x1ef 0x3000>;
+   clocks = <&gcc GCC_SMMU_CFG_CLK>,
+<&gcc GCC_APSS_TCU_CLK>;
+   clock-names = "iface", "bus";
+   qcom,iommu-secure-id = <17>;
+
+   // mdp_0:
+   iommu-ctx@4000 {
+   compatible = "qcom,msm-iommu-v1-ns";
+   reg = <0x4000 0x1000>;
+   interrupts = ;
+   };
+
+   // venus_ns:
+   iommu-ctx@5000 {
+   compatible = "qcom,msm-iommu-v1-sec";
+   reg = <0x5000 0x1000>;
+   interrupts = ;
+   };
+   };
+
+   gpu_iommu: iommu@1f08000 {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   #iommu-cells = <1>;
+   compatible = "qcom,msm8916-iommu", "qcom,msm-iommu-v1";
+   ranges = <0 0x1f08000 0x1>;
+   clocks = <&gcc GCC_SMMU_CFG_CLK>,
+<&gcc GCC_GFX_TCU_CLK>;
+   clock-names = "iface", "bus";
+   qcom,iommu-secure-id = <18>;
+
+   // gfx3d_user:
+   iommu-ctx@1000 {
+   compatible = "qcom,msm-iommu-v1-ns";
+   reg = <0x1000 0x1000>;
+   interrupts = ;
+   };
+
+   // gfx3d_priv:
+   iommu-ctx@2000 {
+   compatible = "qcom,msm-iommu-v1-ns";
+   reg = <0x2000 0x1000>;
+   interrupts = ;
+   };
+   };
+
+   ...
+
+   venus: video-codec@1d0 {
+   ...
+   iommus = <&apps_iommu 5>;
+   };
+
+   mdp: mdp@1a01000 {
+   ...
+   iommus = <&apps_iommu 4>;
+   };
+
+   gpu@01c0 {
+   ...
+   iommus = <&gpu_iommu 1>, <&gpu_iommu 2>;
+   };
-- 
2.13.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH 2/4] iommu: arm-smmu: split out register defines

2017-08-09 Thread Rob Clark
I want to re-use some of these for qcom_iommu, which has (roughly) the
same context-bank registers.

Signed-off-by: Rob Clark 
---
 drivers/iommu/arm-smmu-regs.h | 220 ++
 drivers/iommu/arm-smmu.c  | 211 ++--
 2 files changed, 229 insertions(+), 202 deletions(-)
 create mode 100644 drivers/iommu/arm-smmu-regs.h

diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
new file mode 100644
index ..a1226e4ab5f8
--- /dev/null
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -0,0 +1,220 @@
+/*
+ * IOMMU API for ARM architected SMMU implementations.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon 
+ */
+
+#ifndef _ARM_SMMU_REGS_H
+#define _ARM_SMMU_REGS_H
+
+/* Configuration registers */
+#define ARM_SMMU_GR0_sCR0  0x0
+#define sCR0_CLIENTPD  (1 << 0)
+#define sCR0_GFRE  (1 << 1)
+#define sCR0_GFIE  (1 << 2)
+#define sCR0_EXIDENABLE(1 << 3)
+#define sCR0_GCFGFRE   (1 << 4)
+#define sCR0_GCFGFIE   (1 << 5)
+#define sCR0_USFCFG(1 << 10)
+#define sCR0_VMIDPNE   (1 << 11)
+#define sCR0_PTM   (1 << 12)
+#define sCR0_FB(1 << 13)
+#define sCR0_VMID16EN  (1 << 31)
+#define sCR0_BSU_SHIFT 14
+#define sCR0_BSU_MASK  0x3
+
+/* Auxiliary Configuration register */
+#define ARM_SMMU_GR0_sACR  0x10
+
+/* Identification registers */
+#define ARM_SMMU_GR0_ID0   0x20
+#define ARM_SMMU_GR0_ID1   0x24
+#define ARM_SMMU_GR0_ID2   0x28
+#define ARM_SMMU_GR0_ID3   0x2c
+#define ARM_SMMU_GR0_ID4   0x30
+#define ARM_SMMU_GR0_ID5   0x34
+#define ARM_SMMU_GR0_ID6   0x38
+#define ARM_SMMU_GR0_ID7   0x3c
+#define ARM_SMMU_GR0_sGFSR 0x48
+#define ARM_SMMU_GR0_sGFSYNR0  0x50
+#define ARM_SMMU_GR0_sGFSYNR1  0x54
+#define ARM_SMMU_GR0_sGFSYNR2  0x58
+
+#define ID0_S1TS   (1 << 30)
+#define ID0_S2TS   (1 << 29)
+#define ID0_NTS(1 << 28)
+#define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
+#define ID0_PTFS_NO_AARCH32(1 << 25)
+#define ID0_PTFS_NO_AARCH32S   (1 << 24)
+#define ID0_CTTW   (1 << 14)
+#define ID0_NUMIRPT_SHIFT  16
+#define ID0_NUMIRPT_MASK   0xff
+#define ID0_NUMSIDB_SHIFT  9
+#define ID0_NUMSIDB_MASK   0xf
+#define ID0_EXIDS  (1 << 8)
+#define ID0_NUMSMRG_SHIFT  0
+#define ID0_NUMSMRG_MASK   0xff
+
+#define ID1_PAGESIZE   (1 << 31)
+#define ID1_NUMPAGENDXB_SHIFT  28
+#define ID1_NUMPAGENDXB_MASK   7
+#define ID1_NUMS2CB_SHIFT  16
+#define ID1_NUMS2CB_MASK   0xff
+#define ID1_NUMCB_SHIFT0
+#define ID1_NUMCB_MASK 0xff
+
+#define ID2_OAS_SHIFT  4
+#define ID2_OAS_MASK   0xf
+#define ID2_IAS_SHIFT  0
+#define ID2_IAS_MASK   0xf
+#define ID2_UBS_SHIFT  8
+#define ID2_UBS_MASK   0xf
+#define ID2_PTFS_4K(1 << 12)
+#define ID2_PTFS_16K   (1 << 13)
+#define ID2_PTFS_64K   (1 << 14)
+#define ID2_VMID16 (1 << 15)
+
+#define ID7_MAJOR_SHIFT4
+#define ID7_MAJOR_MASK 0xf
+
+/* Global TLB invalidation */
+#define ARM_SMMU_GR0_TLBIVMID  0x64
+#define ARM_SMMU_GR0_TLBIALLNSNH   0x68
+#define ARM_SMMU_GR0_TLBIALLH  0x6c
+#define ARM_SMMU_GR0_sTLBGSYNC 0x70
+#define ARM_SMMU_GR0_sTLBGSTATUS   0x74
+#define sTLBGSTATUS_GSACTIVE   (1 << 0)
+
+/* Stream mapping registers */
+#define ARM_SMMU_GR0_SMR(n)(0x800 + ((n) << 2))
+#define SMR_VALID  (1 << 31)
+#define SMR_MASK_SHIFT 16
+#define SMR_ID_SHIFT   0
+
+#define ARM_SMMU_GR0_S2CR(n

[RESEND PATCH 0/4] iommu: add qcom_iommu for early "B" family devices

2017-08-09 Thread Rob Clark
An iommu driver for Qualcomm "B" family devices which do implement the
ARM SMMU spec, but not in a way that arm-smmu can support.

(I initially added support to arm-smmu, but it was decided that approach
was too intrusive and it would be cleaner to have a separate driver.)

I should note that all the dependencies for this driver have been merged
since 4.12, and it is the last thing needed for having another fully-
enabled (gpu/display/video codec/etc) ARM device that is fully upstream.

These patches can also be found at:

  git://people.freedesktop.org/~robclark/linux qcom_iommu-next

  https://cgit.freedesktop.org/~robclark/linux/log/?h=qcom_iommu-next

No change since last time.

Rob Clark (3):
  Docs: dt: document qcom iommu bindings
  iommu: arm-smmu: split out register defines
  iommu: add qcom_iommu

Stanimir Varbanov (1):
  iommu: qcom: initialize secure page table

 .../devicetree/bindings/iommu/qcom,iommu.txt   | 121 +++
 drivers/iommu/Kconfig  |  10 +
 drivers/iommu/Makefile |   1 +
 drivers/iommu/arm-smmu-regs.h  | 220 +
 drivers/iommu/arm-smmu.c   | 211 +
 drivers/iommu/qcom_iommu.c | 932 +
 6 files changed, 1293 insertions(+), 202 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/iommu/qcom,iommu.txt
 create mode 100644 drivers/iommu/arm-smmu-regs.h
 create mode 100644 drivers/iommu/qcom_iommu.c

-- 
2.13.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH 0/4] iommu: add qcom_iommu for early "B" family devices

2017-08-09 Thread Rob Clark
An iommu driver for Qualcomm "B" family devices which do implement the
ARM SMMU spec, but not in a way that arm-smmu can support.

(I initially added support to arm-smmu, but it was decided that approach
was too intrusive and it would be cleaner to have a separate driver.)

I should note that all the dependencies for this driver have been merged
since 4.12, and it is the last thing needed for having another fully-
enabled (gpu/display/video codec/etc) ARM device that is fully upstream.

These patches can also be found at:

  git://people.freedesktop.org/~robclark/linux qcom_iommu-next

  https://cgit.freedesktop.org/~robclark/linux/log/?h=qcom_iommu-next

No change since last time.

Rob Clark (3):
  Docs: dt: document qcom iommu bindings
  iommu: arm-smmu: split out register defines
  iommu: add qcom_iommu

Stanimir Varbanov (1):
  iommu: qcom: initialize secure page table

 .../devicetree/bindings/iommu/qcom,iommu.txt   | 121 +++
 drivers/iommu/Kconfig  |  10 +
 drivers/iommu/Makefile |   1 +
 drivers/iommu/arm-smmu-regs.h  | 220 +
 drivers/iommu/arm-smmu.c   | 211 +
 drivers/iommu/qcom_iommu.c | 932 +
 6 files changed, 1293 insertions(+), 202 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/iommu/qcom,iommu.txt
 create mode 100644 drivers/iommu/arm-smmu-regs.h
 create mode 100644 drivers/iommu/qcom_iommu.c

-- 
2.13.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/2] iommu/omap: Rework cache functionality with DMA Streaming API

2017-08-09 Thread Laurent Pinchart
Hi Josue,

Thank you for the patches.

On Friday 28 Jul 2017 15:49:12 Josue Albarran wrote:
> Hi Joerg,
> 
> This series adapts the OMAP IOMMU driver to use the DMA API to flush
> the page table/directory table entries from the CPU caches instead of
> the ARM assembly calls. The patches are baselined on 4.13-rc1.
> 
> Following is the patch summary:
>  1. Patch 1 disables the OMAP IOMMU fault interrupts instead of
> disabling the MMU upon a fault, and resulted in recurring bus
> errors during remoteproc recovery on OMAP4. The MMU fault itself
> is triggered due to the missing PL310 L2 cache operations, and
> this patch fixes the recurring bus errors.
>  2. The second patch makes the adaptation to the DMA API for flushing
> the caches. This fixes the MMU fault triggering issues in the
> first place on OMAP4.
> 
> I have tested these patches on DRA7, OMAP5, and OMAP4 platforms using
> both OMAP IOMMU unit tests and some out-of-tree patches for exercising
> the MMUs using the OMAP remoteproc driver.
> 
> Laurent,
> Appreciate it if you can check the OMAP3ISP functionality with these
> patches once.

I apologize for the delay, I had to resurrect my Beagleboard-xM, which 
involved updating and then debugging U-Boot.

Tested-by: Laurent Pinchart 

-- 
Regards,

Laurent Pinchart

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3] iommu/s390: Add support for iommu_device handling

2017-08-09 Thread Joerg Roedel
From: Joerg Roedel 

Add support for the iommu_device_register interface to make
the s390 hardware iommus visible to the iommu core and in
sysfs.

Signed-off-by: Joerg Roedel 
---
 arch/s390/include/asm/pci.h |  7 +++
 arch/s390/pci/pci.c |  9 -
 drivers/iommu/s390-iommu.c  | 35 +++
 3 files changed, 50 insertions(+), 1 deletion(-)

v2->v3:
- Rebased to v4.13-rc4
- Fixed error path in zpci_create_device() to
  destroy the iommu instance too (noted by
  Gerald Schaefer)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index f36b4b726057..386df9adef0a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -122,6 +123,8 @@ struct zpci_dev {
unsigned long   iommu_pages;
unsigned intnext_bit;
 
+   struct iommu_device iommu_dev;  /* IOMMU core handle */
+
char res_name[16];
struct zpci_bar_struct bars[PCI_BAR_COUNT];
 
@@ -174,6 +177,10 @@ int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
 int clp_get_state(u32 fid, enum zpci_state *state);
 
+/* IOMMU Interface */
+int zpci_init_iommu(struct zpci_dev *zdev);
+void zpci_destroy_iommu(struct zpci_dev *zdev);
+
 #ifdef CONFIG_PCI
 /* Error handling and recovery */
 void zpci_event_error(void *);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7b30af5da222..001ca80fa2fe 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -776,6 +776,7 @@ void pcibios_remove_bus(struct pci_bus *bus)
 
zpci_exit_slot(zdev);
zpci_cleanup_bus_resources(zdev);
+   zpci_destroy_iommu(zdev);
zpci_free_domain(zdev);
 
spin_lock(&zpci_list_lock);
@@ -848,11 +849,15 @@ int zpci_create_device(struct zpci_dev *zdev)
if (rc)
goto out;
 
+   rc = zpci_init_iommu(zdev);
+   if (rc)
+   goto out_free;
+
mutex_init(&zdev->lock);
if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
rc = zpci_enable_device(zdev);
if (rc)
-   goto out_free;
+   goto out_destroy_iommu;
}
rc = zpci_scan_bus(zdev);
if (rc)
@@ -869,6 +874,8 @@ int zpci_create_device(struct zpci_dev *zdev)
 out_disable:
if (zdev->state == ZPCI_FN_STATE_ONLINE)
zpci_disable_device(zdev);
+out_destroy_iommu:
+   zpci_destroy_iommu(zdev);
 out_free:
zpci_free_domain(zdev);
 out:
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 8788640756a7..85f3bc52efc2 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -18,6 +18,8 @@
  */
 #define S390_IOMMU_PGSIZES (~0xFFFUL)
 
+static struct iommu_ops s390_iommu_ops;
+
 struct s390_domain {
struct iommu_domain domain;
struct list_headdevices;
@@ -166,11 +168,13 @@ static void s390_iommu_detach_device(struct iommu_domain 
*domain,
 static int s390_iommu_add_device(struct device *dev)
 {
struct iommu_group *group = iommu_group_get_for_dev(dev);
+   struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
 
if (IS_ERR(group))
return PTR_ERR(group);
 
iommu_group_put(group);
+   iommu_device_link(&zdev->iommu_dev, dev);
 
return 0;
 }
@@ -197,6 +201,7 @@ static void s390_iommu_remove_device(struct device *dev)
s390_iommu_detach_device(domain, dev);
}
 
+   iommu_device_unlink(&zdev->iommu_dev, dev);
iommu_group_remove_device(dev);
 }
 
@@ -327,6 +332,36 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
return size;
 }
 
+int zpci_init_iommu(struct zpci_dev *zdev)
+{
+   int rc = 0;
+
+   rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
+   "s390-iommu.%08x", zdev->fid);
+   if (rc)
+   goto out_err;
+
+   iommu_device_set_ops(&zdev->iommu_dev, &s390_iommu_ops);
+
+   rc = iommu_device_register(&zdev->iommu_dev);
+   if (rc)
+   goto out_sysfs;
+
+   return 0;
+
+out_sysfs:
+   iommu_device_sysfs_remove(&zdev->iommu_dev);
+
+out_err:
+   return rc;
+}
+
+void zpci_destroy_iommu(struct zpci_dev *zdev)
+{
+   iommu_device_unregister(&zdev->iommu_dev);
+   iommu_device_sysfs_remove(&zdev->iommu_dev);
+}
+
 static struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
-- 
2.12.3

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 3/3] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801

2017-08-09 Thread Shameer Kolothum
The HiSilicon erratum 161010801 describes the limitation of HiSilicon
platforms Hip06/Hip07 to support the SMMU mappings for MSI transactions.

On these platforms GICv3 ITS translator is presented with the deviceID
by extending the MSI payload data to 64 bits to include the deviceID.
Hence, the PCIe controller on this platforms has to differentiate the
MSI payload against other DMA payload and has to modify the MSI payload.
This basically makes it difficult for this platforms to have a SMMU
translation for MSI.

This patch implements a ACPI table based quirk to reserve the hw msi
regions in the smmu-v3 driver which means these address regions will
not be translated and will be excluded from iova allocations.

Signed-off-by: Shameer Kolothum 
---
 drivers/iommu/arm-smmu-v3.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 568c400..6f21dd7 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -608,6 +608,7 @@ struct arm_smmu_device {
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY   (1 << 1)
+#define ARM_SMMU_OPT_RESV_HW_MSI   (1 << 2)
u32 options;
 
struct arm_smmu_cmdqcmdq;
@@ -1934,14 +1935,29 @@ static void arm_smmu_get_resv_regions(struct device 
*dev,
  struct list_head *head)
 {
struct iommu_resv_region *region;
+   struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
+   struct arm_smmu_device *smmu = master->smmu;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+   int resv = 0;
 
-   region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-prot, IOMMU_RESV_SW_MSI);
-   if (!region)
-   return;
+   if ((smmu->options & ARM_SMMU_OPT_RESV_HW_MSI)) {
 
-   list_add_tail(®ion->list, head);
+   resv = iommu_dma_get_msi_resv_regions(dev, head);
+
+   if (resv < 0) {
+   dev_warn(dev, "HW MSI region resv failed: %d\n", resv);
+   return;
+   }
+   }
+
+   if (!resv) {
+   region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+prot, IOMMU_RESV_SW_MSI);
+   if (!region)
+   return;
+
+   list_add_tail(®ion->list, head);
+   }
 
iommu_dma_get_resv_regions(dev, head);
 }
@@ -2667,6 +2683,7 @@ static void acpi_smmu_get_options(u32 model, struct 
arm_smmu_device *smmu)
break;
case ACPI_IORT_SMMU_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
+   smmu->options |= ARM_SMMU_OPT_RESV_HW_MSI;
break;
}
 
-- 
1.9.1


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 0/3] iommu/smmu-v3: Workaround for hisilicon 161010801 erratum(reserve HW MSI)

2017-08-09 Thread Shameer Kolothum
On certain HiSilicon platforms (Hip06/Hip07) the GIC ITS and
PCIe RC deviates from the standard implementation and this breaks
PCIe MSI functionality when SMMU is enabled.

The HiSilicon erratum 161010801 describes this limitation of certain
HiSilicon platforms to support the SMMU mappings for MSI transactions.
On these platforms GICv3 ITS translator is presented with the deviceID
by extending the MSI payload data to 64 bits to include the deviceID.
Hence, the PCIe controller on this platforms has to differentiate the
MSI payload against other DMA payload and has to modify the MSI payload.
This basically makes it difficult for this platforms to have a SMMU
translation for MSI.

To implement this quirk, the following changes are incorporated:

1. Added a generic helper function to IORT code to retrieve and
   reserve the HW ITS address regions.
2. Added quirk to SMMUv3 to reserve HW ITS address regions based
on IORT SMMUv3 model with the help of a generic iommu helper
function.

Thanks,
Shameer

Changelog:

v5 --> v6
Addressed comments from Robin and Lorenzo:
-No change to patch#1 .
-Reverted v5 patch#2 as this might break the platforms where this quirk
is not applicable. Provided a generic function in iommu code and
added back the quirk implementation in SMMU v3 driver(patch#3)
 
v4 --> v5
Addressed comments from Robin and Lorenzo:
-Added a comment to make it clear that, for now, only straightforward 
 HW topologies are handled while reserving ITS regions(patch #1).

v3 --> v4
Rebased on 4.13-rc1.
Addressed comments from Robin, Will and Lorenzo:
-As suggested by Robin, moved the ITS msi reservation into
 iommu_dma_get_resv_regions().
-Added its_count != resv region failure case(patch #1).

v2 --> v3
Addressed comments from Lorenzo and Robin:
-Removed dev_is_pci() check in smmuV3 driver.
-Don't treat device not having an ITS mapping as an error in
 iort helper function.

v1 --> v2
-patch 2/2: Invoke iort helper fn based on fwnode type(acpi).

RFCv2 -->PATCH
-Incorporated Lorenzo's review comments.

RFC v1 --> RFC v2
Based on Robin's review comments,
-Removed  the generic erratum framework.
-Using IORT/MADT tables to retrieve the ITS base addr instead
 of vendor specific CSRT table.

Shameer Kolothum (3):
  ACPI/IORT: Add ITS address regions reservation helper
  iommu/dma: Add a helper function to reserve HW MSI address regions for
IOMMU drivers
  iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801

 drivers/acpi/arm64/iort.c| 95 ++--
 drivers/iommu/arm-smmu-v3.c  | 27 +---
 drivers/iommu/dma-iommu.c| 19 
 drivers/irqchip/irq-gic-v3-its.c |  3 +-
 include/linux/acpi_iort.h|  7 ++-
 include/linux/dma-iommu.h|  7 +++
 6 files changed, 148 insertions(+), 10 deletions(-)

-- 
1.9.1


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 2/3] iommu/dma: Add a helper function to reserve HW MSI address regions for IOMMU drivers

2017-08-09 Thread Shameer Kolothum
IOMMU drivers can use this to implement their .get_resv_regions callback
for HW MSI specific reservations(e.g. ARM GICv3 ITS MSI region).

Signed-off-by: Shameer Kolothum 
---
 drivers/iommu/dma-iommu.c | 19 +++
 include/linux/dma-iommu.h |  7 +++
 2 files changed, 26 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9d1cebe..952ecdd 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -19,6 +19,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -198,6 +199,24 @@ void iommu_dma_get_resv_regions(struct device *dev, struct 
list_head *list)
 }
 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 
+/**
+ * iommu_dma_get_msi_resv_regions - Reserved region driver helper
+ * @dev: Device from iommu_get_resv_regions()
+ * @list: Reserved region list from iommu_get_resv_regions()
+ *
+ * IOMMU drivers can use this to implement their .get_resv_regions
+ * callback for HW MSI specific reservations. For now, this only
+ * covers ITS MSI region reservation using ACPI IORT helper function.
+ */
+int iommu_dma_get_msi_resv_regions(struct device *dev, struct list_head *list)
+{
+   if (!is_of_node(dev->iommu_fwspec->iommu_fwnode))
+   return iort_iommu_its_get_resv_regions(dev, list);
+
+   return -ENODEV;
+}
+EXPORT_SYMBOL(iommu_dma_get_msi_resv_regions);
+
 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
phys_addr_t start, phys_addr_t end)
 {
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 92f2083..6062ef0 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -74,6 +74,8 @@ void iommu_dma_unmap_resource(struct device *dev, dma_addr_t 
handle,
 void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
+int iommu_dma_get_msi_resv_regions(struct device *dev, struct list_head *list);
+
 #else
 
 struct iommu_domain;
@@ -107,6 +109,11 @@ static inline void iommu_dma_get_resv_regions(struct 
device *dev, struct list_he
 {
 }
 
+static inline int iommu_dma_get_msi_resv_regions(struct device *dev, struct 
list_head *list)
+{
+   return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_DMA */
 #endif /* __KERNEL__ */
 #endif /* __DMA_IOMMU_H */
-- 
1.9.1


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 1/3] ACPI/IORT: Add ITS address regions reservation helper

2017-08-09 Thread Shameer Kolothum
On some platforms ITS address regions have to be excluded from normal
IOVA allocation in that they are detected and decoded in a HW specific
way by system components and so they cannot be considered normal IOVA
address space.

Add an helper function that retrieves ITS address regions through IORT
device <-> ITS mappings and reserves it so that these regions will not
be translated by IOMMU and will be excluded from IOVA allocations.

Signed-off-by: Shameer Kolothum 
[lorenzo.pieral...@arm.com: updated commit log/added comments]
Signed-off-by: Lorenzo Pieralisi 
---
 drivers/acpi/arm64/iort.c| 95 ++--
 drivers/irqchip/irq-gic-v3-its.c |  3 +-
 include/linux/acpi_iort.h|  7 ++-
 3 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a3215ee..86b5a51 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -39,6 +39,7 @@
 struct iort_its_msi_chip {
struct list_headlist;
struct fwnode_handle*fw_node;
+   phys_addr_t base_addr;
u32 translation_id;
 };
 
@@ -136,14 +137,16 @@ typedef acpi_status (*iort_find_node_callback)
 static DEFINE_SPINLOCK(iort_msi_chip_lock);
 
 /**
- * iort_register_domain_token() - register domain token and related ITS ID
- * to the list from where we can get it back later on.
+ * iort_register_domain_token() - register domain token along with related
+ * ITS ID and base address to the list from where we can get it back later on.
  * @trans_id: ITS ID.
+ * @base: ITS base address.
  * @fw_node: Domain token.
  *
  * Returns: 0 on success, -ENOMEM if no memory when allocating list element
  */
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+  struct fwnode_handle *fw_node)
 {
struct iort_its_msi_chip *its_msi_chip;
 
@@ -153,6 +156,7 @@ int iort_register_domain_token(int trans_id, struct 
fwnode_handle *fw_node)
 
its_msi_chip->fw_node = fw_node;
its_msi_chip->translation_id = trans_id;
+   its_msi_chip->base_addr = base;
 
spin_lock(&iort_msi_chip_lock);
list_add(&its_msi_chip->list, &iort_msi_chip_list);
@@ -481,6 +485,24 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
return -ENODEV;
 }
 
+static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base)
+{
+   struct iort_its_msi_chip *its_msi_chip;
+   bool match = false;
+
+   spin_lock(&iort_msi_chip_lock);
+   list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) {
+   if (its_msi_chip->translation_id == its_id) {
+   *base = its_msi_chip->base_addr;
+   match = true;
+   break;
+   }
+   }
+   spin_unlock(&iort_msi_chip_lock);
+
+   return match ? 0 : -ENODEV;
+}
+
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
@@ -639,6 +661,71 @@ int iort_add_device_replay(const struct iommu_ops *ops, 
struct device *dev)
 
return err;
 }
+
+/**
+ * iort_iommu_its_get_resv_regions - Reserved region driver helper
+ * @dev: Device from iommu_get_resv_regions()
+ * @list: Reserved region list from iommu_get_resv_regions()
+ *
+ * Returns: Number of reserved regions on success(0 if no associated ITS),
+ *  appropriate error value otherwise.
+ */
+int iort_iommu_its_get_resv_regions(struct device *dev, struct list_head *head)
+{
+   struct acpi_iort_its_group *its;
+   struct acpi_iort_node *node, *its_node = NULL;
+   int i, resv = 0;
+
+   node = iort_find_dev_node(dev);
+   if (!node)
+   return -ENODEV;
+
+   /*
+* Current logic to reserve ITS regions relies on HW topologies
+* where a given PCI or named component maps its IDs to only one
+* ITS group; if a PCI or named component can map its IDs to
+* different ITS groups through IORT mappings this function has
+* to be reworked to ensure we reserve regions for all ITS groups
+* a given PCI or named component may map IDs to.
+*/
+   if (dev_is_pci(dev)) {
+   u32 rid;
+
+   pci_for_each_dma_alias(to_pci_dev(dev), __get_pci_rid, &rid);
+   its_node = iort_node_map_id(node, rid, NULL, IORT_MSI_TYPE);
+   } else {
+   for (i = 0; i < node->mapping_count; i++) {
+   its_node = iort_node_map_platform_id(node, NULL,
+IORT_MSI_TYPE, i);
+   if (its_node)
+   break;
+   }
+   }
+
+   if (!its_node)
+   return 0;
+
+   /* Move to ITS specific data */
+   its = (struct acpi_iort_its_group *)its_node->node_data;
+
+   

[PATCH v10 12/12] iommu/amd: Disable iommu only if amd_iommu=off is specified

2017-08-09 Thread Baoquan He
It's ok to disable iommu early in normal kernel or in kdump kernel when
amd_iommu=off is specified. While we should not disable it in kdump kernel
when on-flight dma is still on-going.

Signed-off-by: Baoquan He 
---
v9->v10:
  Change to call disable_iommus() in normal kernel and the case that
  amd_iommu=off is set in kdump kernel. Otherwise if in kdump kernel
  but amd_iommu=off is not specified, we can just keep it as it is
  in 1st kernel.

 drivers/iommu/amd_iommu_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 88e7a6e950ae..c7d03251c80a 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2499,7 +2499,8 @@ static int __init early_amd_iommu_init(void)
goto out;
 
/* Disable any previously enabled IOMMUs */
-   disable_iommus();
+   if (!is_kdump_kernel() || amd_iommu_disabled)
+   disable_iommus();
 
if (amd_iommu_irq_remap)
amd_iommu_irq_remap = check_ioapic_information();
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 11/12] iommu/amd: Don't copy GCR3 table root pointer

2017-08-09 Thread Baoquan He
When iommu is pre_enabled in kdump kernel, if a device is set up with
guest translations (DTE.GV=1), then don't copy GCR3 table root pointer
but move the device over to an empty guest-cr3 table and handle the
faults in the PPR log (which answer them with INVALID). After all these
PPR faults are recoverable for the device and we should not allow the
device to change old-kernels data when we don't have to.

Signed-off-by: Baoquan He 
---
v9->v10:
  Clear the DTE_FLAG_GV when handle the GCR3 table root pointer.

 drivers/iommu/amd_iommu.c   | 28 +++-
 drivers/iommu/amd_iommu_init.c  | 12 
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h | 24 
 drivers/iommu/amd_iommu_v2.c| 18 +-
 5 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index eebf4590cef9..9e8ea1907796 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -103,30 +103,6 @@ int amd_iommu_max_glx_val = -1;
 static const struct dma_map_ops amd_iommu_dma_ops;
 
 /*
- * This struct contains device specific data for the IOMMU
- */
-struct iommu_dev_data {
-   struct list_head list;/* For domain->dev_list */
-   struct list_head dev_data_list;   /* For global dev_data_list */
-   struct protection_domain *domain; /* Domain the device is bound to */
-   u16 devid;/* PCI Device ID */
-   u16 alias;/* Alias Device ID */
-   bool iommu_v2;/* Device can make use of IOMMUv2 */
-   bool passthrough; /* Device is identity mapped */
-   struct {
-   bool enabled;
-   int qdep;
-   } ats;/* ATS state */
-   bool pri_tlp; /* PASID TLB required for
-PPR completions */
-   u32 errata;   /* Bitmap for errata to apply */
-   bool use_vapic;   /* Enable device to use vapic mode */
-   bool defer_attach;
-
-   struct ratelimit_state rs;/* Ratelimit IOPF messages */
-};
-
-/*
  * general struct to manage commands send to an IOMMU
  */
 struct iommu_cmd {
@@ -386,10 +362,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
 }
 
-static struct iommu_dev_data *get_dev_data(struct device *dev)
+struct iommu_dev_data *get_dev_data(struct device *dev)
 {
return dev->archdata.iommu;
 }
+EXPORT_SYMBOL(get_dev_data);
 
 /*
 * Find or create an IOMMU group for a acpihid device.
@@ -2540,6 +2517,7 @@ static int dir2prot(enum dma_data_direction direction)
else
return 0;
 }
+
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index c348732f27d7..88e7a6e950ae 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -214,6 +214,7 @@ u16 *amd_iommu_alias_table;
  * for a specific device. It is also indexed by the PCI device id.
  */
 struct amd_iommu **amd_iommu_rlookup_table;
+EXPORT_SYMBOL(amd_iommu_rlookup_table);
 
 /*
  * This table is used to find the irq remapping table for a given device id
@@ -269,6 +270,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu)
 {
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 }
+EXPORT_SYMBOL(translation_pre_enabled);
 
 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 {
@@ -859,6 +861,7 @@ static bool copy_device_table(void)
struct amd_iommu *iommu;
u16 dom_id, dte_v, irq_v;
gfp_t gfp_flag;
+   u64 tmp;
 
if (!amd_iommu_pre_enabled)
return false;
@@ -910,6 +913,15 @@ static bool copy_device_table(void)
old_dev_tbl_cpy[devid].data[0] = 
old_devtb[devid].data[0];
old_dev_tbl_cpy[devid].data[1] = 
old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   /* If gcr3 table existed, mask it out */
+   if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
+   tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+   tmp |= DTE_GCR3_VAL_C(~0ULL) << 
DTE_GCR3_SHIFT_C;
+   old_dev_tbl_cpy[devid].data[1] &= ~tmp;
+   tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
+   tmp |= DTE_FLAG_GV;
+   old_dev_tbl_cpy[devid].data[0] &= ~tmp;
+   }
}
 
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index a

[PATCH v10 09/12] iommu/amd: Use is_attach_deferred call-back

2017-08-09 Thread Baoquan He
Implement call-back is_attach_deferred and use it to defer the
domain attach from iommu driver init to device driver init when
iommu is pre-enabled in kdump kernel.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index dab901b4f0f9..eebf4590cef9 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -121,6 +121,7 @@ struct iommu_dev_data {
 PPR completions */
u32 errata;   /* Bitmap for errata to apply */
bool use_vapic;   /* Enable device to use vapic mode */
+   bool defer_attach;
 
struct ratelimit_state rs;/* Ratelimit IOPF messages */
 };
@@ -371,12 +372,17 @@ static u16 get_alias(struct device *dev)
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 
dev_data = search_dev_data(devid);
 
-   if (dev_data == NULL)
+   if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
 
+   if (translation_pre_enabled(iommu))
+   dev_data->defer_attach = true;
+   }
+
return dev_data;
 }
 
@@ -2477,11 +2483,18 @@ static struct iommu_group 
*amd_iommu_device_group(struct device *dev)
 static struct protection_domain *get_domain(struct device *dev)
 {
struct protection_domain *domain;
+   struct iommu_domain *io_domain;
 
if (!check_device(dev))
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
+   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   get_dev_data(dev)->defer_attach = false;
+   io_domain = iommu_get_domain_for_dev(dev);
+   domain = to_pdomain(io_domain);
+   attach_device(dev, domain);
+   }
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
 
@@ -3372,6 +3385,13 @@ static void amd_iommu_apply_resv_region(struct device 
*dev,
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
 }
 
+static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+struct device *dev)
+{
+   struct iommu_dev_data *dev_data = dev->archdata.iommu;
+   return dev_data->defer_attach;
+}
+
 const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3388,6 +3408,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_resv_regions = amd_iommu_get_resv_regions,
.put_resv_regions = amd_iommu_put_resv_regions,
.apply_resv_region = amd_iommu_apply_resv_region,
+   .is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
 };
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 10/12] iommu/amd: Allocate memory below 4G for dev table if translation pre-enabled

2017-08-09 Thread Baoquan He
AMD pointed out it's unsafe to update the device-table while iommu
is enabled. It turns out that device-table pointer update is split
up into two 32bit writes in the IOMMU hardware. So updating it while
the IOMMU is enabled could have some nasty side effects.

The safe way to work around this is to always allocate the device-table
below 4G, including the old device-table in normal kernel and the
device-table used for copying the content of the old device-table in kdump
kernel. Meanwhile we need check if the address of old device-table is
above 4G because it might has been touched accidentally in corrupted
1st kernel.

Signed-off-by: Baoquan He 
---
v9->v10:
  The judgement of the address of old_devtb_phys should be '>= 0x1ULL'.

 drivers/iommu/amd_iommu_init.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d08ad74b0928..c348732f27d7 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -885,11 +885,15 @@ static bool copy_device_table(void)
}
 
old_devtb_phys = entry & PAGE_MASK;
+   if (old_devtb_phys >= 0x1ULL) {
+   pr_err("The address of old device table is above 4G, not 
trustworthy!/n");
+   return false;
+   }
old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
if (!old_devtb)
return false;
 
-   gfp_flag = GFP_KERNEL | __GFP_ZERO;
+   gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
get_order(dev_table_size));
if (old_dev_tbl_cpy == NULL) {
@@ -2432,7 +2436,8 @@ static int __init early_amd_iommu_init(void)
 
/* Device table - directly used by all IOMMUs */
ret = -ENOMEM;
-   amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+   amd_iommu_dev_table = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
  get_order(dev_table_size));
if (amd_iommu_dev_table == NULL)
goto out;
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 08/12] iommu: Add is_attach_deferred call-back to iommu-ops

2017-08-09 Thread Baoquan He
This new call-back will be used to check if the domain attach need be
deferred for now. If yes, the domain attach/detach will return directly.

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 8 
 include/linux/iommu.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3f6ea160afed..86581b115b92 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1283,6 +1283,10 @@ static int __iommu_attach_device(struct iommu_domain 
*domain,
 struct device *dev)
 {
int ret;
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return 0;
+
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
 
@@ -1324,6 +1328,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return;
+
if (unlikely(domain->ops->detach_dev == NULL))
return;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2cb54adc4a33..63983c9e6c3a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -225,6 +225,7 @@ struct iommu_ops {
u32 (*domain_get_windows)(struct iommu_domain *domain);
 
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
*dev);
 
unsigned long pgsize_bitmap;
 };
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 02/12] iommu/amd: add several helper functions

2017-08-09 Thread Baoquan He
Move single iommu enabling codes into a wrapper function early_enable_iommu().
This can make later kdump change easier.

And also add iommu_disable_command_buffer and iommu_disable_event_buffer
for later usage.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 42 +++---
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 3f72f44fa2df..277838dbc3a6 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -634,6 +634,14 @@ static void iommu_enable_command_buffer(struct amd_iommu 
*iommu)
amd_iommu_reset_cmd_buffer(iommu);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_command_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+}
+
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
@@ -666,6 +674,14 @@ static void iommu_enable_event_buffer(struct amd_iommu 
*iommu)
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
+/*
+ * This function disables the event log buffer
+ */
+static void iommu_disable_event_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+}
+
 static void __init free_event_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
@@ -2046,6 +2062,19 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable_ga(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -2054,17 +2083,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable_ga(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 01/12] iommu/amd: Detect pre enabled translation

2017-08-09 Thread Baoquan He
Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 24 
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  3 +++
 3 files changed, 28 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 372303700566..3f72f44fa2df 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -258,6 +258,25 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+   iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1 amd_iommu_last_bdf)
@@ -1399,6 +1418,11 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
 
iommu->int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled(iommu))
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 466260f8a1df..a9666d2005bb 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -87,4 +87,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 
f)
return !!(iommu->features & f);
 }
 
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 294a409e283b..0c98b2cf04cc 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -435,6 +435,8 @@ struct iommu_domain;
 struct irq_domain;
 struct amd_irte_ops;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -569,6 +571,7 @@ struct amd_iommu {
struct amd_irte_ops *irte_ops;
 #endif
 
+   u32 flags;
volatile u64 __aligned(8) cmd_sem;
 };
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 07/12] iommu/amd: Do sanity check for address translation and irq remap of old dev table entry

2017-08-09 Thread Baoquan He
Firstly split the dev table entry copy into address translation part
and irq remapping part. Because these two parts could be enabled
independently.

Secondly do sanity check for address translation and irq remap of old
dev table entry separately.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  5 -
 drivers/iommu/amd_iommu_init.c  | 23 ---
 drivers/iommu/amd_iommu_types.h |  8 
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b22b58b33400..dab901b4f0f9 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3776,11 +3776,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
 
 static struct irq_chip amd_ir_chip;
 
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE1ULL
-
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
u64 dte;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 959c25d997e1..d08ad74b0928 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -852,12 +852,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 
 static bool copy_device_table(void)
 {
+   u64 int_ctl, int_tab_len, entry, last_entry = 0;
struct dev_table_entry *old_devtb = NULL;
u32 lo, hi, devid, old_devtb_size;
phys_addr_t old_devtb_phys;
-   u64 entry, last_entry = 0;
struct amd_iommu *iommu;
-   u16 dom_id, dte_v;
+   u16 dom_id, dte_v, irq_v;
gfp_t gfp_flag;
 
if (!amd_iommu_pre_enabled)
@@ -901,8 +901,25 @@ static bool copy_device_table(void)
old_dev_tbl_cpy[devid] = old_devtb[devid];
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
-   if (dte_v && dom_id)
+
+   if (dte_v && dom_id) {
+   old_dev_tbl_cpy[devid].data[0] = 
old_devtb[devid].data[0];
+   old_dev_tbl_cpy[devid].data[1] = 
old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+
+   irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
+   int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
+   int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
+   if (irq_v && (int_ctl || int_tab_len)) {
+   if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
+   (int_tab_len != DTE_IRQ_TABLE_LEN)) {
+   pr_err("Wrong old irq remapping flag: %#x\n", 
devid);
+   return false;
+   }
+
+   old_dev_tbl_cpy[devid].data[2] = 
old_devtb[devid].data[2];
+   }
}
memunmap(old_devtb);
 
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index a7f6cf8c841e..f0979183ec9b 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -250,6 +250,14 @@
 
 #define GA_GUEST_NR0x1
 
+/* Bit value definition for dte irq remapping fields*/
+#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
+#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE1ULL
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 06/12] iommu/amd: copy old trans table from old kernel

2017-08-09 Thread Baoquan He
Here several things need be done:
- If iommu is pre-enabled in a normal kernel, just disable it and print
  warning.

- If any one of IOMMUs is not pre-enabled in kdump kernel, just continue
  as it does in normal kernel.

- If failed to copy dev table of old kernel, continue to proceed as
  it does in normal kernel.

- Only if all IOMMUs are pre-enabled and copy dev table is done well, free
  the dev table allocated in early_amd_iommu_init() and make amd_iommu_dev_table
  point to the copied one.

- Disable and Re-enable event/cmd buffer,  install the copied DTE table
  to reg, and detect and enable guest vapic.

- Flush all caches

Signed-off-by: Baoquan He 
---
v9->v10:
  Add global variable amd_iommu_pre_enabled to mark if all IOMMUs are
  pre-enabled. If any one is not pre-enabled, just set it as false.

  Check amd_iommu_pre_enabled in copy_device_table(), return directly from
  copy_device_table() if it's false.

  Judge by the return value of copy_device_table() to decide if it will go
  through the normal handling or special handling after copy.

 drivers/iommu/amd_iommu_init.c | 59 --
 1 file changed, 51 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index e2857204d32a..959c25d997e1 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -262,6 +263,8 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+static bool __initdata amd_iommu_pre_enabled = true;
+
 bool translation_pre_enabled(struct amd_iommu *iommu)
 {
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
@@ -857,6 +860,8 @@ static bool copy_device_table(void)
u16 dom_id, dte_v;
gfp_t gfp_flag;
 
+   if (!amd_iommu_pre_enabled)
+   return false;
 
pr_warn("Translation is already enabled - trying to copy translation 
structures\n");
for_each_iommu(iommu) {
@@ -1496,9 +1501,14 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
-
-   if (translation_pre_enabled(iommu))
-   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+   iommu_disable(iommu);
+   clear_translation_pre_enabled(iommu);
+   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
kdump mode\n",
+   iommu->index);
+   }
+   if (amd_iommu_pre_enabled)
+   amd_iommu_pre_enabled = translation_pre_enabled(iommu);
 
ret = init_iommu_from_acpi(iommu, h);
if (ret)
@@ -1993,8 +2003,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices
  */
 static void init_device_table_dma(void)
 {
@@ -2130,14 +2139,48 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 
 /*
  * This function finally enables all IOMMUs found in the system after
- * they have been initialized
+ * they have been initialized.
+ *
+ * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
+ * the old content of device table entries. Not this case or copy failed,
+ * just continue as normal kernel does.
  */
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+
+   if (!copy_device_table()) {
+   /*
+* If come here because of failure in copying device table from 
old
+* kernel with all IOMMUs enabled, print error message and try 
to
+* free allocated old_dev_tbl_cpy.
+*/
+   if (amd_iommu_pre_enabled)
+   pr_err("Failed to copy DEV table from previous 
kernel.\n");
+   if (old_dev_tbl_cpy != NULL)
+   free_pages((unsigned long)old_dev_tbl_cpy,
+   get_order(dev_table_size));
+
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied DEV table from previous kernel.\n");
+   free_pages((unsigned long)amd_iommu_dev_table,
+   get_order(dev_table_size));
+   amd_iommu_dev_table = old_dev_tbl_cpy;
+   for_each_iommu(iommu) {
+   iommu_disable

[PATCH v10 00/12] Fix the on-flight DMA issue on system with amd iommu

2017-08-09 Thread Baoquan He
When kernel panicked and jump into the kdump kernel, DMA started by the
1st kernel is not stopped, this is called on-flight DMA. In the current
code it will disable iommu and build new translation table and attach
device to it. This will cause:

 1. IO_PAGE_FAULT warning message can be seen.
 2. transfer data to or from incorrect areas of memory.

Sometime it causes the dump failure or kernel hang.

The principle of the fix is to copy the old device table to let the old-flight
DMA continue looking up to get correct address translation and irq remap result,
meanwhile to defer the assignment of device to domain to device driver 
initializtion
stage. The old domain ids used in 1st kernel are reserved. And a new call-back
is_attach_deferred() is added to iommu-ops, will check whether we need defer the
domain attach/detach in iommu-core code. If defer is needed, just return 
directly
from amd iommu attach/detach function. The attachment will be done in device 
driver
initializaiton stage when calling get_domain().

Change history:
v9->v10:
Main changes are as follows according to Joerg's suggestion. The detailed
changes will be added to each patch.
- Drop the old patch 12/13 patch.

- Move the old dev table copy out of iommu loop in copy_dev_tables().

- Add global variable amd_iommu_pre_enabled to optimize code change
  when call copy_dev_tables().

v8->v9:
Made changes according to Joerg's reviewing comments and suggestions:
- Check if all IOMMUs are pre-enabled, otherwise do not copy dev table
  and just continue as normal kernel does.

- Add a new global old_dev_tbl_cpy to point to a newly allocated device
  table. The content of old device table will be copied to the specific
  device table for copying which old_dev_tbl_cpy points at. If copy failed
  we can still use the amd_iommu_dev_table which is allocated in
  early_amd_iommu_init(). This is for better rolling back if copy failed,
  the amd_iommu_dev_table has got necessary initialization since iommu init.

- Always allocate device table with GFP_DMA32 flag to make sure that they
  are under 4G. This tries to work around the issue mentioned in patch 
10/13.
  Meanwhile double check if the address of device table is above 4G since
  it could be touched accidentally in corrupted 1st kernel and not 
trustworthy
  any more.

v7->v8:
Rebase patchset v7 on the latest v4.13-rc1.
- And re-enable printing IO_PAGE_FAULT message in kdump kernel.

- Only disable iommu if amd_iommu=off is specified in kdump kernel.


v6->v7:
Two main changes are made according to Joerg's suggestion:
- Add is_attach_deferred call-back to iommu-ops. With this domain
  can be deferred to device driver init cleanly.

- Allocate memory below 4G for dev table if translation pre-enabled.
  AMD engineer pointed out that it's unsafe to update the device-table
  while iommu is enabled. device-table pointer update is split up into
  two 32bit writes in the IOMMU hardware. So updating it while the IOMMU
  is enabled could have some nasty side effects.

v5->v6:
According to Joerg's comments made several below main changes:
- Add sanity check when copy old dev tables.

- If a device is set up with guest translations (DTE.GV=1), then don't
  copy that information but move the device over to an empty guest-cr3
  table and handle the faults in the PPR log (which just answer them
  with INVALID).

v5:
bnx2 NIC can't reset itself during driver init. Post patch to reset
it during driver init. IO_PAGE_FAULT can't be seen anymore.

Below is link of v5 post.
https://lists.linuxfoundation.org/pipermail/iommu/2016-September/018527.html


Baoquan He (12):
  iommu/amd: Detect pre enabled translation
  iommu/amd: add several helper functions
  Revert "iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel"
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables()
  iommu/amd: copy old trans table from old kernel
  iommu/amd: Do sanity check for address translation and irq remap of
old dev table entry
  iommu: Add is_attach_deferred call-back to iommu-ops
  iommu/amd: Use is_attach_deferred call-back
  iommu/amd: Allocate memory below 4G for dev table if translation
pre-enabled
  iommu/amd: Don't copy GCR3 table root pointer
  iommu/amd: Disable iommu only if amd_iommu=off is specified

 drivers/iommu/amd_iommu.c   |  65 ++--
 drivers/iommu/amd_iommu_init.c  | 223 +++-
 drivers/iommu/amd_iommu_proto.h |   2 +
 drivers/iommu/amd_iommu_types.h |  55 +-
 drivers/iommu/amd_iommu_v2.c|  18 +++-
 drivers/iommu/iommu.c   |   8 ++
 include/linux/iommu.h   |   1 +
 7 files changed, 306 insertions(+), 66 deletions(-)

-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.l

[PATCH v10 05/12] iommu/amd: Add function copy_dev_tables()

2017-08-09 Thread Baoquan He
Add function copy_dev_tables to copy the old DEV table entries of the panicked
kernel to the new allocated device table. Since all iommus share the same device
table the copy only need be done one time. Here add a new global old_dev_tbl_cpy
to point to the newly allocated device table which the content of old device
table will be copied to. Besides, we also need to:

  - Check whether all IOMMUs actually use the same device table with the same 
size

  - Verify that the size of the old device table is the expected size.

  - Reserve the old domain id occupied in 1st kernel to avoid touching the old
io-page tables. Then on-flight DMA can continue looking it up.

And also define MACRO DEV_DOMID_MASK to replace magic number 0xULL, it can 
be
reused in copy_dev_tables().

Signed-off-by: Baoquan He 
---
v9->v10:
  Move the copy code block out of the iommu loop.

  Change the return value of copy_device_table() to bool.

 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 62 +
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 097db07354b4..b22b58b33400 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2086,7 +2086,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 7044510654fe..e2857204d32a 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -195,6 +195,11 @@ spinlock_t amd_iommu_pd_lock;
  * page table root pointer.
  */
 struct dev_table_entry *amd_iommu_dev_table;
+/*
+ * Pointer to a device table which the content of old device table
+ * will be copied to. It's only be used in kdump kernel.
+ */
+static struct dev_table_entry *old_dev_tbl_cpy;
 
 /*
  * The alias table is a driver specific data structure which contains the
@@ -842,6 +847,63 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static bool copy_device_table(void)
+{
+   struct dev_table_entry *old_devtb = NULL;
+   u32 lo, hi, devid, old_devtb_size;
+   phys_addr_t old_devtb_phys;
+   u64 entry, last_entry = 0;
+   struct amd_iommu *iommu;
+   u16 dom_id, dte_v;
+   gfp_t gfp_flag;
+
+
+   pr_warn("Translation is already enabled - trying to copy translation 
structures\n");
+   for_each_iommu(iommu) {
+   /* All IOMMUs should use the same device table with the same 
size */
+   lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+   hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+   entry = (((u64) hi) << 32) + lo;
+   if (last_entry && last_entry != entry) {
+   pr_err("IOMMU:%d should use the same dev table as 
others!/n",
+   iommu->index);
+   return false;
+   }
+   last_entry = entry;
+
+   old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+   if (old_devtb_size != dev_table_size) {
+   pr_err("The device table size of IOMMU:%d is not 
expected!/n",
+   iommu->index);
+   return false;
+   }
+   }
+
+   old_devtb_phys = entry & PAGE_MASK;
+   old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
+   if (!old_devtb)
+   return false;
+
+   gfp_flag = GFP_KERNEL | __GFP_ZERO;
+   old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
+   get_order(dev_table_size));
+   if (old_dev_tbl_cpy == NULL) {
+   pr_err("Failed to allocate memory for copying old device 
table!/n");
+   return false;
+   }
+
+   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+   old_dev_tbl_cpy[devid] = old_devtb[devid];
+   dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
+   dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
+   if (dte_v && dom_id)
+   __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+   memunmap(old_devtb);
+
+   return true;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f88e802481a3..a7f6cf8c841e 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -336,6 +336,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 

[PATCH v10 03/12] Revert "iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel"

2017-08-09 Thread Baoquan He
This reverts commit 54bd63570484167cb13edf81e31fff107b879981.

We still need the IO_PAGE_FAULT message to warn error after the
issue of on-flight dma in kdump kernel is fixed.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   | 3 +--
 drivers/iommu/amd_iommu_init.c  | 9 -
 drivers/iommu/amd_iommu_types.h | 1 -
 3 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 354cbd6392cd..6d2fc40a086d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2086,8 +2086,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-
-   flags &= ~(DTE_FLAG_SA | 0xULL);
+   flags &= ~(0xUL);
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 277838dbc3a6..7044510654fe 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -1942,14 +1941,6 @@ static void init_device_table_dma(void)
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
set_dev_entry_bit(devid, DEV_ENTRY_VALID);
set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
-   /*
-* In kdump kernels in-flight DMA from the old kernel might
-* cause IO_PAGE_FAULTs. There are no reports that a kdump
-* actually failed because of that, so just disable fault
-* reporting in the hardware to get rid of the messages
-*/
-   if (is_kdump_kernel())
-   set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
}
 }
 
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0c98b2cf04cc..db7ceb4d0957 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -322,7 +322,6 @@
 #define IOMMU_PTE_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB (1ULL << 32)
-#define DTE_FLAG_SA(1ULL << 34)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 04/12] iommu/amd: Define bit fields for DTE particularly

2017-08-09 Thread Baoquan He
In AMD-Vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining them respectively can make code more read-able. Do it now.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  8 
 drivers/iommu/amd_iommu_types.h | 18 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6d2fc40a086d..097db07354b4 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1537,9 +1537,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -2053,7 +2053,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -2096,7 +2096,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index db7ceb4d0957..f88e802481a3 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -265,7 +265,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -314,13 +314,23 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -342,7 +352,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH v5 1/5] iommu: Add capabilities to a group

2017-08-09 Thread David Gibson
On Mon, Aug 07, 2017 at 05:25:44PM +1000, Alexey Kardashevskiy wrote:
> This introduces capabilities to IOMMU groups. The first defined
> capability is IOMMU_GROUP_CAP_ISOLATE_MSIX which tells the IOMMU
> group users that a particular IOMMU group is capable of MSIX message
> filtering; this is useful when deciding whether or not to allow mapping
> of MSIX table to the userspace. Various architectures will enable it
> when they decide that it is safe to do so.
> 
> Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: David Gibson 

This seems like a reasonable concept that's probably useful for
something, whether or not it's the best approach for the problem at
hand.

> ---
>  include/linux/iommu.h | 20 
>  drivers/iommu/iommu.c | 28 
>  2 files changed, 48 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 2cb54adc4a33..6b6f3c2f4904 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -155,6 +155,9 @@ struct iommu_resv_region {
>   enum iommu_resv_typetype;
>  };
>  
> +/* IOMMU group capabilities */
> +#define IOMMU_GROUP_CAP_ISOLATE_MSIX (1U)
> +
>  #ifdef CONFIG_IOMMU_API
>  
>  /**
> @@ -312,6 +315,11 @@ extern void *iommu_group_get_iommudata(struct 
> iommu_group *group);
>  extern void iommu_group_set_iommudata(struct iommu_group *group,
> void *iommu_data,
> void (*release)(void *iommu_data));
> +extern void iommu_group_set_caps(struct iommu_group *group,
> +  unsigned long clearcaps,
> +  unsigned long setcaps);
> +extern bool iommu_group_is_capable(struct iommu_group *group,
> +unsigned long cap);
>  extern int iommu_group_set_name(struct iommu_group *group, const char *name);
>  extern int iommu_group_add_device(struct iommu_group *group,
> struct device *dev);
> @@ -513,6 +521,18 @@ static inline void iommu_group_set_iommudata(struct 
> iommu_group *group,
>  {
>  }
>  
> +static inline void iommu_group_set_caps(struct iommu_group *group,
> + unsigned long clearcaps,
> + unsigned long setcaps)
> +{
> +}
> +
> +static inline bool iommu_group_is_capable(struct iommu_group *group,
> +   unsigned long cap)
> +{
> + return false;
> +}
> +
>  static inline int iommu_group_set_name(struct iommu_group *group,
>  const char *name)
>  {
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 3f6ea160afed..6b2c34fe2c3d 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -52,6 +52,7 @@ struct iommu_group {
>   void (*iommu_data_release)(void *iommu_data);
>   char *name;
>   int id;
> + unsigned long caps;
>   struct iommu_domain *default_domain;
>   struct iommu_domain *domain;
>  };
> @@ -447,6 +448,33 @@ void iommu_group_set_iommudata(struct iommu_group 
> *group, void *iommu_data,
>  EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
>  
>  /**
> + * iommu_group_set_caps - Change the group capabilities
> + * @group: the group
> + * @clearcaps: capabilities mask to remove
> + * @setcaps: capabilities mask to add
> + *
> + * IOMMU groups can be capable of various features which device drivers
> + * may read and adjust the behavior.
> + */
> +void iommu_group_set_caps(struct iommu_group *group,
> + unsigned long clearcaps, unsigned long setcaps)
> +{
> + group->caps &= ~clearcaps;
> + group->caps |= setcaps;
> +}
> +EXPORT_SYMBOL_GPL(iommu_group_set_caps);
> +
> +/**
> + * iommu_group_is_capable - Returns if a group capability is present
> + * @group: the group
> + */
> +bool iommu_group_is_capable(struct iommu_group *group, unsigned long cap)
> +{
> + return !!(group->caps & cap);
> +}
> +EXPORT_SYMBOL_GPL(iommu_group_is_capable);
> +
> +/**
>   * iommu_group_set_name - set name for a group
>   * @group: the group
>   * @name: name

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [RFC PATCH v5 5/5] vfio-pci: Allow to expose MSI-X table to userspace when safe

2017-08-09 Thread David Gibson
On Mon, Aug 07, 2017 at 05:25:48PM +1000, Alexey Kardashevskiy wrote:
1;4803;0c> Some devices have a MSIX BAR not aligned to the system page size
> greater than 4K (like 64k for ppc64) which at the moment prevents
> such MMIO pages from being mapped to the userspace for the sake of
> the MSIX BAR content protection. If such page happens to share
> the same system page with some frequently accessed registers,
> the entire system page will be emulated which can seriously affect
> performance.
> 
> This allows mapping of MSI-X tables to userspace if hardware provides
> MSIX isolation via interrupt remapping or filtering; in other words
> allowing direct access to the MSIX BAR won't do any harm to other devices
> or cause spurious interrupts visible to the kernel.
> 
> This adds a wrapping helper to check if a capability is supported by
> an IOMMU group.
> 
> Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: David Gibson 

> ---
>  include/linux/vfio.h |  1 +
>  drivers/vfio/pci/vfio_pci.c  | 20 +---
>  drivers/vfio/pci/vfio_pci_rdwr.c |  5 -
>  drivers/vfio/vfio.c  | 15 +++
>  4 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 586809abb273..7110bca2fb60 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -46,6 +46,7 @@ struct vfio_device_ops {
>  
>  extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
>  extern void vfio_iommu_group_put(struct iommu_group *group, struct device 
> *dev);
> +extern bool vfio_iommu_group_is_capable(struct device *dev, unsigned long 
> cap);

This diff probably belongs in the earlier patch adding the function,
rather than here where it's first used.  Not worth respinning just for
that, though.

>  extern int vfio_add_group_dev(struct device *dev,
> const struct vfio_device_ops *ops,
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index d87a0a3cda14..c4c39ed64b1e 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -561,11 +561,17 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device 
> *vdev,
>   struct vfio_region_info_cap_sparse_mmap *sparse;
>   size_t end, size;
>   int nr_areas = 2, i = 0, ret;
> + bool is_msix_isolated = vfio_iommu_group_is_capable(&vdev->pdev->dev,
> + IOMMU_GROUP_CAP_ISOLATE_MSIX);
>  
>   end = pci_resource_len(vdev->pdev, vdev->msix_bar);
>  
> - /* If MSI-X table is aligned to the start or end, only one area */
> - if (((vdev->msix_offset & PAGE_MASK) == 0) ||
> + /*
> +  * If MSI-X table is allowed to mmap because of the capability
> +  * of IRQ remapping or aligned to the start or end, only one area
> +  */
> + if (is_msix_isolated ||
> + ((vdev->msix_offset & PAGE_MASK) == 0) ||
>   (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
>   nr_areas = 1;
>  
> @@ -577,6 +583,12 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device 
> *vdev,
>  
>   sparse->nr_areas = nr_areas;
>  
> + if (is_msix_isolated) {
> + sparse->areas[i].offset = 0;
> + sparse->areas[i].size = end;
> + return 0;
> + }
> +
>   if (vdev->msix_offset & PAGE_MASK) {
>   sparse->areas[i].offset = 0;
>   sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
> @@ -1094,6 +1106,8 @@ static int vfio_pci_mmap(void *device_data, struct 
> vm_area_struct *vma)
>   unsigned int index;
>   u64 phys_len, req_len, pgoff, req_start;
>   int ret;
> + bool is_msix_isolated = vfio_iommu_group_is_capable(&vdev->pdev->dev,
> + IOMMU_GROUP_CAP_ISOLATE_MSIX);
>  
>   index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
>  
> @@ -1115,7 +1129,7 @@ static int vfio_pci_mmap(void *device_data, struct 
> vm_area_struct *vma)
>   if (req_start + req_len > phys_len)
>   return -EINVAL;
>  
> - if (index == vdev->msix_bar) {
> + if (index == vdev->msix_bar && !is_msix_isolated) {
>   /*
>* Disallow mmaps overlapping the MSI-X table; users don't
>* get to touch this directly.  We could find somewhere
> diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c 
> b/drivers/vfio/pci/vfio_pci_rdwr.c
> index 357243d76f10..7514206a5ea7 100644
> --- a/drivers/vfio/pci/vfio_pci_rdwr.c
> +++ b/drivers/vfio/pci/vfio_pci_rdwr.c
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "vfio_pci_private.h"
>  
> @@ -123,6 +124,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, 
> char __user *buf,
>   resource_size_t end;
>   void __iomem *io;
>   ssize_t done;
> + bool is_msix_isolated = vfio_iommu_group_is_capable(&vdev->pdev->dev,
> + IOMMU_GROUP_CAP_ISOLATE_MSIX);
>  
>   if (pci_resource_start(pdev, bar))