[PATCH 5/5] iommu/tegra-smmu: Add pagetable mappings to debugfs

2020-09-26 Thread Nicolin Chen
This patch dumps all active mapping entries from pagetable
to a debugfs directory named "mappings".

Ataching an example:

SWGROUP: hc
ASID: 0
reg: 0x250
PTB_ASID: 0xe00bb880
as->pd_dma: 0xbb88
{
[1023] 0xf00bb882 (1)
{
PDE   ATTR PHYS IOVA
#1023 0x50x159f5d0000xf000
}
}
Total PDE count: 1
Total PTE count: 1

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 130 +++--
 1 file changed, 125 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 9dbc5d7183cc..53160d1ca086 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -20,6 +20,11 @@
 #include 
 #include 
 
+struct tegra_smmu_group_debug {
+   const struct tegra_smmu_swgroup *group;
+   void *priv;
+};
+
 struct tegra_smmu_group {
struct list_head list;
struct tegra_smmu *smmu;
@@ -48,6 +53,8 @@ struct tegra_smmu {
struct dentry *debugfs;
 
struct iommu_device iommu;  /* IOMMU Core code handle */
+
+   struct tegra_smmu_group_debug *group_debug;
 };
 
 struct tegra_smmu_as {
@@ -155,6 +162,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, 
unsigned long offset)
 
 #define SMMU_PDE_ATTR  (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
 SMMU_PDE_NONSECURE)
+#define SMMU_PTE_ATTR  (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
+SMMU_PTE_NONSECURE)
+#define SMMU_PTE_ATTR_SHIFT(29)
 
 static unsigned int iova_pd_index(unsigned long iova)
 {
@@ -337,7 +347,7 @@ static void tegra_smmu_domain_free(struct iommu_domain 
*domain)
 }
 
 static const struct tegra_smmu_swgroup *
-tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup)
+tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup, int 
*index)
 {
const struct tegra_smmu_swgroup *group = NULL;
unsigned int i;
@@ -345,6 +355,8 @@ tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned 
int swgroup)
for (i = 0; i < smmu->soc->num_swgroups; i++) {
if (smmu->soc->swgroups[i].swgroup == swgroup) {
group = &smmu->soc->swgroups[i];
+   if (index)
+   *index = i;
break;
}
}
@@ -353,19 +365,22 @@ tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned 
int swgroup)
 }
 
 static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup,
- unsigned int asid)
+ struct tegra_smmu_as *as)
 {
const struct tegra_smmu_swgroup *group;
+   unsigned int asid = as->id;
unsigned int i;
u32 value;
 
-   group = tegra_smmu_find_swgroup(smmu, swgroup);
+   group = tegra_smmu_find_swgroup(smmu, swgroup, &i);
if (group) {
value = smmu_readl(smmu, group->reg);
value &= ~SMMU_ASID_MASK;
value |= SMMU_ASID_VALUE(asid);
value |= SMMU_ASID_ENABLE;
smmu_writel(smmu, value, group->reg);
+   if (smmu->group_debug)
+   smmu->group_debug[i].priv = as;
} else {
pr_warn("%s group from swgroup %u not found\n", __func__,
swgroup);
@@ -392,13 +407,15 @@ static void tegra_smmu_disable(struct tegra_smmu *smmu, 
unsigned int swgroup,
unsigned int i;
u32 value;
 
-   group = tegra_smmu_find_swgroup(smmu, swgroup);
+   group = tegra_smmu_find_swgroup(smmu, swgroup, &i);
if (group) {
value = smmu_readl(smmu, group->reg);
value &= ~SMMU_ASID_MASK;
value |= SMMU_ASID_VALUE(asid);
value &= ~SMMU_ASID_ENABLE;
smmu_writel(smmu, value, group->reg);
+   if (smmu->group_debug)
+   smmu->group_debug[i].priv = NULL;
}
 
for (i = 0; i < smmu->soc->num_clients; i++) {
@@ -501,7 +518,7 @@ static int tegra_smmu_attach_dev(struct iommu_domain 
*domain,
if (err)
goto err_disable;
 
-   tegra_smmu_enable(smmu, fwspec->ids[index], as->id);
+   tegra_smmu_enable(smmu, fwspec->ids[index], as);
}
 
if (index == 0)
@@ -1078,8 +1095,96 @@ static int tegra_smmu_clients_show(struct seq_file *s, 
void *data)
 
 DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
 
+static int tegra_smmu_mappings_show(struct seq_file *s, void *data)
+{
+   struct tegra_smmu_group_debug *group_debug = s->private;
+   const struct tegra_smmu_swgroup *group;
+   struct tegra_smmu_as *as;
+   struct tegra_smmu *smmu;
+   int pd_index, pt_index;
+   u64 pte_count = 0;
+   u32 pde_count = 0;
+   u32 val, ptb_reg;
+   u32 *pd;
+
+   if (!group

[PATCH 3/5] iommu/tegra-smmu: Use iommu_fwspec in .probe_/.attach_device()

2020-09-26 Thread Nicolin Chen
The tegra_smmu_probe_device() function searches in DT for the iommu
phandler to get "smmu" pointer. This works for most of SMMU clients
that exist in the DTB. But a PCI device will not be added to iommu,
since it doesn't have a DT node.

Fortunately, for a client with a DT node, tegra_smmu_probe_device()
calls tegra_smmu_of_xlate() via tegra_smmu_configure(), while for a
PCI device, of_pci_iommu_init() in the IOMMU core calls .of_xlate()
as well, even before running tegra_smmu_probe_device(). And in both
cases, tegra_smmu_of_xlate() prepares a valid iommu_fwspec pointer
that allows us to get the mc->smmu pointer via dev_get_drvdata() by
calling driver_find_device_by_fwnode().

So this patch uses iommu_fwspec in .probe_device() and related code
for a client that does not exist in the DTB, especially a PCI one.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 89 +++---
 drivers/memory/tegra/mc.c  |  2 +-
 include/soc/tegra/mc.h |  2 +
 3 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index b10e02073610..97a7185b4578 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -61,6 +62,8 @@ struct tegra_smmu_as {
u32 attr;
 };
 
+static const struct iommu_ops tegra_smmu_ops;
+
 static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom)
 {
return container_of(dom, struct tegra_smmu_as, domain);
@@ -484,60 +487,49 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu 
*smmu,
 static int tegra_smmu_attach_dev(struct iommu_domain *domain,
 struct device *dev)
 {
+   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
struct tegra_smmu_as *as = to_smmu_as(domain);
-   struct device_node *np = dev->of_node;
-   struct of_phandle_args args;
-   unsigned int index = 0;
-   int err = 0;
-
-   while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
-  &args)) {
-   unsigned int swgroup = args.args[0];
-
-   if (args.np != smmu->dev->of_node) {
-   of_node_put(args.np);
-   continue;
-   }
+   int index, err = 0;
 
-   of_node_put(args.np);
+   if (!fwspec || fwspec->ops != &tegra_smmu_ops)
+   return -ENOENT;
 
+   for (index = 0; index < fwspec->num_ids; index++) {
err = tegra_smmu_as_prepare(smmu, as);
-   if (err < 0)
-   return err;
+   if (err)
+   goto err_disable;
 
-   tegra_smmu_enable(smmu, swgroup, as->id);
-   index++;
+   tegra_smmu_enable(smmu, fwspec->ids[index], as->id);
}
 
if (index == 0)
return -ENODEV;
 
return 0;
+
+err_disable:
+   for (index--; index >= 0; index--) {
+   tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
+   tegra_smmu_as_unprepare(smmu, as);
+   }
+
+   return err;
 }
 
 static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device 
*dev)
 {
+   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu_as *as = to_smmu_as(domain);
-   struct device_node *np = dev->of_node;
struct tegra_smmu *smmu = as->smmu;
-   struct of_phandle_args args;
unsigned int index = 0;
 
-   while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
-  &args)) {
-   unsigned int swgroup = args.args[0];
-
-   if (args.np != smmu->dev->of_node) {
-   of_node_put(args.np);
-   continue;
-   }
-
-   of_node_put(args.np);
+   if (!fwspec || fwspec->ops != &tegra_smmu_ops)
+   return;
 
-   tegra_smmu_disable(smmu, swgroup, as->id);
+   for (index = 0; index < fwspec->num_ids; index++) {
+   tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
tegra_smmu_as_unprepare(smmu, as);
-   index++;
}
 }
 
@@ -845,10 +837,25 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, 
struct device *dev,
return 0;
 }
 
+static struct tegra_smmu *tegra_smmu_get_by_fwnode(struct fwnode_handle 
*fwnode)
+{
+   struct device *dev = 
driver_find_device_by_fwnode(&tegra_mc_driver.driver, fwnode);
+   struct tegra_mc *mc;
+
+   if (!dev)
+   return NULL;
+
+   put_device(dev);
+   mc = dev_get_drvdata(dev);
+
+   return mc->smmu;
+}
+
 static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
 {
struct device_node *np = dev->of_node;
   

[PATCH 1/5] iommu/tegra-smmu: Unwrap tegra_smmu_group_get

2020-09-26 Thread Nicolin Chen
The tegra_smmu_group_get was added to group devices in different
SWGROUPs and it'd return a NULL group pointer upon a mismatch at
tegra_smmu_find_group(), so for most of clients/devices, it very
likely would mismatch and need a fallback generic_device_group().

But now tegra_smmu_group_get handles devices in same SWGROUP too,
which means that it would allocate a group for every new SWGROUP
or would directly return an existing one upon matching a SWGROUP,
i.e. any device will go through this function.

So possibility of having a NULL group pointer in device_group()
is upon failure of either devm_kzalloc() or iommu_group_alloc().
In either case, calling generic_device_group() no longer makes a
sense. Especially for devm_kzalloc() failing case, it'd cause a
problem if it fails at devm_kzalloc() yet succeeds at a fallback
generic_device_group(), because it does not create a group->list
for other devices to match.

This patch simply unwraps the function to clean it up.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 0becdbfea306..6335285dc373 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -903,11 +903,13 @@ static void tegra_smmu_group_release(void *iommu_data)
mutex_unlock(&smmu->lock);
 }
 
-static struct iommu_group *tegra_smmu_group_get(struct tegra_smmu *smmu,
-   unsigned int swgroup)
+static struct iommu_group *tegra_smmu_device_group(struct device *dev)
 {
+   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+   struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
const struct tegra_smmu_group_soc *soc;
struct tegra_smmu_group *group;
+   int swgroup = fwspec->ids[0];
struct iommu_group *grp;
 
/* Find group_soc associating with swgroup */
@@ -950,19 +952,6 @@ static struct iommu_group *tegra_smmu_group_get(struct 
tegra_smmu *smmu,
return group->group;
 }
 
-static struct iommu_group *tegra_smmu_device_group(struct device *dev)
-{
-   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-   struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
-   struct iommu_group *group;
-
-   group = tegra_smmu_group_get(smmu, fwspec->ids[0]);
-   if (!group)
-   group = generic_device_group(dev);
-
-   return group;
-}
-
 static int tegra_smmu_of_xlate(struct device *dev,
   struct of_phandle_args *args)
 {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/5] iommu/tegra-smmu: Add PCI support

2020-09-26 Thread Nicolin Chen
This patch simply adds support for PCI devices.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 97a7185b4578..9dbc5d7183cc 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -935,6 +936,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
const struct tegra_smmu_group_soc *soc;
struct tegra_smmu_group *group;
int swgroup = fwspec->ids[0];
+   bool pci = dev_is_pci(dev);
struct iommu_group *grp;
 
/* Find group_soc associating with swgroup */
@@ -961,7 +963,7 @@ static struct iommu_group *tegra_smmu_device_group(struct 
device *dev)
group->smmu = smmu;
group->soc = soc;
 
-   group->group = iommu_group_alloc();
+   group->group = pci ? pci_device_group(dev) : iommu_group_alloc();
if (IS_ERR(group->group)) {
devm_kfree(smmu->dev, group);
mutex_unlock(&smmu->lock);
@@ -1180,6 +1182,19 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
return ERR_PTR(err);
}
 
+#ifdef CONFIG_PCI
+   if (!iommu_present(&pci_bus_type)) {
+   pci_request_acs();
+   err = bus_set_iommu(&pci_bus_type, &tegra_smmu_ops);
+   if (err < 0) {
+   bus_set_iommu(&platform_bus_type, NULL);
+   iommu_device_unregister(&smmu->iommu);
+   iommu_device_sysfs_remove(&smmu->iommu);
+   return ERR_PTR(err);
+   }
+   }
+#endif
+
if (IS_ENABLED(CONFIG_DEBUG_FS))
tegra_smmu_debugfs_init(smmu);
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/5] iommu/tegra-smmu: Expend mutex protection range

2020-09-26 Thread Nicolin Chen
This is used to protect potential race condition at use_count.
since probes of client drivers, calling attach_dev(), may run
concurrently.

Signed-off-by: Nicolin Chen 
---
 drivers/iommu/tegra-smmu.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 6335285dc373..b10e02073610 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -256,26 +256,19 @@ static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, 
unsigned int *idp)
 {
unsigned long id;
 
-   mutex_lock(&smmu->lock);
-
id = find_first_zero_bit(smmu->asids, smmu->soc->num_asids);
-   if (id >= smmu->soc->num_asids) {
-   mutex_unlock(&smmu->lock);
+   if (id >= smmu->soc->num_asids)
return -ENOSPC;
-   }
 
set_bit(id, smmu->asids);
*idp = id;
 
-   mutex_unlock(&smmu->lock);
return 0;
 }
 
 static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id)
 {
-   mutex_lock(&smmu->lock);
clear_bit(id, smmu->asids);
-   mutex_unlock(&smmu->lock);
 }
 
 static bool tegra_smmu_capable(enum iommu_cap cap)
@@ -420,17 +413,21 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
 struct tegra_smmu_as *as)
 {
u32 value;
-   int err;
+   int err = 0;
+
+   mutex_lock(&smmu->lock);
 
if (as->use_count > 0) {
as->use_count++;
-   return 0;
+   goto err_unlock;
}
 
as->pd_dma = dma_map_page(smmu->dev, as->pd, 0, SMMU_SIZE_PD,
  DMA_TO_DEVICE);
-   if (dma_mapping_error(smmu->dev, as->pd_dma))
-   return -ENOMEM;
+   if (dma_mapping_error(smmu->dev, as->pd_dma)) {
+   err = -ENOMEM;
+   goto err_unlock;
+   }
 
/* We can't handle 64-bit DMA addresses */
if (!smmu_dma_addr_valid(smmu, as->pd_dma)) {
@@ -453,24 +450,35 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
as->smmu = smmu;
as->use_count++;
 
+   mutex_unlock(&smmu->lock);
+
return 0;
 
 err_unmap:
dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
+err_unlock:
+   mutex_unlock(&smmu->lock);
+
return err;
 }
 
 static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
struct tegra_smmu_as *as)
 {
-   if (--as->use_count > 0)
+   mutex_lock(&smmu->lock);
+
+   if (--as->use_count > 0) {
+   mutex_unlock(&smmu->lock);
return;
+   }
 
tegra_smmu_free_asid(smmu, as->id);
 
dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
 
as->smmu = NULL;
+
+   mutex_unlock(&smmu->lock);
 }
 
 static int tegra_smmu_attach_dev(struct iommu_domain *domain,
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/5] iommu/tegra-smmu: Adding PCI support and mappings debugfs node

2020-09-26 Thread Nicolin Chen
This series of patches are some followup patches for tegra-smmu.

There are four parts:
1, PATCH-1 is a clean-up patch for the recently applied SWGROUP change.
2, PATCH-2 fixes a potential race condition
3, PATCH-3/4 adds PCI device support
4, PATCH-5 adds a debugfs node for phys<=>iova mappings

Each of the four parts is sort of functional independent, so we may
apply them separately or incrementally depending on the reviews.

Nicolin Chen (5):
  iommu/tegra-smmu: Unwrap tegra_smmu_group_get
  iommu/tegra-smmu: Expend mutex protection range
  iommu/tegra-smmu: Use iommu_fwspec in .probe_/.attach_device()
  iommu/tegra-smmu: Add PCI support
  iommu/tegra-smmu: Add pagetable mappings to debugfs

 drivers/iommu/tegra-smmu.c | 287 -
 drivers/memory/tegra/mc.c  |   2 +-
 include/soc/tegra/mc.h |   2 +
 3 files changed, 221 insertions(+), 70 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/1] iommu/amd: Fix the overwritten field in IVMD header

2020-09-26 Thread Adrian Huang
From: Adrian Huang 

Commit 387caf0b759a ("iommu/amd: Treat per-device exclusion
ranges as r/w unity-mapped regions") accidentally overwrites
the 'flags' field in IVMD (struct ivmd_header) when the I/O
virtualization memory definition is associated with the
exclusion range entry. This leads to the corrupted IVMD table
(incorrect checksum). The kdump kernel reports the invalid checksum:

ACPI BIOS Warning (bug): Incorrect checksum in table [IVRS] - 0x5C, should be 
0x60 (20200717/tbprint-177)
AMD-Vi: [Firmware Bug]: IVRS invalid checksum

Fix the above-mentioned issue by modifying the 'struct unity_map_entry'
member instead of the IVMD header.

Cleanup: The *exclusion_range* functions are not used anymore, so
get rid of them.

Fixes: 387caf0b759a ("iommu/amd: Treat per-device exclusion ranges as r/w 
unity-mapped regions")
Reported-and-tested-by: Baoquan He 
Signed-off-by: Adrian Huang 
Cc: Jerry Snitselaar 
---
 drivers/iommu/amd/init.c | 56 +++-
 1 file changed, 10 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 445a08d23fed..1ba6b4cc56e8 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1103,25 +1103,6 @@ static int __init add_early_maps(void)
return 0;
 }
 
-/*
- * Reads the device exclusion range from ACPI and initializes the IOMMU with
- * it
- */
-static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
-{
-   if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
-   return;
-
-   /*
-* Treat per-device exclusion ranges as r/w unity-mapped regions
-* since some buggy BIOSes might lead to the overwritten exclusion
-* range (exclusion_start and exclusion_length members). This
-* happens when there are multiple exclusion ranges (IVMD entries)
-* defined in ACPI table.
-*/
-   m->flags = (IVMD_FLAG_IW | IVMD_FLAG_IR | IVMD_FLAG_UNITY_MAP);
-}
-
 /*
  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
  * initializes the hardware and our data structures with it.
@@ -2073,30 +2054,6 @@ static void __init free_unity_maps(void)
}
 }
 
-/* called when we find an exclusion range definition in ACPI */
-static int __init init_exclusion_range(struct ivmd_header *m)
-{
-   int i;
-
-   switch (m->type) {
-   case ACPI_IVMD_TYPE:
-   set_device_exclusion_range(m->devid, m);
-   break;
-   case ACPI_IVMD_TYPE_ALL:
-   for (i = 0; i <= amd_iommu_last_bdf; ++i)
-   set_device_exclusion_range(i, m);
-   break;
-   case ACPI_IVMD_TYPE_RANGE:
-   for (i = m->devid; i <= m->aux; ++i)
-   set_device_exclusion_range(i, m);
-   break;
-   default:
-   break;
-   }
-
-   return 0;
-}
-
 /* called for unity map ACPI definition */
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
@@ -2107,9 +2064,6 @@ static int __init init_unity_map_range(struct ivmd_header 
*m)
if (e == NULL)
return -ENOMEM;
 
-   if (m->flags & IVMD_FLAG_EXCL_RANGE)
-   init_exclusion_range(m);
-
switch (m->type) {
default:
kfree(e);
@@ -2133,6 +2087,16 @@ static int __init init_unity_map_range(struct 
ivmd_header *m)
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
e->prot = m->flags >> 1;
 
+   /*
+* Treat per-device exclusion ranges as r/w unity-mapped regions
+* since some buggy BIOSes might lead to the overwritten exclusion
+* range (exclusion_start and exclusion_length members). This
+* happens when there are multiple exclusion ranges (IVMD entries)
+* defined in ACPI table.
+*/
+   if (m->flags & IVMD_FLAG_EXCL_RANGE)
+   e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
+
DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
" range_start: %016llx range_end: %016llx flags: %x\n", s,
PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 34/46] PCI/MSI: Make arch_.*_msi_irq[s] fallbacks selectable

2020-09-26 Thread Vasily Gorbik
On Fri, Sep 25, 2020 at 09:54:52AM -0400, Qian Cai wrote:
> On Wed, 2020-08-26 at 13:17 +0200, Thomas Gleixner wrote:
> > From: Thomas Gleixner 
> > 
> > The arch_.*_msi_irq[s] fallbacks are compiled in whether an architecture
> > requires them or not. Architectures which are fully utilizing hierarchical
> > irq domains should never call into that code.
> > 
> > It's not only architectures which depend on that by implementing one or
> > more of the weak functions, there is also a bunch of drivers which relies
> > on the weak functions which invoke msi_controller::setup_irq[s] and
> > msi_controller::teardown_irq.
> > 
> > Make the architectures and drivers which rely on them select them in Kconfig
> > and if not selected replace them by stub functions which emit a warning and
> > fail the PCI/MSI interrupt allocation.
> > 
> > Signed-off-by: Thomas Gleixner 
> 
> Today's linux-next will have some warnings on s390x:
> 
> .config: https://gitlab.com/cailca/linux-mm/-/blob/master/s390.config
> 
> WARNING: unmet direct dependencies detected for PCI_MSI_ARCH_FALLBACKS
>   Depends on [n]: PCI [=n]
>   Selected by [y]:
>   - S390 [=y]
> 
> WARNING: unmet direct dependencies detected for PCI_MSI_ARCH_FALLBACKS
>   Depends on [n]: PCI [=n]
>   Selected by [y]:
>   - S390 [=y]
>

Yes, as well as on mips and sparc which also don't FORCE_PCI.
This seems to work for s390:

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b0b7acf07eb8..41136fbe909b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -192,3 +192,3 @@ config S390
select PCI_MSI  if PCI
-   select PCI_MSI_ARCH_FALLBACKS
+   select PCI_MSI_ARCH_FALLBACKS   if PCI
select SET_FS
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 17/18] dma-iommu: implement ->alloc_noncoherent

2020-09-26 Thread Christoph Hellwig
On Fri, Sep 25, 2020 at 06:46:22PM +, Tomasz Figa wrote:
> > +static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size,
> > +   dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp)
> > +{
> > +   if (!gfpflags_allow_blocking(gfp)) {
> > +   struct page *page;
> > +
> > +   page = dma_common_alloc_pages(dev, size, handle, dir, gfp);
> > +   if (!page)
> > +   return NULL;
> > +   return page_address(page);
> > +   }
> > +
> > +   return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO,
> > +PAGE_KERNEL, 0);
> 
> iommu_dma_alloc_remap() makes use of the DMA_ATTR_ALLOC_SINGLE_PAGES attribute
> to optimize the allocations for devices which don't care about how contiguous
> the backing memory is. Do you think we could add an attrs argument to this
> function and pass it there?
> 
> As ARM is being moved to the common iommu-dma layer as well, we'll probably
> make use of the argument to support the DMA_ATTR_NO_KERNEL_MAPPING attribute 
> to
> conserve the vmalloc area.

We could probably at it.  However I wonder why this is something the
drivers should care about.  Isn't this really something that should
be a kernel-wide policy for a given system?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/5] iommu/tegra-smmu: Use iommu_fwspec in .probe_/.attach_device()

2020-09-26 Thread Dmitry Osipenko
26.09.2020 11:07, Nicolin Chen пишет:
...
> + /* NULL smmu pointer means that SMMU driver is not probed yet */
> + if (unlikely(!smmu))
> + return ERR_PTR(-EPROBE_DEFER);

Hello, Nicolin!

Please don't pollute code with likely/unlikely. This is not a
performance-critical code.

...
> -static struct platform_driver tegra_mc_driver = {
> +struct platform_driver tegra_mc_driver = {
>   .driver = {
>   .name = "tegra-mc",
>   .of_match_table = tegra_mc_of_match,
> diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
> index 1238e35653d1..49a4cf64c4b9 100644
> --- a/include/soc/tegra/mc.h
> +++ b/include/soc/tegra/mc.h
> @@ -184,4 +184,6 @@ struct tegra_mc {
>  int tegra_mc_write_emem_configuration(struct tegra_mc *mc, unsigned long 
> rate);
>  unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc);
>  
> +extern struct platform_driver tegra_mc_driver;

No global variables, please. See for the example:

https://elixir.bootlin.com/linux/v5.9-rc6/source/drivers/devfreq/tegra20-devfreq.c#L100

The tegra_get_memory_controller() is now needed by multiple Tegra
drivers, I think it should be good to have it added into the MC driver
and then make it globally available for all drivers by making use of
of_find_matching_node_and_match().

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index e1db209fd2ea..ed1bd6d00aaf 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -43,6 +43,29 @@ static const struct of_device_id tegra_mc_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra_mc_of_match);

+struct tegra_mc *tegra_get_memory_controller(void)
+{
+   struct platform_device *pdev;
+   struct device_node *np;
+   struct tegra_mc *mc;
+
+   np = of_find_matching_node_and_match(NULL, tegra_mc_of_match, NULL);
+   if (!np)
+   return ERR_PTR(-ENOENT);
+
+   pdev = of_find_device_by_node(np);
+   of_node_put(np);
+   if (!pdev)
+   return ERR_PTR(-ENODEV);
+
+   mc = platform_get_drvdata(pdev);
+   if (!mc)
+   return ERR_PTR(-EPROBE_DEFER);
+
+   return mc;
+}
+EXPORT_SYMBOL_GPL(tegra_get_memory_controller);
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 5/5] iommu/tegra-smmu: Add pagetable mappings to debugfs

2020-09-26 Thread Dmitry Osipenko
26.09.2020 11:07, Nicolin Chen пишет:
...
> +static int tegra_smmu_mappings_show(struct seq_file *s, void *data)
> +{
> + struct tegra_smmu_group_debug *group_debug = s->private;
> + const struct tegra_smmu_swgroup *group;
> + struct tegra_smmu_as *as;
> + struct tegra_smmu *smmu;
> + int pd_index, pt_index;
> + u64 pte_count = 0;
> + u32 pde_count = 0;
> + u32 val, ptb_reg;
> + u32 *pd;
> +
> + if (!group_debug || !group_debug->priv || !group_debug->group)
> + return 0;
> +
> + group = group_debug->group;
> + as = group_debug->priv;
> + smmu = as->smmu;
> +
> + val = smmu_readl(smmu, group->reg) & SMMU_ASID_ENABLE;
> + if (!val)
> + return 0;
> +
> + pd = page_address(as->pd);
> + if (!pd)
> + return 0;
> +
> + seq_printf(s, "\nSWGROUP: %s\nASID: %d\nreg: 0x%x\n", group->name,
> + as->id, group->reg);
> +
> + mutex_lock(&smmu->lock);
> + smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID);
> + ptb_reg = smmu_readl(smmu, SMMU_PTB_DATA);

I think the whole traverse needs a locking protection, doesn't it?

> + mutex_unlock(&smmu->lock);
> +
> + seq_printf(s, "PTB_ASID: 0x%x\nas->pd_dma: 0x%llx\n", ptb_reg, 
> as->pd_dma);
> + seq_puts(s, "{\n");
> +
> + for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
> + struct page *pt;
> + u32 *addr;
> +
> + if (!as->count[pd_index] || !pd[pd_index])
> + continue;
> +
> + pde_count++;
> + pte_count += as->count[pd_index];
> + seq_printf(s, "\t[%d] 0x%x (%d)\n",
> +pd_index, pd[pd_index], as->count[pd_index]);
> + pt = as->pts[pd_index];
> + addr = page_address(pt);

This needs as->lock protection.

> + seq_puts(s, "\t{\n");
> + seq_printf(s, "\t\t%-5s %-4s %12s %12s\n", "PDE", "ATTR", 
> "PHYS", "IOVA");
> + for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index++) {
> + u64 iova;
> +
> + if (!addr[pt_index])
> + continue;
> +
> + iova = ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << 
> SMMU_PDE_SHIFT;
> + iova |= ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << 
> SMMU_PTE_SHIFT;
> +
> + seq_printf(s, "\t\t#%-4d 0x%-4x 0x%-12llx 0x%-12llx\n",
> +pt_index, addr[pt_index] >> 
> SMMU_PTE_ATTR_SHIFT,
> +SMMU_PFN_PHYS(addr[pt_index] & 
> ~SMMU_PTE_ATTR), iova);
> + }
> + seq_puts(s, "\t}\n");
> + }
> + seq_puts(s, "}\n");
> + seq_printf(s, "Total PDE count: %d\n", pde_count);
> + seq_printf(s, "Total PTE count: %lld\n", pte_count);
> +
> + return 0;
> +}
> +
> +DEFINE_SHOW_ATTRIBUTE(tegra_smmu_mappings);
> +
>  static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
>  {
> + const struct tegra_smmu_soc *soc = smmu->soc;
> + struct tegra_smmu_group_debug *group_debug;
> + struct device *dev = smmu->dev;
> + struct dentry *d;
> + int i;
> +
> + group_debug = devm_kzalloc(dev, sizeof(*group_debug) * 
> soc->num_swgroups, GFP_KERNEL);

devm_kcalloc()
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 17/18] dma-iommu: implement ->alloc_noncoherent

2020-09-26 Thread Tomasz Figa
On Sat, Sep 26, 2020 at 4:14 PM Christoph Hellwig  wrote:
>
> On Fri, Sep 25, 2020 at 06:46:22PM +, Tomasz Figa wrote:
> > > +static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size,
> > > +   dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp)
> > > +{
> > > +   if (!gfpflags_allow_blocking(gfp)) {
> > > +   struct page *page;
> > > +
> > > +   page = dma_common_alloc_pages(dev, size, handle, dir, gfp);
> > > +   if (!page)
> > > +   return NULL;
> > > +   return page_address(page);
> > > +   }
> > > +
> > > +   return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO,
> > > +PAGE_KERNEL, 0);
> >
> > iommu_dma_alloc_remap() makes use of the DMA_ATTR_ALLOC_SINGLE_PAGES 
> > attribute
> > to optimize the allocations for devices which don't care about how 
> > contiguous
> > the backing memory is. Do you think we could add an attrs argument to this
> > function and pass it there?
> >
> > As ARM is being moved to the common iommu-dma layer as well, we'll probably
> > make use of the argument to support the DMA_ATTR_NO_KERNEL_MAPPING 
> > attribute to
> > conserve the vmalloc area.
>
> We could probably at it.  However I wonder why this is something the
> drivers should care about.  Isn't this really something that should
> be a kernel-wide policy for a given system?

There are IOMMUs out there which support huge pages and those can
benefit *some* hardware depending on what kind of accesses they
perform, possibly on a per-buffer basis. At the same time, order > 0
allocations can be expensive, significantly affecting allocation
latency, so for devices which don't care about huge pages anyone would
prefer simple single-page allocations. Currently the drivers know the
best on whether the hardware they drive would care. There are some
decision factors listed in the documentation [1].

I can imagine cases where drivers could not be the best to decide
about this - for example, the workload could vary depending on the
userspace or a product decision regarding the performance vs
allocation latency, but we haven't seen such cases in practice yet.

[1] 
https://www.kernel.org/doc/html/latest/core-api/dma-attributes.html?highlight=dma_attr_alloc_single_pages#dma-attr-alloc-single-pages

Best regards,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/5] iommu/tegra-smmu: Use iommu_fwspec in .probe_/.attach_device()

2020-09-26 Thread Nicolin Chen
Hi Dmitry,

Thank you for the review and comments!

On Sat, Sep 26, 2020 at 05:48:17PM +0300, Dmitry Osipenko wrote:
> 26.09.2020 11:07, Nicolin Chen пишет:
> ...
> > +   /* NULL smmu pointer means that SMMU driver is not probed yet */
> > +   if (unlikely(!smmu))
> > +   return ERR_PTR(-EPROBE_DEFER);
> 
> Hello, Nicolin!
> 
> Please don't pollute code with likely/unlikely. This is not a
> performance-critical code.

Will drop it. Thanks.

> ...
> > -static struct platform_driver tegra_mc_driver = {
> > +struct platform_driver tegra_mc_driver = {
> > .driver = {
> > .name = "tegra-mc",
> > .of_match_table = tegra_mc_of_match,
> > diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
> > index 1238e35653d1..49a4cf64c4b9 100644
> > --- a/include/soc/tegra/mc.h
> > +++ b/include/soc/tegra/mc.h
> > @@ -184,4 +184,6 @@ struct tegra_mc {
> >  int tegra_mc_write_emem_configuration(struct tegra_mc *mc, unsigned long 
> > rate);
> >  unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc);
> >  
> > +extern struct platform_driver tegra_mc_driver;
> 
> No global variables, please. See for the example:
> 
> https://elixir.bootlin.com/linux/v5.9-rc6/source/drivers/devfreq/tegra20-devfreq.c#L100

Will fix it. Thanks for the example.

> 
> The tegra_get_memory_controller() is now needed by multiple Tegra
> drivers, I think it should be good to have it added into the MC driver
> and then make it globally available for all drivers by making use of
> of_find_matching_node_and_match().
> 
> diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
> index e1db209fd2ea..ed1bd6d00aaf 100644
> --- a/drivers/memory/tegra/mc.c
> +++ b/drivers/memory/tegra/mc.c
> @@ -43,6 +43,29 @@ static const struct of_device_id tegra_mc_of_match[] = {
>  };
>  MODULE_DEVICE_TABLE(of, tegra_mc_of_match);
> 
> +struct tegra_mc *tegra_get_memory_controller(void)
> +{
> + struct platform_device *pdev;
> + struct device_node *np;
> + struct tegra_mc *mc;
> +
> + np = of_find_matching_node_and_match(NULL, tegra_mc_of_match, NULL);
> + if (!np)
> + return ERR_PTR(-ENOENT);
> +
> + pdev = of_find_device_by_node(np);
> + of_node_put(np);
> + if (!pdev)
> + return ERR_PTR(-ENODEV);
> +
> + mc = platform_get_drvdata(pdev);
> + if (!mc)
> + return ERR_PTR(-EPROBE_DEFER);
> +
> + return mc;
> +}
> +EXPORT_SYMBOL_GPL(tegra_get_memory_controller);

Will try this one and integrate into my next version.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 5/5] iommu/tegra-smmu: Add pagetable mappings to debugfs

2020-09-26 Thread Nicolin Chen
Hi Dmitry,

Thank you for the review.

On Sat, Sep 26, 2020 at 05:48:54PM +0300, Dmitry Osipenko wrote:
> 26.09.2020 11:07, Nicolin Chen пишет:
> ...
> > +static int tegra_smmu_mappings_show(struct seq_file *s, void *data)
> > +{
> > +   struct tegra_smmu_group_debug *group_debug = s->private;
> > +   const struct tegra_smmu_swgroup *group;
> > +   struct tegra_smmu_as *as;
> > +   struct tegra_smmu *smmu;
> > +   int pd_index, pt_index;
> > +   u64 pte_count = 0;
> > +   u32 pde_count = 0;
> > +   u32 val, ptb_reg;
> > +   u32 *pd;
> > +
> > +   if (!group_debug || !group_debug->priv || !group_debug->group)
> > +   return 0;
> > +
> > +   group = group_debug->group;
> > +   as = group_debug->priv;
> > +   smmu = as->smmu;
> > +
> > +   val = smmu_readl(smmu, group->reg) & SMMU_ASID_ENABLE;
> > +   if (!val)
> > +   return 0;
> > +
> > +   pd = page_address(as->pd);
> > +   if (!pd)
> > +   return 0;
> > +
> > +   seq_printf(s, "\nSWGROUP: %s\nASID: %d\nreg: 0x%x\n", group->name,
> > +   as->id, group->reg);
> > +
> > +   mutex_lock(&smmu->lock);
> > +   smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID);
> > +   ptb_reg = smmu_readl(smmu, SMMU_PTB_DATA);
> 
> I think the whole traverse needs a locking protection, doesn't it?

Hmm..probably would be nicer. Will move the mutex to the top.

> > +   mutex_unlock(&smmu->lock);
> > +
> > +   seq_printf(s, "PTB_ASID: 0x%x\nas->pd_dma: 0x%llx\n", ptb_reg, 
> > as->pd_dma);
> > +   seq_puts(s, "{\n");
> > +
> > +   for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
> > +   struct page *pt;
> > +   u32 *addr;
> > +
> > +   if (!as->count[pd_index] || !pd[pd_index])
> > +   continue;
> > +
> > +   pde_count++;
> > +   pte_count += as->count[pd_index];
> > +   seq_printf(s, "\t[%d] 0x%x (%d)\n",
> > +  pd_index, pd[pd_index], as->count[pd_index]);
> > +   pt = as->pts[pd_index];
> > +   addr = page_address(pt);
> 
> This needs as->lock protection.

Will add that.

> > +   seq_puts(s, "\t{\n");
> > +   seq_printf(s, "\t\t%-5s %-4s %12s %12s\n", "PDE", "ATTR", 
> > "PHYS", "IOVA");
> > +   for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index++) {
> > +   u64 iova;
> > +
> > +   if (!addr[pt_index])
> > +   continue;
> > +
> > +   iova = ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << 
> > SMMU_PDE_SHIFT;
> > +   iova |= ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << 
> > SMMU_PTE_SHIFT;
> > +
> > +   seq_printf(s, "\t\t#%-4d 0x%-4x 0x%-12llx 0x%-12llx\n",
> > +  pt_index, addr[pt_index] >> 
> > SMMU_PTE_ATTR_SHIFT,
> > +  SMMU_PFN_PHYS(addr[pt_index] & 
> > ~SMMU_PTE_ATTR), iova);
> > +   }
> > +   seq_puts(s, "\t}\n");
> > +   }
> > +   seq_puts(s, "}\n");
> > +   seq_printf(s, "Total PDE count: %d\n", pde_count);
> > +   seq_printf(s, "Total PTE count: %lld\n", pte_count);
> > +
> > +   return 0;
> > +}
> > +
> > +DEFINE_SHOW_ATTRIBUTE(tegra_smmu_mappings);
> > +
> >  static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
> >  {
> > +   const struct tegra_smmu_soc *soc = smmu->soc;
> > +   struct tegra_smmu_group_debug *group_debug;
> > +   struct device *dev = smmu->dev;
> > +   struct dentry *d;
> > +   int i;
> > +
> > +   group_debug = devm_kzalloc(dev, sizeof(*group_debug) * 
> > soc->num_swgroups, GFP_KERNEL);
> 
> devm_kcalloc()

Will change it.

Thanks
Nic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 5/5] iommu/tegra-smmu: Add pagetable mappings to debugfs

2020-09-26 Thread Dmitry Osipenko
26.09.2020 11:07, Nicolin Chen пишет:
...
> + for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
> + struct page *pt;
> + u32 *addr;
> +
> + if (!as->count[pd_index] || !pd[pd_index])
> + continue;

I guess the idea of this patch is to print out the hardware state, isn't
it? Hence the as->count shouldn't be checked here.

> + pde_count++;
> + pte_count += as->count[pd_index];
> + seq_printf(s, "\t[%d] 0x%x (%d)\n",
> +pd_index, pd[pd_index], as->count[pd_index]);
> + pt = as->pts[pd_index];
> + addr = page_address(pt);
> +
> + seq_puts(s, "\t{\n");
> + seq_printf(s, "\t\t%-5s %-4s %12s %12s\n", "PDE", "ATTR", 
> "PHYS", "IOVA");
> + for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index++) {
> + u64 iova;
> +
> + if (!addr[pt_index])
> + continue;
> +
> + iova = ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << 
> SMMU_PDE_SHIFT;
> + iova |= ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << 
> SMMU_PTE_SHIFT;
> +
> + seq_printf(s, "\t\t#%-4d 0x%-4x 0x%-12llx 0x%-12llx\n",
> +pt_index, addr[pt_index] >> 
> SMMU_PTE_ATTR_SHIFT,
> +SMMU_PFN_PHYS(addr[pt_index] & 
> ~SMMU_PTE_ATTR), iova);
>

Would be nice if you could improve the output formatting by printing out
contiguous ranges that have the same ATTRs, otherwise it could be a bit
too large and unpractical output in a case if lots of pages are mapped.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 5/5] iommu/tegra-smmu: Add pagetable mappings to debugfs

2020-09-26 Thread Dmitry Osipenko
27.09.2020 00:24, Dmitry Osipenko пишет:
> 26.09.2020 11:07, Nicolin Chen пишет:
> ...
>> +for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
>> +struct page *pt;
>> +u32 *addr;
>> +
>> +if (!as->count[pd_index] || !pd[pd_index])
>> +continue;
> 
> I guess the idea of this patch is to print out the hardware state, isn't
> it? Hence the as->count shouldn't be checked here.

Perhaps also will be good to check whether the state of
!as->count[pd_index] matches state of !pd[pd_index].

WARN_ON_ONCE(!as->count[pd_index] ^ !pd[pd_index])
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 5/5] iommu/tegra-smmu: Add pagetable mappings to debugfs

2020-09-26 Thread Dmitry Osipenko
26.09.2020 23:47, Nicolin Chen пишет:
...
>>> +   for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
>>> +   struct page *pt;
>>> +   u32 *addr;
>>> +
>>> +   if (!as->count[pd_index] || !pd[pd_index])
>>> +   continue;
>>> +
>>> +   pde_count++;
>>> +   pte_count += as->count[pd_index];
>>> +   seq_printf(s, "\t[%d] 0x%x (%d)\n",
>>> +  pd_index, pd[pd_index], as->count[pd_index]);
>>> +   pt = as->pts[pd_index];
>>> +   addr = page_address(pt);
>>
>> This needs as->lock protection.
> 
> Will add that.
> 
>>> +   seq_puts(s, "\t{\n");
>>> +   seq_printf(s, "\t\t%-5s %-4s %12s %12s\n", "PDE", "ATTR", 
>>> "PHYS", "IOVA");
>>> +   for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index++) {
>>> +   u64 iova;
>>> +
>>> +   if (!addr[pt_index])
>>> +   continue;
>>> +
>>> +   iova = ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << 
>>> SMMU_PDE_SHIFT;
>>> +   iova |= ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << 
>>> SMMU_PTE_SHIFT;
>>> +
>>> +   seq_printf(s, "\t\t#%-4d 0x%-4x 0x%-12llx 0x%-12llx\n",
>>> +  pt_index, addr[pt_index] >> 
>>> SMMU_PTE_ATTR_SHIFT,
>>> +  SMMU_PFN_PHYS(addr[pt_index] & 
>>> ~SMMU_PTE_ATTR), iova);

Please also note that the addr[pt_index] needs to be protected as well.
Perhaps you could temporally bump the as->count.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 4/5] iommu/tegra-smmu: Add PCI support

2020-09-26 Thread Dmitry Osipenko
26.09.2020 11:07, Nicolin Chen пишет:
...
> +#ifdef CONFIG_PCI
> + if (!iommu_present(&pci_bus_type)) {

Is this iommu_present() check really needed?

> + pci_request_acs();

Shouldn't pci_request_acs() be invoked *after* bus_set_iommu() succeeds?

> + err = bus_set_iommu(&pci_bus_type, &tegra_smmu_ops);
> + if (err < 0) {
> + bus_set_iommu(&platform_bus_type, NULL);
> + iommu_device_unregister(&smmu->iommu);
> + iommu_device_sysfs_remove(&smmu->iommu);
> + return ERR_PTR(err);
> + }
> + }
> +#endif
> +
>   if (IS_ENABLED(CONFIG_DEBUG_FS))
>   tegra_smmu_debugfs_init(smmu);
>  
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 1/1] iommu/vt-d: Fix lockdep splat in iommu_flush_dev_iotlb()

2020-09-26 Thread Lu Baolu
Lock(&iommu->lock) without disabling irq causes lockdep warnings.

[   12.703950] 
[   12.703962] WARNING: possible irq lock inversion dependency detected
[   12.703975] 5.9.0-rc6+ #659 Not tainted
[   12.703983] 
[   12.703995] systemd-udevd/284 just changed the state of lock:
[   12.704007] bd6ff4d8 (device_domain_lock){..-.}-{2:2}, at:
   iommu_flush_dev_iotlb.part.57+0x2e/0x90
[   12.704031] but this lock took another, SOFTIRQ-unsafe lock in the past:
[   12.704043]  (&iommu->lock){+.+.}-{2:2}
[   12.704045]

   and interrupts could create inverse lock ordering between
   them.

[   12.704073]
   other info that might help us debug this:
[   12.704085]  Possible interrupt unsafe locking scenario:

[   12.704097]CPU0CPU1
[   12.704106]
[   12.704115]   lock(&iommu->lock);
[   12.704123]local_irq_disable();
[   12.704134]lock(device_domain_lock);
[   12.704146]lock(&iommu->lock);
[   12.704158]   
[   12.704164] lock(device_domain_lock);
[   12.704174]
*** DEADLOCK ***

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1b7d390beb68..722545f61ba2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2699,7 +2699,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
/* Setup the PASID entry for requests without PASID: */
-   spin_lock(&iommu->lock);
+   spin_lock_irqsave(&iommu->lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
@@ -2709,7 +2709,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
-   spin_unlock(&iommu->lock);
+   spin_unlock_irqrestore(&iommu->lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 7/7] iommu/vt-d: Cleanup after converting to dma-iommu ops

2020-09-26 Thread Lu Baolu
Some cleanups after converting the driver to use dma-iommu ops.
- Remove nobounce option;
- Cleanup and simplify the path in domain mapping.

Signed-off-by: Lu Baolu 
---
 .../admin-guide/kernel-parameters.txt |  5 --
 drivers/iommu/intel/iommu.c   | 90 ++-
 2 files changed, 28 insertions(+), 67 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..0d11ef43d314 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1866,11 +1866,6 @@
Note that using this option lowers the security
provided by tboot because it makes the system
vulnerable to DMA attacks.
-   nobounce [Default off]
-   Disable bounce buffer for untrusted devices such as
-   the Thunderbolt devices. This will treat the untrusted
-   devices as the trusted ones, hence might expose security
-   risks of DMA attacks.
 
intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
0   disables intel_idle and fall back on acpi_idle.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 69ccf92ab37b..5135d9ba0993 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -355,7 +355,6 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
-static int intel_no_bounce;
 static int iommu_skip_te_disable;
 
 #define IDENTMAP_GFX   2
@@ -457,9 +456,6 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "tboot_noforce", 13)) {
pr_info("Intel-IOMMU: not forcing on after tboot. This 
could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
-   } else if (!strncmp(str, "nobounce", 8)) {
-   pr_info("Intel-IOMMU: No bounce buffer. This could 
expose security risks of DMA attacks\n");
-   intel_no_bounce = 1;
}
 
str += strcspn(str, ",");
@@ -2277,15 +2273,14 @@ static inline int hardware_largepage_caps(struct 
dmar_domain *domain,
return level;
 }
 
-static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-   struct scatterlist *sg, unsigned long phys_pfn,
-   unsigned long nr_pages, int prot)
+static int
+__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+unsigned long phys_pfn, unsigned long nr_pages, int prot)
 {
struct dma_pte *first_pte = NULL, *pte = NULL;
-   phys_addr_t pteval;
-   unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+   phys_addr_t pteval;
u64 attr;
 
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
@@ -2297,26 +2292,14 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
if (domain_use_first_level(domain))
attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
 
-   if (!sg) {
-   sg_res = nr_pages;
-   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
-   }
+   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
 
while (nr_pages > 0) {
uint64_t tmp;
 
-   if (!sg_res) {
-   unsigned int pgoff = sg->offset & ~PAGE_MASK;
-
-   sg_res = aligned_nrpages(sg->offset, sg->length);
-   sg->dma_address = ((dma_addr_t)iov_pfn << 
VTD_PAGE_SHIFT) + pgoff;
-   sg->dma_length = sg->length;
-   pteval = (sg_phys(sg) - pgoff) | attr;
-   phys_pfn = pteval >> VTD_PAGE_SHIFT;
-   }
-
if (!pte) {
-   largepage_lvl = hardware_largepage_caps(domain, 
iov_pfn, phys_pfn, sg_res);
+   largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
+   phys_pfn, nr_pages);
 
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, 
&largepage_lvl);
if (!pte)
@@ -2328,7 +2311,7 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
pteval |= DMA_PTE_LARGE_PAGE;
lvl_pages = lvl_to_nr_pages(largepage_lvl);
 
-   nr_superpages = sg_res / lvl_pages;
+   nr_superpages = nr_pages / lvl_pages;
end_pfn = iov_pfn + nr_superpages * lvl_pages - 
1;
 
/*
@@ -2362,48 +2345,4

[PATCH v4 3/7] iommu: Allow the dma-iommu api to use bounce buffers

2020-09-26 Thread Lu Baolu
From: Tom Murphy 

Allow the dma-iommu api to use bounce buffers for untrusted devices.
This is a copy of the intel bounce buffer code.

Signed-off-by: Tom Murphy 
Co-developed-by: Lu Baolu 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 163 +++---
 1 file changed, 150 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index fb84cfa83703..3526db774611 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,9 +21,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 struct iommu_dma_msi_page {
struct list_headlist;
@@ -500,6 +502,31 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
+static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
+   size_t size, enum dma_data_direction dir,
+   unsigned long attrs)
+{
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = &cookie->iovad;
+   phys_addr_t phys;
+
+   phys = iommu_iova_to_phys(domain, dma_addr);
+   if (WARN_ON(!phys))
+   return;
+
+   __iommu_dma_unmap(dev, dma_addr, size);
+
+   if (unlikely(is_swiotlb_buffer(phys)))
+   swiotlb_tbl_unmap_single(dev, phys, size,
+   iova_align(iovad, size), dir, attrs);
+}
+
+static bool dev_is_untrusted(struct device *dev)
+{
+   return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -525,6 +552,55 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
+static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+   size_t org_size, dma_addr_t dma_mask, bool coherent,
+   enum dma_data_direction dir, unsigned long attrs)
+{
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = &cookie->iovad;
+   size_t aligned_size = org_size;
+   void *padding_start;
+   size_t padding_size;
+   dma_addr_t iova;
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | org_size)) {
+   aligned_size = iova_align(iovad, org_size);
+   phys = swiotlb_tbl_map_single(dev,
+   __phys_to_dma(dev, io_tlb_start),
+   phys, org_size, aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   return DMA_MAPPING_ERROR;
+
+   /* Cleanup the padding area. */
+   padding_start = phys_to_virt(phys);
+   padding_size = aligned_size;
+
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+   (dir == DMA_TO_DEVICE ||
+dir == DMA_BIDIRECTIONAL)) {
+   padding_start += org_size;
+   padding_size -= org_size;
+   }
+
+   memset(padding_start, 0, padding_size);
+   }
+
+   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+   swiotlb_tbl_unmap_single(dev, phys, org_size,
+   aligned_size, dir, attrs);
+
+   return iova;
+}
+
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -697,11 +773,15 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   arch_sync_dma_for_cpu(phys, size, dir);
+   if (!dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
+   if (is_swiotlb_buffer(phys))
+   swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
 static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -709,11 +789,15 @@ static void iommu_dma_sync_single_for_device(struct 
device *dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev

[PATCH v4 4/7] iommu: Add quirk for Intel graphic devices in map_sg

2020-09-26 Thread Lu Baolu
Combining the sg segments exposes a bug in the Intel i915 driver which
causes visual artifacts and the screen to freeze. This is most likely
because of how the i915 handles the returned list. It probably doesn't
respect the returned value specifying the number of elements in the list
and instead depends on the previous behaviour of the Intel iommu driver
which would return the same number of elements in the output list as in
the input list.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 3526db774611..e7e4d758f51a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -879,6 +879,33 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,
unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
int i, count = 0;
 
+   /*
+* The Intel graphic driver is used to assume that the returned
+* sg list is not combound. This blocks the efforts of converting
+* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
+* device driver work and should be removed once it's fixed in i915
+* driver.
+*/
+   if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) &&
+   to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
+   (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+   for_each_sg(sg, s, nents, i) {
+   unsigned int s_iova_off = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_iova_len = s->length;
+
+   s->offset += s_iova_off;
+   s->length = s_length;
+   sg_dma_address(s) = dma_addr + s_iova_off;
+   sg_dma_len(s) = s_length;
+   dma_addr += s_iova_len;
+
+   pr_info_once("sg combining disabled due to i915 
driver\n");
+   }
+
+   return nents;
+   }
+
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 6/7] iommu/vt-d: Convert intel iommu driver to the iommu ops

2020-09-26 Thread Lu Baolu
From: Tom Murphy 

Convert the intel iommu driver to the dma-iommu api. Remove the iova
handling and reserve region code from the intel iommu driver.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/Kconfig |   1 +
 drivers/iommu/intel/iommu.c | 742 ++--
 2 files changed, 43 insertions(+), 700 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 5337ee1584b0..28a3d1596c76 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -13,6 +13,7 @@ config INTEL_IOMMU
select DMAR_TABLE
select SWIOTLB
select IOASID
+   select IOMMU_DMA
help
  DMA remapping (DMAR) devices support enables independent address
  translations for Direct Memory Access (DMA) from devices.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7d3c73d1e498..69ccf92ab37b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,7 +42,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -382,9 +382,6 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
 DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
-#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&   \
-   to_pci_dev(d)->untrusted)
-
 /*
  * Iterate over elements in device_domain_list and call the specified
  * callback @fn against each element.
@@ -1289,13 +1286,6 @@ static void dma_free_pagelist(struct page *freelist)
}
 }
 
-static void iova_entry_free(unsigned long data)
-{
-   struct page *freelist = (struct page *)data;
-
-   dma_free_pagelist(freelist);
-}
-
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1660,19 +1650,17 @@ static inline void __mapping_notify_one(struct 
intel_iommu *iommu,
iommu_flush_write_buffer(iommu);
 }
 
-static void iommu_flush_iova(struct iova_domain *iovad)
+static void intel_flush_iotlb_all(struct iommu_domain *domain)
 {
-   struct dmar_domain *domain;
+   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
int idx;
 
-   domain = container_of(iovad, struct dmar_domain, iovad);
-
-   for_each_domain_iommu(idx, domain) {
+   for_each_domain_iommu(idx, dmar_domain) {
struct intel_iommu *iommu = g_iommus[idx];
-   u16 did = domain->iommu_did[iommu->seq_id];
+   u16 did = dmar_domain->iommu_did[iommu->seq_id];
 
-   if (domain_use_first_level(domain))
-   domain_flush_piotlb(iommu, domain, 0, -1, 0);
+   if (domain_use_first_level(dmar_domain))
+   domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
else
iommu->flush.flush_iotlb(iommu, did, 0, 0,
 DMA_TLB_DSI_FLUSH);
@@ -1954,48 +1942,6 @@ static int domain_detach_iommu(struct dmar_domain 
*domain,
return count;
 }
 
-static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_rbtree_key;
-
-static int dmar_init_reserved_ranges(void)
-{
-   struct pci_dev *pdev = NULL;
-   struct iova *iova;
-   int i;
-
-   init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
-
-   lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
-   &reserved_rbtree_key);
-
-   /* IOAPIC ranges shouldn't be accessed by DMA */
-   iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
-   IOVA_PFN(IOAPIC_RANGE_END));
-   if (!iova) {
-   pr_err("Reserve IOAPIC range failed\n");
-   return -ENODEV;
-   }
-
-   /* Reserve all PCI MMIO to avoid peer-to-peer access */
-   for_each_pci_dev(pdev) {
-   struct resource *r;
-
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   r = &pdev->resource[i];
-   if (!r->flags || !(r->flags & IORESOURCE_MEM))
-   continue;
-   iova = reserve_iova(&reserved_iova_list,
-   IOVA_PFN(r->start),
-   IOVA_PFN(r->end));
-   if (!iova) {
-   pci_err(pdev, "Reserve iova for %pR failed\n", 
r);
-   return -ENODEV;
-   }
-   }
-   }
-   return 0;
-}
-
 static inline int guestwidth_to_adjustwidth(int gaw)
 {
int agaw;
@@ -2018,7 +1964,7 @@ static void domain_exit(struct dmar_domain *domain)
 
/* destroy iovas */
if (domain->domain.type == IOMMU_DOMAIN_DMA)
-   put_iova_domain(&domain->iovad);
+

[PATCH v4 5/7] iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev

2020-09-26 Thread Lu Baolu
The iommu-dma constrains IOVA allocation based on the domain geometry
that the driver reports. Update domain geometry everytime a domain is
attached to or detached from a device.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index fdd514c8b2d4..7d3c73d1e498 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -67,8 +67,8 @@
 #define MAX_AGAW_WIDTH 64
 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
 
-#define __DOMAIN_MAX_PFN(gaw)  uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << gaw) - 1)
+#define __DOMAIN_MAX_PFN(gaw)  uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 
1)
+#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << (gaw)) - 1)
 
 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
to match. That way, we can use 'unsigned long' for PFNs with impunity. */
@@ -739,6 +739,18 @@ static void domain_update_iommu_cap(struct dmar_domain 
*domain)
 */
if (domain->nid == NUMA_NO_NODE)
domain->nid = domain_update_device_node(domain);
+
+   /*
+* First-level translation restricts the input-address to a
+* canonical address (i.e., address bits 63:N have the same
+* value as address bit [N-1], where N is 48-bits with 4-level
+* paging and 57-bits with 5-level paging). Hence, skip bit
+* [N-1].
+*/
+   if (domain_use_first_level(domain))
+   domain->domain.geometry.aperture_end = 
__DOMAIN_MAX_ADDR(domain->gaw - 1);
+   else
+   domain->domain.geometry.aperture_end = 
__DOMAIN_MAX_ADDR(domain->gaw);
 }
 
 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 0/7] Convert the intel iommu driver to the dma-iommu api

2020-09-26 Thread Lu Baolu
Hi,

The previous post of this series could be found here.

https://lore.kernel.org/linux-iommu/20200912032200.11489-1-baolu...@linux.intel.com/

This version introduce a new patch [4/7] to fix an issue reported here.

https://lore.kernel.org/linux-iommu/51a1baec-48d1-c0ac-181b-1fba92aa4...@linux.intel.com/

There aren't any other changes.

Please help to test and review.

Best regards,
baolu

Lu Baolu (3):
  iommu: Add quirk for Intel graphic devices in map_sg
  iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev
  iommu/vt-d: Cleanup after converting to dma-iommu ops

Tom Murphy (4):
  iommu: Handle freelists when using deferred flushing in iommu drivers
  iommu: Add iommu_dma_free_cpu_cached_iovas()
  iommu: Allow the dma-iommu api to use bounce buffers
  iommu/vt-d: Convert intel iommu driver to the iommu ops

 .../admin-guide/kernel-parameters.txt |   5 -
 drivers/iommu/dma-iommu.c | 228 -
 drivers/iommu/intel/Kconfig   |   1 +
 drivers/iommu/intel/iommu.c   | 901 +++---
 include/linux/dma-iommu.h |   8 +
 include/linux/iommu.h |   1 +
 6 files changed, 336 insertions(+), 808 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 2/7] iommu: Add iommu_dma_free_cpu_cached_iovas()

2020-09-26 Thread Lu Baolu
From: Tom Murphy 

Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which
use the dma-iommu ops to free cached cpu iovas.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c | 9 +
 include/linux/dma-iommu.h | 8 
 2 files changed, 17 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1b8ef3a2cbc3..fb84cfa83703 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -50,6 +50,15 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = &cookie->iovad;
+
+   free_cpu_cached_iovas(cpu, iovad);
+}
+
 static void iommu_dma_entry_dtor(unsigned long data)
 {
struct page *freelist = (struct page *)data;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 2112f21f73d8..706b68d1359b 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc,
 
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain);
+
 #else /* CONFIG_IOMMU_DMA */
 
 struct iommu_domain;
@@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device 
*dev, struct list_he
 {
 }
 
+static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+}
+
 #endif /* CONFIG_IOMMU_DMA */
 #endif /* __DMA_IOMMU_H */
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 1/7] iommu: Handle freelists when using deferred flushing in iommu drivers

2020-09-26 Thread Lu Baolu
From: Tom Murphy 

Allow the iommu_unmap_fast to return newly freed page table pages and
pass the freelist to queue_iova in the dma-iommu ops path.

This is useful for iommu drivers (in this case the intel iommu driver)
which need to wait for the ioTLB to be flushed before newly
free/unmapped page table pages can be freed. This way we can still batch
ioTLB free operations and handle the freelists.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/dma-iommu.c   | 29 +--
 drivers/iommu/intel/iommu.c | 55 -
 include/linux/iommu.h   |  1 +
 3 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index cd6e3c70ebb3..1b8ef3a2cbc3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -50,6 +50,18 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+static void iommu_dma_entry_dtor(unsigned long data)
+{
+   struct page *freelist = (struct page *)data;
+
+   while (freelist) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 {
if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@@ -344,7 +356,7 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
-   NULL))
+ iommu_dma_entry_dtor))
pr_warn("iova flush queue initialization failed\n");
else
cookie->fq_domain = domain;
@@ -441,7 +453,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
-   dma_addr_t iova, size_t size)
+   dma_addr_t iova, size_t size, struct page *freelist)
 {
struct iova_domain *iovad = &cookie->iovad;
 
@@ -450,7 +462,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie 
*cookie,
cookie->msi_iova -= size;
else if (cookie->fq_domain) /* non-strict mode */
queue_iova(iovad, iova_pfn(iovad, iova),
-   size >> iova_shift(iovad), 0);
+   size >> iova_shift(iovad),
+   (unsigned long)freelist);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
@@ -475,7 +488,7 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
 
if (!cookie->fq_domain)
iommu_iotlb_sync(domain, &iotlb_gather);
-   iommu_dma_free_iova(cookie, dma_addr, size);
+   iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -497,7 +510,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return DMA_MAPPING_ERROR;
 
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -649,7 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
 out_free_sg:
sg_free_table(&sgt);
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -900,7 +913,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, iova_len);
+   iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@@ -1194,7 +1207,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
return msi_page;
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_page:
kfree(msi_page);
return NULL;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 722545f61ba2..fdd514c8b2d4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1243,17 +1243,17 @@ static struct page *dma_pte_clear_level(struct 
dmar_domain *domain, int level,
pages can only be freed after the IOTLB flush has been done. */